From d4a1a6536c3d99d4128206c52e672b8d564a1536 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 28 Feb 2022 21:32:12 -0500
Subject: [PATCH 001/154] move np models to np directory

---
 hyperion/bin/apply-mvn-select-frames.py       |   4 +-
 hyperion/bin/compute-energy-vad.py            |   2 +-
 hyperion/bin/compute-mfcc-feats.py            |   2 +-
 hyperion/bin/eval-cos-1vs1.py                 |   2 +-
 hyperion/bin/eval-linear-gbe-up.py            |   4 +-
 hyperion/bin/eval-linear-gbe.py               |   4 +-
 hyperion/bin/eval-linear-svmc.py              |   4 +-
 hyperion/bin/eval-logistic-regression.py      |   4 +-
 hyperion/bin/eval-plda-1vs1.py                |   2 +-
 hyperion/bin/eval-plda-nvs1.py                |   2 +-
 hyperion/bin/plot-vector-hist.py              |   2 +-
 hyperion/bin/plot-vector-tsne.py              |   2 +-
 hyperion/bin/torch-compute-mfcc-feats.py      |   1 -
 hyperion/bin/torch-eval-vae.py                |   2 +-
 ...osine-scoring-from-adv-test-wav-wavegan.py |   2 +-
 ...l-xvec-cosine-scoring-from-adv-test-wav.py |   2 +-
 ...l-xvec-cosine-scoring-from-art-test-wav.py |   2 +-
 ...-eval-xvec-cosine-scoring-from-test-wav.py |   2 +-
 ...sine-scoring-from-transfer-adv-test-wav.py |   2 +-
 ...sine-scoring-from-transfer-art-test-wav.py |   2 +-
 .../bin/torch-eval-xvec-logits-from-wav.py    |   2 +-
 ...rch-extract-xvectors-from-wav-with-rttm.py |   2 +-
 .../bin/torch-extract-xvectors-from-wav.py    |   2 +-
 ...torch-extract-xvectors-slidwin-from-wav.py |   2 +-
 .../bin/torch-extract-xvectors-slidwin.py     |   2 +-
 .../bin/torch-extract-xvectors-vae-preproc.py |   2 +-
 hyperion/bin/torch-extract-xvectors.py        |   2 +-
 ...orch-generate-adv-attacks-xvector-verif.py |   2 +-
 hyperion/bin/train-cw-up.py                   |   4 +-
 hyperion/bin/train-cw.py                      |   4 +-
 hyperion/bin/train-gaussianizer.py            |   4 +-
 hyperion/bin/train-lda.py                     |   2 +-
 hyperion/bin/train-linear-gbe-up.py           |   4 +-
 hyperion/bin/train-linear-gbe.py              |   4 +-
 hyperion/bin/train-linear-svmc.py             |   4 +-
 hyperion/bin/train-logistic-regression.py     |   4 +-
 hyperion/bin/train-mvn.py                     |   4 +-
 hyperion/bin/train-nda.py                     |   2 +-
 hyperion/bin/train-pca.py                     |   2 +-
 hyperion/bin/train-plda.py                    |   2 +-
 hyperion/helpers/__init__.py                  |   4 -
 hyperion/helpers/classif_trial_data_reader.py |   2 +-
 .../helpers/multi_test_trial_data_reader.py   |   2 +-
 .../multi_test_trial_data_reader_v2.py        |   2 +-
 hyperion/helpers/plda_factory.py              |   2 +-
 hyperion/helpers/tracking_data_reader.py      |   2 +-
 hyperion/helpers/trial_data_reader.py         |   2 +-
 hyperion/helpers/vector_class_reader.py       |   2 +-
 hyperion/helpers/vector_reader.py             |   2 +-
 hyperion/np/__init__.py                       |   7 +
 hyperion/{ => np}/augment/__init__.py         |   0
 hyperion/{ => np}/augment/noise_augment.py    |   4 +-
 hyperion/{ => np}/augment/reverb_augment.py   |   2 +-
 hyperion/{ => np}/augment/speech_augment.py   |   2 +-
 hyperion/{ => np}/augment/speed_augment.py    |   2 +-
 hyperion/{ => np}/calibration/__init__.py     |   0
 .../{ => np}/calibration/gauss_calibration.py |   4 +-
 .../calibration/unsup_gauss_calibration.py    |   0
 hyperion/{ => np}/classifiers/__init__.py     |   0
 .../classifiers/binary_logistic_regression.py |   0
 .../{ => np}/classifiers/greedy_fusion.py     |   6 +-
 hyperion/{ => np}/classifiers/linear_gbe.py   |   8 +-
 hyperion/{ => np}/classifiers/linear_gbe1.py  |   8 +-
 .../{ => np}/classifiers/linear_gbe_up.py     |   6 +-
 hyperion/{ => np}/classifiers/linear_svmc.py  |   8 +-
 .../classifiers/logistic_regression.py        |   8 +-
 .../classifiers/q_scoring_homo_gbe.py         |   8 +-
 hyperion/{ => np}/clustering/__init__.py      |   0
 hyperion/{ => np}/clustering/ahc.py           |   6 +-
 hyperion/{ => np}/clustering/kmeans.py        |   6 +-
 hyperion/{ => np}/diarization/__init__.py     |   0
 .../{ => np}/diarization/diar_ahc_plda.py     |   0
 hyperion/{ => np}/feats/__init__.py           |   0
 hyperion/{ => np}/feats/energy_vad.py         |   4 +-
 .../{ => np}/feats/feature_normalization.py   |   2 +-
 hyperion/{ => np}/feats/feature_windows.py    |   2 +-
 hyperion/{ => np}/feats/filter_banks.py       |   3 +-
 hyperion/{ => np}/feats/frame_selector.py     |   0
 hyperion/{ => np}/feats/mfcc.py               |   4 +-
 hyperion/{ => np}/feats/stft.py               |   2 +-
 hyperion/{ => np}/metrics/__init__.py         |   0
 hyperion/{ => np}/metrics/acc.py              |   0
 hyperion/{ => np}/metrics/cllr.py             |   0
 hyperion/{ => np}/metrics/confidence.py       |   0
 hyperion/{ => np}/metrics/confusion_matrix.py |   0
 hyperion/{ => np}/metrics/dcf.py              |   0
 hyperion/{ => np}/metrics/dcf_plot.py         |   0
 hyperion/{ => np}/metrics/det_plot.py         |   0
 hyperion/{ => np}/metrics/eer.py              |   0
 hyperion/{ => np}/metrics/roc.py              |   0
 hyperion/{ => np}/metrics/utils.py            |   2 +-
 .../metrics/verification_evaluator.py         |   6 +-
 hyperion/{hyp_model.py => np/np_model.py}     |   4 +-
 .../np_model_loader.py}                       |   9 +-
 hyperion/{ => np}/pdfs/__init__.py            |   0
 hyperion/{ => np}/pdfs/core/__init__.py       |   0
 hyperion/{ => np}/pdfs/core/exp_family.py     |   0
 hyperion/{ => np}/pdfs/core/normal.py         |   6 +-
 .../{ => np}/pdfs/core/normal_diag_cov.py     |   4 +-
 hyperion/{ => np}/pdfs/core/pdf.py            |   4 +-
 hyperion/{ => np}/pdfs/hmm/__init__.py        |   0
 hyperion/{ => np}/pdfs/hmm/hmm.py             |   4 +-
 hyperion/{ => np}/pdfs/jfa/__init__.py        |   0
 hyperion/{ => np}/pdfs/jfa/jfa_total.py       |   4 +-
 hyperion/{ => np}/pdfs/mixtures/__init__.py   |   0
 .../pdfs/mixtures/exp_family_mixture.py       |   6 +-
 hyperion/{ => np}/pdfs/mixtures/gmm.py        |   6 +-
 .../{ => np}/pdfs/mixtures/gmm_diag_cov.py    |   6 +-
 .../pdfs/mixtures/gmm_tied_diag_cov.py        |   6 +-
 hyperion/{ => np}/pdfs/plda/__init__.py       |   0
 hyperion/{ => np}/pdfs/plda/frplda.py         |   4 +-
 hyperion/{ => np}/pdfs/plda/plda.py           |   4 +-
 hyperion/{ => np}/pdfs/plda/plda_base.py      |   2 +-
 hyperion/{ => np}/pdfs/plda/splda.py          |   4 +-
 hyperion/{ => np}/score_norm/__init__.py      |   0
 hyperion/{ => np}/score_norm/adapt_s_norm.py  |   0
 hyperion/{ => np}/score_norm/s_norm.py        |   0
 hyperion/{ => np}/score_norm/score_norm.py    |   4 +-
 hyperion/{ => np}/score_norm/t_norm.py        |   0
 hyperion/{ => np}/score_norm/tz_norm.py       |   0
 hyperion/{ => np}/score_norm/z_norm.py        |   0
 hyperion/{ => np}/score_norm/zt_norm.py       |   0
 hyperion/{ => np}/transforms/__init__.py      |   0
 hyperion/{ => np}/transforms/cent_whiten.py   |   4 +-
 .../{ => np}/transforms/cent_whiten_up.py     |   2 +-
 hyperion/{ => np}/transforms/coral.py         |   4 +-
 hyperion/{ => np}/transforms/gaussianizer.py  |   6 +-
 hyperion/{ => np}/transforms/lda.py           |   4 +-
 hyperion/{ => np}/transforms/lnorm.py         |   0
 hyperion/{ => np}/transforms/lnorm_up.py      |   1 -
 hyperion/{ => np}/transforms/mvn.py           |   4 +-
 hyperion/{ => np}/transforms/nap.py           |   4 +-
 hyperion/{ => np}/transforms/nda.py           |   6 +-
 hyperion/{ => np}/transforms/pca.py           |   4 +-
 hyperion/{ => np}/transforms/sb_sw.py         |   6 +-
 hyperion/{ => np}/transforms/skl_tsne.py      |   4 +-
 .../{ => np}/transforms/transform_list.py     |   0
 hyperion/pipeline/pipeline.py                 |  63 --------
 hyperion/torch/data/audio_dataset.py          |   2 +-
 hyperion/torch/layers/audio_feats.py          |   4 +-
 hyperion/vb_pdfs/core/exponential_family.py   | 139 ------------------
 hyperion/vb_pdfs/core/pdf.py                  |  32 ----
 requirements.txt                              |   1 +
 143 files changed, 184 insertions(+), 418 deletions(-)
 create mode 100644 hyperion/np/__init__.py
 rename hyperion/{ => np}/augment/__init__.py (100%)
 rename hyperion/{ => np}/augment/noise_augment.py (98%)
 rename hyperion/{ => np}/augment/reverb_augment.py (99%)
 rename hyperion/{ => np}/augment/speech_augment.py (99%)
 rename hyperion/{ => np}/augment/speed_augment.py (99%)
 rename hyperion/{ => np}/calibration/__init__.py (100%)
 rename hyperion/{ => np}/calibration/gauss_calibration.py (98%)
 rename hyperion/{ => np}/calibration/unsup_gauss_calibration.py (100%)
 rename hyperion/{ => np}/classifiers/__init__.py (100%)
 rename hyperion/{ => np}/classifiers/binary_logistic_regression.py (100%)
 rename hyperion/{ => np}/classifiers/greedy_fusion.py (99%)
 rename hyperion/{ => np}/classifiers/linear_gbe.py (98%)
 rename hyperion/{ => np}/classifiers/linear_gbe1.py (97%)
 rename hyperion/{ => np}/classifiers/linear_gbe_up.py (98%)
 rename hyperion/{ => np}/classifiers/linear_svmc.py (98%)
 rename hyperion/{ => np}/classifiers/logistic_regression.py (99%)
 rename hyperion/{ => np}/classifiers/q_scoring_homo_gbe.py (97%)
 rename hyperion/{ => np}/clustering/__init__.py (100%)
 rename hyperion/{ => np}/clustering/ahc.py (97%)
 rename hyperion/{ => np}/clustering/kmeans.py (95%)
 rename hyperion/{ => np}/diarization/__init__.py (100%)
 rename hyperion/{ => np}/diarization/diar_ahc_plda.py (100%)
 rename hyperion/{ => np}/feats/__init__.py (100%)
 rename hyperion/{ => np}/feats/energy_vad.py (99%)
 rename hyperion/{ => np}/feats/feature_normalization.py (99%)
 rename hyperion/{ => np}/feats/feature_windows.py (98%)
 rename hyperion/{ => np}/feats/filter_banks.py (98%)
 rename hyperion/{ => np}/feats/frame_selector.py (100%)
 rename hyperion/{ => np}/feats/mfcc.py (99%)
 rename hyperion/{ => np}/feats/stft.py (98%)
 rename hyperion/{ => np}/metrics/__init__.py (100%)
 rename hyperion/{ => np}/metrics/acc.py (100%)
 rename hyperion/{ => np}/metrics/cllr.py (100%)
 rename hyperion/{ => np}/metrics/confidence.py (100%)
 rename hyperion/{ => np}/metrics/confusion_matrix.py (100%)
 rename hyperion/{ => np}/metrics/dcf.py (100%)
 rename hyperion/{ => np}/metrics/dcf_plot.py (100%)
 rename hyperion/{ => np}/metrics/det_plot.py (100%)
 rename hyperion/{ => np}/metrics/eer.py (100%)
 rename hyperion/{ => np}/metrics/roc.py (100%)
 rename hyperion/{ => np}/metrics/utils.py (99%)
 rename hyperion/{ => np}/metrics/verification_evaluator.py (99%)
 rename hyperion/{hyp_model.py => np/np_model.py} (98%)
 rename hyperion/{model_loader.py => np/np_model_loader.py} (78%)
 rename hyperion/{ => np}/pdfs/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/core/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/core/exp_family.py (100%)
 rename hyperion/{ => np}/pdfs/core/normal.py (99%)
 rename hyperion/{ => np}/pdfs/core/normal_diag_cov.py (99%)
 rename hyperion/{ => np}/pdfs/core/pdf.py (93%)
 rename hyperion/{ => np}/pdfs/hmm/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/hmm/hmm.py (99%)
 rename hyperion/{ => np}/pdfs/jfa/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/jfa/jfa_total.py (99%)
 rename hyperion/{ => np}/pdfs/mixtures/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/mixtures/exp_family_mixture.py (99%)
 rename hyperion/{ => np}/pdfs/mixtures/gmm.py (99%)
 rename hyperion/{ => np}/pdfs/mixtures/gmm_diag_cov.py (99%)
 rename hyperion/{ => np}/pdfs/mixtures/gmm_tied_diag_cov.py (98%)
 rename hyperion/{ => np}/pdfs/plda/__init__.py (100%)
 rename hyperion/{ => np}/pdfs/plda/frplda.py (99%)
 rename hyperion/{ => np}/pdfs/plda/plda.py (99%)
 rename hyperion/{ => np}/pdfs/plda/plda_base.py (99%)
 rename hyperion/{ => np}/pdfs/plda/splda.py (99%)
 rename hyperion/{ => np}/score_norm/__init__.py (100%)
 rename hyperion/{ => np}/score_norm/adapt_s_norm.py (100%)
 rename hyperion/{ => np}/score_norm/s_norm.py (100%)
 rename hyperion/{ => np}/score_norm/score_norm.py (85%)
 rename hyperion/{ => np}/score_norm/t_norm.py (100%)
 rename hyperion/{ => np}/score_norm/tz_norm.py (100%)
 rename hyperion/{ => np}/score_norm/z_norm.py (100%)
 rename hyperion/{ => np}/score_norm/zt_norm.py (100%)
 rename hyperion/{ => np}/transforms/__init__.py (100%)
 rename hyperion/{ => np}/transforms/cent_whiten.py (98%)
 rename hyperion/{ => np}/transforms/cent_whiten_up.py (96%)
 rename hyperion/{ => np}/transforms/coral.py (98%)
 rename hyperion/{ => np}/transforms/gaussianizer.py (96%)
 rename hyperion/{ => np}/transforms/lda.py (98%)
 rename hyperion/{ => np}/transforms/lnorm.py (100%)
 rename hyperion/{ => np}/transforms/lnorm_up.py (99%)
 rename hyperion/{ => np}/transforms/mvn.py (94%)
 rename hyperion/{ => np}/transforms/nap.py (97%)
 rename hyperion/{ => np}/transforms/nda.py (94%)
 rename hyperion/{ => np}/transforms/pca.py (98%)
 rename hyperion/{ => np}/transforms/sb_sw.py (98%)
 rename hyperion/{ => np}/transforms/skl_tsne.py (99%)
 rename hyperion/{ => np}/transforms/transform_list.py (100%)
 delete mode 100644 hyperion/pipeline/pipeline.py
 delete mode 100644 hyperion/vb_pdfs/core/exponential_family.py
 delete mode 100644 hyperion/vb_pdfs/core/pdf.py

diff --git a/hyperion/bin/apply-mvn-select-frames.py b/hyperion/bin/apply-mvn-select-frames.py
index 71c52cda..4f73628e 100755
--- a/hyperion/bin/apply-mvn-select-frames.py
+++ b/hyperion/bin/apply-mvn-select-frames.py
@@ -23,8 +23,8 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
-from hyperion.feats import MeanVarianceNorm as MVN
-from hyperion.feats import FrameSelector as FSel
+from hyperion.np.feats import MeanVarianceNorm as MVN
+from hyperion.np.feats import FrameSelector as FSel
 
 
 def process_feats(
diff --git a/hyperion/bin/compute-energy-vad.py b/hyperion/bin/compute-energy-vad.py
index 397aea80..99f562cf 100755
--- a/hyperion/bin/compute-energy-vad.py
+++ b/hyperion/bin/compute-energy-vad.py
@@ -19,7 +19,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import DataWriterFactory as DWF
-from hyperion.feats import EnergyVAD
+from hyperion.np.feats import EnergyVAD
 
 
 def compute_vad(input_path, output_path, write_num_frames, **kwargs):
diff --git a/hyperion/bin/compute-mfcc-feats.py b/hyperion/bin/compute-mfcc-feats.py
index 589d3188..b7e90056 100755
--- a/hyperion/bin/compute-mfcc-feats.py
+++ b/hyperion/bin/compute-mfcc-feats.py
@@ -21,7 +21,7 @@
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import compression_methods
-from hyperion.feats import MFCC
+from hyperion.np.feats import MFCC
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin/eval-cos-1vs1.py b/hyperion/bin/eval-cos-1vs1.py
index 123221f2..16c9122a 100755
--- a/hyperion/bin/eval-cos-1vs1.py
+++ b/hyperion/bin/eval-cos-1vs1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_ndx import TrialNdx
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
-from hyperion.transforms import TransformList, LNorm
+from hyperion.np.transforms import TransformList, LNorm
 
 
 def eval_cos(
diff --git a/hyperion/bin/eval-linear-gbe-up.py b/hyperion/bin/eval-linear-gbe-up.py
index 287117fd..a8c3d999 100755
--- a/hyperion/bin/eval-linear-gbe-up.py
+++ b/hyperion/bin/eval-linear-gbe-up.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearGBEUP as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearGBEUP as GBE
 
 
 def eval_linear_gbe(
diff --git a/hyperion/bin/eval-linear-gbe.py b/hyperion/bin/eval-linear-gbe.py
index a93b6c39..0970bb5e 100755
--- a/hyperion/bin/eval-linear-gbe.py
+++ b/hyperion/bin/eval-linear-gbe.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearGBE as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearGBE as GBE
 
 
 def eval_linear_gbe(
diff --git a/hyperion/bin/eval-linear-svmc.py b/hyperion/bin/eval-linear-svmc.py
index ff7b1faa..d6c96c11 100755
--- a/hyperion/bin/eval-linear-svmc.py
+++ b/hyperion/bin/eval-linear-svmc.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearSVMC as SVM
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearSVMC as SVM
 
 
 def eval_svm(
diff --git a/hyperion/bin/eval-logistic-regression.py b/hyperion/bin/eval-logistic-regression.py
index d96e2473..91a092ea 100755
--- a/hyperion/bin/eval-logistic-regression.py
+++ b/hyperion/bin/eval-logistic-regression.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LogisticRegression as LR
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LogisticRegression as LR
 
 
 def eval_lr(
diff --git a/hyperion/bin/eval-plda-1vs1.py b/hyperion/bin/eval-plda-1vs1.py
index 715d043a..eadf4a87 100755
--- a/hyperion/bin/eval-plda-1vs1.py
+++ b/hyperion/bin/eval-plda-1vs1.py
@@ -20,7 +20,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/hyperion/bin/eval-plda-nvs1.py b/hyperion/bin/eval-plda-nvs1.py
index 30ea2606..5a63e5a5 100755
--- a/hyperion/bin/eval-plda-nvs1.py
+++ b/hyperion/bin/eval-plda-nvs1.py
@@ -20,7 +20,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/hyperion/bin/plot-vector-hist.py b/hyperion/bin/plot-vector-hist.py
index cd86b1c1..60560a80 100755
--- a/hyperion/bin/plot-vector-hist.py
+++ b/hyperion/bin/plot-vector-hist.py
@@ -18,7 +18,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def plot_vector_hist(
diff --git a/hyperion/bin/plot-vector-tsne.py b/hyperion/bin/plot-vector-tsne.py
index 030d7e39..c4c30302 100755
--- a/hyperion/bin/plot-vector-tsne.py
+++ b/hyperion/bin/plot-vector-tsne.py
@@ -22,7 +22,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, PCA
+from hyperion.np.transforms import TransformList, PCA
 
 colors = ["b", "g", "r", "c", "m", "y", "k"]
 markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
diff --git a/hyperion/bin/torch-compute-mfcc-feats.py b/hyperion/bin/torch-compute-mfcc-feats.py
index 5f7d9f7d..4fc6bec2 100755
--- a/hyperion/bin/torch-compute-mfcc-feats.py
+++ b/hyperion/bin/torch-compute-mfcc-feats.py
@@ -22,7 +22,6 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import compression_methods
 from hyperion.torch.layers import AudioFeatsFactory as AFF
-from hyperion.feats import MFCC
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin/torch-eval-vae.py b/hyperion/bin/torch-eval-vae.py
index dfcdaa38..44ed0bfb 100755
--- a/hyperion/bin/torch-eval-vae.py
+++ b/hyperion/bin/torch-eval-vae.py
@@ -29,7 +29,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.feats import MeanVarianceNorm as MVN
+from hyperion.np.feats import MeanVarianceNorm as MVN
 
 from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
index 8d55b719..411873ac 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
@@ -27,7 +27,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
index a5783654..18d6843f 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
@@ -27,7 +27,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
index 44a3b98f..73da6088 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
@@ -28,7 +28,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py
index c7bcc50a..a8b4b962 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py
@@ -26,7 +26,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
index 4b08c7ab..51a8afbb 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
@@ -27,7 +27,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
index 9d9d4666..9fcc8f30 100755
--- a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
+++ b/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
@@ -28,7 +28,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/torch-eval-xvec-logits-from-wav.py b/hyperion/bin/torch-eval-xvec-logits-from-wav.py
index 58cc9005..61acebd4 100755
--- a/hyperion/bin/torch-eval-xvec-logits-from-wav.py
+++ b/hyperion/bin/torch-eval-xvec-logits-from-wav.py
@@ -25,7 +25,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.augment import SpeechAugment
+from hyperion.np.augment import SpeechAugment
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.narchs import AudioFeatsMVN as AF
diff --git a/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
index bf227045..c4f1ba9a 100755
--- a/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
+++ b/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
@@ -25,7 +25,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.augment import SpeechAugment
+from hyperion.np.augment import SpeechAugment
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.narchs import AudioFeatsMVN as AF
diff --git a/hyperion/bin/torch-extract-xvectors-from-wav.py b/hyperion/bin/torch-extract-xvectors-from-wav.py
index 0aea084e..48c23687 100755
--- a/hyperion/bin/torch-extract-xvectors-from-wav.py
+++ b/hyperion/bin/torch-extract-xvectors-from-wav.py
@@ -25,7 +25,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.augment import SpeechAugment
+from hyperion.np.augment import SpeechAugment
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.narchs import AudioFeatsMVN as AF
diff --git a/hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py b/hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py
index e3ab70e9..ecf65037 100755
--- a/hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py
+++ b/hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py
@@ -26,7 +26,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.augment import SpeechAugment
+from hyperion.np.augment import SpeechAugment
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.narchs import AudioFeatsMVN as AF
diff --git a/hyperion/bin/torch-extract-xvectors-slidwin.py b/hyperion/bin/torch-extract-xvectors-slidwin.py
index 0e2f0173..7d6d9f11 100755
--- a/hyperion/bin/torch-extract-xvectors-slidwin.py
+++ b/hyperion/bin/torch-extract-xvectors-slidwin.py
@@ -24,7 +24,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.feats import MeanVarianceNorm as MVN
+from hyperion.np.feats import MeanVarianceNorm as MVN
 
 from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
diff --git a/hyperion/bin/torch-extract-xvectors-vae-preproc.py b/hyperion/bin/torch-extract-xvectors-vae-preproc.py
index 376de911..64f6359d 100755
--- a/hyperion/bin/torch-extract-xvectors-vae-preproc.py
+++ b/hyperion/bin/torch-extract-xvectors-vae-preproc.py
@@ -24,7 +24,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.feats import MeanVarianceNorm as MVN
+from hyperion.np.feats import MeanVarianceNorm as MVN
 
 from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
diff --git a/hyperion/bin/torch-extract-xvectors.py b/hyperion/bin/torch-extract-xvectors.py
index 18bab96f..f36e35e2 100755
--- a/hyperion/bin/torch-extract-xvectors.py
+++ b/hyperion/bin/torch-extract-xvectors.py
@@ -24,7 +24,7 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.feats import MeanVarianceNorm as MVN
+from hyperion.np.feats import MeanVarianceNorm as MVN
 
 from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
diff --git a/hyperion/bin/torch-generate-adv-attacks-xvector-verif.py b/hyperion/bin/torch-generate-adv-attacks-xvector-verif.py
index 58f73b00..c13bd815 100755
--- a/hyperion/bin/torch-generate-adv-attacks-xvector-verif.py
+++ b/hyperion/bin/torch-generate-adv-attacks-xvector-verif.py
@@ -29,7 +29,7 @@
 from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
 from hyperion.utils.list_utils import ismember
 from hyperion.io import VADReaderFactory as VRF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 from hyperion.torch.utils import open_device
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
diff --git a/hyperion/bin/train-cw-up.py b/hyperion/bin/train-cw-up.py
index 48b8dfc4..a7392a32 100755
--- a/hyperion/bin/train-cw-up.py
+++ b/hyperion/bin/train-cw-up.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.pdfs.core import Normal
-from hyperion.transforms import TransformList, CentWhitenUP, LNormUP
+from hyperion.np.pdfs.core import Normal
+from hyperion.np.transforms import TransformList, CentWhitenUP, LNormUP
 
 
 def load_model(input_path, with_lnorm, name, **kwargs):
diff --git a/hyperion/bin/train-cw.py b/hyperion/bin/train-cw.py
index c64d4892..a70485a6 100755
--- a/hyperion/bin/train-cw.py
+++ b/hyperion/bin/train-cw.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.pdfs.core import Normal
-from hyperion.transforms import TransformList, CentWhiten, LNorm
+from hyperion.np.pdfs.core import Normal
+from hyperion.np.transforms import TransformList, CentWhiten, LNorm
 
 
 def load_model(input_path, with_lnorm, name, **kwargs):
diff --git a/hyperion/bin/train-gaussianizer.py b/hyperion/bin/train-gaussianizer.py
index eefd2456..a265403e 100755
--- a/hyperion/bin/train-gaussianizer.py
+++ b/hyperion/bin/train-gaussianizer.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.pdfs.core import Normal
-from hyperion.transforms import TransformList, Gaussianizer
+from hyperion.np.pdfs.core import Normal
+from hyperion.np.transforms import TransformList, Gaussianizer
 
 
 def load_model(input_path, **kwargs):
diff --git a/hyperion/bin/train-lda.py b/hyperion/bin/train-lda.py
index 17cd5ab6..36217c8f 100755
--- a/hyperion/bin/train-lda.py
+++ b/hyperion/bin/train-lda.py
@@ -16,7 +16,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, LDA, SbSw
+from hyperion.np.transforms import TransformList, LDA, SbSw
 
 
 def train_lda(
diff --git a/hyperion/bin/train-linear-gbe-up.py b/hyperion/bin/train-linear-gbe-up.py
index 3e102b1f..5accb785 100755
--- a/hyperion/bin/train-linear-gbe-up.py
+++ b/hyperion/bin/train-linear-gbe-up.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearGBEUP as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearGBEUP as GBE
 
 
 def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin/train-linear-gbe.py b/hyperion/bin/train-linear-gbe.py
index 1428358e..a7ac5236 100755
--- a/hyperion/bin/train-linear-gbe.py
+++ b/hyperion/bin/train-linear-gbe.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearGBE as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearGBE as GBE
 
 
 def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin/train-linear-svmc.py b/hyperion/bin/train-linear-svmc.py
index 6c0e2de2..6b589491 100755
--- a/hyperion/bin/train-linear-svmc.py
+++ b/hyperion/bin/train-linear-svmc.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LinearSVMC as SVM
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearSVMC as SVM
 
 
 def train_svm(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin/train-logistic-regression.py b/hyperion/bin/train-logistic-regression.py
index 6a409119..1d657dc4 100755
--- a/hyperion/bin/train-logistic-regression.py
+++ b/hyperion/bin/train-logistic-regression.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList
-from hyperion.classifiers import LogisticRegression as LR
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LogisticRegression as LR
 
 
 def train_lr(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin/train-mvn.py b/hyperion/bin/train-mvn.py
index 8ddc5e92..a0204fd5 100755
--- a/hyperion/bin/train-mvn.py
+++ b/hyperion/bin/train-mvn.py
@@ -17,8 +17,8 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.pdfs.core import Normal
-from hyperion.transforms import TransformList, MVN, SbSw
+from hyperion.np.pdfs.core import Normal
+from hyperion.np.transforms import TransformList, MVN, SbSw
 
 
 def train_mvn(
diff --git a/hyperion/bin/train-nda.py b/hyperion/bin/train-nda.py
index dcc856ed..11cd7da3 100755
--- a/hyperion/bin/train-nda.py
+++ b/hyperion/bin/train-nda.py
@@ -17,7 +17,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, NDA, NSbSw
+from hyperion.np.transforms import TransformList, NDA, NSbSw
 
 
 def train_nda(
diff --git a/hyperion/bin/train-pca.py b/hyperion/bin/train-pca.py
index b82a7772..d1ab1c7e 100755
--- a/hyperion/bin/train-pca.py
+++ b/hyperion/bin/train-pca.py
@@ -16,7 +16,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, PCA
+from hyperion.np.transforms import TransformList, PCA
 
 
 def load_model(input_path, name, **kwargs):
diff --git a/hyperion/bin/train-plda.py b/hyperion/bin/train-plda.py
index ba9a40c2..26f6e0a8 100755
--- a/hyperion/bin/train-plda.py
+++ b/hyperion/bin/train-plda.py
@@ -18,7 +18,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def train_plda(
diff --git a/hyperion/helpers/__init__.py b/hyperion/helpers/__init__.py
index eeaf2cce..48bf1476 100644
--- a/hyperion/helpers/__init__.py
+++ b/hyperion/helpers/__init__.py
@@ -11,8 +11,4 @@
 from .multi_test_trial_data_reader_v2 import MultiTestTrialDataReaderV2
 from .classif_trial_data_reader import ClassifTrialDataReader
 
-# from .sequence_reader import SequenceReader
-# from .sequence_class_reader import SequenceClassReader
-# from .sequence_post_reader import SequencePostReader
-# from .sequence_post_class_reader import SequencePostClassReader
 from .plda_factory import PLDAFactory
diff --git a/hyperion/helpers/classif_trial_data_reader.py b/hyperion/helpers/classif_trial_data_reader.py
index f5d74640..f7aeb727 100644
--- a/hyperion/helpers/classif_trial_data_reader.py
+++ b/hyperion/helpers/classif_trial_data_reader.py
@@ -14,7 +14,7 @@
 
 from ..io import HypDataReader
 from ..utils import TrialNdx, SCPList
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class ClassifTrialDataReader(object):
diff --git a/hyperion/helpers/multi_test_trial_data_reader.py b/hyperion/helpers/multi_test_trial_data_reader.py
index 57355cd0..eeea60f2 100644
--- a/hyperion/helpers/multi_test_trial_data_reader.py
+++ b/hyperion/helpers/multi_test_trial_data_reader.py
@@ -13,7 +13,7 @@
 
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils import TrialNdx, TrialKey, Utt2Info
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class MultiTestTrialDataReader(object):
diff --git a/hyperion/helpers/multi_test_trial_data_reader_v2.py b/hyperion/helpers/multi_test_trial_data_reader_v2.py
index 306f75ae..43fd1254 100644
--- a/hyperion/helpers/multi_test_trial_data_reader_v2.py
+++ b/hyperion/helpers/multi_test_trial_data_reader_v2.py
@@ -13,7 +13,7 @@
 
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils import Utt2Info, TrialNdx, TrialKey
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class MultiTestTrialDataReaderV2(object):
diff --git a/hyperion/helpers/plda_factory.py b/hyperion/helpers/plda_factory.py
index b9c2ec60..0fdd2609 100644
--- a/hyperion/helpers/plda_factory.py
+++ b/hyperion/helpers/plda_factory.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from ..pdfs.plda import FRPLDA, SPLDA, PLDA
+from ..np.pdfs.plda import FRPLDA, SPLDA, PLDA
 
 
 class PLDAFactory(object):
diff --git a/hyperion/helpers/tracking_data_reader.py b/hyperion/helpers/tracking_data_reader.py
index 6dfc9a19..4bac5be2 100644
--- a/hyperion/helpers/tracking_data_reader.py
+++ b/hyperion/helpers/tracking_data_reader.py
@@ -13,7 +13,7 @@
 
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils import Utt2Info, TrialNdx, ExtSegmentList
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class TrackingDataReader(object):
diff --git a/hyperion/helpers/trial_data_reader.py b/hyperion/helpers/trial_data_reader.py
index 984cdb1f..e6da5b7d 100644
--- a/hyperion/helpers/trial_data_reader.py
+++ b/hyperion/helpers/trial_data_reader.py
@@ -13,7 +13,7 @@
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils.utt2info import Utt2Info
 from ..utils import TrialNdx, TrialKey
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class TrialDataReader(object):
diff --git a/hyperion/helpers/vector_class_reader.py b/hyperion/helpers/vector_class_reader.py
index 4f893aac..0c6f346d 100644
--- a/hyperion/helpers/vector_class_reader.py
+++ b/hyperion/helpers/vector_class_reader.py
@@ -14,7 +14,7 @@
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils.utt2info import Utt2Info
 from ..utils.tensors import to3D_by_class
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class VectorClassReader(object):
diff --git a/hyperion/helpers/vector_reader.py b/hyperion/helpers/vector_reader.py
index 3f0fa1d2..0ac1b11a 100644
--- a/hyperion/helpers/vector_reader.py
+++ b/hyperion/helpers/vector_reader.py
@@ -13,7 +13,7 @@
 
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils.scp_list import SCPList
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class VectorReader(object):
diff --git a/hyperion/np/__init__.py b/hyperion/np/__init__.py
new file mode 100644
index 00000000..d2774314
--- /dev/null
+++ b/hyperion/np/__init__.py
@@ -0,0 +1,7 @@
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .np_model import NPModel
+from .np_model_loader import NPModelLoader
diff --git a/hyperion/augment/__init__.py b/hyperion/np/augment/__init__.py
similarity index 100%
rename from hyperion/augment/__init__.py
rename to hyperion/np/augment/__init__.py
diff --git a/hyperion/augment/noise_augment.py b/hyperion/np/augment/noise_augment.py
similarity index 98%
rename from hyperion/augment/noise_augment.py
rename to hyperion/np/augment/noise_augment.py
index ad88ff08..e180a292 100644
--- a/hyperion/augment/noise_augment.py
+++ b/hyperion/np/augment/noise_augment.py
@@ -11,8 +11,8 @@
 
 import numpy as np
 
-from ..hyp_defs import float_cpu
-from ..io import RandomAccessAudioReader as AR
+from ...hyp_defs import float_cpu
+from ...io import RandomAccessAudioReader as AR
 
 
 class SingleNoiseAugment(object):
diff --git a/hyperion/augment/reverb_augment.py b/hyperion/np/augment/reverb_augment.py
similarity index 99%
rename from hyperion/augment/reverb_augment.py
rename to hyperion/np/augment/reverb_augment.py
index 9f80c168..fe225e53 100644
--- a/hyperion/augment/reverb_augment.py
+++ b/hyperion/np/augment/reverb_augment.py
@@ -14,7 +14,7 @@
 import numpy as np
 from scipy import signal
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 from ..io import RandomAccessDataReaderFactory as DRF
 
 
diff --git a/hyperion/augment/speech_augment.py b/hyperion/np/augment/speech_augment.py
similarity index 99%
rename from hyperion/augment/speech_augment.py
rename to hyperion/np/augment/speech_augment.py
index b6756ce7..e3eab4ea 100644
--- a/hyperion/augment/speech_augment.py
+++ b/hyperion/np/augment/speech_augment.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 from .noise_augment import NoiseAugment
 from .reverb_augment import ReverbAugment
diff --git a/hyperion/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
similarity index 99%
rename from hyperion/augment/speed_augment.py
rename to hyperion/np/augment/speed_augment.py
index b72bf338..7fdaab3c 100644
--- a/hyperion/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -9,7 +9,7 @@
 import numpy as np
 from librosa.effects import time_stretch
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 
 class SpeedAugment(object):
diff --git a/hyperion/calibration/__init__.py b/hyperion/np/calibration/__init__.py
similarity index 100%
rename from hyperion/calibration/__init__.py
rename to hyperion/np/calibration/__init__.py
diff --git a/hyperion/calibration/gauss_calibration.py b/hyperion/np/calibration/gauss_calibration.py
similarity index 98%
rename from hyperion/calibration/gauss_calibration.py
rename to hyperion/np/calibration/gauss_calibration.py
index 07d882ed..630d5e95 100644
--- a/hyperion/calibration/gauss_calibration.py
+++ b/hyperion/np/calibration/gauss_calibration.py
@@ -4,10 +4,10 @@
 """
 import numpy as np
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class GaussCalibration(HypModel):
+class GaussCalibration(NPModel):
     """Class for supervised Gaussian calibration.
        The model assumes that targer and non-target score distributions are Gaussians
        with shared covariance.
diff --git a/hyperion/calibration/unsup_gauss_calibration.py b/hyperion/np/calibration/unsup_gauss_calibration.py
similarity index 100%
rename from hyperion/calibration/unsup_gauss_calibration.py
rename to hyperion/np/calibration/unsup_gauss_calibration.py
diff --git a/hyperion/classifiers/__init__.py b/hyperion/np/classifiers/__init__.py
similarity index 100%
rename from hyperion/classifiers/__init__.py
rename to hyperion/np/classifiers/__init__.py
diff --git a/hyperion/classifiers/binary_logistic_regression.py b/hyperion/np/classifiers/binary_logistic_regression.py
similarity index 100%
rename from hyperion/classifiers/binary_logistic_regression.py
rename to hyperion/np/classifiers/binary_logistic_regression.py
diff --git a/hyperion/classifiers/greedy_fusion.py b/hyperion/np/classifiers/greedy_fusion.py
similarity index 99%
rename from hyperion/classifiers/greedy_fusion.py
rename to hyperion/np/classifiers/greedy_fusion.py
index 6eff32ad..2102bc22 100644
--- a/hyperion/classifiers/greedy_fusion.py
+++ b/hyperion/np/classifiers/greedy_fusion.py
@@ -6,14 +6,14 @@
 import logging
 import numpy as np
 
-from ..hyp_defs import float_cpu, float_save
-from ..hyp_model import HypModel
+from ...hyp_defs import float_cpu, float_save
+from ..np_model import NPModel
 from ..metrics import dcf
 
 from .binary_logistic_regression import BinaryLogisticRegression as BLR
 
 
-class GreedyFusionBinaryLR(HypModel):
+class GreedyFusionBinaryLR(NPModel):
     """Greedy score fusion based on binary logistic regression.
 
     It computes ``max_systmes`` fusions. The best system, the best fusion of two,
diff --git a/hyperion/classifiers/linear_gbe.py b/hyperion/np/classifiers/linear_gbe.py
similarity index 98%
rename from hyperion/classifiers/linear_gbe.py
rename to hyperion/np/classifiers/linear_gbe.py
index 075ea751..c786cb50 100644
--- a/hyperion/classifiers/linear_gbe.py
+++ b/hyperion/np/classifiers/linear_gbe.py
@@ -7,12 +7,12 @@
 import numpy as np
 from scipy.special import gammaln
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
 
 
-class LinearGBE(HypModel):
+class LinearGBE(NPModel):
     """Linear Gaussian Back-end.
 
     Attributes:
diff --git a/hyperion/classifiers/linear_gbe1.py b/hyperion/np/classifiers/linear_gbe1.py
similarity index 97%
rename from hyperion/classifiers/linear_gbe1.py
rename to hyperion/np/classifiers/linear_gbe1.py
index 71edd606..8c5df381 100644
--- a/hyperion/classifiers/linear_gbe1.py
+++ b/hyperion/np/classifiers/linear_gbe1.py
@@ -5,12 +5,12 @@
 
 import numpy as np
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
 
 
-class LinearGBE(HypModel):
+class LinearGBE(NPModel):
     def __init__(
         self,
         mu=None,
diff --git a/hyperion/classifiers/linear_gbe_up.py b/hyperion/np/classifiers/linear_gbe_up.py
similarity index 98%
rename from hyperion/classifiers/linear_gbe_up.py
rename to hyperion/np/classifiers/linear_gbe_up.py
index 8c855dfa..4a489639 100644
--- a/hyperion/classifiers/linear_gbe_up.py
+++ b/hyperion/np/classifiers/linear_gbe_up.py
@@ -7,9 +7,9 @@
 import numpy as np
 from scipy.special import gammaln
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import (
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import (
     int2onehot,
     logdet_pdmat,
     invert_pdmat,
diff --git a/hyperion/classifiers/linear_svmc.py b/hyperion/np/classifiers/linear_svmc.py
similarity index 98%
rename from hyperion/classifiers/linear_svmc.py
rename to hyperion/np/classifiers/linear_svmc.py
index 244e0dc0..df14a16e 100644
--- a/hyperion/classifiers/linear_svmc.py
+++ b/hyperion/np/classifiers/linear_svmc.py
@@ -8,12 +8,12 @@
 
 from sklearn.svm import LinearSVC as SVC
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import softmax
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import softmax
 
 
-class LinearSVMC(HypModel):
+class LinearSVMC(NPModel):
     """Linear Support Vector Machine for Classification.
 
     Attributes:
diff --git a/hyperion/classifiers/logistic_regression.py b/hyperion/np/classifiers/logistic_regression.py
similarity index 99%
rename from hyperion/classifiers/logistic_regression.py
rename to hyperion/np/classifiers/logistic_regression.py
index ad845170..932a28e3 100644
--- a/hyperion/classifiers/logistic_regression.py
+++ b/hyperion/np/classifiers/logistic_regression.py
@@ -8,12 +8,12 @@
 
 from sklearn.linear_model import LogisticRegression as LR
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import softmax
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import softmax
 
 
-class LogisticRegression(HypModel):
+class LogisticRegression(NPModel):
     """Multi-class logistic regression.
 
     This is a wrapper that add functionalities to sklearn logistic regression.
diff --git a/hyperion/classifiers/q_scoring_homo_gbe.py b/hyperion/np/classifiers/q_scoring_homo_gbe.py
similarity index 97%
rename from hyperion/classifiers/q_scoring_homo_gbe.py
rename to hyperion/np/classifiers/q_scoring_homo_gbe.py
index 83f2408b..8ef42052 100644
--- a/hyperion/classifiers/q_scoring_homo_gbe.py
+++ b/hyperion/np/classifiers/q_scoring_homo_gbe.py
@@ -7,12 +7,12 @@
 import numpy as np
 from scipy.special import gammaln
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
-from ..utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
 
 
-class QScoringHomoGBE(HypModel):
+class QScoringHomoGBE(NPModel):
     def __init__(
         self,
         mu=None,
diff --git a/hyperion/clustering/__init__.py b/hyperion/np/clustering/__init__.py
similarity index 100%
rename from hyperion/clustering/__init__.py
rename to hyperion/np/clustering/__init__.py
diff --git a/hyperion/clustering/ahc.py b/hyperion/np/clustering/ahc.py
similarity index 97%
rename from hyperion/clustering/ahc.py
rename to hyperion/np/clustering/ahc.py
index 2f8dbe08..fc5dbb30 100644
--- a/hyperion/clustering/ahc.py
+++ b/hyperion/np/clustering/ahc.py
@@ -10,11 +10,11 @@
 from scipy.cluster.hierarchy import linkage
 from sklearn.metrics import homogeneity_score, completeness_score
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
 
 
-class AHC(HypModel):
+class AHC(NPModel):
     def __init__(self, method="average", metric="llr", **kwargs):
         super().__init__(**kwargs)
         self.method = method
diff --git a/hyperion/clustering/kmeans.py b/hyperion/np/clustering/kmeans.py
similarity index 95%
rename from hyperion/clustering/kmeans.py
rename to hyperion/np/clustering/kmeans.py
index 7da2bd01..9d8758e1 100644
--- a/hyperion/clustering/kmeans.py
+++ b/hyperion/np/clustering/kmeans.py
@@ -8,11 +8,11 @@
 import numpy as np
 import h5py
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
 
 
-class KMeans(HypModel):
+class KMeans(NPModel):
     def __init__(self, num_clusters, mu=None, rtol=0.001, **kwargs):
         super(KMeans, self).__init__(**kwargs)
         self.num_clusters = num_clusters
diff --git a/hyperion/diarization/__init__.py b/hyperion/np/diarization/__init__.py
similarity index 100%
rename from hyperion/diarization/__init__.py
rename to hyperion/np/diarization/__init__.py
diff --git a/hyperion/diarization/diar_ahc_plda.py b/hyperion/np/diarization/diar_ahc_plda.py
similarity index 100%
rename from hyperion/diarization/diar_ahc_plda.py
rename to hyperion/np/diarization/diar_ahc_plda.py
diff --git a/hyperion/feats/__init__.py b/hyperion/np/feats/__init__.py
similarity index 100%
rename from hyperion/feats/__init__.py
rename to hyperion/np/feats/__init__.py
diff --git a/hyperion/feats/energy_vad.py b/hyperion/np/feats/energy_vad.py
similarity index 99%
rename from hyperion/feats/energy_vad.py
rename to hyperion/np/feats/energy_vad.py
index 734e86bb..7785318f 100644
--- a/hyperion/feats/energy_vad.py
+++ b/hyperion/np/feats/energy_vad.py
@@ -7,8 +7,8 @@
 import numpy as np
 from scipy.signal import lfilter
 
-from ..hyp_defs import float_cpu
-from ..utils.misc import str2bool
+from ...hyp_defs import float_cpu
+from ...utils.misc import str2bool
 from .stft import st_logE
 
 
diff --git a/hyperion/feats/feature_normalization.py b/hyperion/np/feats/feature_normalization.py
similarity index 99%
rename from hyperion/feats/feature_normalization.py
rename to hyperion/np/feats/feature_normalization.py
index 313d027b..38f7b766 100644
--- a/hyperion/feats/feature_normalization.py
+++ b/hyperion/np/feats/feature_normalization.py
@@ -7,7 +7,7 @@
 from jsonargparse import ArgumentParser, ActionParser
 from scipy.signal import convolve2d
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 
 class MeanVarianceNorm(object):
diff --git a/hyperion/feats/feature_windows.py b/hyperion/np/feats/feature_windows.py
similarity index 98%
rename from hyperion/feats/feature_windows.py
rename to hyperion/np/feats/feature_windows.py
index 3e880f7e..ae5d07d2 100644
--- a/hyperion/feats/feature_windows.py
+++ b/hyperion/np/feats/feature_windows.py
@@ -8,7 +8,7 @@
 import numpy as np
 from scipy.signal import blackman, hamming, hann
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 
 class FeatureWindowFactory(object):
diff --git a/hyperion/feats/filter_banks.py b/hyperion/np/feats/filter_banks.py
similarity index 98%
rename from hyperion/feats/filter_banks.py
rename to hyperion/np/feats/filter_banks.py
index b92535da..3b0da644 100644
--- a/hyperion/feats/filter_banks.py
+++ b/hyperion/np/feats/filter_banks.py
@@ -9,8 +9,7 @@
 import numpy as np
 from librosa.filters import mel as make_mel_librosa
 
-from ..hyp_defs import float_cpu
-from ..utils.misc import str2bool
+from ...hyp_defs import float_cpu
 
 
 class FilterBankFactory(object):
diff --git a/hyperion/feats/frame_selector.py b/hyperion/np/feats/frame_selector.py
similarity index 100%
rename from hyperion/feats/frame_selector.py
rename to hyperion/np/feats/frame_selector.py
diff --git a/hyperion/feats/mfcc.py b/hyperion/np/feats/mfcc.py
similarity index 99%
rename from hyperion/feats/mfcc.py
rename to hyperion/np/feats/mfcc.py
index 94af5c2e..cf517ee5 100644
--- a/hyperion/feats/mfcc.py
+++ b/hyperion/np/feats/mfcc.py
@@ -9,8 +9,8 @@
 from scipy.fftpack import dct
 from scipy.signal import lfilter
 
-from ..hyp_defs import float_cpu
-from ..utils.misc import str2bool
+from ...hyp_defs import float_cpu
+from ...utils.misc import str2bool
 from .feature_windows import FeatureWindowFactory as FWF
 from .filter_banks import FilterBankFactory as FBF
 from .stft import strft, st_logE
diff --git a/hyperion/feats/stft.py b/hyperion/np/feats/stft.py
similarity index 98%
rename from hyperion/feats/stft.py
rename to hyperion/np/feats/stft.py
index 7f22bdee..0a55453c 100644
--- a/hyperion/feats/stft.py
+++ b/hyperion/np/feats/stft.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 
 def stft(x, frame_length, frame_shift, fft_length, window=None):
diff --git a/hyperion/metrics/__init__.py b/hyperion/np/metrics/__init__.py
similarity index 100%
rename from hyperion/metrics/__init__.py
rename to hyperion/np/metrics/__init__.py
diff --git a/hyperion/metrics/acc.py b/hyperion/np/metrics/acc.py
similarity index 100%
rename from hyperion/metrics/acc.py
rename to hyperion/np/metrics/acc.py
diff --git a/hyperion/metrics/cllr.py b/hyperion/np/metrics/cllr.py
similarity index 100%
rename from hyperion/metrics/cllr.py
rename to hyperion/np/metrics/cllr.py
diff --git a/hyperion/metrics/confidence.py b/hyperion/np/metrics/confidence.py
similarity index 100%
rename from hyperion/metrics/confidence.py
rename to hyperion/np/metrics/confidence.py
diff --git a/hyperion/metrics/confusion_matrix.py b/hyperion/np/metrics/confusion_matrix.py
similarity index 100%
rename from hyperion/metrics/confusion_matrix.py
rename to hyperion/np/metrics/confusion_matrix.py
diff --git a/hyperion/metrics/dcf.py b/hyperion/np/metrics/dcf.py
similarity index 100%
rename from hyperion/metrics/dcf.py
rename to hyperion/np/metrics/dcf.py
diff --git a/hyperion/metrics/dcf_plot.py b/hyperion/np/metrics/dcf_plot.py
similarity index 100%
rename from hyperion/metrics/dcf_plot.py
rename to hyperion/np/metrics/dcf_plot.py
diff --git a/hyperion/metrics/det_plot.py b/hyperion/np/metrics/det_plot.py
similarity index 100%
rename from hyperion/metrics/det_plot.py
rename to hyperion/np/metrics/det_plot.py
diff --git a/hyperion/metrics/eer.py b/hyperion/np/metrics/eer.py
similarity index 100%
rename from hyperion/metrics/eer.py
rename to hyperion/np/metrics/eer.py
diff --git a/hyperion/metrics/roc.py b/hyperion/np/metrics/roc.py
similarity index 100%
rename from hyperion/metrics/roc.py
rename to hyperion/np/metrics/roc.py
diff --git a/hyperion/metrics/utils.py b/hyperion/np/metrics/utils.py
similarity index 99%
rename from hyperion/metrics/utils.py
rename to hyperion/np/metrics/utils.py
index 8a764c3d..4f06bb18 100644
--- a/hyperion/metrics/utils.py
+++ b/hyperion/np/metrics/utils.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from ..hyp_defs import float_cpu
+from ...hyp_defs import float_cpu
 
 
 def effective_prior(p_tar, c_miss, c_fa):
diff --git a/hyperion/metrics/verification_evaluator.py b/hyperion/np/metrics/verification_evaluator.py
similarity index 99%
rename from hyperion/metrics/verification_evaluator.py
rename to hyperion/np/metrics/verification_evaluator.py
index d2b26ed6..9c9c3208 100644
--- a/hyperion/metrics/verification_evaluator.py
+++ b/hyperion/np/metrics/verification_evaluator.py
@@ -17,9 +17,9 @@
 matplotlib.rc("text", usetex=True)
 import matplotlib.pyplot as plt
 
-from ..hyp_defs import float_cpu
-from ..utils import TrialKey, TrialScores
-from ..utils.trial_stats import TrialStats
+from ...hyp_defs import float_cpu
+from ...utils import TrialKey, TrialScores
+from ...utils.trial_stats import TrialStats
 from .utils import effective_prior
 from .dcf import fast_eval_dcf_eer
 
diff --git a/hyperion/hyp_model.py b/hyperion/np/np_model.py
similarity index 98%
rename from hyperion/hyp_model.py
rename to hyperion/np/np_model.py
index 0ffd2285..35717a82 100644
--- a/hyperion/hyp_model.py
+++ b/hyperion/np/np_model.py
@@ -10,10 +10,10 @@
 import numpy as np
 import h5py
 
-from .hyp_defs import float_save, float_cpu
+from ..hyp_defs import float_save, float_cpu
 
 
-class HypModel(object):
+class NPModel(object):
     __metaclass__ = ABCMeta
 
     def __init__(self, name=None, **kwargs):
diff --git a/hyperion/model_loader.py b/hyperion/np/np_model_loader.py
similarity index 78%
rename from hyperion/model_loader.py
rename to hyperion/np/np_model_loader.py
index 30780d7b..efdd27a9 100644
--- a/hyperion/model_loader.py
+++ b/hyperion/np/np_model_loader.py
@@ -3,12 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .hyp_model import HypModel
+from .np_model import NPModel
 from .pdfs import *
 from .transforms import *
 
 
-class ModelLoader(object):
+class NPModelLoader(object):
     @staticmethod
     def get_object():
         obj_dict = {
@@ -18,6 +18,7 @@ def get_object():
             "GMM": GMM,
             "FRPLDA": FRPLDA,
             "SPLDA": SPLDA,
+            "PLDA": PLDA,
             "CentWhiten": CentWhiten,
             "LNorm": LNorm,
             "PCA": PCA,
@@ -31,6 +32,6 @@ def get_object():
 
     @staticmethod
     def load(file_path):
-        class_name = HypModel.load_config(file_path)["class_name"]
-        class_obj = ModelLoader.get_object()[class_name]
+        class_name = NPModel.load_config(file_path)["class_name"]
+        class_obj = NPModelLoader.get_object()[class_name]
         return class_obj.load(file_path)
diff --git a/hyperion/pdfs/__init__.py b/hyperion/np/pdfs/__init__.py
similarity index 100%
rename from hyperion/pdfs/__init__.py
rename to hyperion/np/pdfs/__init__.py
diff --git a/hyperion/pdfs/core/__init__.py b/hyperion/np/pdfs/core/__init__.py
similarity index 100%
rename from hyperion/pdfs/core/__init__.py
rename to hyperion/np/pdfs/core/__init__.py
diff --git a/hyperion/pdfs/core/exp_family.py b/hyperion/np/pdfs/core/exp_family.py
similarity index 100%
rename from hyperion/pdfs/core/exp_family.py
rename to hyperion/np/pdfs/core/exp_family.py
diff --git a/hyperion/pdfs/core/normal.py b/hyperion/np/pdfs/core/normal.py
similarity index 99%
rename from hyperion/pdfs/core/normal.py
rename to hyperion/np/pdfs/core/normal.py
index b1ff4224..ed60edb7 100644
--- a/hyperion/pdfs/core/normal.py
+++ b/hyperion/np/pdfs/core/normal.py
@@ -8,14 +8,14 @@
 import scipy.linalg as la
 from scipy.special import erf
 
-from ...hyp_defs import float_cpu
-from ...utils.plotting import (
+from ....hyp_defs import float_cpu
+from ....utils.plotting import (
     plot_gaussian_1D,
     plot_gaussian_ellipsoid_2D,
     plot_gaussian_ellipsoid_3D,
     plot_gaussian_3D,
 )
-from ...utils.math import (
+from ....utils.math import (
     invert_pdmat,
     invert_trimat,
     symmat2vec,
diff --git a/hyperion/pdfs/core/normal_diag_cov.py b/hyperion/np/pdfs/core/normal_diag_cov.py
similarity index 99%
rename from hyperion/pdfs/core/normal_diag_cov.py
rename to hyperion/np/pdfs/core/normal_diag_cov.py
index 562d3899..cb21f84c 100644
--- a/hyperion/pdfs/core/normal_diag_cov.py
+++ b/hyperion/np/pdfs/core/normal_diag_cov.py
@@ -10,8 +10,8 @@
 # import matplotlib.pyplot as plt
 # import matplotlib.mlab as mlab
 
-from ...hyp_defs import float_cpu
-from ...utils.plotting import (
+from ....hyp_defs import float_cpu
+from ....utils.plotting import (
     plot_gaussian_1D,
     plot_gaussian_ellipsoid_2D,
     plot_gaussian_ellipsoid_3D,
diff --git a/hyperion/pdfs/core/pdf.py b/hyperion/np/pdfs/core/pdf.py
similarity index 93%
rename from hyperion/pdfs/core/pdf.py
rename to hyperion/np/pdfs/core/pdf.py
index 2764780c..acd26105 100644
--- a/hyperion/pdfs/core/pdf.py
+++ b/hyperion/np/pdfs/core/pdf.py
@@ -6,10 +6,10 @@
 import numpy as np
 
 from abc import ABCMeta, abstractmethod
-from ...hyp_model import HypModel
+from ...np_model import NPModel
 
 
-class PDF(HypModel):
+class PDF(NPModel):
     __metaclass__ = ABCMeta
 
     def __init__(self, x_dim=1, **kwargs):
diff --git a/hyperion/pdfs/hmm/__init__.py b/hyperion/np/pdfs/hmm/__init__.py
similarity index 100%
rename from hyperion/pdfs/hmm/__init__.py
rename to hyperion/np/pdfs/hmm/__init__.py
diff --git a/hyperion/pdfs/hmm/hmm.py b/hyperion/np/pdfs/hmm/hmm.py
similarity index 99%
rename from hyperion/pdfs/hmm/hmm.py
rename to hyperion/np/pdfs/hmm/hmm.py
index aeec994a..b8497b38 100644
--- a/hyperion/pdfs/hmm/hmm.py
+++ b/hyperion/np/pdfs/hmm/hmm.py
@@ -5,8 +5,8 @@
 
 import numpy as np
 
-from ...hyp_defs import float_cpu
-from ...utils.math import softmax, logsumexp
+from ....hyp_defs import float_cpu
+from ....utils.math import softmax, logsumexp
 from ..core import PDF
 
 
diff --git a/hyperion/pdfs/jfa/__init__.py b/hyperion/np/pdfs/jfa/__init__.py
similarity index 100%
rename from hyperion/pdfs/jfa/__init__.py
rename to hyperion/np/pdfs/jfa/__init__.py
diff --git a/hyperion/pdfs/jfa/jfa_total.py b/hyperion/np/pdfs/jfa/jfa_total.py
similarity index 99%
rename from hyperion/pdfs/jfa/jfa_total.py
rename to hyperion/np/pdfs/jfa/jfa_total.py
index 74fe0f95..4a11b5cf 100644
--- a/hyperion/pdfs/jfa/jfa_total.py
+++ b/hyperion/np/pdfs/jfa/jfa_total.py
@@ -6,8 +6,8 @@
 import numpy as np
 from scipy import linalg as sla
 
-from ...hyp_defs import float_cpu
-from ...utils.math import (
+from ....hyp_defs import float_cpu
+from ....utils.math import (
     invert_pdmat,
     invert_trimat,
     logdet_pdmat,
diff --git a/hyperion/pdfs/mixtures/__init__.py b/hyperion/np/pdfs/mixtures/__init__.py
similarity index 100%
rename from hyperion/pdfs/mixtures/__init__.py
rename to hyperion/np/pdfs/mixtures/__init__.py
diff --git a/hyperion/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
similarity index 99%
rename from hyperion/pdfs/mixtures/exp_family_mixture.py
rename to hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 113bb8fc..143d7df5 100644
--- a/hyperion/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -7,9 +7,9 @@
 import logging
 from abc import ABCMeta, abstractmethod
 
-from ...hyp_defs import float_cpu
-from ...utils.math import softmax, logsumexp
-from ...utils.queues import GeneratorQueue
+from ....hyp_defs import float_cpu
+from ....utils.math import softmax, logsumexp
+from ....utils.queues import GeneratorQueue
 from ..core import PDF
 
 
diff --git a/hyperion/pdfs/mixtures/gmm.py b/hyperion/np/pdfs/mixtures/gmm.py
similarity index 99%
rename from hyperion/pdfs/mixtures/gmm.py
rename to hyperion/np/pdfs/mixtures/gmm.py
index b71f0a61..391c59ee 100644
--- a/hyperion/pdfs/mixtures/gmm.py
+++ b/hyperion/np/pdfs/mixtures/gmm.py
@@ -8,8 +8,8 @@
 from scipy.special import erf
 
 
-from ...hyp_defs import float_cpu
-from ...utils.math import (
+from ....hyp_defs import float_cpu
+from ....utils.math import (
     softmax,
     logsumexp,
     invert_pdmat,
@@ -19,7 +19,7 @@
     fullcov_varfloor,
     logdet_pdmat,
 )
-from ...utils.plotting import (
+from ....utils.plotting import (
     plot_gaussian_1D,
     plot_gaussian_ellipsoid_2D,
     plot_gaussian_ellipsoid_3D,
diff --git a/hyperion/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
similarity index 99%
rename from hyperion/pdfs/mixtures/gmm_diag_cov.py
rename to hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index b586a900..46a30f81 100644
--- a/hyperion/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -7,9 +7,9 @@
 import h5py
 from scipy.special import erf
 
-from ...hyp_defs import float_cpu
-from ...utils.math import softmax, logsumexp
-from ...utils.plotting import (
+from ....hyp_defs import float_cpu
+from ....utils.math import softmax, logsumexp
+from ....utils.plotting import (
     plot_gaussian_1D,
     plot_gaussian_ellipsoid_2D,
     plot_gaussian_ellipsoid_3D,
diff --git a/hyperion/pdfs/mixtures/gmm_tied_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
similarity index 98%
rename from hyperion/pdfs/mixtures/gmm_tied_diag_cov.py
rename to hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
index a3e7f93e..87043cc4 100644
--- a/hyperion/pdfs/mixtures/gmm_tied_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
@@ -6,9 +6,9 @@
 import h5py
 from scipy.special import erf
 
-from ...hyp_defs import float_cpu
-from ...utils.math import softmax, logsumexp
-from ...utils.plotting import (
+from ....hyp_defs import float_cpu
+from ....utils.math import softmax, logsumexp
+from ....utils.plotting import (
     plot_gaussian_1D,
     plot_gaussian_ellipsoid_2D,
     plot_gaussian_ellipsoid_3D,
diff --git a/hyperion/pdfs/plda/__init__.py b/hyperion/np/pdfs/plda/__init__.py
similarity index 100%
rename from hyperion/pdfs/plda/__init__.py
rename to hyperion/np/pdfs/plda/__init__.py
diff --git a/hyperion/pdfs/plda/frplda.py b/hyperion/np/pdfs/plda/frplda.py
similarity index 99%
rename from hyperion/pdfs/plda/frplda.py
rename to hyperion/np/pdfs/plda/frplda.py
index 5ea628fe..137276c7 100644
--- a/hyperion/pdfs/plda/frplda.py
+++ b/hyperion/np/pdfs/plda/frplda.py
@@ -6,8 +6,8 @@
 import numpy as np
 from scipy import linalg as sla
 
-from ...hyp_defs import float_cpu
-from ...utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....hyp_defs import float_cpu
+from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
diff --git a/hyperion/pdfs/plda/plda.py b/hyperion/np/pdfs/plda/plda.py
similarity index 99%
rename from hyperion/pdfs/plda/plda.py
rename to hyperion/np/pdfs/plda/plda.py
index 16dee5ea..30c21361 100644
--- a/hyperion/pdfs/plda/plda.py
+++ b/hyperion/np/pdfs/plda/plda.py
@@ -6,8 +6,8 @@
 import numpy as np
 from scipy import linalg as sla
 
-from ...hyp_defs import float_cpu
-from ...utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....hyp_defs import float_cpu
+from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
diff --git a/hyperion/pdfs/plda/plda_base.py b/hyperion/np/pdfs/plda/plda_base.py
similarity index 99%
rename from hyperion/pdfs/plda/plda_base.py
rename to hyperion/np/pdfs/plda/plda_base.py
index 8a83543d..1d5d758c 100644
--- a/hyperion/pdfs/plda/plda_base.py
+++ b/hyperion/np/pdfs/plda/plda_base.py
@@ -7,7 +7,7 @@
 
 from abc import ABCMeta, abstractmethod
 
-from ...hyp_defs import float_cpu
+from ....hyp_defs import float_cpu
 from ..core.pdf import PDF
 from ...transforms import LNorm
 
diff --git a/hyperion/pdfs/plda/splda.py b/hyperion/np/pdfs/plda/splda.py
similarity index 99%
rename from hyperion/pdfs/plda/splda.py
rename to hyperion/np/pdfs/plda/splda.py
index 1ffaaa1c..f10759cf 100644
--- a/hyperion/pdfs/plda/splda.py
+++ b/hyperion/np/pdfs/plda/splda.py
@@ -5,8 +5,8 @@
 import numpy as np
 from scipy import linalg as sla
 
-from ...hyp_defs import float_cpu
-from ...utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....hyp_defs import float_cpu
+from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
diff --git a/hyperion/score_norm/__init__.py b/hyperion/np/score_norm/__init__.py
similarity index 100%
rename from hyperion/score_norm/__init__.py
rename to hyperion/np/score_norm/__init__.py
diff --git a/hyperion/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
similarity index 100%
rename from hyperion/score_norm/adapt_s_norm.py
rename to hyperion/np/score_norm/adapt_s_norm.py
diff --git a/hyperion/score_norm/s_norm.py b/hyperion/np/score_norm/s_norm.py
similarity index 100%
rename from hyperion/score_norm/s_norm.py
rename to hyperion/np/score_norm/s_norm.py
diff --git a/hyperion/score_norm/score_norm.py b/hyperion/np/score_norm/score_norm.py
similarity index 85%
rename from hyperion/score_norm/score_norm.py
rename to hyperion/np/score_norm/score_norm.py
index f20a0b98..45df0323 100644
--- a/hyperion/score_norm/score_norm.py
+++ b/hyperion/np/score_norm/score_norm.py
@@ -5,10 +5,10 @@
 
 import numpy as np
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class ScoreNorm(HypModel):
+class ScoreNorm(NPModel):
     """
     Base class for score normalization
     """
diff --git a/hyperion/score_norm/t_norm.py b/hyperion/np/score_norm/t_norm.py
similarity index 100%
rename from hyperion/score_norm/t_norm.py
rename to hyperion/np/score_norm/t_norm.py
diff --git a/hyperion/score_norm/tz_norm.py b/hyperion/np/score_norm/tz_norm.py
similarity index 100%
rename from hyperion/score_norm/tz_norm.py
rename to hyperion/np/score_norm/tz_norm.py
diff --git a/hyperion/score_norm/z_norm.py b/hyperion/np/score_norm/z_norm.py
similarity index 100%
rename from hyperion/score_norm/z_norm.py
rename to hyperion/np/score_norm/z_norm.py
diff --git a/hyperion/score_norm/zt_norm.py b/hyperion/np/score_norm/zt_norm.py
similarity index 100%
rename from hyperion/score_norm/zt_norm.py
rename to hyperion/np/score_norm/zt_norm.py
diff --git a/hyperion/transforms/__init__.py b/hyperion/np/transforms/__init__.py
similarity index 100%
rename from hyperion/transforms/__init__.py
rename to hyperion/np/transforms/__init__.py
diff --git a/hyperion/transforms/cent_whiten.py b/hyperion/np/transforms/cent_whiten.py
similarity index 98%
rename from hyperion/transforms/cent_whiten.py
rename to hyperion/np/transforms/cent_whiten.py
index 00a83cca..f1cdf227 100644
--- a/hyperion/transforms/cent_whiten.py
+++ b/hyperion/np/transforms/cent_whiten.py
@@ -8,11 +8,11 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 from ..pdfs import Normal
 
 
-class CentWhiten(HypModel):
+class CentWhiten(NPModel):
     """Class to do centering and whitening of i-vectors."""
 
     def __init__(self, mu=None, T=None, update_mu=True, update_T=True, **kwargs):
diff --git a/hyperion/transforms/cent_whiten_up.py b/hyperion/np/transforms/cent_whiten_up.py
similarity index 96%
rename from hyperion/transforms/cent_whiten_up.py
rename to hyperion/np/transforms/cent_whiten_up.py
index f22488f4..f3793328 100644
--- a/hyperion/transforms/cent_whiten_up.py
+++ b/hyperion/np/transforms/cent_whiten_up.py
@@ -8,7 +8,7 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 from ..pdfs import Normal
 from .cent_whiten import CentWhiten
 
diff --git a/hyperion/transforms/coral.py b/hyperion/np/transforms/coral.py
similarity index 98%
rename from hyperion/transforms/coral.py
rename to hyperion/np/transforms/coral.py
index 0c9dea85..9aee7579 100644
--- a/hyperion/transforms/coral.py
+++ b/hyperion/np/transforms/coral.py
@@ -8,10 +8,10 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class CORAL(HypModel):
+class CORAL(NPModel):
     """Class to do CORAL"""
 
     def __init__(
diff --git a/hyperion/transforms/gaussianizer.py b/hyperion/np/transforms/gaussianizer.py
similarity index 96%
rename from hyperion/transforms/gaussianizer.py
rename to hyperion/np/transforms/gaussianizer.py
index ea512ade..26294134 100644
--- a/hyperion/transforms/gaussianizer.py
+++ b/hyperion/np/transforms/gaussianizer.py
@@ -10,11 +10,11 @@
 import scipy.linalg as la
 from scipy.special import erfinv
 
-from ..hyp_defs import float_cpu
-from ..hyp_model import HypModel
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
 
 
-class Gaussianizer(HypModel):
+class Gaussianizer(NPModel):
     """Class to make i-vector distribution standard Normal."""
 
     def __init__(self, max_vectors=None, r=None, **kwargs):
diff --git a/hyperion/transforms/lda.py b/hyperion/np/transforms/lda.py
similarity index 98%
rename from hyperion/transforms/lda.py
rename to hyperion/np/transforms/lda.py
index 142ed2bd..13c74fe8 100644
--- a/hyperion/transforms/lda.py
+++ b/hyperion/np/transforms/lda.py
@@ -8,11 +8,11 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 from .sb_sw import SbSw
 
 
-class LDA(HypModel):
+class LDA(NPModel):
     """Class to do linear discriminant analysis."""
 
     def __init__(
diff --git a/hyperion/transforms/lnorm.py b/hyperion/np/transforms/lnorm.py
similarity index 100%
rename from hyperion/transforms/lnorm.py
rename to hyperion/np/transforms/lnorm.py
diff --git a/hyperion/transforms/lnorm_up.py b/hyperion/np/transforms/lnorm_up.py
similarity index 99%
rename from hyperion/transforms/lnorm_up.py
rename to hyperion/np/transforms/lnorm_up.py
index ab7b1ec9..b6e211d5 100644
--- a/hyperion/transforms/lnorm_up.py
+++ b/hyperion/np/transforms/lnorm_up.py
@@ -3,7 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import numpy as np
 import h5py
 
diff --git a/hyperion/transforms/mvn.py b/hyperion/np/transforms/mvn.py
similarity index 94%
rename from hyperion/transforms/mvn.py
rename to hyperion/np/transforms/mvn.py
index a3b77582..7f60206e 100644
--- a/hyperion/transforms/mvn.py
+++ b/hyperion/np/transforms/mvn.py
@@ -8,10 +8,10 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class MVN(HypModel):
+class MVN(NPModel):
     """Class to do global mean and variance normalization."""
 
     def __init__(self, mu=None, s=None, **kwargs):
diff --git a/hyperion/transforms/nap.py b/hyperion/np/transforms/nap.py
similarity index 97%
rename from hyperion/transforms/nap.py
rename to hyperion/np/transforms/nap.py
index 6917c6b4..ee13e7e0 100644
--- a/hyperion/transforms/nap.py
+++ b/hyperion/np/transforms/nap.py
@@ -8,10 +8,10 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class NAP(HypModel):
+class NAP(NPModel):
     """Class to do nussance attribute projection."""
 
     def __init__(self, U=None, **kwargs):
diff --git a/hyperion/transforms/nda.py b/hyperion/np/transforms/nda.py
similarity index 94%
rename from hyperion/transforms/nda.py
rename to hyperion/np/transforms/nda.py
index 4f9772fc..c84a4527 100644
--- a/hyperion/transforms/nda.py
+++ b/hyperion/np/transforms/nda.py
@@ -8,11 +8,11 @@
 
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
-from ..hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...hyp_defs import float_cpu
 
 
-class NDA(HypModel):
+class NDA(NPModel):
     """Class to do nearest-neighbors discriminant analysis"""
 
     def __init__(self, mu=None, T=None, **kwargs):
diff --git a/hyperion/transforms/pca.py b/hyperion/np/transforms/pca.py
similarity index 98%
rename from hyperion/transforms/pca.py
rename to hyperion/np/transforms/pca.py
index cd8d6973..23477c84 100644
--- a/hyperion/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -8,10 +8,10 @@
 from numpy.linalg import matrix_rank
 import scipy.linalg as la
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class PCA(HypModel):
+class PCA(NPModel):
     """Class to do principal component analysis"""
 
     def __init__(
diff --git a/hyperion/transforms/sb_sw.py b/hyperion/np/transforms/sb_sw.py
similarity index 98%
rename from hyperion/transforms/sb_sw.py
rename to hyperion/np/transforms/sb_sw.py
index 83c8d185..92cba594 100644
--- a/hyperion/transforms/sb_sw.py
+++ b/hyperion/np/transforms/sb_sw.py
@@ -8,11 +8,11 @@
 import scipy.linalg as la
 from sklearn.neighbors import BallTree
 
-from ..hyp_model import HypModel
-from ..hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...hyp_defs import float_cpu
 
 
-class SbSw(HypModel):
+class SbSw(NPModel):
     """Class to compute between and within class matrices"""
 
     def __init__(self, Sb=None, Sw=None, mu=None, num_classes=0, **kwargs):
diff --git a/hyperion/transforms/skl_tsne.py b/hyperion/np/transforms/skl_tsne.py
similarity index 99%
rename from hyperion/transforms/skl_tsne.py
rename to hyperion/np/transforms/skl_tsne.py
index 048be0c7..b5be0fac 100644
--- a/hyperion/transforms/skl_tsne.py
+++ b/hyperion/np/transforms/skl_tsne.py
@@ -7,10 +7,10 @@
 
 from sklearn.manifold import TSNE
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 
-class SklTSNE(HypModel):
+class SklTSNE(NPModel):
     """Wrapper class for sklearn TSNE manifold learner
 
     Attributes:
diff --git a/hyperion/transforms/transform_list.py b/hyperion/np/transforms/transform_list.py
similarity index 100%
rename from hyperion/transforms/transform_list.py
rename to hyperion/np/transforms/transform_list.py
diff --git a/hyperion/pipeline/pipeline.py b/hyperion/pipeline/pipeline.py
deleted file mode 100644
index 6b8076f5..00000000
--- a/hyperion/pipeline/pipeline.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
- Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import logging
-import numpy as np
-import h5py
-
-from ..hyp_model import HypModel
-
-from ..transforms import *
-
-
-class Pipeline(HypModel):
-    """Class to process a series of models."""
-
-    def __init__(self, transforms, **kwargs):
-        super(Pipeline, self).__init__(**kwargs)
-        if not isinstance(transforms, list):
-            transforms = [transforms]
-        self.transforms = transforms
-        if transforms is not None:
-            self.update_names()
-
-    def append(self, t):
-        self.transforms.append(t)
-        if self.name is not None:
-            t.name = self.name + "/" + t.name
-
-    def predict(self, x):
-        for t in self.transforms:
-            x = t.predict(x)
-        return x
-
-    def update_names(self):
-        if self.name is not None:
-            for t in self.transforms:
-                t.name = self.name + "/" + t.name
-
-    def get_config(self):
-        config = super(Pipeline, self).get_config()
-        config_t = {}
-        for i in range(len(self.transforms)):
-            config_t[i] = self.transforms[i].get_config()
-        config["transforms"] = config_t
-        return config
-
-    def save_params(self, f):
-        for t in self.transforms:
-            t.save_params(f)
-
-    @classmethod
-    def load_params(cls, f, config):
-        config_ts = config["transforms"]
-        transforms = []
-        for i in range(len(config_ts)):
-            config_t = config_ts[str(i)]
-            logging.debug(config_t)
-            class_t = globals()[config_t["class_name"]]
-            t = class_t.load_params(f, config_t)
-            transforms.append(t)
-        return cls(transforms, name=config["name"])
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index f0ab811d..1801f11a 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -16,7 +16,7 @@
 from ..torch_defs import floatstr_torch
 from ...io import RandomAccessAudioReader as AR
 from ...utils.utt2info import Utt2Info
-from ...augment import SpeechAugment
+from ...np.augment import SpeechAugment
 
 from torch.utils.data import Dataset
 import torch.distributed as dist
diff --git a/hyperion/torch/layers/audio_feats.py b/hyperion/torch/layers/audio_feats.py
index d435ebbd..718844f5 100644
--- a/hyperion/torch/layers/audio_feats.py
+++ b/hyperion/torch/layers/audio_feats.py
@@ -7,8 +7,6 @@
 import math
 import logging
 
-from ...utils.misc import str2bool
-
 import torch
 import torch.nn as nn
 import torch.cuda.amp as amp
@@ -24,7 +22,7 @@
     _pow_spectrogram = lambda x: x.pow(2).sum(-1)
     _spectrogram = lambda x: x.pow(2).sum(-1).sqrt()
 
-from ...feats.filter_banks import FilterBankFactory as FBF
+from ...np.feats.filter_banks import FilterBankFactory as FBF
 
 # window types
 HAMMING = "hamming"
diff --git a/hyperion/vb_pdfs/core/exponential_family.py b/hyperion/vb_pdfs/core/exponential_family.py
deleted file mode 100644
index c3e59040..00000000
--- a/hyperion/vb_pdfs/core/exponential_family.py
+++ /dev/null
@@ -1,139 +0,0 @@
-"""
- Copyright 2017 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import numpy as np
-
-from abc import ABCMeta, abstractmethod
-from .pdf import PDF
-
-
-class ExpFamily(PDF):
-    __metaclass__ = ABCMeta
-
-    def __init__(self, eta=None, **kwargs):
-        super(ExpFamily, self).__init__(**kwargs)
-        self.eta = eta
-        self.A = None
-
-    def fit(
-        self, x, sample_weight=None, x_val=None, sample_weight_val=None, batch_size=None
-    ):
-
-        N, u_x = self.Estep(x=x, sample_weight=sample_weight, batch_size=batch_size)
-        self.Mstep(N, u_x)
-        elbo = self.elbo(x, N=N, u_x=u_x)
-        elbo = [elbo, elbo / N]
-
-        if x_val is not None:
-            N, u_x = self.Estep(
-                x=x_val, sample_weight=sample_weight_val, batch_size=batch_size
-            )
-            elbo_val = self.elbo(x_val, N=N, u_x=u_x)
-            elbo += [elbo_val, elbo_val / N]
-        return elbo
-
-    def log_h(self, x):
-        return 0
-
-    def accum_logh(self, x, sample_weight=None):
-        if sample_weight is None:
-            return np.sum(self.logh(x))
-        return np.sum(sample_weight * self.logh(x))
-
-    def compute_suff_stats(self, x):
-        return x
-
-    def accum_suff_stats(self, x, u_x=None, sample_weight=None, batch_size=None):
-        if u_x is not None or batch_size is None:
-            return self._accum_suff_stats_1batch(x, u_x, sample_weight)
-        else:
-            return self._accum_suff_stats_nbatches(x, sample_weight, batch_size)
-
-    def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
-        if u_x is None:
-            u_x = self.compute_suff_stats(x)
-        if sample_weight is None:
-            N = u_x.shape[0]
-        else:
-            u_x *= sample_weight[:, None]
-            N = np.sum(sample_weight)
-        acc_u_x = np.sum(u_x, axis=0)
-        return N, acc_u_x
-
-    def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
-        sw_i = None
-        for i1 in range(0, x.shape[0], batch_size):
-            i2 = np.minimum(i1 + batch_size, x.shape[0])
-            x_i = x[i1:i2, :]
-            if sample_weight is not None:
-                sw_i = sample_weight[i1:i2]
-            N_i, u_x_i = self._accum_suff_stats_1batch(x_i, sample_weight=sw_i)
-            if i1 == 0:
-                N = N_i
-                u_x = u_x_i
-            else:
-                N += N_i
-                u_x += u_x_i
-        return N, u_x
-
-    def add_suff_stats(self, N, u_x):
-        assert len(N) == len(u_x)
-        acc_N = N[1]
-        acc_u_x = u_x[1]
-        for i in range(1, len(N)):
-            acc_N += N
-            acc_u_x += u[i]
-        return acc_N, acc_u_x
-
-    def Estep(self, x, u_x=None, sample_weight=None, batch_size=None):
-        return self.accum_suff_stats(x, u_x, sample_weight, batch_size)
-
-    @abstractmethod
-    def Mstep(self, stats):
-        pass
-
-    def elbo(self, x, u_x=None, N=1, logh=None, sample_weight=None, batch_size=None):
-        if u_x is None:
-            N, u_x = self.accum_suff_stats(
-                x, sample_weight=sample_weight, batch_size=batch_size
-            )
-        if logh is None:
-            logh = self.accum_logh(x, sample_weight=sample_weight)
-        return logh + np.inner(u_x, self.eta) - N * self.A
-
-    def eval_llk(self, x, u_x=None, mode="nat"):
-        if mode == "nat":
-            return self.eval_llk_nat(x, u_x)
-        else:
-            return self.eval_llk_std(x)
-
-    def eval_llk_nat(self, x, u_x=None):
-        if u_x is None:
-            u_x = self.compute_suff_stats(x)
-        return self.logh(x) + np.inner(u_x, self.eta) - self.A
-
-    @staticmethod
-    def compute_A_nat(eta):
-        raise NotImplementedError()
-
-    @staticmethod
-    def compute_A_std(params):
-        raise NotImplementedError()
-
-    @staticmethod
-    def compute_eta(param):
-        raise NotImplementedError()
-
-    @staticmethod
-    def compute_std(eta):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def _compute_nat_params(self):
-        pass
-
-    @abstractmethod
-    def _compute_std_params(self):
-        pass
diff --git a/hyperion/vb_pdfs/core/pdf.py b/hyperion/vb_pdfs/core/pdf.py
deleted file mode 100644
index 012ff96c..00000000
--- a/hyperion/vb_pdfs/core/pdf.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
- Copyright 2017 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import numpy as np
-
-from abc import ABCMeta, abstractmethod
-from ...hyp_model import HypModel
-
-
-class PDF(HypModel):
-    __metaclass__ = ABCMeta
-
-    def __init__(self, **kwargs):
-        super(PDF, self).__init__(**kwargs)
-
-    # def get_config(self):
-    #     config = {'x_dim': self.x_dim }
-    #     base_config = super(PDF, self).get_config()
-    #     return dict(list(base_config.items()) + list(config.items()))
-
-    @abstractmethod
-    def log_prob(self, x):
-        pass
-
-    def log_cdf(self, x):
-        raise NotImplementedError
-
-    @abstractmethod
-    def sample(self, num_samples):
-        pass
diff --git a/requirements.txt b/requirements.txt
index 7a1ae1b3..6f1c8bc1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,4 @@ librosa>=0.8.1
 black
 twine
 wheel
+transformers>=4.16.2

From 887bd3becaeba2ba621e19f54366fac674ef40b6 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 1 Mar 2022 09:44:50 -0500
Subject: [PATCH 002/154] In egs, change hyperion -> hyperion.np where needed

---
 egs/chime5_spkdet/v1/local/score_dcf.py              |  2 +-
 egs/chime5_spkdet/v1/steps_be/eval-be-diar-v2.py     |  2 +-
 egs/chime5_spkdet/v1/steps_be/eval-be-v1.py          |  2 +-
 egs/chime5_spkdet/v1/steps_be/eval-calibration-v1.py |  4 ++--
 egs/chime5_spkdet/v1/steps_be/train-be-v1.py         |  2 +-
 .../v1/steps_be/train-calibration-v1.py              |  4 ++--
 egs/dihard2019/v1/steps_diar/eval-ahc-v1.py          | 12 ++++++------
 egs/dihard2019/v1/steps_diar/train-plda-v1.py        |  2 +-
 egs/sre18/v1.8k/local/score_dcf.py                   |  2 +-
 egs/sre18/v1.8k/steps_be/eval-calibration-v1.py      |  4 ++--
 egs/sre18/v1.8k/steps_be/eval-tel-be-snorm-v1.py     |  4 ++--
 egs/sre18/v1.8k/steps_be/eval-tel-be-v1.py           |  2 +-
 .../v1.8k/steps_be/eval-vid-be-diar-snorm-v1.py      |  4 ++--
 egs/sre18/v1.8k/steps_be/eval-vid-be-diar-v1.py      |  2 +-
 egs/sre18/v1.8k/steps_be/eval-vid-be-snorm-v1.py     |  4 ++--
 egs/sre18/v1.8k/steps_be/eval-vid-be-v1.py           |  2 +-
 egs/sre18/v1.8k/steps_be/train-calibration-v1.py     |  4 ++--
 egs/sre18/v1.8k/steps_be/train-tel-be-v1.py          |  4 ++--
 egs/sre18/v1.8k/steps_be/train-vid-be-v1.py          |  2 +-
 egs/sre19-av-v/v0.1/local/score_dcf.py               |  2 +-
 egs/sre19-av-v/v0.1/steps_be/eval-calibration-v1.py  |  4 ++--
 .../v0.1/steps_be/eval-face-vid-be-snorm-v1.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v2.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v3.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v4.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v5.py       |  4 ++--
 .../v0.1/steps_be/eval-face-vid-be-snorm-v6.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v7.py       |  2 +-
 .../v0.1/steps_be/eval-face-vid-be-snorm-v9.py       |  2 +-
 egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-v8.py  |  2 +-
 egs/sre19-av-v/v0.1/steps_be/eval-fusion-v1.py       |  2 +-
 egs/sre19-av-v/v0.1/steps_be/face_be_utils.py        |  4 ++--
 .../v0.1/steps_be/face_video_trial_data_reader.py    |  2 +-
 egs/sre19-av-v/v0.1/steps_be/train-calibration-v1.py |  4 ++--
 egs/sre19-av-v/v0.1/steps_be/train-calibration-v2.py |  4 ++--
 egs/sre19-av-v/v0.1/steps_be/train-fusion-v1.py      |  4 ++--
 egs/sre19-av-v/v0.1/steps_be/train-fusion-v2.py      |  4 ++--
 egs/sre19-cmn2/v1/local/error_analysis.py            |  2 +-
 egs/sre19-cmn2/v1/local/score_dcf.py                 |  2 +-
 egs/sre19-cmn2/v1/steps_be/eval-calibration-v1.py    |  4 ++--
 egs/sre19-cmn2/v1/steps_be/eval-tel-be-snorm-v1.py   |  4 ++--
 egs/sre19-cmn2/v1/steps_be/eval-tel-be-v1.py         |  2 +-
 .../v1/steps_be/eval-vid-be-diar-snorm-v1.py         |  4 ++--
 .../v1/steps_be/eval-vid-be-diar-snorm-v2.py         |  4 ++--
 egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v1.py    |  2 +-
 egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v2.py    |  2 +-
 egs/sre19-cmn2/v1/steps_be/eval-vid-be-snorm-v1.py   |  4 ++--
 egs/sre19-cmn2/v1/steps_be/eval-vid-be-v1.py         |  2 +-
 egs/sre19-cmn2/v1/steps_be/train-calibration-v1.py   |  4 ++--
 egs/sre19-cmn2/v1/steps_be/train-tel-be-v1.py        |  4 ++--
 egs/sre19-cmn2/v1/steps_be/train-tel-be-v2.py        |  4 ++--
 egs/sre19-cmn2/v1/steps_be/train-tel-be-v3.py        |  4 ++--
 egs/sre19-cmn2/v1/steps_be/train-vid-be-v1.py        |  2 +-
 egs/sre20-cts/v1/local/score_dcf.py                  |  2 +-
 egs/sre20-cts/v1/steps_be/apply-ahc-v1.py            |  8 ++++----
 egs/sre20-cts/v1/steps_be/eval-calibration-v1.py     |  4 ++--
 egs/sre20-cts/v1/steps_be/eval-calibration-v2.py     |  4 ++--
 egs/sre20-cts/v1/steps_be/eval-fusion-v1.py          |  2 +-
 .../v1/steps_be/eval-tel-be-knn-snorm-v1.py          |  4 ++--
 egs/sre20-cts/v1/steps_be/eval-tel-be-knn-v1.py      |  2 +-
 egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v1.py    |  4 ++--
 egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py    |  6 +++---
 egs/sre20-cts/v1/steps_be/eval-tel-be-v1.py          |  2 +-
 egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py          |  4 ++--
 egs/sre20-cts/v1/steps_be/train-calibration-v1.py    |  4 ++--
 egs/sre20-cts/v1/steps_be/train-calibration-v2.py    |  4 ++--
 egs/sre20-cts/v1/steps_be/train-fusion-v1.py         |  4 ++--
 egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py     |  4 ++--
 egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py     |  4 ++--
 egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py     |  4 ++--
 egs/sre20-cts/v1/steps_be/train-tel-be-v1.py         |  2 +-
 egs/sre20-cts/v1/steps_be/train-tel-be-v3.py         |  2 +-
 egs/sre20-cts/v1/steps_be/train-tel-be-v4.py         |  2 +-
 egs/sre21-av-a/v1.16k/local/plot-tsne-cts.py         |  2 +-
 egs/sre21-av-a/v1.16k/local/score_sre16.py           |  2 +-
 egs/sre21-av-a/v1.16k/local/score_sre21.py           |  2 +-
 .../v1.16k/local/score_sre_cts_superset.py           |  2 +-
 .../v1.16k/steps_be/eval-be-plda-snorm-v1.py         |  4 ++--
 .../v1.16k/steps_be/eval-be-plda-snorm-v2.py         |  4 ++--
 egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v1.py    |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v2.py    |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v3.py    |  2 +-
 .../steps_be/eval-calibration-v1-sre-superset.py     |  4 ++--
 .../v1.16k/steps_be/eval-calibration-v1-sre16.py     |  2 +-
 .../v1.16k/steps_be/eval-calibration-v1-sre21-dev.py |  4 ++--
 .../steps_be/eval-calibration-v1-sre21-eval.py       |  4 ++--
 egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v1.py     |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v2.py     |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v1.py   |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v2.py   |  2 +-
 egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v3.py   |  2 +-
 .../v1.16k/steps_be/train-calibration-v1.py          |  4 ++--
 egs/sre21-av-a/v1.16k/steps_be/train-fusion-v1.py    |  4 ++--
 egs/sre21-av-a/v1.16k/steps_be/train-fusion-v2.py    |  6 +++---
 egs/sre21-av-v/v0.1/local/score_dcf.py               |  2 +-
 egs/sre21-av/v1/local/score_sre21.py                 |  2 +-
 egs/voices_challenge/v0/steps_be/eval-be-snorm-v1.py |  4 ++--
 egs/voices_challenge/v0/steps_be/eval-be-v1.py       |  2 +-
 .../v0/steps_be/eval-calibration-v1.py               |  4 ++--
 egs/voices_challenge/v0/steps_be/train-be-v1.py      |  2 +-
 egs/voices_challenge/v0/steps_be/train-be-v2.py      |  2 +-
 .../v0/steps_be/train-calibration-v1.py              |  4 ++--
 egs/voices_challenge/v1/steps_be/eval-be-snorm-v1.py |  4 ++--
 egs/voices_challenge/v1/steps_be/eval-be-v1.py       |  2 +-
 .../v1/steps_be/eval-calibration-v1.py               |  4 ++--
 egs/voices_challenge/v1/steps_be/train-be-v1.py      |  2 +-
 egs/voices_challenge/v1/steps_be/train-be-v2.py      |  2 +-
 .../v1/steps_be/train-calibration-v1.py              |  4 ++--
 egs/voxceleb/adv.v2/local/make_some_figs.py          |  2 +-
 egs/voxceleb/adv.v2/local/score_dcf.py               |  2 +-
 egs/voxceleb/adv.v2/steps_backend/eval-be-Nvs1-v1.py |  2 +-
 .../adv.v2/steps_backend/eval-be-cos-Nvs1.py         |  4 ++--
 egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py     |  2 +-
 egs/voxceleb/adv.v2/steps_backend/eval-be-novelty.py |  2 +-
 .../eval-classif-perf-plda-unknown-attacks-noimp.py  |  8 ++++----
 .../eval-classif-perf-plda-unkown-attacks.py         |  8 ++++----
 .../eval-classif-perf-unknown-attacks.py             |  4 ++--
 .../adv.v2/steps_backend/eval-classif-perf.py        |  4 ++--
 egs/voxceleb/adv.v2/steps_backend/train-be-v1.py     |  2 +-
 egs/voxceleb/adv.v2/steps_visual/proj-attack-lda.py  |  2 +-
 egs/voxceleb/adv.v2/steps_visual/proj-attack-tsne.py |  2 +-
 egs/voxceleb/v1/local/attack_analysis.py             |  2 +-
 egs/voxceleb/v1/local/make_some_figs.py              |  2 +-
 egs/voxceleb/v1/local/score_dcf.py                   |  2 +-
 egs/voxceleb/v1/steps_be/eval-be-v1.py               |  2 +-
 egs/voxceleb/v1/steps_be/eval-be-v2.py               |  2 +-
 egs/voxceleb/v1/steps_be/eval-calibration-v1.py      |  4 ++--
 egs/voxceleb/v1/steps_be/train-be-v1.py              |  2 +-
 egs/voxceleb/v1/steps_be/train-be-v2.py              |  2 +-
 egs/voxceleb/v1/steps_be/train-calibration-v1.py     |  4 ++--
 130 files changed, 201 insertions(+), 201 deletions(-)

diff --git a/egs/chime5_spkdet/v1/local/score_dcf.py b/egs/chime5_spkdet/v1/local/score_dcf.py
index 1137e049..cba16610 100755
--- a/egs/chime5_spkdet/v1/local/score_dcf.py
+++ b/egs/chime5_spkdet/v1/local/score_dcf.py
@@ -15,7 +15,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/chime5_spkdet/v1/steps_be/eval-be-diar-v2.py b/egs/chime5_spkdet/v1/steps_be/eval-be-diar-v2.py
index b77d3595..9ef02a02 100755
--- a/egs/chime5_spkdet/v1/steps_be/eval-be-diar-v2.py
+++ b/egs/chime5_spkdet/v1/steps_be/eval-be-diar-v2.py
@@ -17,7 +17,7 @@
 from hyperion.utils import TrialScores
 from hyperion.helpers import MultiTestTrialDataReaderV2 as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def combine_diar_scores(ndx, orig_seg, subseg_scores):
diff --git a/egs/chime5_spkdet/v1/steps_be/eval-be-v1.py b/egs/chime5_spkdet/v1/steps_be/eval-be-v1.py
index dc3e3f87..19d582e4 100755
--- a/egs/chime5_spkdet/v1/steps_be/eval-be-v1.py
+++ b/egs/chime5_spkdet/v1/steps_be/eval-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/chime5_spkdet/v1/steps_be/eval-calibration-v1.py b/egs/chime5_spkdet/v1/steps_be/eval-calibration-v1.py
index fb5dd6f9..1cf80177 100755
--- a/egs/chime5_spkdet/v1/steps_be/eval-calibration-v1.py
+++ b/egs/chime5_spkdet/v1/steps_be/eval-calibration-v1.py
@@ -18,8 +18,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/chime5_spkdet/v1/steps_be/train-be-v1.py b/egs/chime5_spkdet/v1/steps_be/train-be-v1.py
index 55c412ac..6d1af604 100755
--- a/egs/chime5_spkdet/v1/steps_be/train-be-v1.py
+++ b/egs/chime5_spkdet/v1/steps_be/train-be-v1.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 
 
diff --git a/egs/chime5_spkdet/v1/steps_be/train-calibration-v1.py b/egs/chime5_spkdet/v1/steps_be/train-calibration-v1.py
index fa1dfcf7..9eaa7187 100755
--- a/egs/chime5_spkdet/v1/steps_be/train-calibration-v1.py
+++ b/egs/chime5_spkdet/v1/steps_be/train-calibration-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, verbose):
diff --git a/egs/dihard2019/v1/steps_diar/eval-ahc-v1.py b/egs/dihard2019/v1/steps_diar/eval-ahc-v1.py
index 25282718..c45767b2 100755
--- a/egs/dihard2019/v1/steps_diar/eval-ahc-v1.py
+++ b/egs/dihard2019/v1/steps_diar/eval-ahc-v1.py
@@ -27,13 +27,13 @@
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList, PCA, LNorm
-from hyperion.clustering import AHC
-from hyperion.pdfs import GMMTiedDiagCov as GMM
-from hyperion.diarization import DiarAHCPLDA as Diar
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.clustering import AHC
+from hyperion.np.pdfs import GMMTiedDiagCov as GMM
+from hyperion.np.diarization import DiarAHCPLDA as Diar
 
-# from hyperion.pdfs import GMMDiagCov as GMM2
-# from hyperion.pdfs import GMM as GMM3
+# from hyperion.np.pdfs import GMMDiagCov as GMM2
+# from hyperion.np.pdfs import GMM as GMM3
 
 
 def make_timestamps(n, win_start, win_length, win_shift, win_shrink):
diff --git a/egs/dihard2019/v1/steps_diar/train-plda-v1.py b/egs/dihard2019/v1/steps_diar/train-plda-v1.py
index c7589c8a..713798af 100755
--- a/egs/dihard2019/v1/steps_diar/train-plda-v1.py
+++ b/egs/dihard2019/v1/steps_diar/train-plda-v1.py
@@ -22,7 +22,7 @@
 from hyperion.utils import Utt2Info
 
 # from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, LDA, LNorm, PCA
+from hyperion.np.transforms import TransformList, LDA, LNorm, PCA
 from hyperion.helpers import PLDAFactory as F
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 
diff --git a/egs/sre18/v1.8k/local/score_dcf.py b/egs/sre18/v1.8k/local/score_dcf.py
index 1137e049..cba16610 100755
--- a/egs/sre18/v1.8k/local/score_dcf.py
+++ b/egs/sre18/v1.8k/local/score_dcf.py
@@ -15,7 +15,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre18/v1.8k/steps_be/eval-calibration-v1.py b/egs/sre18/v1.8k/steps_be/eval-calibration-v1.py
index fa16dfce..31b527f7 100755
--- a/egs/sre18/v1.8k/steps_be/eval-calibration-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-calibration-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/sre18/v1.8k/steps_be/eval-tel-be-snorm-v1.py b/egs/sre18/v1.8k/steps_be/eval-tel-be-snorm-v1.py
index d3b35fba..954a8a4a 100755
--- a/egs/sre18/v1.8k/steps_be/eval-tel-be-snorm-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-tel-be-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre18/v1.8k/steps_be/eval-tel-be-v1.py b/egs/sre18/v1.8k/steps_be/eval-tel-be-v1.py
index d9668e1a..06b2bc87 100755
--- a/egs/sre18/v1.8k/steps_be/eval-tel-be-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-tel-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-snorm-v1.py b/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-snorm-v1.py
index c37d450a..af8895b2 100755
--- a/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-snorm-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-v1.py b/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-v1.py
index c19dc074..433cbbff 100755
--- a/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-vid-be-diar-v1.py
@@ -21,7 +21,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def combine_diar_scores(ndx, diar_ndx, diar2orig, diar_scores):
diff --git a/egs/sre18/v1.8k/steps_be/eval-vid-be-snorm-v1.py b/egs/sre18/v1.8k/steps_be/eval-vid-be-snorm-v1.py
index fc94c754..1f1ffc81 100755
--- a/egs/sre18/v1.8k/steps_be/eval-vid-be-snorm-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-vid-be-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre18/v1.8k/steps_be/eval-vid-be-v1.py b/egs/sre18/v1.8k/steps_be/eval-vid-be-v1.py
index f7d83d30..19ca8bdf 100755
--- a/egs/sre18/v1.8k/steps_be/eval-vid-be-v1.py
+++ b/egs/sre18/v1.8k/steps_be/eval-vid-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre18/v1.8k/steps_be/train-calibration-v1.py b/egs/sre18/v1.8k/steps_be/train-calibration-v1.py
index fa1dfcf7..9eaa7187 100755
--- a/egs/sre18/v1.8k/steps_be/train-calibration-v1.py
+++ b/egs/sre18/v1.8k/steps_be/train-calibration-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, verbose):
diff --git a/egs/sre18/v1.8k/steps_be/train-tel-be-v1.py b/egs/sre18/v1.8k/steps_be/train-tel-be-v1.py
index c9f22d83..46710992 100755
--- a/egs/sre18/v1.8k/steps_be/train-tel-be-v1.py
+++ b/egs/sre18/v1.8k/steps_be/train-tel-be-v1.py
@@ -12,9 +12,9 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
-from hyperion.clustering import AHC
+from hyperion.np.clustering import AHC
 from hyperion.utils.utt2info import Utt2Info
 
 
diff --git a/egs/sre18/v1.8k/steps_be/train-vid-be-v1.py b/egs/sre18/v1.8k/steps_be/train-vid-be-v1.py
index a1b0cad6..4724a24a 100755
--- a/egs/sre18/v1.8k/steps_be/train-vid-be-v1.py
+++ b/egs/sre18/v1.8k/steps_be/train-vid-be-v1.py
@@ -13,7 +13,7 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.scp_list import SCPList
 
diff --git a/egs/sre19-av-v/v0.1/local/score_dcf.py b/egs/sre19-av-v/v0.1/local/score_dcf.py
index 514ebf51..772d107a 100755
--- a/egs/sre19-av-v/v0.1/local/score_dcf.py
+++ b/egs/sre19-av-v/v0.1/local/score_dcf.py
@@ -19,7 +19,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-calibration-v1.py b/egs/sre19-av-v/v0.1/steps_be/eval-calibration-v1.py
index 8087cac2..576ea3d5 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-calibration-v1.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-calibration-v1.py
@@ -22,8 +22,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v1.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v1.py
index 1527f514..9b490e72 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v1.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v1.py
@@ -14,7 +14,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v2.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v2.py
index 5ac23484..40187aa4 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v2.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v2.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v3.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v3.py
index 2a7abe08..3d52788e 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v3.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v3.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v4.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v4.py
index 9c22cc1f..f18a53f7 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v4.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v4.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v5.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v5.py
index 660854e3..af75f526 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v5.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v5.py
@@ -15,10 +15,10 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 # from hyperion.helpers import PLDAFactory as F
-# from hyperion.transforms import TransformList
+# from hyperion.np.transforms import TransformList
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
 
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v6.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v6.py
index 86ca6a8f..e23e52a1 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v6.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v6.py
@@ -15,7 +15,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v7.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v7.py
index 19f78a23..85bd8ee4 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v7.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v7.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v9.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v9.py
index a6774a68..d36b91ec 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v9.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-snorm-v9.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-v8.py b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-v8.py
index aa9539d4..a66794da 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-v8.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-face-vid-be-v8.py
@@ -18,7 +18,7 @@
 from hyperion.utils.trial_scores import TrialScores
 
 # from hyperion.helpers import PLDAFactory as F
-# from hyperion.transforms import TransformList
+# from hyperion.np.transforms import TransformList
 from face_video_trial_data_reader import FaceVideoTrialDataReaderV1 as TDR
 from face_be_utils import *
 
diff --git a/egs/sre19-av-v/v0.1/steps_be/eval-fusion-v1.py b/egs/sre19-av-v/v0.1/steps_be/eval-fusion-v1.py
index 70b0c81b..fe24f947 100755
--- a/egs/sre19-av-v/v0.1/steps_be/eval-fusion-v1.py
+++ b/egs/sre19-av-v/v0.1/steps_be/eval-fusion-v1.py
@@ -17,7 +17,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def eval_fusion(in_score_files, ndx_file, model_file, out_score_file, fus_idx):
diff --git a/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py b/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
index 025d11a3..14e3fc20 100644
--- a/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
+++ b/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
@@ -12,8 +12,8 @@
 from hyperion.utils.utt2info import Utt2Info
 from hyperion.utils.math import softmax
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.transforms import LNorm
-from hyperion.clustering import AHC
+from hyperion.np.transforms import LNorm
+from hyperion.np.clustering import AHC
 
 
 def lnorm(x):
diff --git a/egs/sre19-av-v/v0.1/steps_be/face_video_trial_data_reader.py b/egs/sre19-av-v/v0.1/steps_be/face_video_trial_data_reader.py
index 091a4ee1..11223607 100644
--- a/egs/sre19-av-v/v0.1/steps_be/face_video_trial_data_reader.py
+++ b/egs/sre19-av-v/v0.1/steps_be/face_video_trial_data_reader.py
@@ -18,7 +18,7 @@
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.utils.utt2info import Utt2Info
 from hyperion.utils import TrialNdx, TrialKey
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 class FaceVideoTrialDataReaderV1(object):
diff --git a/egs/sre19-av-v/v0.1/steps_be/train-calibration-v1.py b/egs/sre19-av-v/v0.1/steps_be/train-calibration-v1.py
index 35c1a3bc..0d97a4fb 100755
--- a/egs/sre19-av-v/v0.1/steps_be/train-calibration-v1.py
+++ b/egs/sre19-av-v/v0.1/steps_be/train-calibration-v1.py
@@ -21,8 +21,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
diff --git a/egs/sre19-av-v/v0.1/steps_be/train-calibration-v2.py b/egs/sre19-av-v/v0.1/steps_be/train-calibration-v2.py
index b247f264..f1f89bdd 100755
--- a/egs/sre19-av-v/v0.1/steps_be/train-calibration-v2.py
+++ b/egs/sre19-av-v/v0.1/steps_be/train-calibration-v2.py
@@ -21,8 +21,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
diff --git a/egs/sre19-av-v/v0.1/steps_be/train-fusion-v1.py b/egs/sre19-av-v/v0.1/steps_be/train-fusion-v1.py
index e28bfffa..564fccaa 100755
--- a/egs/sre19-av-v/v0.1/steps_be/train-fusion-v1.py
+++ b/egs/sre19-av-v/v0.1/steps_be/train-fusion-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def train_fusion(
diff --git a/egs/sre19-av-v/v0.1/steps_be/train-fusion-v2.py b/egs/sre19-av-v/v0.1/steps_be/train-fusion-v2.py
index 0679eb7c..1f97d189 100755
--- a/egs/sre19-av-v/v0.1/steps_be/train-fusion-v2.py
+++ b/egs/sre19-av-v/v0.1/steps_be/train-fusion-v2.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def train_fusion(
diff --git a/egs/sre19-cmn2/v1/local/error_analysis.py b/egs/sre19-cmn2/v1/local/error_analysis.py
index c4dbba5a..bbdb893d 100755
--- a/egs/sre19-cmn2/v1/local/error_analysis.py
+++ b/egs/sre19-cmn2/v1/local/error_analysis.py
@@ -16,7 +16,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre19-cmn2/v1/local/score_dcf.py b/egs/sre19-cmn2/v1/local/score_dcf.py
index deb39682..fd7a3149 100755
--- a/egs/sre19-cmn2/v1/local/score_dcf.py
+++ b/egs/sre19-cmn2/v1/local/score_dcf.py
@@ -16,7 +16,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-calibration-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-calibration-v1.py
index fa16dfce..31b527f7 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-calibration-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-calibration-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-tel-be-snorm-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-tel-be-snorm-v1.py
index d3b35fba..954a8a4a 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-tel-be-snorm-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-tel-be-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-tel-be-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-tel-be-v1.py
index 820c90db..5d77a896 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-tel-be-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-tel-be-v1.py
@@ -20,7 +20,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v1.py
index c6f62957..0d5c3000 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v1.py
@@ -22,8 +22,8 @@
     MultiTestTrialDataReader as TDR,
 )
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v2.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v2.py
index ec4addef..e0b29fd4 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v2.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-snorm-v2.py
@@ -17,8 +17,8 @@
 from hyperion.utils import TrialScores
 from hyperion.helpers import MultiTestTrialDataReaderV2 as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v1.py
index 20e88a37..ebc77930 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v1.py
@@ -21,7 +21,7 @@
     MultiTestTrialDataReader as TDR,
 )
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def combine_diar_scores(ndx, orig_seg, subseg_scores):
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v2.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v2.py
index b77d3595..9ef02a02 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v2.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-diar-v2.py
@@ -17,7 +17,7 @@
 from hyperion.utils import TrialScores
 from hyperion.helpers import MultiTestTrialDataReaderV2 as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def combine_diar_scores(ndx, orig_seg, subseg_scores):
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-snorm-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-snorm-v1.py
index 0c5b31e0..76bf4bcd 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-snorm-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-snorm-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-v1.py b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-v1.py
index f7d83d30..19ca8bdf 100755
--- a/egs/sre19-cmn2/v1/steps_be/eval-vid-be-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/eval-vid-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre19-cmn2/v1/steps_be/train-calibration-v1.py b/egs/sre19-cmn2/v1/steps_be/train-calibration-v1.py
index 779e62af..c57a1162 100755
--- a/egs/sre19-cmn2/v1/steps_be/train-calibration-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/train-calibration-v1.py
@@ -18,8 +18,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
diff --git a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v1.py b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v1.py
index c9f22d83..46710992 100755
--- a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v1.py
@@ -12,9 +12,9 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
-from hyperion.clustering import AHC
+from hyperion.np.clustering import AHC
 from hyperion.utils.utt2info import Utt2Info
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v2.py b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v2.py
index d8d82405..df435852 100755
--- a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v2.py
+++ b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v2.py
@@ -13,9 +13,9 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
-from hyperion.clustering import AHC
+from hyperion.np.clustering import AHC
 from hyperion.utils.utt2info import Utt2Info
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v3.py b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v3.py
index 1b039c40..6532b9aa 100755
--- a/egs/sre19-cmn2/v1/steps_be/train-tel-be-v3.py
+++ b/egs/sre19-cmn2/v1/steps_be/train-tel-be-v3.py
@@ -12,9 +12,9 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm, CORAL
+from hyperion.np.transforms import TransformList, LDA, LNorm, CORAL
 from hyperion.helpers import PLDAFactory as F
-from hyperion.clustering import AHC
+from hyperion.np.clustering import AHC
 from hyperion.utils.utt2info import Utt2Info
 
 
diff --git a/egs/sre19-cmn2/v1/steps_be/train-vid-be-v1.py b/egs/sre19-cmn2/v1/steps_be/train-vid-be-v1.py
index f825d59b..c1087bf4 100755
--- a/egs/sre19-cmn2/v1/steps_be/train-vid-be-v1.py
+++ b/egs/sre19-cmn2/v1/steps_be/train-vid-be-v1.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.scp_list import SCPList
 
diff --git a/egs/sre20-cts/v1/local/score_dcf.py b/egs/sre20-cts/v1/local/score_dcf.py
index 1137e049..cba16610 100755
--- a/egs/sre20-cts/v1/local/score_dcf.py
+++ b/egs/sre20-cts/v1/local/score_dcf.py
@@ -15,7 +15,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre20-cts/v1/steps_be/apply-ahc-v1.py b/egs/sre20-cts/v1/steps_be/apply-ahc-v1.py
index a5373bf4..bfa0c7c3 100755
--- a/egs/sre20-cts/v1/steps_be/apply-ahc-v1.py
+++ b/egs/sre20-cts/v1/steps_be/apply-ahc-v1.py
@@ -25,11 +25,11 @@
 # from hyperion.utils.trial_scores import TrialScores
 # from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
-from hyperion.clustering import AHC
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
+from hyperion.np.clustering import AHC
 from hyperion.utils import Utt2Info
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def apply_ahc(
diff --git a/egs/sre20-cts/v1/steps_be/eval-calibration-v1.py b/egs/sre20-cts/v1/steps_be/eval-calibration-v1.py
index fb5dd6f9..1cf80177 100755
--- a/egs/sre20-cts/v1/steps_be/eval-calibration-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-calibration-v1.py
@@ -18,8 +18,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/sre20-cts/v1/steps_be/eval-calibration-v2.py b/egs/sre20-cts/v1/steps_be/eval-calibration-v2.py
index e3d1db91..92d2c2d0 100755
--- a/egs/sre20-cts/v1/steps_be/eval-calibration-v2.py
+++ b/egs/sre20-cts/v1/steps_be/eval-calibration-v2.py
@@ -18,8 +18,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.utils import Utt2Info
 
 
diff --git a/egs/sre20-cts/v1/steps_be/eval-fusion-v1.py b/egs/sre20-cts/v1/steps_be/eval-fusion-v1.py
index 0d67a741..f1d90241 100755
--- a/egs/sre20-cts/v1/steps_be/eval-fusion-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-fusion-v1.py
@@ -17,7 +17,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def eval_fusion(in_score_files, ndx_file, model_file, out_score_file, fus_idx):
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-snorm-v1.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-snorm-v1.py
index 651a1b7f..7ab376c1 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-snorm-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-v1.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-v1.py
index 49ad3b42..50966aeb 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-knn-v1.py
@@ -20,7 +20,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda_e(
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v1.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v1.py
index ac6710ad..e46f729b 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
index 7430caf4..907509fd 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
@@ -17,9 +17,9 @@
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.utils.math import cosine_scoring
-from hyperion.pdfs import PLDA
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.pdfs import PLDA
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-v1.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-v1.py
index fb2904b1..698c0f32 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-v1.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-v1.py
@@ -18,7 +18,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
index 9eaea8b5..b661cbde 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
@@ -16,10 +16,10 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.utils.math import cosine_scoring
-from hyperion.pdfs import PLDA
+from hyperion.np.pdfs import PLDA
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre20-cts/v1/steps_be/train-calibration-v1.py b/egs/sre20-cts/v1/steps_be/train-calibration-v1.py
index 779e62af..c57a1162 100755
--- a/egs/sre20-cts/v1/steps_be/train-calibration-v1.py
+++ b/egs/sre20-cts/v1/steps_be/train-calibration-v1.py
@@ -18,8 +18,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
diff --git a/egs/sre20-cts/v1/steps_be/train-calibration-v2.py b/egs/sre20-cts/v1/steps_be/train-calibration-v2.py
index 16d09e3a..28597899 100755
--- a/egs/sre20-cts/v1/steps_be/train-calibration-v2.py
+++ b/egs/sre20-cts/v1/steps_be/train-calibration-v2.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils import Utt2Info
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration_cond(cond, scr, key, model_file, prior, lambda_reg, verbose):
diff --git a/egs/sre20-cts/v1/steps_be/train-fusion-v1.py b/egs/sre20-cts/v1/steps_be/train-fusion-v1.py
index a76b2b6c..9c7f5315 100755
--- a/egs/sre20-cts/v1/steps_be/train-fusion-v1.py
+++ b/egs/sre20-cts/v1/steps_be/train-fusion-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def train_fusion(
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
index a024281a..8e7715e0 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
@@ -13,8 +13,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.pdfs import PLDA
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.pdfs import PLDA
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 from hyperion.utils.math import cosine_scoring
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
index 568e7edf..12f1725b 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
@@ -13,8 +13,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.pdfs import PLDA
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.pdfs import PLDA
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 from hyperion.utils.math import cosine_scoring
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
index 7633cf17..234f966c 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
@@ -13,8 +13,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.pdfs import PLDA, SPLDA
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.pdfs import PLDA, SPLDA
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 from hyperion.utils.math import cosine_scoring
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-v1.py b/egs/sre20-cts/v1/steps_be/train-tel-be-v1.py
index a388fb88..01d38b65 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-v1.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-v1.py
@@ -11,7 +11,7 @@
 import numpy as np
 
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-v3.py b/egs/sre20-cts/v1/steps_be/train-tel-be-v3.py
index ac5bfa7e..e29da60b 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-v3.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-v3.py
@@ -12,7 +12,7 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils import Utt2Info
 
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-v4.py b/egs/sre20-cts/v1/steps_be/train-tel-be-v4.py
index 7326d649..baef33f1 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-v4.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-v4.py
@@ -12,7 +12,7 @@
 
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, PCA, LDA, LNorm, CORAL
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm, CORAL
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils import Utt2Info
 from numpy.linalg import matrix_rank
diff --git a/egs/sre21-av-a/v1.16k/local/plot-tsne-cts.py b/egs/sre21-av-a/v1.16k/local/plot-tsne-cts.py
index 25cddea8..46769568 100755
--- a/egs/sre21-av-a/v1.16k/local/plot-tsne-cts.py
+++ b/egs/sre21-av-a/v1.16k/local/plot-tsne-cts.py
@@ -16,7 +16,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.transforms import PCA, SklTSNE, LNorm
+from hyperion.np.transforms import PCA, SklTSNE, LNorm
 
 colors = ["b", "g", "r", "c", "m", "y", "k"]
 markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
diff --git a/egs/sre21-av-a/v1.16k/local/score_sre16.py b/egs/sre21-av-a/v1.16k/local/score_sre16.py
index 4064b64f..af44fb53 100755
--- a/egs/sre21-av-a/v1.16k/local/score_sre16.py
+++ b/egs/sre21-av-a/v1.16k/local/score_sre16.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_file):
diff --git a/egs/sre21-av-a/v1.16k/local/score_sre21.py b/egs/sre21-av-a/v1.16k/local/score_sre21.py
index 986aa3f6..72fc1a13 100755
--- a/egs/sre21-av-a/v1.16k/local/score_sre21.py
+++ b/egs/sre21-av-a/v1.16k/local/score_sre21.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score(key_file, score_file, sre21_subset, output_file):
diff --git a/egs/sre21-av-a/v1.16k/local/score_sre_cts_superset.py b/egs/sre21-av-a/v1.16k/local/score_sre_cts_superset.py
index 3f2223a4..bb61ca18 100755
--- a/egs/sre21-av-a/v1.16k/local/score_sre_cts_superset.py
+++ b/egs/sre21-av-a/v1.16k/local/score_sre_cts_superset.py
@@ -14,7 +14,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score(key_file, score_file, output_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v1.py b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v1.py
index f265ca30..51d21312 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v2.py b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v2.py
index 35b2d501..a9e7ee03 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v2.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-snorm-v2.py
@@ -20,8 +20,8 @@
 from hyperion.utils import TrialNdx, TrialScores, Utt2Info
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 
 
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v1.py b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v1.py
index d122d14c..1e45f560 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v1.py
@@ -20,7 +20,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v2.py b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v2.py
index 3051383b..2eda0f47 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v2.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v2.py
@@ -19,7 +19,7 @@
 from hyperion.utils import TrialNdx, TrialScores, Utt2Info
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 conds = [
     "cts_eng",
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v3.py b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v3.py
index ee0cb558..8cceb387 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v3.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-be-plda-v3.py
@@ -19,7 +19,7 @@
 from hyperion.utils import TrialNdx, TrialScores, Utt2Info
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 conds = [
     "cts_eng",
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre-superset.py b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre-superset.py
index 0781f9f2..21d2337b 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre-superset.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre-superset.py
@@ -18,8 +18,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, TrialNdx, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def read_ndx_and_scores(ndx_file, score_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre16.py b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre16.py
index 7880e358..6b2da927 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre16.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre16.py
@@ -18,7 +18,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, TrialNdx, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def read_ndx_and_scores(ndx_file, score_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-dev.py b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-dev.py
index ce2be18c..240baf82 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-dev.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-dev.py
@@ -18,8 +18,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, TrialNdx, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def read_ndx_and_scores(ndx_file, score_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-eval.py b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-eval.py
index 407d78dc..50ce6943 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-eval.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-calibration-v1-sre21-eval.py
@@ -18,8 +18,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, TrialNdx, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def read_ndx_and_scores(ndx_file, score_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v1.py b/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v1.py
index 205a73d3..933f8864 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v1.py
@@ -17,7 +17,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def eval_fusion(in_score_files, ndx_file, model_file, out_score_file, fus_idx):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v2.py b/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v2.py
index 0fa1ee59..081d8f23 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v2.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/eval-fusion-v2.py
@@ -16,7 +16,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, TrialNdx, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def read_ndx(ndx_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v1.py b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v1.py
index 42d5d927..d7ba9129 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v1.py
@@ -18,7 +18,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.scp_list import SCPList
 
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v2.py b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v2.py
index 082f69a6..f38445c5 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v2.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v2.py
@@ -18,7 +18,7 @@
 import pandas as pd
 
 from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v3.py b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v3.py
index 423ab265..febda665 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v3.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-be-plda-v3.py
@@ -18,7 +18,7 @@
 import pandas as pd
 
 from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.transforms import TransformList, PCA, LDA, LNorm
+from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-calibration-v1.py b/egs/sre21-av-a/v1.16k/steps_be/train-calibration-v1.py
index 2c5fa488..01a26410 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-calibration-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-calibration-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialScores, TrialKey, Utt2Info
 from hyperion.utils.list_utils import ismember
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def read_key_and_scores(key_file, score_file):
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v1.py b/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v1.py
index 8935b431..65c78b41 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v1.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
 
 
 def train_fusion(
diff --git a/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v2.py b/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v2.py
index 1c51111c..b447b81e 100755
--- a/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v2.py
+++ b/egs/sre21-av-a/v1.16k/steps_be/train-fusion-v2.py
@@ -17,9 +17,9 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import GreedyFusionBinaryLR as GF
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import GreedyFusionBinaryLR as GF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_fusion_condition(
diff --git a/egs/sre21-av-v/v0.1/local/score_dcf.py b/egs/sre21-av-v/v0.1/local/score_dcf.py
index 514ebf51..772d107a 100755
--- a/egs/sre21-av-v/v0.1/local/score_dcf.py
+++ b/egs/sre21-av-v/v0.1/local/score_dcf.py
@@ -19,7 +19,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/sre21-av/v1/local/score_sre21.py b/egs/sre21-av/v1/local/score_sre21.py
index 986aa3f6..72fc1a13 100755
--- a/egs/sre21-av/v1/local/score_sre21.py
+++ b/egs/sre21-av/v1/local/score_sre21.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score(key_file, score_file, sre21_subset, output_file):
diff --git a/egs/voices_challenge/v0/steps_be/eval-be-snorm-v1.py b/egs/voices_challenge/v0/steps_be/eval-be-snorm-v1.py
index 78231ba1..b280ab0e 100755
--- a/egs/voices_challenge/v0/steps_be/eval-be-snorm-v1.py
+++ b/egs/voices_challenge/v0/steps_be/eval-be-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/voices_challenge/v0/steps_be/eval-be-v1.py b/egs/voices_challenge/v0/steps_be/eval-be-v1.py
index dc3e3f87..19d582e4 100755
--- a/egs/voices_challenge/v0/steps_be/eval-be-v1.py
+++ b/egs/voices_challenge/v0/steps_be/eval-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voices_challenge/v0/steps_be/eval-calibration-v1.py b/egs/voices_challenge/v0/steps_be/eval-calibration-v1.py
index fa16dfce..31b527f7 100755
--- a/egs/voices_challenge/v0/steps_be/eval-calibration-v1.py
+++ b/egs/voices_challenge/v0/steps_be/eval-calibration-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/voices_challenge/v0/steps_be/train-be-v1.py b/egs/voices_challenge/v0/steps_be/train-be-v1.py
index 44f93a57..ed1b5f09 100755
--- a/egs/voices_challenge/v0/steps_be/train-be-v1.py
+++ b/egs/voices_challenge/v0/steps_be/train-be-v1.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 
 
diff --git a/egs/voices_challenge/v0/steps_be/train-be-v2.py b/egs/voices_challenge/v0/steps_be/train-be-v2.py
index cd4d4470..fbb961b2 100755
--- a/egs/voices_challenge/v0/steps_be/train-be-v2.py
+++ b/egs/voices_challenge/v0/steps_be/train-be-v2.py
@@ -18,7 +18,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 
diff --git a/egs/voices_challenge/v0/steps_be/train-calibration-v1.py b/egs/voices_challenge/v0/steps_be/train-calibration-v1.py
index fa1dfcf7..9eaa7187 100755
--- a/egs/voices_challenge/v0/steps_be/train-calibration-v1.py
+++ b/egs/voices_challenge/v0/steps_be/train-calibration-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, verbose):
diff --git a/egs/voices_challenge/v1/steps_be/eval-be-snorm-v1.py b/egs/voices_challenge/v1/steps_be/eval-be-snorm-v1.py
index 78231ba1..b280ab0e 100755
--- a/egs/voices_challenge/v1/steps_be/eval-be-snorm-v1.py
+++ b/egs/voices_challenge/v1/steps_be/eval-be-snorm-v1.py
@@ -20,8 +20,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
-from hyperion.score_norm import AdaptSNorm as SNorm
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
 from hyperion.helpers import VectorReader as VR
 
 
diff --git a/egs/voices_challenge/v1/steps_be/eval-be-v1.py b/egs/voices_challenge/v1/steps_be/eval-be-v1.py
index dc3e3f87..19d582e4 100755
--- a/egs/voices_challenge/v1/steps_be/eval-be-v1.py
+++ b/egs/voices_challenge/v1/steps_be/eval-be-v1.py
@@ -19,7 +19,7 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voices_challenge/v1/steps_be/eval-calibration-v1.py b/egs/voices_challenge/v1/steps_be/eval-calibration-v1.py
index fa16dfce..31b527f7 100755
--- a/egs/voices_challenge/v1/steps_be/eval-calibration-v1.py
+++ b/egs/voices_challenge/v1/steps_be/eval-calibration-v1.py
@@ -19,8 +19,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/voices_challenge/v1/steps_be/train-be-v1.py b/egs/voices_challenge/v1/steps_be/train-be-v1.py
index 44f93a57..ed1b5f09 100755
--- a/egs/voices_challenge/v1/steps_be/train-be-v1.py
+++ b/egs/voices_challenge/v1/steps_be/train-be-v1.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 
 
diff --git a/egs/voices_challenge/v1/steps_be/train-be-v2.py b/egs/voices_challenge/v1/steps_be/train-be-v2.py
index 36fbc341..fda28dc7 100755
--- a/egs/voices_challenge/v1/steps_be/train-be-v2.py
+++ b/egs/voices_challenge/v1/steps_be/train-be-v2.py
@@ -17,7 +17,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, LDA, LNorm
+from hyperion.np.transforms import TransformList, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
 
diff --git a/egs/voices_challenge/v1/steps_be/train-calibration-v1.py b/egs/voices_challenge/v1/steps_be/train-calibration-v1.py
index fa1dfcf7..9eaa7187 100755
--- a/egs/voices_challenge/v1/steps_be/train-calibration-v1.py
+++ b/egs/voices_challenge/v1/steps_be/train-calibration-v1.py
@@ -17,8 +17,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, verbose):
diff --git a/egs/voxceleb/adv.v2/local/make_some_figs.py b/egs/voxceleb/adv.v2/local/make_some_figs.py
index 0b2b672f..84c167a4 100755
--- a/egs/voxceleb/adv.v2/local/make_some_figs.py
+++ b/egs/voxceleb/adv.v2/local/make_some_figs.py
@@ -11,7 +11,7 @@
 import pandas as pd
 
 from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.metrics.verification_evaluator import (
+from hyperion.np.metrics.verification_evaluator import (
     VerificationAdvAttackEvaluator as Eval,
 )
 
diff --git a/egs/voxceleb/adv.v2/local/score_dcf.py b/egs/voxceleb/adv.v2/local/score_dcf.py
index 50babe69..1718ad4d 100755
--- a/egs/voxceleb/adv.v2/local/score_dcf.py
+++ b/egs/voxceleb/adv.v2/local/score_dcf.py
@@ -14,7 +14,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import SparseTrialScores, SparseTrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-Nvs1-v1.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-Nvs1-v1.py
index 4b017114..ea570f60 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-Nvs1-v1.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-Nvs1-v1.py
@@ -18,7 +18,7 @@
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
index 0b3c9125..85e82149 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
@@ -16,10 +16,10 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.utils.math import cosine_scoring
-from hyperion.pdfs import PLDA
+from hyperion.np.pdfs import PLDA
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList, LNorm
+from hyperion.np.transforms import TransformList, LNorm
 
 
 def eval_plda(
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
index 0438e373..d5cd6a55 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
@@ -22,7 +22,7 @@
 from hyperion.utils.math import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-novelty.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-novelty.py
index 3ebac1f6..29b0a2c8 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-novelty.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-novelty.py
@@ -19,7 +19,7 @@
 from hyperion.utils import TrialNdx, TrialScores, Utt2Info
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 
 
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unknown-attacks-noimp.py b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unknown-attacks-noimp.py
index 630bc244..0aeb2367 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unknown-attacks-noimp.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unknown-attacks-noimp.py
@@ -19,13 +19,13 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.metrics.acc import compute_accuracy
-from hyperion.metrics.confusion_matrix import (
+from hyperion.np.metrics.acc import compute_accuracy
+from hyperion.np.metrics.confusion_matrix import (
     compute_confusion_matrix,
     print_confusion_matrix,
 )
-from hyperion.transforms import PCA, LNorm
-from hyperion.pdfs import SPLDA
+from hyperion.np.transforms import PCA, LNorm
+from hyperion.np.pdfs import SPLDA
 from numpy.linalg import matrix_rank
 
 # colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unkown-attacks.py b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unkown-attacks.py
index 5ad87f72..796422f8 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unkown-attacks.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-plda-unkown-attacks.py
@@ -19,13 +19,13 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.metrics.acc import compute_accuracy
-from hyperion.metrics.confusion_matrix import (
+from hyperion.np.metrics.acc import compute_accuracy
+from hyperion.np.metrics.confusion_matrix import (
     compute_confusion_matrix,
     print_confusion_matrix,
 )
-from hyperion.transforms import PCA, LNorm
-from hyperion.pdfs import SPLDA
+from hyperion.np.transforms import PCA, LNorm
+from hyperion.np.pdfs import SPLDA
 from numpy.linalg import matrix_rank
 
 # colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-unknown-attacks.py b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-unknown-attacks.py
index e8dd6e00..cf20735f 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-unknown-attacks.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf-unknown-attacks.py
@@ -19,8 +19,8 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.metrics.acc import compute_accuracy
-from hyperion.metrics.confusion_matrix import (
+from hyperion.np.metrics.acc import compute_accuracy
+from hyperion.np.metrics.confusion_matrix import (
     compute_xlabel_confusion_matrix,
     print_confusion_matrix,
 )
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
index 6b259a2f..c174cb3b 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
@@ -20,8 +20,8 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.metrics.acc import compute_accuracy
-from hyperion.metrics.confusion_matrix import (
+from hyperion.np.metrics.acc import compute_accuracy
+from hyperion.np.metrics.confusion_matrix import (
     compute_confusion_matrix,
     print_confusion_matrix,
 )
diff --git a/egs/voxceleb/adv.v2/steps_backend/train-be-v1.py b/egs/voxceleb/adv.v2/steps_backend/train-be-v1.py
index b681b0ac..e2c8e928 100755
--- a/egs/voxceleb/adv.v2/steps_backend/train-be-v1.py
+++ b/egs/voxceleb/adv.v2/steps_backend/train-be-v1.py
@@ -13,7 +13,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, LDA, LNorm, PCA
+from hyperion.np.transforms import TransformList, LDA, LNorm, PCA
 from hyperion.helpers import PLDAFactory as F
 
 from numpy.linalg import matrix_rank
diff --git a/egs/voxceleb/adv.v2/steps_visual/proj-attack-lda.py b/egs/voxceleb/adv.v2/steps_visual/proj-attack-lda.py
index 03fa3325..b7725386 100755
--- a/egs/voxceleb/adv.v2/steps_visual/proj-attack-lda.py
+++ b/egs/voxceleb/adv.v2/steps_visual/proj-attack-lda.py
@@ -25,7 +25,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.transforms import LDA
+from hyperion.np.transforms import LDA
 
 colors = ["b", "g", "r", "c", "m", "y", "k"]
 markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
diff --git a/egs/voxceleb/adv.v2/steps_visual/proj-attack-tsne.py b/egs/voxceleb/adv.v2/steps_visual/proj-attack-tsne.py
index a76a6633..b02447e8 100755
--- a/egs/voxceleb/adv.v2/steps_visual/proj-attack-tsne.py
+++ b/egs/voxceleb/adv.v2/steps_visual/proj-attack-tsne.py
@@ -25,7 +25,7 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import Utt2Info
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.transforms import PCA, SklTSNE, LNorm
+from hyperion.np.transforms import PCA, SklTSNE, LNorm
 
 colors = ["b", "g", "r", "c", "m", "y", "k"]
 markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
diff --git a/egs/voxceleb/v1/local/attack_analysis.py b/egs/voxceleb/v1/local/attack_analysis.py
index 8c74c6e9..2e0fdb42 100755
--- a/egs/voxceleb/v1/local/attack_analysis.py
+++ b/egs/voxceleb/v1/local/attack_analysis.py
@@ -15,7 +15,7 @@
 import pandas as pd
 
 from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.metrics.verification_evaluator import (
+from hyperion.np.metrics.verification_evaluator import (
     VerificationAdvAttackEvaluator as Eval,
 )
 
diff --git a/egs/voxceleb/v1/local/make_some_figs.py b/egs/voxceleb/v1/local/make_some_figs.py
index 207cab20..a4117aba 100755
--- a/egs/voxceleb/v1/local/make_some_figs.py
+++ b/egs/voxceleb/v1/local/make_some_figs.py
@@ -9,7 +9,7 @@
 import pandas as pd
 
 from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.metrics.verification_evaluator import (
+from hyperion.np.metrics.verification_evaluator import (
     VerificationAdvAttackEvaluator as Eval,
 )
 
diff --git a/egs/voxceleb/v1/local/score_dcf.py b/egs/voxceleb/v1/local/score_dcf.py
index 9858583d..3524d222 100755
--- a/egs/voxceleb/v1/local/score_dcf.py
+++ b/egs/voxceleb/v1/local/score_dcf.py
@@ -14,7 +14,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import SparseTrialScores, SparseTrialKey
-from hyperion.metrics import fast_eval_dcf_eer as fast_eval
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
 
 
 def score_dcf(key_file, score_file, output_path):
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v1.py b/egs/voxceleb/v1/steps_be/eval-be-v1.py
index c88b05fc..f7d26390 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v1.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-v1.py
@@ -23,7 +23,7 @@
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v2.py b/egs/voxceleb/v1/steps_be/eval-be-v2.py
index 0438e373..d5cd6a55 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v2.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-v2.py
@@ -22,7 +22,7 @@
 from hyperion.utils.math import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
-from hyperion.transforms import TransformList
+from hyperion.np.transforms import TransformList
 
 
 def eval_plda(
diff --git a/egs/voxceleb/v1/steps_be/eval-calibration-v1.py b/egs/voxceleb/v1/steps_be/eval-calibration-v1.py
index bf252f60..fdd5516f 100755
--- a/egs/voxceleb/v1/steps_be/eval-calibration-v1.py
+++ b/egs/voxceleb/v1/steps_be/eval-calibration-v1.py
@@ -23,8 +23,8 @@
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
 from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
diff --git a/egs/voxceleb/v1/steps_be/train-be-v1.py b/egs/voxceleb/v1/steps_be/train-be-v1.py
index a1e6fa7e..ea8cf867 100755
--- a/egs/voxceleb/v1/steps_be/train-be-v1.py
+++ b/egs/voxceleb/v1/steps_be/train-be-v1.py
@@ -17,7 +17,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.transforms import TransformList, LDA, LNorm, PCA
+from hyperion.np.transforms import TransformList, LDA, LNorm, PCA
 from hyperion.helpers import PLDAFactory as F
 
 
diff --git a/egs/voxceleb/v1/steps_be/train-be-v2.py b/egs/voxceleb/v1/steps_be/train-be-v2.py
index 1d72df93..4e3d7542 100755
--- a/egs/voxceleb/v1/steps_be/train-be-v2.py
+++ b/egs/voxceleb/v1/steps_be/train-be-v2.py
@@ -18,7 +18,7 @@
 
 from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.transforms import TransformList, CentWhiten, PCA
+from hyperion.np.transforms import TransformList, CentWhiten, PCA
 
 from numpy.linalg import matrix_rank
 
diff --git a/egs/voxceleb/v1/steps_be/train-calibration-v1.py b/egs/voxceleb/v1/steps_be/train-calibration-v1.py
index 7408fd1d..489ceed9 100755
--- a/egs/voxceleb/v1/steps_be/train-calibration-v1.py
+++ b/egs/voxceleb/v1/steps_be/train-calibration-v1.py
@@ -22,8 +22,8 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.trial_scores import TrialScores
 from hyperion.utils.trial_key import TrialKey
-from hyperion.metrics import compute_act_dcf, compute_min_dcf
-from hyperion.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):

From 569c9ba0eaa9b14cf2708ecf4aadbd6d5a301c7c Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 9 Mar 2022 13:47:18 -0500
Subject: [PATCH 003/154] added more docs and x_lengths support

---
 ...rch-extract-xvectors-from-wav-with-rttm.py |  26 --
 .../bin/torch-extract-xvectors-vae-preproc.py |  17 +-
 hyperion/np/feats/mfcc.py                     |   2 +-
 hyperion/torch/layer_blocks/conformer_conv.py |  10 +-
 hyperion/torch/layer_blocks/dc1d_blocks.py    |  57 ++-
 hyperion/torch/layer_blocks/dc2d_blocks.py    |  52 +++
 hyperion/torch/layer_blocks/etdnn_blocks.py   |  29 +-
 hyperion/torch/layer_blocks/mbconv_blocks.py  |  49 ++-
 .../torch/layer_blocks/resetdnn_blocks.py     |  26 +-
 hyperion/torch/layer_blocks/se_blocks.py      | 108 +++++-
 hyperion/torch/layer_blocks/tdnn_blocks.py    |  25 ++
 hyperion/torch/layers/activation_factory.py   |   7 +-
 hyperion/torch/layers/audio_feats.py          | 332 +++++++++++++++---
 hyperion/torch/layers/audio_feats_factory.py  |  62 +++-
 hyperion/torch/layers/calibrators.py          |  16 +
 hyperion/torch/layers/dropout.py              |  48 +++
 hyperion/torch/layers/global_pool.py          | 282 ++++++++++++---
 hyperion/torch/layers/interpolate.py          |  15 +
 hyperion/torch/layers/margin_losses.py        |  74 ++++
 hyperion/torch/layers/mvn.py                  |  28 +-
 hyperion/torch/layers/norm_layer_factory.py   |   8 +
 hyperion/torch/layers/pdf_storage.py          |  10 +-
 hyperion/torch/layers/pool_factory.py         |  32 ++
 hyperion/torch/layers/spec_augment.py         |  48 ++-
 hyperion/torch/layers/subpixel_convs.py       |  48 +++
 hyperion/torch/layers/swish.py                |   6 +
 hyperion/torch/layers/tensor2pdf.py           | 121 ++++++-
 hyperion/torch/layers/vq.py                   | 218 +++++++++++-
 .../torch/models/wav2xvectors/__init__.py     |  13 +
 .../hf_wav2vec2resnet1d_xvector.py            |  40 +++
 .../models/wav2xvectors/hf_wav2xvector.py     |  26 ++
 .../wav2xvectors/wav2resnet1d_xvector.py      |  53 +++
 .../models/wav2xvectors/wav2resnet_xvector.py |  53 +++
 .../torch/models/wav2xvectors/wav2xvector.py  | 128 +++++++
 hyperion/torch/models/xvectors/__init__.py    |  13 +
 .../torch/models/xvectors/resnet1d_xvector.py |   2 +-
 hyperion/torch/models/xvectors/xvector.py     | 196 ++++-------
 hyperion/torch/narchs/audio_feats_mvn.py      |  16 +-
 hyperion/torch/narchs/classif_head.py         |  28 +-
 hyperion/torch/narchs/conformer_encoder_v1.py |  11 +-
 hyperion/torch/torch_model.py                 |   4 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |  11 +-
 hyperion/torch/trainers/xvector_finetuner.py  | 117 ------
 hyperion/torch/trainers/xvector_trainer.py    |   2 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |  51 +--
 .../xvector_trainer_deep_feat_reg_from_wav.py |  22 +-
 .../trainers/xvector_trainer_from_wav.py      |   4 +-
 hyperion/torch/utils/__init__.py              |   3 +
 hyperion/torch/utils/collation.py             |  92 +++++
 hyperion/torch/utils/masking.py               |  58 +++
 hyperion/torch/utils/misc.py                  |  66 +++-
 hyperion/torch/utils/vad_utils.py             |  59 ++++
 52 files changed, 2268 insertions(+), 556 deletions(-)
 create mode 100644 hyperion/torch/models/wav2xvectors/__init__.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/wav2xvector.py
 create mode 100644 hyperion/torch/models/xvectors/__init__.py
 delete mode 100644 hyperion/torch/trainers/xvector_finetuner.py
 create mode 100644 hyperion/torch/utils/collation.py
 create mode 100644 hyperion/torch/utils/masking.py
 create mode 100644 hyperion/torch/utils/vad_utils.py

diff --git a/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
index c4f1ba9a..101d6a10 100755
--- a/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
+++ b/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
@@ -154,21 +154,6 @@ def extract_xvectors(
                     t3 = time.time()
                     key, x = augment(key0, x0, augmenter, aug_df, aug_id)
 
-                    # if augmenter is None:
-                    #     x = x0
-                    #     key = key0
-                    # else:
-                    #     x, aug_info = augmenter(x0)
-                    #     key = '%s-aug-%02d' % (key0, aug_id)
-                    #     aug_df_row = {'key_aug': key, 'key_orig': key0,
-                    #                   'noise_type': aug_info['noise']['noise_type'],
-                    #                   'snr': aug_info['noise']['snr'],
-                    #                   'rir_type': aug_info['reverb']['rir_type'],
-                    #                   'srr': aug_info['reverb']['srr'],
-                    #                   'sdr': aug_info['sdr']}
-
-                    #     aug_df.append(pd.DataFrame(aug_df_row, index=[0]))
-
                     x_total = x
                     max_samples = x.shape[0]
                     y = np.zeros(
@@ -219,17 +204,6 @@ def extract_xvectors(
                                     key, x, min_utt_length, max_utt_length, rng
                                 )
 
-                            # if random_utt_length:
-                            #     utt_length = rng.randint(
-                            #         low=min_utt_length, high=max_utt_length+1)
-                            #     if utt_length < x.shape[1]:
-                            #         first_frame = rng.randint(
-                            #             low=0, high=x.shape[1]-utt_length)
-                            #         x = x[:,first_frame:first_frame+utt_length]
-                            #         logging.info(
-                            #             'extract-random-utt %s of length=%d first-frame=%d' % (
-                            #                 key, x.shape[1], first_frame))
-
                             t6 = time.time()
                             if x.shape[1] > 0:
                                 x = x.transpose(1, 2).contiguous()
diff --git a/hyperion/bin/torch-extract-xvectors-vae-preproc.py b/hyperion/bin/torch-extract-xvectors-vae-preproc.py
index 64f6359d..afa7a117 100755
--- a/hyperion/bin/torch-extract-xvectors-vae-preproc.py
+++ b/hyperion/bin/torch-extract-xvectors-vae-preproc.py
@@ -96,21 +96,6 @@ def extract_xvectors(
         keys = []
         info = []
 
-    # num_gpus = 1 if use_gpu else 0
-    # logging.info('initializing devices num_gpus={}'.format(num_gpus))
-    # device = open_device(num_gpus=num_gpus)
-    # logging.info('loading x-vector model {}'.format(xvec_model_path))
-    # xvec_model = TML.load(xvec_model_path)
-    # xvec_model.to(device)
-    # xvec_model.eval()
-    # logging.info('x-vector={}'.format(xvec_model))
-
-    # logging.info('loading vae model {}'.format(vae_model_path))
-    # vae_model = TML.load(vae_model_path)
-    # vae_model.to(device)
-    # vae_model.eval()
-    # logging.info('vae={}'.format(vae_model))
-
     mse_loss = torch.nn.MSELoss()
 
     dr_args = DRF.filter_args(**kwargs)
@@ -151,7 +136,7 @@ def extract_xvectors(
 
                 t4 = time.time()
                 if x.shape[0] == 0:
-                    y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                    y = np.zeros((xvec_model.embed_dim,), dtype=float_cpu())
                 else:
                     xx = torch.tensor(x.T[None, :], dtype=torch.get_default_dtype())
                     with torch.no_grad():
diff --git a/hyperion/np/feats/mfcc.py b/hyperion/np/feats/mfcc.py
index cf517ee5..d6b8dd3f 100644
--- a/hyperion/np/feats/mfcc.py
+++ b/hyperion/np/feats/mfcc.py
@@ -64,7 +64,7 @@ class MFCC(object):
     """Compute MFCC features.
 
     Attributes:
-       sample_frequency:                Waveform data sample frequency (must match the waveform file, if specified there) (default = 16000)
+       sample_frequency:  Waveform data sample frequency (must match the waveform file, if specified there) (default = 16000)
        frame_length:      Frame length in milliseconds (default = 25)
        frame_shift:       Frame shift in milliseconds (default = 10)
        fft_length:        Length of FFT (default = 512)
diff --git a/hyperion/torch/layer_blocks/conformer_conv.py b/hyperion/torch/layer_blocks/conformer_conv.py
index 7ed9a43a..0c42f34a 100644
--- a/hyperion/torch/layer_blocks/conformer_conv.py
+++ b/hyperion/torch/layer_blocks/conformer_conv.py
@@ -100,14 +100,16 @@ def __init__(
 
         self.context = stride * (kernel_size - 1) // 2
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
         """Forward function
 
         Args:
-          x: input size = (batch, num_channels, time)
+          x: input tesnosr shape = (batch, num_channels, time)
+          x_mask: mask indicating the valid frames in the sequence with
+                  shape = (batch, 1, time) or (batch, time)
 
         Returns
-          torch.Tensor size = (batch, num_channels, (time-1)//stride+1)
+          Tensor with shape = (batch, num_channels, (time-1)//stride+1)
         """
         residual = x
 
@@ -121,7 +123,7 @@ def forward(self, x):
         # depthwide conv phase
         x = self.act(self.norm_dw(self.conv_dw(x)))
         if self.has_se:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         # final projection
         x = self.conv_proj(x)
diff --git a/hyperion/torch/layer_blocks/dc1d_blocks.py b/hyperion/torch/layer_blocks/dc1d_blocks.py
index f5b794ef..da643c34 100644
--- a/hyperion/torch/layer_blocks/dc1d_blocks.py
+++ b/hyperion/torch/layer_blocks/dc1d_blocks.py
@@ -12,6 +12,22 @@
 
 
 class DC1dEncBlock(nn.Module):
+    """Build block for deep convolutional encoder 1d.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      stride:        downsampling stride.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm1d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -62,14 +78,25 @@ def __init__(
         self.context = dilation * (kernel_size - 1) // 2
 
     def freeze(self):
+        """Freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = False
 
     def unfreeze(self):
+        """Un freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = True
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_time).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_time).
+        """
 
         x = self.conv1(x)
         if self.norm_before:
@@ -88,6 +115,22 @@ def forward(self, x):
 
 
 class DC1dDecBlock(nn.Module):
+    """Build block for deep convolutional decoder 1d.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      stride:        upsampling stride.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm1d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -149,15 +192,25 @@ def __init__(
         self.context = dilation * (kernel_size - 1) // 2
 
     def freeze(self):
+        """Freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = False
 
     def unfreeze(self):
+        """Unfreezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = True
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_time).
+          x_mask: unused.
 
+        Returns:
+          Tensor with shape = (batch, out_channels, out_time).
+        """
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
diff --git a/hyperion/torch/layer_blocks/dc2d_blocks.py b/hyperion/torch/layer_blocks/dc2d_blocks.py
index 0d251528..bae8e203 100644
--- a/hyperion/torch/layer_blocks/dc2d_blocks.py
+++ b/hyperion/torch/layer_blocks/dc2d_blocks.py
@@ -11,6 +11,22 @@
 
 
 class DC2dEncBlock(nn.Module):
+    """Build block for deep convolutional encoder 2d.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      stride:        downsampling stride.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm2d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -61,15 +77,25 @@ def __init__(
         self.context = dilation * (kernel_size - 1) // 2
 
     def freeze(self):
+        """Freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = False
 
     def unfreeze(self):
+        """Unfreezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = True
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
 
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
@@ -87,6 +113,22 @@ def forward(self, x):
 
 
 class DC2dDecBlock(nn.Module):
+    """Build block for deep convolutional decoder 2d.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      stride:        upsampling stride.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm2d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -148,15 +190,25 @@ def __init__(
         self.context = dilation * (kernel_size - 1) // 2
 
     def freeze(self):
+        """Freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = False
 
     def unfreeze(self):
+        """Unfreezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = True
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
 
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
diff --git a/hyperion/torch/layer_blocks/etdnn_blocks.py b/hyperion/torch/layer_blocks/etdnn_blocks.py
index 958c31ba..17f3f8ef 100644
--- a/hyperion/torch/layer_blocks/etdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/etdnn_blocks.py
@@ -13,6 +13,21 @@
 
 
 class ETDNNBlock(nn.Module):
+    """Building block for Extended-TDNN.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm1d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -62,15 +77,21 @@ def __init__(
         )
         self.conv2 = Conv1d(out_channels, out_channels, bias=bias, kernel_size=1)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
 
-        x = self.conv1(x)
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_time).
+          x_mask: unused.
 
+        Returns:
+          Tensor with shape = (batch, out_channels, out_time).
+        """
+        x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
 
         x = self.activation1(x)
-
         if self.norm_after:
             x = self.bn1(x)
 
@@ -78,12 +99,10 @@ def forward(self, x):
             x = self.dropout1(x)
 
         x = self.conv2(x)
-
         if self.norm_before:
             x = self.bn2(x)
 
         x = self.activation2(x)
-
         if self.norm_after:
             x = self.bn2(x)
 
diff --git a/hyperion/torch/layer_blocks/mbconv_blocks.py b/hyperion/torch/layer_blocks/mbconv_blocks.py
index 6d9a3141..89c746ea 100644
--- a/hyperion/torch/layer_blocks/mbconv_blocks.py
+++ b/hyperion/torch/layer_blocks/mbconv_blocks.py
@@ -42,6 +42,22 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer):
 
 
 class MBConvBlock(nn.Module):
+    """MobileNet/EfficentNet Inverted bottleneck Block
+
+    Attributes:
+      in_channels:       input channels.
+      out_channels:      output channels
+      expansion:         expansion of channels for the inverted bottleneck.
+      kernel_size:       kernel size of the convs.
+      stride:            downsampling stride of the convs.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      drop_connect_rate: Drop-connect rate for stochastic number of layers.
+      norm_layer:        Normalization layer constructor, if None BatchNorm2d is used.
+      se_r=None:         Squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -113,8 +129,17 @@ def __init__(
         self.context = stride * (kernel_size - 1) // 2
         self.downsample_factor = stride
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
 
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
         if self.expansion > 1:
             x = self.act(self.bn_exp(self.conv_exp(x)))
@@ -137,6 +162,19 @@ def forward(self, x):
 
 
 class MBConvInOutBlock(nn.Module):
+    """Convolutional block used as input/output
+        in MobileNet/EffcientNet
+
+    Attributes:
+      in_channels:       input channels.
+      out_channels:      output channels
+      kernel_size:       kernel size of the convs.
+      stride:            downsampling stride of the convs.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      norm_layer:        Normalization layer constructor, if None BatchNorm2d is used.
+
+    """
+
     def __init__(
         self,
         in_channels,
@@ -169,4 +207,13 @@ def __init__(
         self.downsample_factor = stride
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         return self.act(self.bn(self.conv(x)))
diff --git a/hyperion/torch/layer_blocks/resetdnn_blocks.py b/hyperion/torch/layer_blocks/resetdnn_blocks.py
index 9d849719..775118d1 100644
--- a/hyperion/torch/layer_blocks/resetdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/resetdnn_blocks.py
@@ -15,6 +15,21 @@
 
 
 class ResETDNNBlock(ETDNNBlock):
+    """Building block for Residual Extended-TDNN.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm1d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         num_channels,
@@ -39,7 +54,16 @@ def __init__(
             norm_before,
         )
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_time).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_time).
+        """
 
         residual = x
         x = self.conv1(x)
diff --git a/hyperion/torch/layer_blocks/se_blocks.py b/hyperion/torch/layer_blocks/se_blocks.py
index 3d33f7d4..c3ba8e20 100644
--- a/hyperion/torch/layer_blocks/se_blocks.py
+++ b/hyperion/torch/layer_blocks/se_blocks.py
@@ -11,7 +11,15 @@
 
 
 class SEBlock2D(nn.Module):
-    """From https://arxiv.org/abs/1709.01507"""
+    """Squeeze-excitation block 2d
+        from https://arxiv.org/abs/1709.01507.
+
+    Attributes:
+      num_channels:      input/output channels.
+      r:                 Squeeze-excitation compression ratio.
+      activation:        Non-linear activation object, string of configuration dictionary.
+
+    """
 
     def __init__(
         self, num_channels, r=16, activation={"name": "relu", "inplace": True}
@@ -26,8 +34,33 @@ def __init__(
         )
         self.sigmoid = nn.Sigmoid()
 
-    def forward(self, x):
-        z = torch.mean(x, dim=(2, 3), keepdim=True)
+    def _standardize_mask(self, mask):
+        if mask.dim() == 2:
+            return mask.view(mask.size(0), 1, 1, mask.size(-1))
+
+        if mask.dim() == 3:
+            return mask.unsqueeze(1)
+
+        return mask
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, heigh, width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, channels, heigh, width).
+        """
+        if x_mask is None:
+            z = torch.mean(x, dim=(2, 3), keepdim=True)
+        else:
+            x_mask = self._standardize_mask(x_mask)
+            total = torch.mean(x_mask, dim=(2, 3), keepdim=True)
+            z = torch.mean(x * x_mask, dim=(2, 3), keepdim=True) / total
+
         scale = self.sigmoid(self.conv2(self.act(self.conv1(z))))
         y = scale * x
         return y
@@ -35,7 +68,14 @@ def forward(self, x):
 
 class TSEBlock2D(nn.Module):
     """From https://arxiv.org/abs/1709.01507
-    Modified to do pooling only in time dimension
+    Modified to do pooling only in time dimension.
+
+    Attributes:
+      num_channels:      input/output channels.
+      num_feats:         Number of features in dimension 2.
+      r:                 Squeeze-excitation compression ratio.
+      activation:        Non-linear activation object, string of configuration dictionary.
+
     """
 
     def __init__(
@@ -62,10 +102,35 @@ def __init__(
         )
         self.sigmoid = nn.Sigmoid()
 
-    def forward(self, x):
+    def _standardize_mask(self, mask):
+        if mask.dim() == 2:
+            return mask.view(mask.size(0), 1, 1, mask.size(-1))
+
+        if mask.dim() == 3:
+            return mask.unsqueeze(1)
+
+        return mask
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, heigh, width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, channels, heigh, width).
+        """
         num_feats = x.shape[2]
         num_channels = x.shape[1]
-        z = torch.mean(x, dim=-1, keepdim=True)
+        if x_mask is None:
+            z = torch.mean(x, dim=-1, keepdim=True)
+        else:
+            x_mask = self._standardize_mask(x_mask)
+            total = torch.mean(x_mask, dim=-1, keepdim=True)
+            z = torch.mean(x * x_mask, dim=-1, keepdim=True) / total
+
         z = z.view(-1, self.num_channels_1d, 1, 1)
         scale = self.sigmoid(self.conv2(self.act(self.conv1(z))))
         scale = scale.view(-1, num_channels, num_feats, 1)
@@ -76,6 +141,11 @@ def forward(self, x):
 class SEBlock1d(nn.Module):
     """1d Squeeze Excitation version of
     https://arxiv.org/abs/1709.01507
+
+    Attributes:
+      num_channels:      input/output channels.
+      r:                 Squeeze-excitation compression ratio.
+      activation:        Non-linear activation object, string of configuration dictionary.
     """
 
     def __init__(
@@ -91,8 +161,30 @@ def __init__(
         )
         self.sigmoid = nn.Sigmoid()
 
-    def forward(self, x):
-        z = torch.mean(x, dim=2, keepdim=True)
+    def _standardize_mask(self, mask):
+        if mask.dim() == 2:
+            return mask.unsqueeze(1)
+
+        return mask
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, time).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time)
+
+        Returns:
+          Tensor with shape = (batch, channels, time).
+        """
+        if x_mask is None:
+            z = torch.mean(x, dim=2, keepdim=True)
+        else:
+            x_mask = self._standardize_mask(x_mask)
+            total = torch.mean(x_mask, dim=-1, keepdim=True)
+            z = torch.mean(x * x_mask, dim=-1, keepdim=True) / total
+
         scale = self.sigmoid(self.conv2(self.act(self.conv1(z))))
         y = scale * x
         return y
diff --git a/hyperion/torch/layer_blocks/tdnn_blocks.py b/hyperion/torch/layer_blocks/tdnn_blocks.py
index 8fcbb056..e979b7db 100644
--- a/hyperion/torch/layer_blocks/tdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/tdnn_blocks.py
@@ -11,6 +11,21 @@
 
 
 class TDNNBlock(nn.Module):
+    """Building block for TDNN.
+
+    Args:
+      in_channels:   input channels.
+      out_channels:  output channels.
+      kernel_size:   kernels size for the convolution.
+      dilation:      kernel dilation.
+      activation:    non-linear activation function object, string or config dict.
+      dropout_rate:  dropout rate.
+      use_norm:      if True, if uses layer normalization.
+      norm_layer:    Normalization Layer constructor, if None it used BatchNorm1d.
+      norm_before:   if True, layer normalization is before the non-linearity, else
+                     after the non-linearity.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -56,15 +71,25 @@ def __init__(
         )
 
     def freeze(self):
+        """Freezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = False
 
     def unfreeze(self):
+        """Unreezes trainable parameters."""
         for param in self.parameters():
             param.requires_grad = True
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_time).
+          x_mask: unused.
 
+        Returns:
+          Tensor with shape = (batch, out_channels, out_time).
+        """
         x = self.conv1(x)
 
         if self.norm_before:
diff --git a/hyperion/torch/layers/activation_factory.py b/hyperion/torch/layers/activation_factory.py
index 7bc09827..1d3bdfd2 100644
--- a/hyperion/torch/layers/activation_factory.py
+++ b/hyperion/torch/layers/activation_factory.py
@@ -41,12 +41,13 @@ def create(activation, **kwargs):
         """Creates a non-linear activation object
 
         Args:
-           activation: str with activation type,
-                       dictionary with name field indicating the activation type, and extra activation arguments
+           activation: String with activation type,
+                       dictionary with name field indicating the activation type,
+                       and extra activation arguments
                        None, then it returns None,
                        Activation constructor
 
-           **kwargs: extra arguments for activation constructor
+           **kwargs: Extra arguments for activation constructor
 
         Return:
            Non-linear activation object
diff --git a/hyperion/torch/layers/audio_feats.py b/hyperion/torch/layers/audio_feats.py
index 718844f5..34cb9aa3 100644
--- a/hyperion/torch/layers/audio_feats.py
+++ b/hyperion/torch/layers/audio_feats.py
@@ -66,21 +66,23 @@ def _get_feature_window_function(window_type, window_size, blackman_coeff=0.42):
 
 
 def _get_strided_batch(waveform, window_length, window_shift, snip_edges, center=False):
-    r"""Given a waveform (1D tensor of size ``num_samples``), it returns a 2D tensor (m, ``window_size``)
-    representing how the window is shifted along the waveform. Each row is a frame.
+    """Given a waveform (2D tensor of size  (batch, num_samples),
+       it returns a 3D tensor (batch, m, window_size)
+       representing how the window is shifted along the waveform. Each row is a frame.
 
     Args:
-        waveform (torch.Tensor): Tensor of size ``num_samples``
-        window_size (int): Frame length
-        window_shift (int): Frame shift
-        snip_edges (bool): If True, end effects will be handled by outputting only frames that completely fit
-            in the file, and the number of frames depends on the frame_length.  If False, the number of frames
-            depends only on the frame_shift, and we reflect the data at the ends.
-        center (bool): If true, if puts the center of the frame at t*window_shift, starting at t=0,
-                       If overwrides snip_edges and set it to False
+        waveform:     Tensor of size (batch, num_samples).
+        window_size:  Frame length in samples.
+        window_shift: Frame shift in samples.
+        snip_edges:   If True, end effects will be handled by outputting only frames
+                      that completely fit in the file, and the number of frames depends
+                      on the frame_length.  If False, the number of frames depends only
+                      on the frame_shift, and we reflect the data at the ends.
+        center (bool): If true, if puts the center of the frame at t*window_shift,
+                       starting at t=0, it overwrides snip_edges and set it to False
 
     Returns:
-        torch.Tensor: 3D tensor of size (m, ``window_size``) where each row is a frame
+        3D tensor of size (batch, m, ``window_size``) where each row is a frame
     """
     assert waveform.dim() == 2
     batch_size = waveform.size(0)
@@ -121,7 +123,7 @@ def _get_strided_batch(waveform, window_length, window_shift, snip_edges, center
 
 
 def _get_log_energy(x, energy_floor):
-    r"""Returns the log energy of size (m) for a strided_input (m,*)"""
+    r"""Returns the log energy of size (batch, m) for a strided_input (batch, m,*)"""
     log_energy = (x.pow(2).sum(-1) + 1e-15).log()  # size (m)
     if energy_floor > 0.0:
         log_energy = torch.max(
@@ -133,6 +135,13 @@ def _get_log_energy(x, energy_floor):
 
 
 class Wav2Win(nn.Module):
+    """Class that takes a batch of waveforms and returns windowed frames
+       with a given frame-shift and frame-length.
+
+    Attributes:
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -235,7 +244,7 @@ def forward(self, x):
 
         if self.return_log_energy and not self.raw_energy:
             signal_log_energy = _get_log_energy(
-                strided_input, self.energy_floor
+                x_strided, self.energy_floor
             )  # size (batch, m)
 
         # Pad columns with zero until we reach size (batch, num_frames, pad_length)
@@ -252,6 +261,37 @@ def forward(self, x):
 
 
 class Wav2FFT(nn.Module):
+    """Computes FFT from waveforms.
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds (default = 25)
+      frame_shift:       Frame shift in milliseconds (default = 10)
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                         (default = False)
+      dither:            Dithering constant (0.0 means no dither) (default = 1)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
+      energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                         (default = 0)
+      raw_energy:        If true, compute energy before preemphasis and
+                         windowing (default = True)
+      use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -324,14 +364,19 @@ def dither(self):
         return self.wav2win.dither
 
     def forward(self, x):
+        """Computes the comples Fourier transform.
+
+        Args:
+          x: waveform tensor with shape = (batch, num_samples).
 
+        Returns:
+          FFT tensor with shape = (batch, num_frames, fft_length//2+1)
+        """
         x_strided = self.wav2win(x)
         if self.use_energy:
             x_strided, log_e = x_strided
 
-        # X = torch.rfft(x_strided, 1, normalized=False, onesided=True)
         X = _rfft(x_strided)
-
         if self.use_energy:
             X[:, 0, :, 0] = log_e
 
@@ -339,6 +384,37 @@ def forward(self, x):
 
 
 class Wav2Spec(Wav2FFT):
+    """Computes Spectrograms from waveforms.
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds (default = 25)
+      frame_shift:       Frame shift in milliseconds (default = 10)
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                         (default = False)
+      dither:            Dithering constant (0.0 means no dither) (default = 1)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
+      energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                         (default = 0)
+      raw_energy:        If true, compute energy before preemphasis and
+                         windowing (default = True)
+      use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -380,18 +456,21 @@ def __init__(
             self._to_spec = _pow_spectrogram
 
     def forward(self, x):
+        """Computes the Spectrogram.
+
+        Args:
+          x: waveform tensor with shape = (batch, num_samples).
+
+        Returns:
+          Spectrogram tensor with shape = (batch, num_frames, fft_length//2+1)
+        """
 
         x_strided = self.wav2win(x)
         if self.use_energy:
             x_strided, log_e = x_strided
 
-        # X = torch.rfft(x_strided, 1, normalized=False, onesided=True)
         X = _rfft(x_strided)
         pow_spec = self._to_spec(X)
-        # pow_spec = X.pow(2).sum(-1)
-        # if self.use_fft_mag:
-        #     pow_spec = pow_spec.sqrt()
-
         if self.use_energy:
             pow_spec[:, 0] = log_e
 
@@ -399,6 +478,37 @@ def forward(self, x):
 
 
 class Wav2LogSpec(Wav2FFT):
+    """Computes log-spectrograms from waveforms.
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds (default = 25)
+      frame_shift:       Frame shift in milliseconds (default = 10)
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                         (default = False)
+      dither:            Dithering constant (0.0 means no dither) (default = 1)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
+      energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                         (default = 0)
+      raw_energy:        If true, compute energy before preemphasis and
+                         windowing (default = True)
+      use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -440,21 +550,21 @@ def __init__(
             self._to_spec = _pow_spectrogram
 
     def forward(self, x):
+        """Computes the log-spectrogram.
+
+        Args:
+          x: waveform tensor with shape = (batch, num_samples).
 
+        Returns:
+          Spectrogram tensor with shape = (batch, num_frames, fft_length//2+1)
+        """
         x_strided = self.wav2win(x)
         if self.use_energy:
             x_strided, log_e = x_strided
 
-        # X = torch.rfft(x_strided, 1, normalized=False, onesided=True)
         X = _rfft(x_strided)
         pow_spec = self._to_spec(X)
-
-        # pow_spec = X.pow(2).sum(-1)
-        # if self.use_fft_mag:
-        #     pow_spec = pow_spec.sqrt()
-
         pow_spec = (pow_spec + 1e-15).log()
-
         if self.use_energy:
             pow_spec[:, 0] = log_e
 
@@ -462,6 +572,46 @@ def forward(self, x):
 
 
 class Wav2LogFilterBank(Wav2FFT):
+    """Computes log-filter-bank from waveforms.
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds (default = 25)
+      frame_shift:       Frame shift in milliseconds (default = 10)
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                         (default = False)
+      dither:            Dithering constant (0.0 means no dither) (default = 1)
+      fb_type:           Filter-bank type in ["mel_kaldi", "mel_etsi",
+                         "mel_librosa", "mel_librosa_htk", "linear"]
+                         (default = 'mel_kaldi')
+      low_freq:          Low cutoff frequency for mel bins (default = 20)
+      high_freq:         High cutoff frequency for mel bins, if < 0,
+                         offset from Nyquist (default = 0)
+      num_filters:       Number of triangular mel-frequency bins (default = 23)
+      norm_filters:      Normalize filters coeff to sum up to 1, if librosa
+                         it uses Stanley norm (default = False)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
+      energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                         (default = 0)
+      raw_energy:        If true, compute energy before preemphasis and
+                         windowing (default = True)
+      use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -526,29 +676,24 @@ def __init__(
             self._to_spec = _pow_spectrogram
 
     def forward(self, x):
+        """Computes the log-filter-banks.
+
+        Args:
+          x: waveform tensor with shape = (batch, num_samples).
 
+        Returns:
+          Filter-bank tensor with shape = (batch, num_frames, num_filters)
+        """
         x_strided = self.wav2win(x)
         if self.use_energy:
             x_strided, log_e = x_strided
 
-        # X = torch.rfft(x_strided, 1, normalized=False, onesided=True)
         X = _rfft(x_strided)
-        # logging.info('X={} {}'.format(X, X.type()))
-        # logging.info('X={}'.format(X.type()))
         pow_spec = self._to_spec(X)
-        # pow_spec = X.pow(2).sum(-1)
-        # # logging.info('p={} {} nan={}'.format(pow_spec, pow_spec.type(), torch.sum(torch.isnan(pow_spec))))
-        # # logging.info('p={}'.format(pow_spec.type()))
-        # if self.use_fft_mag:
-        #     pow_spec = pow_spec.sqrt()
-
         with amp.autocast(enabled=False):
             pow_spec = torch.matmul(pow_spec.float(), self._fb.float())
-        # logging.info('fb={} {}'.format(pow_spec, pow_spec.type()))
-        # logging.info('fb={}'.format(pow_spec.type()))
+
         pow_spec = (pow_spec + 1e-10).log()
-        # logging.info('lfb={} {}'.format(pow_spec, pow_spec.type()))
-        # logging.info('lfb={}'.format(pow_spec.type()))
         if self.use_energy:
             pow_spec = torch.cat((log_e.unsqueeze(-1), pow_spec), dim=-1)
 
@@ -556,6 +701,49 @@ def forward(self, x):
 
 
 class Wav2MFCC(Wav2FFT):
+    """Computes MFCC from waveforms.
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds (default = 25)
+      frame_shift:       Frame shift in milliseconds (default = 10)
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                         (default = False)
+      dither:            Dithering constant (0.0 means no dither) (default = 1)
+      fb_type:           Filter-bank type in ["mel_kaldi", "mel_etsi",
+                         "mel_librosa", "mel_librosa_htk", "linear"]
+                         (default = 'mel_kaldi')
+      low_freq:          Low cutoff frequency for mel bins (default = 20)
+      high_freq:         High cutoff frequency for mel bins, if < 0,
+                         offset from Nyquist (default = 0)
+      num_filters:       Number of triangular mel-frequency bins (default = 23)
+      norm_filters:      Normalize filters coeff to sum up to 1, if librosa
+                         it uses Stanley norm (default = False)
+      num_ceps:          Number of cepstra in MFCC computation (including C0)
+                         (default = 13)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
+      cepstral_lifter:   Constant that controls scaling of MFCCs (default = 22)
+      energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                         (default = 0)
+      raw_energy:        If true, compute energy before preemphasis and
+                         windowing (default = True)
+      use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+    """
+
     def __init__(
         self,
         fs=16000,
@@ -648,6 +836,15 @@ def make_lifter(N, Q):
 
     @staticmethod
     def make_dct_matrix(num_ceps, num_filters):
+        """Calculates the DCT Matrix.
+
+        Args:
+          num_ceps:    Number of cepstral coeffs.
+          num_filters: Number of filters.
+
+        Returns
+          DCT matrix (num_ceps, num_filters)
+        """
         n = torch.arange(float(num_filters)).unsqueeze(1)
         k = torch.arange(float(num_ceps))
         dct = torch.cos(
@@ -658,23 +855,25 @@ def make_dct_matrix(num_ceps, num_filters):
         return dct
 
     def forward(self, x):
+        """Computes the MFCC.
+
+        Args:
+          x: Waveform tensor with shape = (batch, num_samples).
+
+        Returns:
+          MFCC tensor with shape = (batch, num_frames, num_ceps)
+        """
 
         x_strided = self.wav2win(x)
         if self.use_energy:
             x_strided, log_e = x_strided
 
-        # X = torch.rfft(x_strided, 1, normalized=False, onesided=True)
         X = _rfft(x_strided)
         pow_spec = self._to_spec(X)
-        # pow_spec = X.pow(2).sum(-1)
-        # if self.use_fft_mag:
-        #     pow_spec = pow_spec.sqrt()
-
         with amp.autocast(enabled=False):
             pow_spec = torch.matmul(pow_spec.float(), self._fb.float())
 
         pow_spec = (pow_spec + 1e-10).log()
-
         mfcc = torch.matmul(pow_spec, self._dct)
         if self.cepstral_lifter > 0:
             mfcc *= self._lifter
@@ -689,6 +888,31 @@ class Wav2KanBayashiLogFilterBank(Wav2LogFilterBank):
     """Class to replicate log-filter-banks used in
     Kan Bayashi's ParallelWaveGAN repository:
     https://github.com/kan-bayashi/ParallelWaveGAN
+
+    Attributes:
+      fs:                Waveform data sample frequency (must match the waveform
+                         file, if specified there) (default = 16000)
+      frame_length:      Frame length in milliseconds
+      frame_shift:       Frame shift in milliseconds
+      fft_length:        Length of FFT (default = 512)
+      remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+      window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                         "blackmann"] (default = 'povey')
+      fb_type:           Filter-bank type in ["mel_kaldi", "mel_etsi",
+                         "mel_librosa", "mel_librosa_htk", "linear"]
+                         (default = 'mel_kaldi')
+      low_freq:          Low cutoff frequency for mel bins (default = 20)
+      high_freq:         High cutoff frequency for mel bins, if < 0,
+                         offset from Nyquist (default = 0)
+      num_filters:       Number of triangular mel-frequency bins (default = 23)
+      snip_edges:        If true, end effects will be handled by outputting only
+                         frames that completely fit in the file, and the number of
+                         frames depends on the frame-length.
+                         If false, the number of frames depends only on the
+                         frame-shift, and we reflect the data at the ends.
+                         (default = True)
+      center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                         If overwrides snip_edges and set it to False
     """
 
     def __init__(
@@ -730,6 +954,14 @@ def __init__(
         self.scale = 1.0 / math.log(10)
 
     def forward(self, x):
+        """Computes the Log filter banks using Kan Bayashi configuration.
+
+        Args:
+          x: Waveform tensor with shape = (batch, num_samples).
+
+        Returns:
+          Filter-bank tensor with shape = (batch, num_frames, num_samples)
+        """
         return self.scale * super().forward(x)
 
 
@@ -768,6 +1000,14 @@ def __init__(
         )
 
     def forward(self, x):
+        """Computes the Log filter banks from spectrograms.
+
+        Args:
+          x: Waveform tensor with shape = (batch, num_samples).
+
+        Returns:
+          Filter-bank tensor with shape = (batch, num_frames, num_filters)
+        """
         with amp.autocast(enabled=False):
             pow_spec = torch.matmul(x.float(), self._fb.float())
         pow_spec = (pow_spec + 1e-10).log()
diff --git a/hyperion/torch/layers/audio_feats_factory.py b/hyperion/torch/layers/audio_feats_factory.py
index ac463f07..71c3a8e8 100644
--- a/hyperion/torch/layers/audio_feats_factory.py
+++ b/hyperion/torch/layers/audio_feats_factory.py
@@ -6,7 +6,7 @@
 import re
 
 from ...utils.misc import str2bool
-from ...feats.filter_banks import FilterBankFactory as FBF
+from ...np.feats.filter_banks import FilterBankFactory as FBF
 from .audio_feats import *
 
 FFT = "fft"
@@ -20,6 +20,10 @@
 
 
 class AudioFeatsFactory(object):
+    """Factory class to create acoustic features layers like
+    FFT, Spectrogram, log-Spectrogram, log-filter-bank, MFCC.
+    """
+
     @staticmethod
     def create(
         audio_feat,
@@ -45,6 +49,53 @@ def create(
         raw_energy=True,
         use_energy=True,
     ):
+        """
+        Method that creates  acoustic features layers like
+        FFT, Spectrogram, log-Spectrogram, log-filter-bank, MFCC.
+
+        Args:
+          audio_feat:        Type of feature extractor in ["fft", "spec", "log_spec",
+                             "logfb", "mfcc", "kanbayashi_logfb"]. "kanbayashi_logfb"
+                             should produce features compatible with WaveGAN repository.
+          sample_frequency:  Waveform data sample frequency (must match the waveform
+                             file, if specified there) (default = 16000)
+          frame_length:      Frame length in milliseconds (default = 25)
+          frame_shift:       Frame shift in milliseconds (default = 10)
+          fft_length:        Length of FFT (default = 512)
+          remove_dc_offset:  Subtract mean from waveform on each frame (default = True)
+          preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
+          window_type:       Type of window ["hamming"|"hanning"|"povey"|"rectangular"|
+                             "blackmann"] (default = 'povey')
+          use_fft_mag:       If false, it uses |X(f)|^2, if true, it uses |X(f)|,
+                             (default = False)
+          dither:            Dithering constant (0.0 means no dither) (default = 1)
+          fb_type:           Filter-bank type in ["mel_kaldi", "mel_etsi",
+                             "mel_librosa", "mel_librosa_htk", "linear"]
+                             (default = 'mel_kaldi')
+          low_freq:          Low cutoff frequency for mel bins (default = 20)
+          high_freq:         High cutoff frequency for mel bins, if < 0,
+                             offset from Nyquist (default = 0)
+          num_filters:       Number of triangular mel-frequency bins (default = 23)
+          norm_filters:      Normalize filters coeff to sum up to 1, if librosa
+                             it uses Stanley norm (default = False)
+          num_ceps:          Number of cepstra in MFCC computation (including C0)
+                             (default = 13)
+          snip_edges:        If true, end effects will be handled by outputting only
+                             frames that completely fit in the file, and the number of
+                             frames depends on the frame-length.
+                             If false, the number of frames depends only on the
+                             frame-shift, and we reflect the data at the ends.
+                             (default = True)
+          center:            If true, if puts the center of the frame at t*window_shift, starting at t=0,
+                             If overwrides snip_edges and set it to False
+          cepstral_lifter:   Constant that controls scaling of MFCCs (default = 22)
+          energy_floor:      Floor on energy (absolute, not relative) in MFCC computation
+                             (default = 0)
+          raw_energy:        If true, compute energy before preemphasis and
+                             windowing (default = True)
+          use_energy:        Use energy (not C0) in MFCC computation (default = True)
+
+        """
 
         if audio_feat == FFT:
             return Wav2FFT(
@@ -163,13 +214,13 @@ def create(
 
     @staticmethod
     def filter_args(**kwargs):
-        """Filters MFCC args from arguments dictionary.
+        """Filters feature extractor args from arguments dictionary.
 
         Args:
           kwargs: Arguments dictionary.
 
         Returns:
-          Dictionary with MFCC options.
+          Dictionary with feature extractor options.
         """
         valid_args = (
             "sample_frequency",
@@ -189,7 +240,7 @@ def filter_args(**kwargs):
             "norm_filters",
             "num_ceps",
             "snip_edges",
-            "energy_floor",
+            "center" "energy_floor",
             "raw_energy",
             "use_energy",
             "cepstral_lifter",
@@ -201,7 +252,7 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        """Adds MFCC options to parser.
+        """Adds feature extractor options to parser.
 
         Args:
           parser: Arguments parser
@@ -337,6 +388,5 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='acoustic features options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/layers/calibrators.py b/hyperion/torch/layers/calibrators.py
index 4b38a858..51d363b8 100644
--- a/hyperion/torch/layers/calibrators.py
+++ b/hyperion/torch/layers/calibrators.py
@@ -8,10 +8,26 @@
 
 
 class LinBinCalibrator(nn.Module):
+    """Linear score calibrator.
+        Applies a scale and bias to a tensor.
+
+    Attributes:
+      a: Scale
+      b: Bias
+    """
+
     def __init__(self, a, b):
         super().__init__()
         self.a = a
         self.b = b
 
     def forward(self, x):
+        """Applies scale and bias to a tensor.
+
+        Args:
+          x: Input tensor.
+
+        Returns:
+          Calibrated tensor.
+        """
         return self.a * x + self.b
diff --git a/hyperion/torch/layers/dropout.py b/hyperion/torch/layers/dropout.py
index 6765baa5..22bff733 100644
--- a/hyperion/torch/layers/dropout.py
+++ b/hyperion/torch/layers/dropout.py
@@ -10,7 +10,21 @@
 
 
 class Dropout1d(Dropout2d):
+    """Dropout for tensors with 1d spatial (time) dimension (3d tensors).
+
+    Attributes:
+      p: Drop probability.
+    """
+
     def forward(self, inputs):
+        """Applies dropout 1d.
+
+        Args:
+          inputs: Input tensor with shape = (batch, C, time).
+
+        Returns:
+          Tensor with shape = (batch, C, time).
+        """
         x = torch.unsqueeze(inputs, dim=-2)
         x = F.dropout2d(x, self.p, self.training, self.inplace)
         return torch.squeeze(x, dim=-2)
@@ -24,6 +38,15 @@ def __str__(self):
 
 
 class DropConnect2d(nn.Module):
+    """DropsConnect for tensor with 2d spatial dimanions (4d tensors).
+        It drops the full feature map. It used to create residual networks
+        with stochastic depth.
+
+    Attributes:
+      p: Probability of dropping the feature map.
+
+    """
+
     def __init__(self, p=0.2):
         super().__init__()
         self.p = p
@@ -36,6 +59,14 @@ def __str__(self):
         return s
 
     def forward(self, inputs):
+        """Applies drop-connect.
+
+        Args:
+          inputs: Input tensor with shape = (batch, C, H, W).
+
+        Returns:
+          Tensor with shape = (batch, C, H, W).
+        """
         if not self.training:
             return inputs
 
@@ -51,6 +82,15 @@ def forward(self, inputs):
 
 
 class DropConnect1d(nn.Module):
+    """DropsConnect for tensor with 1d spatial dimanions (3d tensors).
+        It drops the full feature map. It used to create residual networks
+        with stochastic depth.
+
+    Attributes:
+      p: Probability of dropping the feature map.
+
+    """
+
     def __init__(self, p=0.2):
         super().__init__()
         self.p = p
@@ -63,6 +103,14 @@ def __str__(self):
         return s
 
     def forward(self, inputs):
+        """Applies drop-connect.
+
+        Args:
+          inputs: Input tensor with shape = (batch, C, time).
+
+        Returns:
+          Tensor with shape = (batch, C, time).
+        """
         if not self.training:
             return inputs
 
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 5a2e960c..467ea589 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -10,7 +10,12 @@
 import torch.nn as nn
 import torch.nn.functional as nnf
 
+from hyperion.torch.utils.masking import seq_lengths_to_mask
+
+from ..utils import seq_le
+
 SQRT_EPS = 1e-5
+N_EPS = 1e-6
 
 
 def _conv1(in_channels, out_channels, bias=False):
@@ -19,19 +24,34 @@ def _conv1(in_channels, out_channels, bias=False):
 
 
 class _GlobalPool1d(nn.Module):
+    """Abstract base class Global pooling in 1d
+
+    Attributes:
+       dim: Pooling dimension
+       keepdim: If True, it keeps the same number of dimensions after pooling
+
+    """
+
     def __init__(self, dim=-1, keepdim=False):
         super().__init__()
         self.dim = dim
         self.keepdim = keepdim
         self.size_multiplier = 1
 
-    def _standarize_weights(self, weights, ndims):
+    def _standardize_weights(self, x, x_lengths=None, weights=None):
+        """standardizes the weights to have the proper shape to be
+        multiplied by the input data.
+        """
+        if weights is None:
+            return seq_lengths_to_mask(
+                x, x.size(self.dim), dtype=x.dtype, time_dim=self.dim
+            )
 
-        if weights.dim() == ndims:
+        if weights.dim() == x.dim():
             return weights
 
         assert weights.dim() == 2
-        shape = ndims * [1]
+        shape = x.dim() * [1]
         shape[0] = weights.shape[0]
         shape[self.dim] = weights.shape[1]
         return weights.view(tuple(shape))
@@ -68,21 +88,30 @@ class GlobalAvgPool1d(_GlobalPool1d):
     """Global average pooling in 1d
 
     Attributes:
-       dim: pooling dimension
-       keepdim: it True keeps the same number of dimensions after pooling
+       dim:     Pooling dimension
+       keepdim: if True, it keeps the same number of dimensions after pooling
 
     """
 
     def __init__(self, dim=-1, keepdim=False):
         super().__init__(dim, keepdim)
 
-    def forward(self, x, weights=None):
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x:         Input tensor.
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor
+        """
+        weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             y = torch.mean(x, dim=self.dim, keepdim=self.keepdim)
             return y
 
-        weights = self._standarize_weights(weights, x.dim())
-
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=self.keepdim)
         wbar = torch.mean(weights, dim=self.dim, keepdim=self.keepdim)
         return xbar / wbar
@@ -146,8 +175,8 @@ class GlobalMeanStdPool1d(_GlobalPool1d):
     """Global mean + standard deviation pooling in 1d
 
     Attributes:
-       dim: pooling dimension
-       keepdim: it True keeps the same number of dimensions after pooling
+       dim:     Pooling dimension
+       keepdim: If True, it keeps the same number of dimensions after pooling
 
     """
 
@@ -155,7 +184,18 @@ def __init__(self, dim=-1, keepdim=False):
         super().__init__(dim, keepdim)
         self.size_multiplier = 2
 
-    def forward(self, x, weights=None):
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x:         Input tensor.
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor
+        """
+        weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             mu = torch.mean(x, dim=self.dim, keepdim=True)
             delta = x - mu
@@ -173,7 +213,6 @@ def forward(self, x, weights=None):
 
             return mus
 
-        weights = self._standarize_weights(weights, x.dim())
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=True)
         wbar = torch.mean(weights, dim=self.dim, keepdim=True)
         mu = xbar / wbar
@@ -342,8 +381,8 @@ class GlobalMeanLogVarPool1d(_GlobalPool1d):
     """Global mean + log-variance pooling in 1d
 
     Attributes:
-       dim: pooling dimension
-       keepdim: it True keeps the same number of dimensions after pooling
+       dim:     Pooling dimension
+       keepdim: If True, it keeps the same number of dimensions after pooling
 
     """
 
@@ -351,15 +390,24 @@ def __init__(self, dim=-1, keepdim=False):
         super().__init__(dim, keepdim)
         self.size_multiplier = 2
 
-    def forward(self, x, weights=None):
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x: Input tensor.
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor
+        """
+        weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             mu = torch.mean(x, dim=self.dim, keepdim=self.keepdim)
             x2bar = torch.mean(x ** 2, dim=self.dim, keepdim=self.keepdim)
             logvar = torch.log(x2bar - mu * mu + 1e-5)  # for stability in case var=0
             return torch.cat((mu, logvar), dim=-1)
 
-        weights = self._standarize_weights(weights, x.dim())
-
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=self.keepdim)
         wbar = torch.mean(weights, dim=self.dim, keepdim=self.keepdim)
         mu = xbar / wbar
@@ -371,15 +419,16 @@ def forward(self, x, weights=None):
 
 
 class LDEPool1d(_GlobalPool1d):
-    """Learnable dictionary encoder pooling in 1d
+    """Learnable dictionary encoder pooling in 1d.
+       It only works for 3d tensors.
 
     Attributes:
-       in_feats: input feature dimension
-       num_comp: number of cluster components
-       dist_pow: power for distance metric
-       use_bias: use bias parameter when computing posterior responsibility
-       dim: pooling dimension
-       keepdim: it True keeps the same number of dimensions after pooling
+       in_feats: Input feature dimension.
+       num_comp: Number of cluster components.
+       dist_pow: Power for distance metric.
+       use_bias: Use bias parameter when computing posterior responsibility.
+       dim: Pooling dimension.
+       keepdim: if True, it keeps the same number of dimensions after pooling.
 
     """
 
@@ -426,29 +475,52 @@ def __str__(self):
         )
         return s
 
-    def forward(self, x, weights=None):
+    def _standardize_weights(self, x, x_lengths=None, weights=None):
+        """standardizes the weights to have shape (batch, max_length)."""
+        if weights is None:
+            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=1)
+
+        if weights.dim() == x.dim():
+            return weights.traspose(1, self.dim)
+
+        assert weights.dim() == 2
+        return weights
+
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x: Input tensor of shape=(batch, time, feat_dim) or (batch, feat_dim, time).
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor.
+        """
+        weights = self._standardize_weights(x, x_lengths, weights)
         if self.dim != 1 or self.dim != -2:
-            x = x.transpose(1, self.dim)
+            x = x.transpose(1, self.dim)  # (batch, time, feat_dim)
 
-        x = torch.unsqueeze(x, dim=2)
-        delta = x - self.mu
-        dist = self.dist_f(delta)
+        x = torch.unsqueeze(x, dim=2)  # (batch, time, 1, feat_dim)
+        delta = x - self.mu  # (batch, time, num_comp, feat_dim)
+        dist = self.dist_f(delta)  # (batch, time, num_comp)
 
         llk = -self.prec ** 2 * dist + self.bias
-        r = nnf.softmax(llk, dim=-1)
+        r = nnf.softmax(llk, dim=-1)  # (batch, time, num_comp)
         if weights is not None:
             r *= weights
 
-        r = torch.unsqueeze(r, dim=-1)
-        N = torch.sum(r, dim=1) + 1e-9
-        F = torch.sum(r * delta, dim=1)
-        pool = F / N
+        r = torch.unsqueeze(r, dim=-1)  # (batch, time, num_comp, 1)
+        N = torch.sum(r, dim=1) + N_EPS  # (batch, num_comp, 1)
+        F = torch.sum(r * delta, dim=1)  # (batch, num_comp, feat_dim)
+        pool = F / N  # (batch, num_comp, feat_dim)
         pool = pool.contiguous().view(-1, self.num_comp * self.in_feats)
+        # (batch, num_comp * feat_dim)
         if self.keepdim:
             if self.dim == 1 or self.dim == -2:
-                pool.unsqueeze_(1)
+                pool = pool.unsqueeze(1)
             else:
-                pool.unsqueeze_(-1)
+                pool = pool.unsqueeze(-1)
 
         return pool
 
@@ -466,6 +538,23 @@ def get_config(self):
 
 
 class ScaledDotProdAttV1Pool1d(_GlobalPool1d):
+    """Scaled dot product attention pooling in 1d.
+       The attention weights are obtained by scaled inner product
+       between the feature frames and learned parameters contained
+       inside the layer.
+       This class only works on 3d tensors.
+
+    Attributes:
+      in_feats: Input feature dimension.
+      num_heads: Number of attention heads.
+      d_k: Dimension of the keys.
+      d_v: Dimension of the values
+      bin_attn: It True, use binary attention. Attention values are obtained by applying sigmoid to
+                the dot products instead of softmax.
+      dim: Pooling dimension.
+      keepdim: if True, it keeps the same number of dimensions after pooling.
+    """
+
     def __init__(
         self, in_feats, num_heads, d_k, d_v, bin_attn=False, dim=-1, keepdim=False
     ):
@@ -505,9 +594,32 @@ def __str__(self):
         )
         return s
 
-    def forward(self, x, weights=None):
+    def _standardize_weights(self, x, x_lengths=None, weights=None):
+        """standardizes the weights to have shape (batch, max_length)."""
+        if weights is None:
+            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=1)
+
+        if weights.dim() == x.dim():
+            return weights.traspose(1, self.dim)
+
+        assert weights.dim() == 2
+        return weights
+
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x: Input tensor of shape=(batch, time, feat_dim) or (batch, feat_dim, time).
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor. In this implementation only binary weights
+                   are allowed.
+        """
+        weights = self._standardize_weights(x, x_lengths, weights)
         batch_size = x.size(0)
-        if self.dim != 1:
+        if self.dim == 2 or self.dim == -1:
             x = x.transpose(1, self.dim)
 
         k = self.linear_k(x).view(batch_size, -1, self.num_heads, self.d_k)
@@ -519,16 +631,20 @@ def forward(self, x, weights=None):
             self.d_k
         )  # (batch, head, 1, time)
         if self.bin_attn:
+            # use binary attention.
             scores = torch.sigmoid(scores + self.bias)
 
         # scores = scores.squeeze(dim=-1)                    # (batch, head, time)
         if weights is not None:
-            mask = weights.view(batch_size, 1, 1, -1).eq(0)  # (batch, 1, 1,time)
+            mask = weights.view(batch_size, 1, 1, -1).eq(0)  # (batch, 1, 1, time)
             if self.bin_attn:
                 scores = scores.masked_fill(mask, 0.0)
                 self.attn = scores / (torch.sum(scores, dim=-1, keepdim=True) + 1e-9)
             else:
-                min_value = -1e200
+                if scores.dtype == torch.half:
+                    min_value = -65504
+                else:
+                    min_value = -1e200
                 scores = scores.masked_fill(mask, min_value)
                 self.attn = torch.softmax(scores, dim=-1).masked_fill(
                     mask, 0.0
@@ -541,7 +657,14 @@ def forward(self, x, weights=None):
 
         x = torch.matmul(self.attn, v)  # (batch, head, 1, d_v)
         if self.keepdim:
-            x = x.view(batch_size, 1, self.num_heads * self.d_v)  # (batch, 1, d_model)
+            if self.dim == 1 or self.dim == -2:
+                x = x.view(
+                    batch_size, 1, self.num_heads * self.d_v
+                )  # (batch, 1, d_model)
+            else:
+                x = x.view(
+                    batch_size, 1, self.num_heads * self.d_v
+                )  # (batch, d_model, 1)
         else:
             x = x.view(batch_size, self.num_heads * self.d_v)  # (batch, d_model)
         return x
@@ -560,7 +683,20 @@ def get_config(self):
 
 
 class GlobalChWiseAttMeanStdPool1d(_GlobalPool1d):
-    """Attentive mean + stddev pooling for each channel"""
+    """Attentive mean + stddev pooling for each channel.
+    This class only works on 3d tensors.
+
+    Attributes:
+      in_feats: Input feature dimension.
+      inner_feats: Feature dimension in the hidden layer of the content based attention.
+      bin_attn: If True, use binary attention. Attention values are obtained by applying sigmoid to
+                the dot products instead of softmax.
+      use_global_context: If True, concat global stats pooling to the input features to
+                          compute the attention.
+      norm_layer: Normalization layer object, if None, it used BatchNorm1d.
+      dim: Pooling dimension.
+      keepdim: it True, it keeps the same number of dimensions after pooling.
+    """
 
     def __init__(
         self,
@@ -588,9 +724,9 @@ def __init__(
         self.norm_layer = norm_layer(inner_feats)
         self.activation = nn.Tanh()
         self.conv2 = _conv1(inner_feats, in_feats, bias=True)
-        self.stats_pool = GlobalMeanStdPool1d(dim=dim)
+        self.stats_pool = GlobalMeanStdPool1d(dim=-1)
         if self.bin_attn:
-            self.bias = nn.Parameter(torch.ones((1, in_feats, 1)))
+            self.bias = nn.Parameter(torch.zeros((1, in_feats, 1)))
 
     def __repr__(self):
         return self.__str__()
@@ -607,23 +743,69 @@ def __str__(self):
         )
         return s
 
-    def forward(self, x, weights=None):
+    def _standardize_weights(self, x, x_lengths=None, weights=None):
+        """standardizes the weights to have the proper shape to be
+        multiplied by the input data.
+        """
+        if weights is None:
+            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=-1)
 
-        x_inner = self.conv1(x)
+        if weights.dim() == x.dim():
+            return weights.transpose(self.dim, -1)
+
+        assert weights.dim() == 2
+        shape = x.dim() * [1]
+        shape[0] = weights.shape[0]
+        shape[-1] = weights.shape[1]
+        return weights.view(tuple(shape))
+
+    def forward(self, x, x_lengths=None, weights=None):
+        """Applies pooling to the input.
+
+        Args:
+          x: Input tensor of shape=(batch, time, feat_dim) or (batch, feat_dim, time).
+          x_lengths: Lengths of the input sequences in the pooling dimension.
+                     x_lengths is only used if weights is not given.
+          weights: Weights for weighted pooling with shape=(batch, max_length)
+                   or (batch,..., max_length,...) with shape matching the one
+                   of the input tensor.
+        """
+        assert x.dim() == 3, "Input should be a 3d tensor"
+        if self.dim == 1 or self.dim == -2:
+            x = x.transpose(1, self.dim)
+
+        # x = (batch, feat_dim, time)
+        weights = self._standardize_weights(x, x_lengths, weights)  # (batch, 1,  time)
+        x_inner = self.conv1(x)  # (batch, inner_dim, time)
         # logging.info('x_inner1={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
         if self.use_global_context:
-            global_mus = self.stats_pool(x)
+            global_mus = self.stats_pool(x, weights=weights)
             x_inner = x_inner + self.lin_global(global_mus).unsqueeze(-1)
         # logging.info('x_inner2={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
-        attn = self.conv2(self.activation(self.norm_layer(x_inner)))
+        attn = self.conv2(
+            self.activation(self.norm_layer(x_inner))
+        )  # (batch, feat_dim, time)
         if self.bin_attn:
-            # attn = torch.sigmoid(attn+self.bias)
-            attn = torch.sigmoid(attn)
+            attn = torch.sigmoid(attn + self.bias).clamp(min=N_EPS)
         else:
+            if weights is not None:
+                if attn.dtype == torch.half:
+                    min_value = -65504
+                else:
+                    min_value = -1e200
+                mask = weights.eq(0)
+                attn = attn.masked_fill(mask, min_value)
+
             attn = nnf.softmax(attn, dim=-1)
 
+        if weights is not None:
+            attn = attn * weights
+
         mus = self.stats_pool(x, weights=attn)
         # logging.info('mus={} {}'.format(torch.sum(torch.isnan(mus)), torch.sum(torch.isinf(mus))))
+        if self.keepdim:
+            mus = mus.unsqueeze(self.dim)
+
         return mus
 
     def get_config(self):
diff --git a/hyperion/torch/layers/interpolate.py b/hyperion/torch/layers/interpolate.py
index fa76fd2a..94b3d2ed 100644
--- a/hyperion/torch/layers/interpolate.py
+++ b/hyperion/torch/layers/interpolate.py
@@ -9,6 +9,13 @@
 
 
 class Interpolate(nn.Module):
+    """Interpolation class.
+
+    Attributes:
+      scale_factor: upsampling scale factor.
+      mode: algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'.
+    """
+
     def __init__(self, scale_factor, mode="nearest"):
         super().__init__()
         self.interp = nnf.interpolate
@@ -24,5 +31,13 @@ def __repr__(self):
         return s
 
     def forward(self, x):
+        """Interpolates the input.
+
+        Args:
+          x: input tensor.
+
+        Returns:
+          Interpolated tensor.
+        """
         x = self.interp(x, scale_factor=self.scale_factor, mode=self.mode)
         return x
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index 36fd2a5f..5ae2b518 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -20,6 +20,17 @@ def _l2_norm(x, axis=-1):
 
 
 class ArcLossOutput(nn.Module):
+    """Additive angular margin softmax (ArcFace) output layer.
+
+    Attributes:
+      in_feats: input feature dimension.
+      num_classes: number of output classes.
+      cos_scale: cosine scale.
+      margin: angular margin.
+      margin_warmup_epochs: number of epochs to warm up the margin from 0 to
+                            its final value.
+    """
+
     def __init__(
         self, in_feats, num_classes, cos_scale=64, margin=0.3, margin_warmup_epochs=0
     ):
@@ -59,6 +70,11 @@ def _compute_aux(self):
         self.sin_m = math.sin(self.cur_margin)
 
     def update_margin(self, epoch):
+        """Updates the value of the margin.
+
+        Args:
+          epoch: value of current epoch.
+        """
         if self.margin_warmup_epochs == 0:
             return
 
@@ -73,6 +89,16 @@ def update_margin(self, epoch):
         self._compute_aux()
 
     def forward(self, x, y=None):
+        """Computes penalized logits.
+
+        Args:
+          x: input feature tensor with shape = (batch, in_feats).
+          y: ground truth classes. This is required to penalize the logit of
+             the true class at training time.
+
+        Returns:
+          Logit tensor with shape = (batch, num_classes)
+        """
         with amp.autocast(enabled=False):
             s = self.cos_scale
             batch_size = len(x)
@@ -98,6 +124,17 @@ def forward(self, x, y=None):
 
 
 class CosLossOutput(nn.Module):
+    """Additive margin softmax (CosFace) output layer.
+
+    Attributes:
+      in_feats: input feature dimension.
+      num_classes: number of output classes.
+      cos_scale: cosine scale.
+      margin: angular margin.
+      margin_warmup_epochs: number of epochs to warm up the margin from 0 to
+                            its final value.
+    """
+
     def __init__(
         self, in_feats, num_classes, cos_scale=64, margin=0.3, margin_warmup_epochs=0
     ):
@@ -116,6 +153,11 @@ def __init__(
         self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
 
     def update_margin(self, epoch):
+        """Updates the value of the margin.
+
+        Args:
+          epoch: value of current epoch.
+        """
         if self.margin_warmup_epochs == 0:
             return
 
@@ -130,6 +172,16 @@ def update_margin(self, epoch):
                 return
 
     def forward(self, x, y=None):
+        """Computes penalized logits.
+
+        Args:
+          x: input feature tensor with shape = (batch, in_feats).
+          y: ground truth classes. This is required to penalize the logit of
+             the true class at training time.
+
+        Returns:
+          Logit tensor with shape = (batch, num_classes)
+        """
         with amp.autocast(enabled=False):
             s = self.cos_scale
             x = _l2_norm(x.float())
@@ -152,6 +204,18 @@ def forward(self, x, y=None):
 
 
 class SubCenterArcLossOutput(ArcLossOutput):
+    """Sub-Center Additive angular margin softmax (ArcFace) output layer.
+
+    Attributes:
+      in_feats: input feature dimension.
+      num_classes: number of output classes.
+      num_subcenters: number of subcenters.
+      cos_scale: cosine scale.
+      margin: angular margin.
+      margin_warmup_epochs: number of epochs to warm up the margin from 0 to
+                            its final value.
+    """
+
     def __init__(
         self,
         in_feats,
@@ -184,6 +248,16 @@ def __str__(self):
         return s
 
     def forward(self, x, y=None):
+        """Computes penalized logits.
+
+        Args:
+          x: input feature tensor with shape = (batch, in_feats).
+          y: ground truth classes. This is required to penalize the logit of
+             the true class at training time.
+
+        Returns:
+          Logit tensor with shape = (batch, num_classes)
+        """
         with amp.autocast(enabled=False):
             s = self.cos_scale
             batch_size = len(x)
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index 3ee1e121..4f569089 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -9,11 +9,22 @@
 
 
 class MeanVarianceNorm(nn.Module):
+    """Class to apply short-time mean-variance normalization to features.
+    
+    Attributes:
+      norm_mean:    if True, it normalizes the mean.
+      norm_var:     if True, is also normalized the variance.
+      left_context:  left context for the window that computes the normalization stats.
+      right_context: right context for the window that computes the normalization stats.
+      dim:           normalization dimension (time dimension). 
+
+    If left_context = right_context = 0, it computes the stats on the whole utterance.
+    """
     def __init__(
         self, norm_mean=True, norm_var=False, left_context=0, right_context=0, dim=1
     ):
 
-        super(MeanVarianceNorm, self).__init__()
+        super().__init__()
         self.norm_mean = norm_mean
         self.norm_var = norm_var
         self.left_context = left_context
@@ -35,6 +46,14 @@ def __str__(self):
         return s
 
     def forward(self, x):
+        """Short-time mean-var normalizes feature tensor.
+        
+        Args:
+          x: feature tensor.
+
+        Returns:
+          Normalized feature tensor.
+        """
 
         T = x.shape[self.dim]
         if (self.left_context == 0 and self.right_context == 0) or (
@@ -45,6 +64,7 @@ def forward(self, x):
         return self.normalize_cumsum(x)
 
     def normalize_global(self, x):
+        """Applies global mean-var normalization."""
         # Global mean/var norm.
         if self.norm_mean:
             m_x = torch.mean(x, dim=self.dim, keepdim=True)
@@ -57,7 +77,7 @@ def normalize_global(self, x):
         return x
 
     def normalize_cumsum(self, x):
-
+        """Applies short-time mean-var normalization using cumulative sums."""
         if self.norm_mean:
             # substract first global mean
             # it will help cumsum numerical stability
@@ -99,13 +119,13 @@ def normalize_cumsum(self, x):
 
     @staticmethod
     def filter_args(**kwargs):
-        """Filters ST-CMVN args from arguments dictionary.
+        """Filters ST-MVN args from arguments dictionary.
 
         Args:
           kwargs: Arguments dictionary.
 
         Returns:
-          Dictionary with ST-CMVN options.
+          Dictionary with ST-MVN options.
         """
 
         valid_args = (
diff --git a/hyperion/torch/layers/norm_layer_factory.py b/hyperion/torch/layers/norm_layer_factory.py
index cd7e542f..8543b31b 100644
--- a/hyperion/torch/layers/norm_layer_factory.py
+++ b/hyperion/torch/layers/norm_layer_factory.py
@@ -7,6 +7,10 @@
 
 
 class NormLayer2dFactory(object):
+    """Factory class to create normalization layers for
+    tensors with 2D spatial dimension.
+    """
+
     @staticmethod
     def create(norm_name, num_groups=None, momentum=0.1, eps=1e-5):
         """Creates a layer-norm callabe constructor
@@ -54,6 +58,10 @@ def create(norm_name, num_groups=None, momentum=0.1, eps=1e-5):
 
 
 class NormLayer1dFactory(object):
+    """Factory class to create normalization layers for
+    tensors with 1D spatial (time) dimension.
+    """
+
     @staticmethod
     def create(norm_name, num_groups=None, momentum=0.1, eps=1e-5):
         """Creates a layer-norm callabe constructor
diff --git a/hyperion/torch/layers/pdf_storage.py b/hyperion/torch/layers/pdf_storage.py
index bac48d27..f3f34b37 100644
--- a/hyperion/torch/layers/pdf_storage.py
+++ b/hyperion/torch/layers/pdf_storage.py
@@ -10,18 +10,22 @@
 
 
 class StdNormal(nn.Module):
-    """Storage for Standard Normal distribution"""
+    """Storage for Standard Normal distribution parameters
+    
+    Attributes:
+      shape: shape of the location/scale tensors.
+    """
 
     def __init__(self, shape):
         super().__init__()
         self.register_buffer("loc", torch.zeros(shape))
         self.register_buffer("scale", torch.ones(shape))
-        # self.loc = nn.Parameter(torch.zeros(shape), requires_grad=False)
-        # self.scale = nn.Parameter(torch.ones(shape), requires_grad=False)
 
     @property
     def pdf(self):
+        """Probability density function for N(0,I)."""
         return pdf.normal.Normal(self.loc, self.scale)
 
     def forward(self):
+        """Probability density function for N(0,I)."""
         return self.pdf
diff --git a/hyperion/torch/layers/pool_factory.py b/hyperion/torch/layers/pool_factory.py
index 41cf2ac2..fa1032a8 100644
--- a/hyperion/torch/layers/pool_factory.py
+++ b/hyperion/torch/layers/pool_factory.py
@@ -9,6 +9,8 @@
 
 
 class GlobalPool1dFactory(object):
+    """Factory class to create global pooling layers 1d."""
+
     @staticmethod
     def create(
         pool_type,
@@ -27,6 +29,28 @@ def create(
         keepdim=False,
         **kwargs
     ):
+        """Creates a global pooling layer from arguments.
+
+        Args:
+          pool_type: pooling type in ["avg", "mean+stddev", "mean+logvar", "lde",
+          "scaled-dot-prod-att-v1", "ch-wise-att-mean+stddev"]
+          in_feats: input feature dimension.
+          inner_feats: feature dimension in the hidden layer of the content based attention,
+                       in channel-wise attention.
+          num_comp: number of LDE components.
+          dist_power: distance type in LDE in L1 or L2.
+          use_bias: use bias in LDE.
+          num_heads: number of attention heads.
+          d_k: dimension of the keys in scaled dot product attn.
+          d_v: dimension of the values in scaled dot product attn.
+          bin_attn: it True, use binary attention. Attention values are obtained by applying sigmoid to
+                    the dot products instead of softmax.
+          use_global_context: if True, concat global stats pooling to the input features to
+                              compute the attention in channel-wise attention.
+          norm_layer: normalization layer object, if None, it used BatchNorm1d.
+          dim: pooling dimension.
+          keepdim: it True keeps the same number of dimensions after pooling.
+        """
 
         if pool_type == "avg":
             return GlobalAvgPool1d(dim=dim, keepdim=keepdim)
@@ -71,6 +95,14 @@ def create(
 
     @staticmethod
     def filter_args(**kwargs):
+        """Filters the arguments corresponding to the creation of a pooling layer.
+
+        Args:
+          kwargs: Arguments dictionary.
+
+        Returns:
+          Dictionary with the pooling layer options.
+        """
 
         if "wo_bias" in kwargs:
             kwargs["use_bias"] = not kwargs["wo_bias"]
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index ecb3609f..1366172b 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -17,8 +17,10 @@ class AxisMasker(nn.Module):
     Implementation based on espnet.
 
     Attributes:
-      mask_width_range: range for the width of the masks
-      mask_num_range: range for the number of masks
+      min_width: minimum width of the mask.
+      max_width: maximum width of the mask.
+      min_num_mask: minimum number of masks.
+      max_num_mask: maximum number of masks.
       dim: axis where we apply the mask
       fill_value: masking value
     """
@@ -121,7 +123,9 @@ class SpecWarper(nn.Module):
     Implementation based on espnet.
 
     Attributes:
-      window: time warp parameter
+      window: time warp parameter.
+      mode: interpolation mode in ["nearest", "linear", "bilinear"]
+      dim: warping dimension.
     """
 
     def __init__(self, window=80, mode="bicubic", dim=-2):
@@ -136,14 +140,14 @@ def __repr__(self):
         )
         return s
 
-    def forward(self, x, lengths=None):
+    def forward(self, x, x_lengths=None):
         """warps x along time or freq dimension
 
         Args:
-           x: spectrogram (batch, *, time, freq)
-           lengths: length ratios
+           x: spectrogram shape= (batch, *, time, freq)
+           lengths: time lengths of the sequences.
         Returns:
-           warped spectrogram (batch, *, time, freq)
+           warped spectrogram shape = (batch, *, time, freq)
         """
         if not self.training:
             return x
@@ -166,10 +170,10 @@ def forward(self, x, lengths=None):
         # the first n frames where n is the length of the
         # shortest utterance
         # the end of the utterance will not be warped
-        if dim == -1 or lengths is None:
+        if dim == -1 or x_lengths is None:
             warp_length = x.shape[-2]
         else:
-            warp_length = int(x.shape[-2] * torch.min(lengths))
+            warp_length = int(x.shape[-2] * torch.min(x_lengths))
 
         center = torch.randint(self.window, warp_length - self.window, (1,))[0]
         warped = torch.randint(center - self.window, center + self.window, (1,))[0] + 1
@@ -208,6 +212,20 @@ class SpecAugment(nn.Module):
       Augmentation Method for Automatic Speech Recognition"
 
     Attributes:
+      time_warp_prob:   probability of applying time warping.
+      time_warp_window: time warp parameter.
+      time_warp_mode:   interpolation mode in ["nearest", "linear", "bilinear"]
+      time_mask_prob:   probability of applying masking in time.
+      time_min_width:   minimum width of the time mask.
+      time_max_width:   maximum width of the time mask.
+      time_min_num_mask: minimum number of time masks.
+      time_max_num_mask: maximum number of time masks.
+      freq_mask_prob:    probability of applying frequency masking.
+      freq_min_width:    minimum width of the frequency mask.
+      freq_max_width:    maximum width of the frequency mask.
+      freq_min_num_mask: minimum number of frequency masks.
+      freq_max_num_mask: maximum number of frequency masks.
+      fill_value:        masking value.
     """
 
     def __init__(
@@ -287,7 +305,14 @@ def __repr__(self):
         )
         return s
 
-    def forward(self, x, lengths=None):
+    def forward(self, x, x_lengths=None):
+        """Applies spec augment to input
+        Args:
+           x: spectrogram with shape = (batch, time, freq)
+           lengths: time lengths of the sequences.
+        Returns:
+           Augmented spectrogram with shape = (batch, time, freq)
+        """
         if not self.training:
             return x
         # global count
@@ -300,7 +325,7 @@ def forward(self, x, lengths=None):
         # ax.imshow(x.cpu().numpy()[0].T)
         r = torch.rand((3,), device=x.device)
         if self.time_warp_prob > r[0]:
-            x = self.time_warper(x, lengths)
+            x = self.time_warper(x, x_lengths)
             # ax = plt.subplot(222)
             # ax.imshow(x.cpu().numpy()[0].T)
 
@@ -319,6 +344,7 @@ def forward(self, x, lengths=None):
         # count += 1
         return x
 
+    @staticmethod
     def filter_args(**kwargs):
         """Filters SpecAugment args from arguments dictionary.
 
diff --git a/hyperion/torch/layers/subpixel_convs.py b/hyperion/torch/layers/subpixel_convs.py
index 6b529aff..19c0283f 100644
--- a/hyperion/torch/layers/subpixel_convs.py
+++ b/hyperion/torch/layers/subpixel_convs.py
@@ -9,6 +9,22 @@
 
 
 class SubPixelConv1d(nn.Module):
+    """Implements a SubPixel Convolution in 1d proposed in:
+       https://arxiv.org/abs/1609.05158
+
+    Attributes:
+      in_channels:  Number of input channels.
+      out_channels: Number of output channels.
+      kernel_size:  Kernel size.
+      stride:       Downsampling stride.
+      padding:      Int or Int Tuple with the number of left/right padding samples
+      dilation:     Kernel dilation.
+      groups:       Number of groups in the convolution.
+      bias:         If true, the convolution has bias.
+      padding_mode: Padding mode in ['zeros', 'reflect', 'replicate' or 'circular'].
+
+    """
+
     def __init__(
         self,
         in_channels,
@@ -38,6 +54,14 @@ def __init__(
         self.stride = stride
 
     def forward(self, x):
+        """Applies subpixel convolution 1d.
+
+        Args:
+          x: Input tensor with shape = (batch, in_channels, in_time)
+
+        Returns:
+          Output tensor with shape = (batch, out_channels, out_time)
+        """
         x = self.conv(x)
         if self.stride == 1:
             return x
@@ -51,6 +75,22 @@ def forward(self, x):
 
 
 class SubPixelConv2d(nn.Module):
+    """Implements a SubPixel Convolution in 2d proposed in:
+       https://arxiv.org/abs/1609.05158
+
+    Attributes:
+      in_channels:  Number of input channels.
+      out_channels: Number of output channels.
+      kernel_size:  Kernel size.
+      stride:       Downsampling stride.
+      padding:      Int or Int Tuple with the number of left/right padding samples
+      dilation:     Kernel dilation.
+      groups:       Number of groups in the convolution.
+      bias:         If true, the convolution has bias.
+      padding_mode: Padding mode in ['zeros', 'reflect', 'replicate' or 'circular'].
+
+    """
+
     def __init__(
         self,
         in_channels,
@@ -81,6 +121,14 @@ def __init__(
             self.pixel_shuffle = nn.PixelShuffle(self.stride)
 
     def forward(self, x):
+        """Applies subpixel convolution 1d.
+
+        Args:
+          x: Input tensor with shape = (batch, in_channels, in_W, in_H)
+
+        Returns:
+          Output tensor with shape = (batch, out_channels, out_W, out_H)
+        """
         x = self.conv(x)
         if self.stride == 1:
             return x
diff --git a/hyperion/torch/layers/swish.py b/hyperion/torch/layers/swish.py
index 520a71fb..a313455e 100644
--- a/hyperion/torch/layers/swish.py
+++ b/hyperion/torch/layers/swish.py
@@ -7,6 +7,8 @@
 
 
 class SwishImplementation(torch.autograd.Function):
+    """Implementation for Swish activation function."""
+
     @staticmethod
     def forward(ctx, i):
         result = i * torch.sigmoid(i)
@@ -21,6 +23,10 @@ def backward(ctx, grad_output):
 
 
 class Swish(nn.Module):
+    """Swish activation class:
+    y = x * sigmoid(x)
+    """
+
     def forward(self, x):
         return SwishImplementation.apply(x)
 
diff --git a/hyperion/torch/layers/tensor2pdf.py b/hyperion/torch/layers/tensor2pdf.py
index e38b1bc7..55c890a3 100644
--- a/hyperion/torch/layers/tensor2pdf.py
+++ b/hyperion/torch/layers/tensor2pdf.py
@@ -13,6 +13,12 @@
 class Tensor2PDF(nn.Module):
     """Base class for layers that create a prob distribution
     from an input tensor
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -44,7 +50,14 @@ def _make_proj(self, in_feats, out_feats, ndims):
 
 
 class Tensor2NormalICov(Tensor2PDF):
-    """Transforms a Tensor into Normal distribution with identitiy variance"""
+    """Transforms a Tensor into Normal distribution with identitiy variance
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
+    """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
         super().__init__(pdf_feats, project=project, in_feats=in_feats, in_dim=in_dim)
@@ -53,6 +66,16 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
             self._proj = self._make_proj(self.in_feats, self.pdf_feats, self.in_dim)
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          prior:  Not used.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
@@ -70,6 +93,12 @@ class Tensor2NormalGlobDiagCov(Tensor2PDF):
 
     Input tensor will be the mean of the distribution and
     the standard deviation is a global trainable parameter.
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -85,6 +114,18 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
         self.logvar = nn.Parameter(torch.zeros(pdf_shape))
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          Args:
+          inputs: Input tensor.
+          prior:  prior pdf object.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
@@ -108,6 +149,12 @@ class Tensor2NormalDiagCov(Tensor2PDF):
 
     Applies two linear transformation to the tensors to
     obtain the mean and the log-variance.
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -117,6 +164,18 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
             self._proj = self._make_proj(self.in_feats, self.pdf_feats * 2, self.in_dim)
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          Args:
+          inputs: Input tensor.
+          prior:  prior pdf object.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
@@ -138,7 +197,13 @@ def forward(self, inputs, prior=None, squeeze_dim=None):
 class Tensor2BayNormalICovGivenNormalPrior(Tensor2PDF):
     """Transforms a Tensor into Normal distribution with identitiy variance
 
-    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation
+    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation.
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -151,6 +216,18 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
         self._alpha = nn.Parameter(torch.zeros(1))
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          Args:
+          inputs: Input tensor.
+          prior:  prior pdf object.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
@@ -173,7 +250,13 @@ class Tensor2BayNormalGlobDiagCovGivenNormalPrior(Tensor2PDF):
     Input tensor will be the ML mean of the distribution and
     the ML standard deviation is a global trainable parameter.
 
-    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation
+    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation.
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -193,6 +276,18 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
         self._beta = nn.Parameter(torch.zeros(1))
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          Args:
+          inputs: Input tensor.
+          prior:  prior pdf object.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
@@ -231,7 +326,13 @@ class Tensor2BayNormalDiagCovGivenNormalPrior(Tensor2PDF):
     Applies two linear transformation to the tensors to
     obtain the maximum likelihood mean and the log-variance.
 
-    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation
+    Uses Bayesian interpolation between Gaussian prior and Maximum Likelihood estimation.
+
+    Attributes:
+      pdf_feats: Feature dimension of the probability distribution.
+      project:   If True, it applies a projection to the input tensor.
+      in_feats:  Feature dimension of the input tensor.
+      in_dim:    Number of dimensions of the input tensor.
     """
 
     def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
@@ -245,6 +346,18 @@ def __init__(self, pdf_feats, project=True, in_feats=None, in_dim=None):
         self._beta = nn.Parameter(torch.zeros(1))
 
     def forward(self, inputs, prior=None, squeeze_dim=None):
+        """Creates a Normal distribution from input tensor.
+
+        Args:
+          inputs: Input tensor.
+          Args:
+          inputs: Input tensor.
+          prior:  prior pdf object.
+          squeeze_dim: Squeezes pdf parameters dimensions.
+
+        Returns:
+          torch.distributions.normal.Normal object.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
diff --git a/hyperion/torch/layers/vq.py b/hyperion/torch/layers/vq.py
index 98307438..c56b58f6 100644
--- a/hyperion/torch/layers/vq.py
+++ b/hyperion/torch/layers/vq.py
@@ -9,8 +9,20 @@
 import torch.nn.functional as F
 import torch.distributed as dist
 
+from ..utils import seq_lengths_to_mask
+
 
 class VectorQuantizer(nn.Module):
+    """Abstract base class for vector quantization layers.
+
+    Attributes:
+      num_embed: codebook size.
+      embed_feats: feature dimension of the codebook vectors.
+      project: if True, it projects the input features to the embed_feats dim.
+      in_feats: input feature dimension, needed when project=True.
+      in_dim: number of dimensions of the input tensor in [2,5], needed when project=True
+    """
+
     def __init__(
         self, num_embed, embed_feats, project=True, in_feats=None, in_dim=None
     ):
@@ -43,6 +55,7 @@ def __repr__(self):
         return self.__str__()
 
     def _make_proj(self, in_feats, out_feats, ndims):
+        """Creates the feature projection layer."""
         if ndims == 2:
             return nn.Linear(in_feats, out_feats)
         elif ndims == 3:
@@ -56,6 +69,18 @@ def _make_proj(self, in_feats, out_feats, ndims):
 
 
 class KMeansVectorQuantizer(VectorQuantizer):
+    """Class for K-Means vector quantization layers,
+        where codebook vectors are trained by gradient descend losses.
+
+    Attributes:
+      num_embed: codebook size.
+      embed_feats: feature dimension of the codebook vectors.
+      commitment_cost: weight for loss that makes input features close to the codebook vectors.
+      project: if True, it projects the input features to the embed_feats dim.
+      in_feats: input feature dimension, needed when project=True.
+      in_dim: number of dimensions of the input tensor in [2,5], needed when project=True
+    """
+
     def __init__(
         self,
         num_embed,
@@ -95,11 +120,33 @@ def __str__(self):
         )
         return s
 
-    def forward(self, inputs, return_r=False):
+    def forward(self, inputs, lengths=None, mask=None, return_r=False):
+        """Quantizes the input tensor.
+
+        Args:
+          input: input tensor 2d - 5d dimension with shape (batch, channels, ...)
+          lengths: when inputs is 3d, it the length of each sequence in the batch.
+                   Not used if mask is given.
+          mask: indicates which elements are valid, to quantize. The elements with zero
+                mask are set to 0. The mask tensor should have the same shape as the
+                input tensor with the channel dimension removed, shape=(batch, ...).
+          return_r: it True, it returns the responsibilities.
+
+        Returns:
+           Dictionary containing quantized vectors, vq_loss, KL(q(z)||p(z)), where q(z) is
+           the distribution of posterior responsabilities and p(z) is a uniform categorical
+           distribution, and the log_perplexity of the responsibilities. If return_r is True,
+           it also returns the responsibilities.
+        """
         # inputs -> z_e in paper
         if self.project:
             inputs = self._proj(inputs)
 
+        if mask is None and lengths is not None:
+            mask = seq_lengths_to_mask(
+                lengths, inputs.size(-1), time_dim=1, dtype=inputs.dtype
+            )
+
         # convert inputs from BCHW -> BHWC
         inputs = inputs.transpose(1, -1).contiguous()
         input_shape = inputs.shape
@@ -112,26 +159,37 @@ def forward(self, inputs, return_r=False):
             torch.sum(flat_inputs ** 2, dim=1, keepdim=True)
             + torch.sum(self.embed ** 2, dim=1)
             - 2 * torch.matmul(flat_inputs, self.embed.t())
-        )
+        )  # (batch x time, num_embeds)
 
         # Encoding
         # quantization integer indexes
-        q_idx = torch.argmin(d2, dim=1).unsqueeze(1)
+        q_idx = torch.argmin(d2, dim=1).unsqueeze(1)  # (batch x time, 1)
         # 1 hot responsibilities
         r = torch.zeros(q_idx.shape[0], self.num_embed, device=inputs.device)
-        r.scatter_(1, q_idx, 1)
-        z_q = torch.matmul(r, self.embed).view(input_shape)
+        r.scatter_(1, q_idx, 1)  # (batch x time, num_embeds)
+        z_q = torch.matmul(r, self.embed).view(input_shape)  # (batch, time, embed_dim)
+
+        if mask is not None:
+            z_q = z_q * mask
+            inputs = inputs * mask
 
         # Loss
-        vq_loss = F.mse_loss(z_q, inputs.detach())
-        commitment_loss = F.mse_loss(z_q.detach(), inputs)
+        vq_loss = F.mse_loss(z_q, inputs.detach())  # || z_q - sg(z) ||_2
+        commitment_loss = F.mse_loss(z_q.detach(), inputs)  # || z - sg (z_q) ||_2
+
         loss = vq_loss + self.commitment_cost * commitment_loss
+        if mask is not None:
+            loss /= torch.mean(mask)
 
         # this allows to backprogate the gradients as if the output were equal to z_e
         z_q = inputs + (z_q - inputs).detach()
 
         # compute the perplexity
-        probs = torch.mean(r, dim=0)
+        if mask is None:
+            probs = torch.mean(r, dim=0)
+        else:
+            probs = torch.mean(r[mask.flatten()], dim=0)
+
         log_perplexity = -torch.sum(probs * torch.log(probs + 1e-10))
 
         # compute KL divergence between r and uniform categorical prior
@@ -147,7 +205,7 @@ def forward(self, inputs, return_r=False):
         )
 
         # convert quantized from BHWC -> BCHW
-        z_q = z_q.transpose(1, -1).contiguous()
+        z_q = z_q.transpose(1, -1).contiguous()  # (batch, embed_dim, time)
         output = {
             "z_q": z_q,
             "loss": loss,
@@ -162,6 +220,20 @@ def forward(self, inputs, return_r=False):
 
 
 class MultiKMeansVectorQuantizer(VectorQuantizer):
+    """Class for Mulit-group K-Means vector quantization layers,
+        where codebook vectors are trained by gradient descend losses.
+        The input tensors are divided into groups and quantized separately.
+
+    Attributes:
+      num_groups: number of codebooks.
+      num_embed: codebook size.
+      embed_feats: feature dimension of the codebook vectors.
+      commitment_cost: weight for loss that makes input features close to the codebook vectors.
+      project: if True, it projects the input features to the embed_feats dim.
+      in_feats: input feature dimension, needed when project=True.
+      in_dim: number of dimensions of the input tensor in [2,5], needed when project=True
+    """
+
     def __init__(
         self,
         num_groups,
@@ -212,15 +284,37 @@ def __str__(self):
         )
         return s
 
-    def forward(self, inputs, return_r=False):
+    def forward(self, inputs, lengths=None, mask=None, return_r=False):
+        """Quantizes the input tensor.
+
+        Args:
+          input: input tensor 2d - 5d dimension with shape (batch, channels, ...)
+          lengths: when inputs is 3d, it the length of each sequence in the batch.
+                   Not used if mask is given.
+          mask: indicates which elements are valid, to quantize. The elements with zero
+                mask are set to 0. The mask tensor should have the same shape as the
+                input tensor with the channel dimension removed, shape=(batch, ...).
+          return_r: it True, it returns the responsibilities.
+
+        Returns:
+           Dictionary containing quantized vectors, vq_loss, KL(q(z)||p(z)), where q(z) is
+           the distribution of posterior responsabilities and p(z) is a uniform categorical
+           distribution, and the log_perplexity of the responsibilities. If return_r is True,
+           it also returns the responsibilities.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
+        if mask is None and lengths is not None:
+            mask = seq_lengths_to_mask(
+                lengths, inputs.size(-1), time_dim=1, dtype=inputs.dtype
+            )
+
         inputs = inputs.chunk(self.num_groups, dim=1)
         z_q = []
         r = []
         for i in range(self.num_groups):
-            output_i = self.vq_layers[i](inputs[i], return_r=return_r)
+            output_i = self.vq_layers[i](inputs[i], mask=mask, return_r=return_r)
             z_qi = output_i["z_q"]
             loss_i = output_i["loss"]
             kldiv_ri = output_i["kldiv_qrpr"]
@@ -255,6 +349,19 @@ def forward(self, inputs, return_r=False):
 
 
 class EMAKMeansVectorQuantizer(VectorQuantizer):
+    """Class exponential moving average vector quantization layers,
+
+    Attributes:
+      num_embed: codebook size.
+      embed_feats: feature dimension of the codebook vectors.
+      commitment_cost: weight for loss that makes input features close to the codebook vectors.
+      gamma: exponential average coefficient.
+      eps: epsilon for Laplace smoothing of the counts.
+      project: if True, it projects the input features to the embed_feats dim.
+      in_feats: input feature dimension, needed when project=True.
+      in_dim: number of dimensions of the input tensor in [2,5], needed when project=True
+    """
+
     def __init__(
         self,
         num_embed,
@@ -302,11 +409,34 @@ def __str__(self):
         )
         return s
 
-    def forward(self, inputs, return_r=False):
+    def forward(self, inputs, lengths=None, mask=None, return_r=False):
+        """Quantizes the input tensor. In training phase, it also
+            updates the codebooks by EMA.
+
+        Args:
+          input: input tensor 2d - 5d dimension with shape (batch, channels, ...)
+          lengths: when inputs is 3d, it the length of each sequence in the batch.
+                   Not used if mask is given.
+          mask: indicates which elements are valid, to quantize. The elements with zero
+                mask are set to 0. The mask tensor should have the same shape as the
+                input tensor with the channel dimension removed, shape=(batch, ...).
+          return_r: it True, it returns the responsibilities.
+
+        Returns:
+           Dictionary containing quantized vectors, vq_loss, KL(q(z)||p(z)), where q(z) is
+           the distribution of posterior responsabilities and p(z) is a uniform categorical
+           distribution, and the log_perplexity of the responsibilities. If return_r is True,
+           it also returns the responsibilities.
+        """
         # inputs -> z_e in paper
         if self.project:
             inputs = self._proj(inputs)
 
+        if mask is None and lengths is not None:
+            mask = seq_lengths_to_mask(
+                lengths, inputs.size(-1), time_dim=1, dtype=inputs.dtype
+            )
+
         # convert inputs from BCHW -> BHWC
         inputs = inputs.transpose(1, -1).contiguous()
         input_shape = inputs.shape
@@ -331,9 +461,15 @@ def forward(self, inputs, return_r=False):
 
         # Use Exponetial Moving Average (EMA) to update the embedding vectors
         if self.training:
+            if mask is not None:
+                flat_mask = mask.flatten()
+                r = r[flat_mask]
+                flat_inputs = flat_inputs[flat_mask]
+
             N = torch.sum(r, dim=0)
             # required to sync gpus in DDP
-            dist.all_reduce(N, op=dist.ReduceOp.SUM)
+            if dist.is_initialized():
+                dist.all_reduce(N, op=dist.ReduceOp.SUM)
 
             ema_N = self._ema_N * self.gamma + (1 - self.gamma) * N
 
@@ -345,21 +481,31 @@ def forward(self, inputs, return_r=False):
 
             z_acc = torch.matmul(r.t(), flat_inputs)
             # required to sync gpus in DDP
-            dist.all_reduce(z_acc, op=dist.ReduceOp.SUM)
+            if dist.is_initialized():
+                dist.all_reduce(z_acc, op=dist.ReduceOp.SUM)
             self._ema_z_acc = (
                 self.gamma * self._ema_z_acc + (1 - self.gamma) * z_acc
             ).detach()
             self.embed = (self._ema_z_acc / self._ema_N.unsqueeze(1)).detach()
 
+        if mask is not None:
+            z_q = z_q * mask
+            inputs = inputs * mask
         # Loss
         commitment_loss = F.mse_loss(z_q.detach(), inputs)
         loss = self.commitment_cost * commitment_loss
+        if mask is not None:
+            loss /= torch.mean(mask)
 
         # this allows to backprogate the gradients as if the output were equal to z_e
         z_q = inputs + (z_q - inputs).detach()
 
         # compute the perplexity
-        probs = torch.mean(r, dim=0)
+        if mask is None:
+            probs = torch.mean(r, dim=0)
+        else:
+            probs = torch.mean(r[mask.flatten()], dim=0)
+
         log_perplexity = -torch.sum(probs * torch.log(probs + 1e-10))
 
         # compute KL divergence between r and uniform categorical prior
@@ -390,6 +536,22 @@ def forward(self, inputs, return_r=False):
 
 
 class MultiEMAKMeansVectorQuantizer(VectorQuantizer):
+    """Class for Mulit-group exponential moving average vector quantization layers,
+        where codebook vectors are trained by gradient descend losses.
+        The input tensors are divided into groups and quantized separately.
+
+    Attributes:
+      num_groups: number of codebooks.
+      num_embed: codebook size.
+      embed_feats: feature dimension of the codebook vectors.
+      commitment_cost: weight for loss that makes input features close to the codebook vectors.
+      gamma: exponential average coefficient.
+      eps: epsilon for Laplace smoothing of the counts.
+      project: if True, it projects the input features to the embed_feats dim.
+      in_feats: input feature dimension, needed when project=True.
+      in_dim: number of dimensions of the input tensor in [2,5], needed when project=True
+    """
+
     def __init__(
         self,
         num_groups,
@@ -452,15 +614,37 @@ def __str__(self):
         )
         return s
 
-    def forward(self, inputs, return_r=False):
+    def forward(self, inputs, lengths=None, mask=None, return_r=False):
+        """Quantizes the input tensor.
+
+        Args:
+          input: input tensor 2d - 5d dimension with shape=(batch, channels, ...)
+          lengths: when inputs is 3d, it the length of each sequence in the batch.
+                   Not used if mask is given.
+          mask: indicates which elements are valid, to quantize. The elements with zero
+                mask are set to 0. The mask tensor should have the same shape as the
+                input tensor with the channel dimension removed, shape=(batch, ...).
+          return_r: it True, it returns the responsibilities.
+
+        Returns:
+           Dictionary containing quantized vectors, vq_loss, KL(q(z)||p(z)), where q(z) is
+           the distribution of posterior responsabilities and p(z) is a uniform categorical
+           distribution, and the log_perplexity of the responsibilities. If return_r is True,
+           it also returns the responsibilities.
+        """
         if self.project:
             inputs = self._proj(inputs)
 
+        if mask is None and lengths is not None:
+            mask = seq_lengths_to_mask(
+                lengths, inputs.size(-1), time_dim=1, dtype=inputs.dtype
+            )
+
         inputs = inputs.chunk(self.num_groups, dim=1)
         z_q = []
         r = []
         for i in range(self.num_groups):
-            output_i = self.vq_layers[i](inputs[i])
+            output_i = self.vq_layers[i](inputs[i], mask=mask)
             z_qi = output_i["z_q"]
             loss_i = output_i["loss"]
             kldiv_ri = output_i["kldiv_qrpr"]
diff --git a/hyperion/torch/models/wav2xvectors/__init__.py b/hyperion/torch/models/wav2xvectors/__init__.py
new file mode 100644
index 00000000..d1e65dd0
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/__init__.py
@@ -0,0 +1,13 @@
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""
+
+# from .wav2tdnn_xvector import Wav2TDNNXVector
+from .wav2resnet_xvector import Wav2ResNetXVector
+
+# from .wav2efficient_net_xvector import Wav2EfficientNetXVector
+# from .wav2transformer_xvector_v1 import Wav2TransformerXVectorV1
+# from .wav2spinenet_xvector import Wav2SpineNetXVector
+from .wav2resnet1d_xvector import Wav2ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
new file mode 100644
index 00000000..78724174
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -0,0 +1,40 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+from ..xvectors import ResNet1dXVector
+from ...tpm import HFWav2Vec
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFWav2Vec2ResNet1dXVector(HFWav2XVector):
+    """Class extracting ResNet1d x-vectors from waveform.
+    It contains acoustic feature extraction, feature normalization and
+    ResNet1dXVector extractor.
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      xvector: ResNet1dXVector configuration dictionary or object.
+    """
+
+    def __init__(self, hf_feats, xvector):
+
+        if isinstance(hf_feats, dict):
+            hf_feats = HFWav2Vec(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWav2Vec)
+
+        if isinstance(xvector, dict):
+            xvector = ResNet1dXVector(**xvector)
+        else:
+            assert isinstance(xvector, ResNet1dXVector)
+
+        super().__init__(hf_feats, xvector)
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
new file mode 100644
index 00000000..a471343c
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -0,0 +1,26 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+
+from ...torch_model import TorchModel
+
+
+class HFWav2XVector(TorchModel):
+    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+
+    Attributes:
+       hf_feats: hugging face model wrapper object.
+       xvector: x-vector model object.
+    """
+
+    def __init__(self, hf_feats, xvector):
+
+        self.hf_feats = hf_feats
+        self.xvector = xvector
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
new file mode 100644
index 00000000..983fbac2
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -0,0 +1,53 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+from .wav2xvector import Wav2XVector
+from ..xvectors import ResNet1dXVector
+
+
+class Wav2ResNet1dXVector(Wav2XVector):
+    """Class extracting ResNet1d x-vectors from waveform.
+    It contains acoustic feature extraction, feature normalization and
+    ResNet1dXVector extractor.
+
+    Attributes:
+      Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      xvector: ResNet1dXVector configuration dictionary or object.
+    """
+
+    def __init__(self, feats, xvector):
+
+        if isinstance(xvector, dict):
+            xvector = ResNet1dXVector.filter_args(**xvector)
+            xvector = ResNet1dXVector(**xvector)
+        else:
+            assert isinstance(xvector, ResNet1dXVector)
+
+        super().__init__(feats, xvector)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds Wav2ResNet1dXVector options to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Wav2XVector.add_class_args(parser)
+        ResNet1dXVector.add_class_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
new file mode 100644
index 00000000..dea2e442
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -0,0 +1,53 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+from .wav2xvector import Wav2XVector
+from ..xvectors import ResNetXVector
+
+
+class Wav2ResNetXVector(Wav2XVector):
+    """Class extracting ResNet x-vectors from waveform.
+    It contains acoustic feature extraction, feature normalization and
+    ResNetXVector extractor.
+
+    Attributes:
+      Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      xvector: ResNetXVector configuration dictionary or object.
+    """
+
+    def __init__(self, feats, xvector):
+
+        if isinstance(xvector, dict):
+            xvector = ResNetXVector.filter_args(**xvector)
+            xvector = ResNetXVector(**xvector)
+        else:
+            assert isinstance(xvector, ResNetXVector)
+
+        super().__init__(feats, xvector)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds Wav2ResNet1dXVector options to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Wav2XVector.add_class_args(parser)
+        ResNetXVector.add_class_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
new file mode 100644
index 00000000..0c5a1698
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -0,0 +1,128 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+from ...torch_model import TorchModel
+from ...narchs import AudioFeatsMVN
+from ...utils import remove_silence
+
+
+class Wav2XVector(TorchModel):
+    """Base class for models that integrate the acoustic feature extractor and and x-vector model that takes acoustic features as input.
+
+    Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      xvector: x-vector model object.
+    """
+
+    def __init__(self, feats, xvector):
+
+        super().__init__()
+
+        if isinstance(feats, dict):
+            feats = AudioFeatsMVN.filter_args(**feats)
+            feats["trans"] = True
+            feats = AudioFeatsMVN(**feats)
+        else:
+            assert isinstance(feats, AudioFeatsMVN)
+
+        self.feats = feats
+        self.xvector = xvector
+
+    def forward(
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        vad_samples=None,
+        vad_feats=None,
+        enc_layers=None,
+        classif_layers=None,
+        return_output=True,
+    ):
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+        feats, feat_lengths = self.feats(x, x_lengths)
+        if vad_feats is not None:
+            feats, feat_lengths = remove_silence(feats, feat_lengths)
+
+        # feat_lengths = torch.div(x_lengths * feats.size(-1), x.size(-1))
+        return self.xvector(
+            feats, feat_lengths, y, enc_layers, classif_layers, return_output
+        )
+
+    def extract_embed(
+        self,
+        x,
+        x_lengths=None,
+        vad_samples=None,
+        vad_feats=None,
+        chunk_length=0,
+        embed_layer=None,
+        detach_chunks=False,
+    ):
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+        feats, feat_lengths = self.feats(x, x_lengths)
+        if vad_feats is not None:
+            feats, feat_lengths = remove_silence(feats, feat_lengths)
+
+        return self.xvector.extract_embed(
+            feats, feat_lengths, chunk_length, embed_layer, detach_chunks
+        )
+
+    def train_mode(self, mode="ft-embed-affine"):
+        self.xvector.train_mode(mode)
+
+    def get_config(self):
+        feat_cfg = self.feats.get_config()
+        xvector_cfg = self.xvector.get_config()
+        config = {
+            "feats": feat_cfg,
+            "xvector": xvector_cfg,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(*kwargs):
+        """Filters Wav2XVector class arguments from arguments dictionary.
+
+        Args:
+          kwargs: Arguments dictionary.
+
+        Returns:
+          Dictionary with SpecAugment options.
+        """
+        valid_args = (
+            "feats",
+            "xvector",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds Wav2XVector options common to all child classes to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        AudioFeatsMVN.add_class_args(parser, prefix="feats")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/__init__.py b/hyperion/torch/models/xvectors/__init__.py
new file mode 100644
index 00000000..408de716
--- /dev/null
+++ b/hyperion/torch/models/xvectors/__init__.py
@@ -0,0 +1,13 @@
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""
+
+from .xvector import XVector
+from .tdnn_xvector import TDNNXVector
+from .resnet_xvector import ResNetXVector
+from .efficient_net_xvector import EfficientNetXVector
+from .transformer_xvector_v1 import TransformerXVectorV1
+from .spinenet_xvector import SpineNetXVector
+from .resnet1d_xvector import ResNet1dXVector
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 8db9a073..295824f3 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -138,7 +138,7 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
             del cfg["in_feats"]
         except:
             pass
-        print(cfg, flush=True)
+
         model = cls(**cfg)
         if state_dict is not None:
             model.load_state_dict(state_dict)
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 21932491..685ead4a 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -12,7 +12,7 @@
 from ...layer_blocks import TDNNBlock
 from ...narchs import ClassifHead, TorchNALoader
 from ...torch_model import TorchModel
-from ...utils import eval_nnet_by_chunks
+from ...utils import eval_nnet_by_chunks, scale_lengths
 
 
 class XVector(TorchModel):
@@ -201,40 +201,38 @@ def _pre_enc(self, x):
             x = x.view(x.size(0), 1, x.size(1), x.size(2))
         return x
 
-    def _post_enc(self, x):
+    def _post_enc(self, x, in_lengths=None, max_in_length=None):
         if self.encoder_net.out_dim() == 4:
             x = x.view(x.size(0), -1, x.size(-1))
 
         if self.proj is not None:
             x = self.proj(x)
 
-        return x
+        if in_lengths is not None:
+            out_lengths = scale_lengths(in_lengths, x.size(-1), max_in_length)
+        else:
+            out_lengths = None
+
+        return x, out_lengths
 
     def forward(
-        self, x, y=None, enc_layers=None, classif_layers=None, return_output=True
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_enc_layers=None,
+        return_classif_layers=None,
+        return_logits=True,
     ):
 
-        if enc_layers is None and classif_layers is None:
-            return self.forward_output(x, y)
+        if return_enc_layers is None and return_classif_layers is None:
+            return self.forward_logits(x, x_lengths, y)
 
-        h = self.forward_hid_feats(x, y, enc_layers, classif_layers, return_output)
-        output = {}
-        if enc_layers is not None:
-            if classif_layers is None:
-                output["h_enc"] = h
-            else:
-                output["h_enc"] = h[0]
-        else:
-            output["h_enc"] = []
-        if classif_layers is not None:
-            output["h_classif"] = h[1]
-        else:
-            output["h_classif"] = []
-        if return_output:
-            output["output"] = h[2]
-        return output
+        return self.forward_hid_feats(
+            x, x_lengths, y, return_enc_layers, return_classif_layers, return_logits
+        )
 
-    def forward_output(self, x, y=None):
+    def forward_logits(self, x, x_lengths=None, y=None):
         """Forward function
 
         Args:
@@ -242,59 +240,57 @@ def forward_output(self, x, y=None):
           y: target classes torch.long tensor with shape=(batch,)
 
         Returns:
-          class posteriors tensor with shape=(batch, num_classes)
+          class logits tensor with shape=(batch, num_classes)
         """
-        if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-            x = x.view(x.size(0), 1, x.size(1), x.size(2))
-
+        max_in_length = x.size(-1)
+        x = self._pre_enc(x)
         x = self.encoder_net(x)
-
-        if self.encoder_net.out_dim() == 4:
-            x = x.view(x.size(0), -1, x.size(-1))
-
-        if self.proj is not None:
-            x = self.proj(x)
-
-        p = self.pool_net(x)
+        x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
+        p = self.pool_net(x, x_lengths=x_lengths)
         y = self.classif_net(p, y)
         return y
 
     def forward_hid_feats(
-        self, x, y=None, enc_layers=None, classif_layers=None, return_output=False
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_enc_layers=None,
+        return_classif_layers=None,
+        return_logits=False,
     ):
         """forwards hidden representations in the x-vector network"""
-
-        if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-            x = x.view(x.size(0), 1, x.size(1), x.size(2))
-
-        h_enc, x = self.encoder_net.forward_hid_feats(x, enc_layers, return_output=True)
-
-        if not return_output and classif_layers is None:
-            return h_enc
-
-        if self.encoder_net.out_dim() == 4:
-            x = x.view(x.size(0), -1, x.size(-1))
-
-        if self.proj is not None:
-            x = self.proj(x)
-
-        p = self.pool_net(x)
-        h_classif = self.classif_net.forward_hid_feats(
-            p, y, classif_layers, return_output=return_output
+        max_in_length = x.size(-1)
+        x = self._pre_enc(x)
+        h_enc, x = self.encoder_net.forward_hid_feats(
+            x, return_enc_layers, return_logits=True
+        )
+        output = {"h_enc": h_enc}
+        if not return_logits and return_classif_layers is None:
+            return output
+
+        x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
+        p = self.pool_net(x, x_lengths=x_lengths)
+        h_classif, y_pred = self.classif_net.forward_hid_feats(
+            p, y, return_classif_layers, return_logits=return_logits
         )
-        if return_output:
-            h_classif, y = h_classif
-            return h_enc, h_classif, y
+        if return_logits:
+            h_classif, y_pred = h_classif
+            output["h_classif"] = h_classif
+            output["logits"] = y_pred
+            return output
 
-        return h_enc, h_classif
+        output["h_classif"] = h_classif
+        return output
 
-    def extract_embed(self, x, chunk_length=0, embed_layer=None, detach_chunks=False):
+    def extract_embed(
+        self, x, x_lengths=None, chunk_length=0, embed_layer=None, detach_chunks=False
+    ):
         if embed_layer is None:
             embed_layer = self.embed_layer
 
+        max_in_length = x.size(-1)
         x = self._pre_enc(x)
-        # if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-        #     x = x.view(x.size(0), 1, x.size(1), x.size(2))
         x = eval_nnet_by_chunks(
             x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
         )
@@ -302,15 +298,8 @@ def extract_embed(self, x, chunk_length=0, embed_layer=None, detach_chunks=False
         if x.device != self.device:
             x = x.to(self.device)
 
-        x = self._post_enc(x)
-
-        # if self.encoder_net.out_dim() == 4:
-        #     x = x.view(x.size(0), -1, x.size(-1))
-
-        # if self.proj is not None:
-        #     x = self.proj(x)
-
-        p = self.pool_net(x)
+        x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
+        p = self.pool_net(x, x_lengths=x_lengths)
         y = self.classif_net.extract_embed(p, embed_layer)
         return y
 
@@ -344,7 +333,7 @@ def extract_embed_slidwin(
             embed_layer = self.embed_layer
 
         in_time = x.size(-1)
-        x = self._pre_enc(x)
+        x, _ = self._pre_enc(x)
         x = eval_nnet_by_chunks(
             x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
         )
@@ -501,7 +490,7 @@ def rebuild_output_layer(
             # if we change the number of classes or the loss-type
             # we need to reinitiate the last layer
             self.classif_net.rebuild_output_layer(
-                num_classes, loss_type, s, margin, margin_warmup_epochs
+                num_classes, loss_type, cos_scale, margin, margin_warmup_epochs
             )
             return
 
@@ -538,11 +527,6 @@ def train_mode(self, mode="ft-embed-affine"):
     @staticmethod
     def filter_args(**kwargs):
 
-        # # get boolean args that are negated
-        # if 'pool_wo_bias' in kwargs:
-        #     kwargs['pool_use_bias'] = not kwargs['pool_wo_bias']
-        #     del kwargs['pool_wo_bias']
-
         if "wo_norm" in kwargs:
             kwargs["use_norm"] = not kwargs["wo_norm"]
             del kwargs["wo_norm"]
@@ -553,19 +537,6 @@ def filter_args(**kwargs):
 
         # get arguments for pooling
         pool_args = PF.filter_args(**kwargs["pool_net"])
-        # pool_valid_args = (
-        #     'pool_type', 'pool_num_comp', 'pool_use_bias',
-        #     'pool_dist_pow', 'pool_d_k', 'pool_d_v', 'pool_num_heads',
-        #     'pool_bin_attn', 'pool_inner_feats')
-        # pool_args = dict((k, kwargs[k])
-        #                  for k in pool_valid_args if k in kwargs)
-
-        # # remove pooling prefix from arg name
-        # for k in pool_valid_args[1:]:
-        #     if k in pool_args:
-        #         k2 = k.replace('pool_','')
-        #         pool_args[k2] = pool_args[k]
-        #         del pool_args[k]
 
         valid_args = (
             "num_classes",
@@ -573,7 +544,7 @@ def filter_args(**kwargs):
             "num_embed_layers",
             "hid_act",
             "loss_type",
-            "s",
+            "cos_scale",
             "margin",
             "margin_warmup_epochs",
             "num_subcenters",
@@ -600,49 +571,6 @@ def add_class_args(parser, prefix=None, skip=set()):
             parser, prefix="pool_net", skip=["dim", "in_feats", "keepdim"]
         )
 
-        # parser.add_argument('--pool-type', type=str.lower,
-        #                     default='mean+stddev',
-        #                     choices=['avg','mean+stddev', 'mean+logvar',
-        #                              'lde', 'scaled-dot-prod-att-v1', 'ch-wise-att-mean-stddev'],
-        #                     help=('Pooling methods: Avg, Mean+Std, Mean+logVar, LDE, '
-        #                           'scaled-dot-product-attention-v1'))
-
-        # parser.add_argument('--pool-num-comp',
-        #                     default=64, type=int,
-        #                     help=('number of components for LDE pooling'))
-
-        # parser.add_argument('--pool-dist-pow',
-        #                     default=2, type=int,
-        #                     help=('Distace power for LDE pooling'))
-
-        # parser.add_argument('--pool-wo-bias',
-        #                     default=False, action='store_true',
-        #                     help=('Don\' use bias in LDE'))
-
-        # parser.add_argument(
-        #     '--pool-num-heads', default=8, type=int,
-        #     help=('number of attention heads'))
-
-        # parser.add_argument(
-        #     '--pool-d-k', default=256, type=int,
-        #     help=('key dimension for attention'))
-
-        # parser.add_argument(
-        #     '--pool-d-v', default=256, type=int,
-        #     help=('value dimension for attention'))
-
-        # parser.add_argument(
-        #     '--pool-bin-attn', default=False, action='store_true',
-        #     help=('Use binary attention, i.e. sigmoid instead of softmax'))
-
-        # parser.add_argument(
-        #     '--pool-inner-feats', default=128, type=int,
-        #     help=('inner feature size for attentive pooling'))
-
-        # parser.add_argument('--num-classes',
-        #                     required=True, type=int,
-        #                     help=('number of classes'))
-
         parser.add_argument(
             "--embed-dim", default=256, type=int, help=("x-vector dimension")
         )
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 1d5cb0a3..9092e9d8 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -4,6 +4,7 @@
 """
 from jsonargparse import ArgumentParser, ActionParser
 
+import torch
 import torch.nn as nn
 
 from ..layers import AudioFeatsFactory as AFF
@@ -56,16 +57,24 @@ def frame_length(self):
     def frame_shift(self):
         return self.audio_feats.frame_shift
 
-    def forward(self, x, lengths=None):
+    @staticmethod
+    def _compute_feat_lengths(x_lengths, max_samples, max_frames):
+        if x_lengths is None:
+            return None
+
+        return torch.div(x_lengths * max_frames, max_samples, rounding_mode="floor")
+
+    def forward(self, x, x_lengths=None):
         f = self.audio_feats(x)
+        f_lengths = self._compute_feat_lengths(x_lengths, x.size(-1), f.size(1))
         if self.spec_augment is not None and not self.aug_after_mvn:
-            f = self.spec_augment(f, lengths)
+            f = self.spec_augment(f, f_lengths)
 
         if self.mvn is not None:
             f = self.mvn(f)
 
         if self.spec_augment is not None and self.aug_after_mvn:
-            f = self.spec_augment(f, lengths)
+            f = self.spec_augment(f, f_lengths)
 
         if self.trans:
             f = f.transpose(1, 2).contiguous()
@@ -105,4 +114,3 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='feature extraction options')
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index e3af9f2d..5824cb1b 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -8,6 +8,7 @@
 import torch.nn as nn
 from torch.nn import Linear
 
+from ..layers import ActivationFactory as AF
 from ..layers import CosLossOutput, ArcLossOutput, SubCenterArcLossOutput
 from ..layers import NormLayer1dFactory as NLF
 from ..layer_blocks import FCBlock
@@ -143,7 +144,13 @@ def __init__(
             )
 
     def rebuild_output_layer(
-        self, num_classes, loss_type, s, margin, margin_warmup_epochs, num_subcenters=2
+        self,
+        num_classes,
+        loss_type,
+        cos_scale,
+        margin,
+        margin_warmup_epochs,
+        num_subcenters=2,
     ):
 
         embed_dim = self.embed_dim
@@ -228,16 +235,16 @@ def forward(self, x, y=None):
 
         return y
 
-    def forward_hid_feats(self, x, y=None, layers=None, return_output=False):
+    def forward_hid_feats(self, x, y=None, return_layers=None, return_logits=False):
 
-        assert layers is not None or return_output
-        if layers is None:
-            layers = []
+        assert return_layers is not None or return_logits
+        if return_layers is None:
+            return_layers = []
 
         h = []
         for l in range(self.num_embed_layers):
             x = self.fc_blocks[l](x)
-            if l in layers:
+            if l in return_layers:
                 h.append(x)
 
         if self.loss_type == "softmax":
@@ -245,16 +252,19 @@ def forward_hid_feats(self, x, y=None, layers=None, return_output=False):
         else:
             y = self.output(x, y)
 
-        if return_output:
+        if return_logits:
             return h, y
-        return h
+        return h, None
 
     def extract_embed(self, x, embed_layer=0):
 
         for l in range(embed_layer):
             x = self.fc_blocks[l](x)
 
-        y = self.fc_blocks[embed_layer].forward_linear(x)
+        if self.loss_type == "softmax" or embed_layer < self.num_embed_layers:
+            y = self.fc_blocks[embed_layer].forward_linear(x)
+        else:
+            y = self.fc_blocks[l](x)
         return y
 
     def get_config(self):
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 69f9300c..4fabe8d2 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -232,25 +232,28 @@ def _make_in_layer(self):
                 nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx), pos_enc
             )
         elif isinstance(self.in_layer_type, nn.Module):
-            self.in_layer = nn.Sequential(in_layer_type, pos_enc)
+            self.in_layer = nn.Sequential(self.in_layer_type, pos_enc)
         elif self.in_layer_type is None:
             self.in_layer = pos_enc
         else:
             raise ValueError("unknown in_layer_type: " + self.in_layer_type)
 
-    def forward(self, x, mask=None, target_shape=None):
+    def forward(self, x, x_lengths=None, x_mask=None, target_shape=None):
         """Forward pass function
 
         Args:
           x: input tensor with size=(batch, time, num_feats)
-          mask: mask to indicate valid time steps for x (batch, time)
+          x_lengths: lengths of the input sequences.
+          x_mask: mask to indicate valid time steps for x (batch, time).
+                  It overwrites the mask of x_lengths.
 
         Returns:
            Tensor with output features
            Tensor with mask
         """
+
         if isinstance(self.in_layer, Conv2dSubsampler):
-            x, mask = self.in_layer(x, mask)
+            x, mask = self.in_layer(x, x_mask)
         else:
             if self.in_time_dim != 1:
                 x = x.transpose(1, self.in_time_dim).contiguous()
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 66c4d028..dc5de737 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -2,7 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+import os
 from copy import deepcopy
 
 import torch
@@ -39,7 +39,7 @@ def unfreeze(self):
     @staticmethod
     def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
         model_data = None
-        if cfg is None:
+        if cfg is None or state_dict is None:
             assert file_path is not None
             model_data = torch.load(file_path)
         if cfg is None:
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 75c3ece8..fef0b3b5 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -11,7 +11,7 @@
 import torch
 import torch.nn as nn
 
-from ..utils import MetricAcc  # , TorchDataParallel
+from ..utils import MetricAcc  
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
 
 
@@ -128,11 +128,6 @@ def __init__(
                 % (p_attack, 1.0 / self.grad_acc_steps)
             )
 
-        # if data_parallel:
-        #     # change model in attack by the data parallel version
-        #     self.attack.model = TorchDataParallel(self.attack.model)
-        #     # make loss function in attack data parallel
-        #     self.attack.make_data_parallel()
 
     def train_epoch(self, data_loader):
 
@@ -167,7 +162,7 @@ def train_epoch(self, data_loader):
                 feats = self.feat_extractor(data)
 
             with self.amp_autocast():
-                output = self.model(feats, target)
+                output = self.model(feats, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -263,4 +258,4 @@ def add_class_args(parser, prefix=None, skip=[]):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='trainer options')
+            
diff --git a/hyperion/torch/trainers/xvector_finetuner.py b/hyperion/torch/trainers/xvector_finetuner.py
deleted file mode 100644
index cf833257..00000000
--- a/hyperion/torch/trainers/xvector_finetuner.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-import os
-from collections import OrderedDict as ODict
-
-import time
-import logging
-
-import torch
-import torch.nn as nn
-
-from ..utils import MetricAcc
-from .xvector_trainer import XVectorTrainer
-
-
-class XVectorFinetuner(XVectorTrainer):
-    def __init__(
-        self,
-        model,
-        optimizer,
-        epochs,
-        exp_path,
-        cur_epoch=0,
-        grad_acc_steps=1,
-        device=None,
-        metrics=None,
-        lr_scheduler=None,
-        loggers=None,
-        data_parallel=False,
-        loss=None,
-        finetune_mode="ft-embed-affine",
-    ):
-
-        super(XVectorFinetuner, self).__init__(
-            model,
-            optimizer,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            device=device,
-            metrics=metrics,
-            lr_scheduler=lr_scheduler,
-            loggers=loggers,
-            data_parallel=data_parallel,
-            loss=loss,
-        )
-
-        self.finetune_mode = finetune_mode
-
-    def train_epoch(self, data_loader):
-        # epoch_batches = len(data_loader.dataset)
-        # total_batches = self.cur_epoch * epoch_batches
-
-        self.model.update_loss_margin(self.cur_epoch)
-
-        metric_acc = MetricAcc()
-        batch_metrics = ODict()
-        # self.model.train_mode(self.finetune_mode)
-        self.model.eval()
-        for batch, (data, target) in enumerate(data_loader):
-            self.loggers.on_batch_begin(batch)
-
-            if batch % self.grad_acc_steps == 0:
-                self.optimizer.zero_grad()
-
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
-
-            output = self.model(data, target)
-            loss = self.loss(output, target).mean() / self.grad_acc_steps
-            loss.backward()
-
-            if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None:
-                    self.lr_scheduler.on_opt_step()
-                self.optimizer.step()
-
-            batch_metrics["loss"] = loss.item() * self.grad_acc_steps
-            for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
-
-            # logging.info('batch={} shape={} loss={} acc={}'.format(batch,data.shape, batch_metrics['loss'], batch_metrics['acc']))
-
-            # if batch > 63:
-            #     logging.info(str(self.model.classif_net.fc_blocks[0].linear.weight))
-            #     logging.info(str(self.model.classif_net.fc_blocks[0].linear.weight.grad))
-            # if batch > 63 :
-            #     t=torch.nn.functional.cross_entropy(output, target, reduction='none')
-            #     logging.info(str(t))
-            #     if batch == 65:
-            #         #torch.set_printoptions(profile="full")
-            #         #logging.info(str(data[1]))
-            #         #logging.info(str(target[1]))
-            #         #logging.info(str(output[1]))
-
-            #         #logging.info(str(data[33]))
-            #         #logging.info(str(target[33]))
-            #         logging.info(str(output[33, target[33]]))
-            #         #time.sleep(1000)
-            #         #torch.set_printoptions(profile="default")
-
-            #     #logging.info(str(torch.sum(torch.isnan(data))))
-            #     #logging.info(str(torch.sum(torch.isnan(target))))
-            #     #logging.info(str(torch.sum(torch.isnan(output))))
-
-            metric_acc.update(batch_metrics, batch_size)
-            logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
-            self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
-            # total_batches +=1
-
-        logs = metric_acc.metrics
-        logs["lr"] = self._get_lr()
-        return logs
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 190b2a30..2e032a49 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -127,7 +127,7 @@ def train_epoch(self, data_loader):
             batch_size = data.shape[0]
 
             with self.amp_autocast():
-                output = self.model(data, target, **self.amp_args)
+                output = self.model(data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 7b7cb21c..47801c29 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import os
+from jsonargparse import ArgumentParser, ActionParser
 from collections import OrderedDict as ODict
 
 import logging
@@ -10,30 +11,9 @@
 import torch
 import torch.nn as nn
 
-from ..utils import MetricAcc  # , TorchDataParallel
+from ..utils import MetricAcc
 from .xvector_trainer import XVectorTrainer
 
-# class DFRModelWrapper(nn.Module):
-#     """Wrapper class for the xvector model, which
-#     replace the forward method by the forward_hid_feats method
-
-#     This is need because nn.DataParallel only support multi-gpu when colling the
-#     forward method, but not the other methods in the nn.Module classes.
-#     """
-#     def __init__(self, model):
-#         super().__init__()
-#         self.model = model
-
-#     def forward(self, x, y=None, enc_layers=None, classif_layers=None,
-#                 return_output=False, use_amp=False):
-#         if use_amp:
-#             with torch.cuda.amp.autocast():
-#                 return self.model.forward_hid_feats(
-#                     x, y, enc_layers, classif_layers, return_output)
-
-#         return self.model.forward_hid_feats(
-#             x, y, enc_layers, classif_layers, return_output)
-
 
 class XVectorTrainerDeepFeatReg(XVectorTrainer):
     """Trainer to train x-vector style models.
@@ -149,19 +129,6 @@ def __init__(
         if device is not None:
             self.prior_model.to(device)
 
-        # self.model_wrapper = DFRModelWrapper(self.model)
-        # self.prior_model_wrapper = DFRModelWrapper(self.prior_model)
-
-        # if device is not None:
-        #     self.model_wrapper.to(device)
-        #     self.prior_model_wrapper.to(device)
-        #     self.reg_loss.to(device)
-
-        # if data_parallel:
-        #     self.model_wrapper = TorchDataParallel(self.model_wrapper)
-        #     self.prior_model_wrapper = TorchDataParallel(self.prior_model_wrapper)
-        #     self.reg_loss = TorchDataParallel(self.reg_loss)
-
     def train_epoch(self, data_loader):
         """Training epoch loop
 
@@ -184,14 +151,11 @@ def train_epoch(self, data_loader):
             batch_size = data.shape[0]
 
             with self.amp_autocast():
-                # h_enc, h_classif, output = self.model_wrapper(
-                #     data, target, self.reg_layers_enc, self.reg_layers_classif,
-                #     return_output=True, **self.amp_args)
                 outputs = self.model(
                     data,
-                    target,
-                    self.reg_layers_enc,
-                    self.reg_layers_classif,
+                    y=target,
+                    return_enc_layers=self.reg_layers_enc,
+                    return_classif_layers=self.reg_layers_classif,
                     return_output=True,
                 )
                 h_enc, h_classif, output = (
@@ -207,9 +171,8 @@ def train_epoch(self, data_loader):
 
                 prior_outputs = self.prior_model(
                     data,
-                    target,
-                    self.reg_layers_enc,
-                    self.reg_layers_classif,
+                    return_enc_layers=self.reg_layers_enc,
+                    return_classif_layers=self.reg_layers_classif,
                     return_output=False,
                 )
                 prior_h_enc, prior_h_classif = (
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 29964322..6763b035 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -10,7 +10,7 @@
 import torch
 import torch.nn as nn
 
-from ..utils import MetricAcc  # , TorchDataParallel
+from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
 
@@ -126,9 +126,6 @@ def __init__(
         if device is not None:
             self.feat_extractor.to(device)
 
-        # if data_parallel:
-        #     self.feat_extractor = TorchDataParallel(self.feat_extractor)
-
     def train_epoch(self, data_loader):
         """Training epoch loop
 
@@ -154,14 +151,11 @@ def train_epoch(self, data_loader):
                 feats = self.feat_extractor(data)
 
             with self.amp_autocast():
-                # h_enc, h_classif, output = self.model_wrapper(
-                #     feats, target, self.reg_layers_enc, self.reg_layers_classif,
-                #     return_output=True, **self.amp_args)
                 outputs = self.model(
                     feats,
-                    target,
-                    self.reg_layers_enc,
-                    self.reg_layers_classif,
+                    y=target,
+                    return_enc_layers=self.reg_layers_enc,
+                    return_classif_layers=self.reg_layers_classif,
                     return_output=True,
                 )
                 h_enc, h_classif, output = (
@@ -175,14 +169,10 @@ def train_epoch(self, data_loader):
                 ).mean()  # you need to take the mean here because of the multi-gpu training
                 batch_metrics["loss-classif"] = loss.item()
 
-                # prior_h_enc, prior_h_classif = self.prior_model_wrapper(
-                #     feats, target, self.reg_layers_enc, self.reg_layers_classif,
-                #     return_output=False, **self.amp_args)
                 prior_outputs = self.prior_model(
                     feats,
-                    target,
-                    self.reg_layers_enc,
-                    self.reg_layers_classif,
+                    return_enc_layers=self.reg_layers_enc,
+                    return_classif_layers=self.reg_layers_classif,
                     return_output=False,
                 )
                 prior_h_enc, prior_h_classif = (
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 06086d32..a00016e6 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -135,7 +135,7 @@ def train_epoch(self, data_loader):
                 feats = self.feat_extractor(data)
 
             with self.amp_autocast():
-                output = self.model(feats, target)
+                output = self.model(feats, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -184,7 +184,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
                 feats = self.feat_extractor(data)
                 with self.amp_autocast():
-                    output = self.model(feats, **self.amp_args)
+                    output = self.model(feats)
                     loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 6db39ef3..22af492c 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -5,6 +5,9 @@
 
 from .devices import open_device
 from .metric_acc import MetricAcc
+from .masking import seq_lengths_to_mask, scale_lengths
+from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add
+from .vad_utils import remove_silence
 from .data_parallel import TorchDataParallel
 from .ddp import TorchDDP, FairShardedDDP, FairFullyShardedDDP
diff --git a/hyperion/torch/utils/collation.py b/hyperion/torch/utils/collation.py
new file mode 100644
index 00000000..25b3790b
--- /dev/null
+++ b/hyperion/torch/utils/collation.py
@@ -0,0 +1,92 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import torch
+import torch.nn as nn
+
+
+def collate_seq_1d(x, pad_value=0):
+    """Combines a list/tuple of vectors with different lengths
+       into a single tensor.
+
+    Args:
+        x: input lits/tuple of vectors.
+
+    Returns:
+      2D tensor with shape (num_vectors, max_vector_length).
+      1D long tensor containing the vector lengths.
+    """
+    max_length = max([x_i.size(0) for x_i in x])
+    y = pad_value * torch.ones(len(x), max_length, dtype=x[0].dtype, device=x[0].device)
+    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+    for i, x_i in enumerate(x):
+        y[i, : x_i.size(0)] = x_i
+        y_lengths[i] = x_i.size(0)
+
+    return y, y_lengths
+
+
+def collate_seq_2d(x, pad_value=0, pad_dim=-1):
+    """Combines a list/tuple of matrices with different sizes in one of
+       the dimensions into a single 3d tensor.
+       Combines performing padding on the dimension which is not constant.
+
+    Args:
+        x: input lits/tuple of matrices.
+        pad_dim: padding dimension.
+
+    Returns:
+      3D tensor with shape (num_vectors, max_length, feat_dim) or (num_vectors, feat_dim, length).
+      1D long tensor containing the dimensions lengths.
+    """
+    max_length = max([x_i.size(pad_dim) for x_i in x])
+    y_size = list(x[0].size())
+    y_size[pad_dim] = max_length
+    y = pad_value * torch.ones(*y_size, dtype=x[0].dtype, device=x[0].device)
+    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+    if pad_dim == -1 or pad_dim == 1:
+        for i, x_i in enumerate(x):
+            y[i, :, : x_i.size(pad_dim)] = x_i
+            y_lengths[i] = x_i.size(pad_dim)
+    else:
+        for i, x_i in enumerate(x):
+            y[i, : x_i.size(pad_dim)] = x_i
+            y_lengths[i] = x_i.size(pad_dim)
+
+    return y, y_lengths
+
+
+def collate_seq_nd(x, pad_value=0, pad_dim=-1):
+    """Combines a list/tuple of N-d tensors with different sizes in one of
+       the dimensions into a single (N+1)-d tensor.
+       Combines performing padding on the dimension which is not constant.
+
+    Args:
+        x: input lits/tuple of matrices.
+        pad_dim: padding dimension.
+
+    Returns:
+      (N+1)-D combined tensor.
+      1D long tensor containing the dimensions lengths.
+    """
+    if x[0].dim() == 1:
+        return collate_seq_1d(x)
+
+    if x[0].dim() == 2:
+        return collate_seq_2d(x)
+
+    # here the general case
+    max_length = max([x_i.size(pad_dim) for x_i in x])
+    y_trans_size = list(x[0].transpose(0, pad_dim).size())
+    y = pad_value * torch.ones(*y_trans_size, dtype=x[0].dtype, device=x[0].device)
+    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+    for i, x_i in enumerate(x):
+        y[i, : x_i.size(pad_dim)] = x_i.transpose(0, pad_dim)
+        y_lengths[i] = x_i.size(pad_dim)
+
+    if pad_dim > 0:
+        pad_dim = pad_dim + 1
+    y = y.transpose(1, pad_dim).contiguous()
+    return y, y_lengths
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
new file mode 100644
index 00000000..b6ccd5ef
--- /dev/null
+++ b/hyperion/torch/utils/masking.py
@@ -0,0 +1,58 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import torch
+import torch.nn as nn
+
+
+def scale_lengths(lengths, max_out_length, max_in_length=None):
+    if lengths is None:
+        return None
+
+    if max_in_length is None:
+        max_in_length = lengths.max()
+
+    return torch.div(lengths * max_out_length, max_in_length, rounding_mode="floor")
+
+
+def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
+    """Creates a binary masks indicating the valid values in a sequence.
+
+    Args:
+      lengths: sequence lengths with shape=(batch,). If None, it returns None
+      max_length: maximum length of the sequence.
+      dtype: dtype for the mask.
+      time_dim: dimension corresponding to time in the mask. This will
+                return a view of the mask which will adapt to the shape
+                of the tensor where we want to apply the mask.
+                This has to be a positive integer.
+
+    Returns:
+      Binary mask with shape=(batch,...,max_length) or None
+    """
+    if lengths is None:
+        return None
+
+    assert lengths.dim() == 1
+
+    if max_length is None:
+        max_length = lengths.max()
+    idx = torch.arange(max_length, dtype=lengths.dtype, device=lengths.device)
+
+    # compute mask shape=(batch, max_length)
+    mask = idx.unsqueeze(0) < lengths.unsqueeze(1)
+
+    # view to match the tensor where we want to apply the mask
+    if time_dim > 1:
+        shape = [1] * (time_dim + 1)
+        shape[0] = lengths.size(0)
+        shape[time_dim] = -1
+        mask = mask.view(*shape)
+
+    # change dtype if needed
+    if dtype is not None:
+        mask = mask.to(dtype)
+
+    return mask
diff --git a/hyperion/torch/utils/misc.py b/hyperion/torch/utils/misc.py
index 2b4f6034..69d209eb 100644
--- a/hyperion/torch/utils/misc.py
+++ b/hyperion/torch/utils/misc.py
@@ -1,26 +1,69 @@
 """
- Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
 import torch
+import torch.nn as nn
 import torch.cuda.amp as amp
 
 
-def l2_norm(x, axis=-1):
+def l2_norm(x, dim=1, axis=None):
+    """Applies length normalization to vectors.
+
+    Args:
+      x: input tensor.
+      dim: dimension along which normalize the vectors.
+      axis: same as dim (deprecated).
+
+    Returns:
+      Normalized tensor.
+    """
+    if axis is not None:
+        dim = axis
+
     with amp.autocast(enabled=False):
-        norm = torch.norm(x.float(), 2, axis, True) + 1e-10
+        norm = torch.norm(x.float(), 2, dim, True) + 1e-10
         y = torch.div(x, norm)
     return y
 
 
-def compute_snr(x, n, axis=-1):
-    P_x = 10 * torch.log10(torch.mean(x ** 2, dim=axis))
-    P_n = 10 * torch.log10(torch.mean(n ** 2, dim=axis))
+def compute_snr(x, n, dim=1, axis=None):
+    """Computes SNR (dB)
+
+    Args:
+      x: tensor with clean signal.
+      n: tensor with noisy signal
+      dim: dimension along which normalize power.
+      axis: same as dim (deprecated).
+
+    Returns:
+      Tensor with SNR(dB)
+    """
+    if axis is not None:
+        dim = axis
+    P_x = 10 * torch.log10(torch.mean(x ** 2, dim=dim))
+    P_n = 10 * torch.log10(torch.mean(n ** 2, dim=dim))
     return P_x - P_n
 
 
 def compute_stats_adv_attack(x, x_adv):
+    """Compute statistics of adversarial attack sample.
+
+    Args:
+      x: benign signal tensor.
+      x_adv: adversarial signal tensor.
+
+    Returns:
+      SNR (dB).
+      Power of x.
+      Power of n.
+      L2 norm of x.
+      Linf norm of x.
+      L0 norm of n.
+      L2 norm of n.
+      Linf norm of n.
+    """
 
     if x.dim() > 2:
         x = torch.flatten(x, start_dim=1)
@@ -42,6 +85,17 @@ def compute_stats_adv_attack(x, x_adv):
 
 
 def get_selfsim_tarnon(y, return_mask=False):
+    """Computes ground truth selfsimilarity matrix given
+       integer class labels.
+
+    Args:
+      y: integer tensor with class labels of shape (batch,).
+      return_mask: If True, it returns upper triangular mask with zero diagonal.
+
+    Returns:
+      Self-similarity binary matrix wiht shape=(batch, batch).
+      Upper triangular mask.
+    """
     y_bin = y.unsqueeze(-1) - y.unsqueeze(0) + 1
     y_bin[y_bin != 1] = 0
     y_bin = y_bin.float()
diff --git a/hyperion/torch/utils/vad_utils.py b/hyperion/torch/utils/vad_utils.py
new file mode 100644
index 00000000..a47b92ef
--- /dev/null
+++ b/hyperion/torch/utils/vad_utils.py
@@ -0,0 +1,59 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import torch
+import torch.nn as nn
+
+from .collation import collate_seq_nd
+
+
+def remove_silence(x, vad, x_lengths=None, time_dim=1, tol=0):
+    """Remove silence samples/frames.
+
+    Args:
+        x: input signal/spectrogram of shape=(batch,...,time,...).
+        vad: binary voice activity detection mask of shape=(batch, time).
+        x_lenghts: lengths of each sequence in x.
+        time_dim: which dimension in x is time.
+        tol: tolerance for the difference between time dimensions in x and vad.
+
+    Returns:
+        x without silence samples/frames.
+    """
+
+    # we make x and vad time dimensions of the same size.
+    assert x.size(0) == vad.size(0), "batch-size is different for x and vad"
+    x_max_length = x.size(time_dim)
+    vad_max_length = vad.size(-1)
+    length_err = x_max_length - vad_max_length
+    assert abs(length_err) <= tol, (
+        f"Difference between x_length({x_max_length}) and "
+        f"vad_length({vad_max_length}) > tol ({tol})"
+    )
+    if length_err > 0:
+        vad = nn.functional.pad(vad, (0, length_err), model="constant", value=0)
+    elif length_err < 0:
+        vad = vad[:, :x_max_length]
+
+    # if x_lengths is passed, we make sure that vad is 0 for time steps larger
+    # than x_length
+    if x_lengths is not None:
+        for i in range(x.size(0)):
+            vad[i, x_lengths[i] :] = 0
+
+    trans = False
+    if time_dim != 1 or time_dim != 1 - x.dim():
+        x = x.transpose(1, time_dim)
+        trans = True
+
+    y = []
+    for i in range(x.size(0)):
+        y.append(x[i, vad[i]])
+
+    y, y_lengths = collate_seq_nd(y, pad_dim=0)
+    if trans:
+        y = y.transpose(1, time_dim).contigous()
+
+    return y, y_lengths

From 01ada9f64621f923321fc3e2102ec200a91c1ef3 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 9 Mar 2022 18:23:45 -0500
Subject: [PATCH 004/154] more docs and x_lengths support

---
 .../torch/layer_blocks/res2net1d_blocks.py    |  71 ++++-
 .../torch/layer_blocks/res2net2d_blocks.py    |  72 ++++-
 hyperion/torch/layer_blocks/res2net_blocks.py |  71 ++++-
 .../torch/layer_blocks/resnet1d_blocks.py     | 287 ++++++++++++++++--
 .../torch/layer_blocks/resnet2d_blocks.py     | 235 +++++++++++++-
 hyperion/torch/layer_blocks/resnet_blocks.py  |  76 ++++-
 .../torch/layer_blocks/seresnet_blocks.py     |  55 +++-
 hyperion/torch/layer_blocks/spine_blocks.py   |  30 ++
 .../transformer_conv2d_subsampler.py          |  14 +-
 .../layer_blocks/transformer_feedforward.py   |   4 +-
 hyperion/torch/layers/interpolate.py          |   7 +-
 11 files changed, 855 insertions(+), 67 deletions(-)

diff --git a/hyperion/torch/layer_blocks/res2net1d_blocks.py b/hyperion/torch/layer_blocks/res2net1d_blocks.py
index 6f66557b..59706f61 100644
--- a/hyperion/torch/layer_blocks/res2net1d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net1d_blocks.py
@@ -46,6 +46,28 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before)
 
 
 class Res2Net1dBasicBlock(nn.Module):
+    """Res2Net basic Block. This is a modified Res2Net block with
+    two 3x3 convolutions, instead of the standard bottleneck block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      width_factor:      multiplication factor for the number of channels in the first layer
+                         or the block.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r:              squeeze-excitation compression ratio.
+    """
+
     expansion = 1
 
     def __init__(
@@ -160,7 +182,17 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time).
+
+        Returns:
+          Tensor with shape = (batch, out_channels, time).
+        """
         residual = x
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
@@ -194,7 +226,7 @@ def forward(self, x):
             x = self.bn2(x)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
@@ -215,6 +247,26 @@ def forward(self, x):
 
 
 class Res2Net1dBNBlock(nn.Module):
+    """Res2Net bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          channels in bottleneck layer when width_factor=1.
+      kernel_size:       kernel size in bottleneck layers.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r:              squeeze-excitation compression ratio.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -232,7 +284,6 @@ def __init__(
         norm_layer=None,
         norm_before=True,
         se_r=None,
-        num_feats=None,
     ):
 
         super().__init__()
@@ -322,7 +373,17 @@ def out_channels(self):
     def expansion(self):
         return self.channels / self.width / self.scale
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time).
+
+        Returns:
+          Tensor with shape = (batch, out_channels, time).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -360,7 +421,7 @@ def forward(self, x):
             x = self.bn3(x)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
diff --git a/hyperion/torch/layer_blocks/res2net2d_blocks.py b/hyperion/torch/layer_blocks/res2net2d_blocks.py
index 37bbd966..e426d809 100644
--- a/hyperion/torch/layer_blocks/res2net2d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net2d_blocks.py
@@ -45,6 +45,29 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before)
 
 
 class Res2Net2dBasicBlock(nn.Module):
+    """Res2Net basic Block. This is a modified Res2Net block with
+    two 3x3 convolutions, instead of the standard bottleneck block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the first layer
+                         or the block.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r=None:         squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     expansion = 1
 
     def __init__(
@@ -159,7 +182,17 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
@@ -196,7 +229,7 @@ def forward(self, x):
             residual = self.downsample(residual)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         x += residual
         x = self.act2(x)
@@ -211,6 +244,27 @@ def forward(self, x):
 
 
 class Res2Net2dBNBlock(nn.Module):
+    """Res2Net bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          channels in bottleneck layer when width_factor=1.
+      kernel_size:       kernel size in bottleneck layers.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r=None:         squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -316,7 +370,17 @@ def out_channels(self):
     def expansion(self):
         return self.channels / self.width / self.scale
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -357,7 +421,7 @@ def forward(self, x):
             residual = self.downsample(residual)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         x += residual
         x = self.act3(x)
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 56804307..daf391be 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -42,6 +42,28 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before)
 
 
 class Res2NetBasicBlock(nn.Module):
+    """Res2Net basic Block. This is a modified Res2Net block with
+    two 3x3 convolutions, instead of the standard bottleneck block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the first layer
+                         or the block.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r:              squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     expansion = 1
 
     def __init__(
@@ -136,12 +158,22 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
         split_x = torch.split(x, split_size, 1)
-        # split_x = torch.split(x, self.width_in, 1)
+
         x = []
         for i in range(self.num_3x3):
             if i == 0 or self.stride > 1:
@@ -173,7 +205,7 @@ def forward(self, x):
             residual = self.downsample(residual)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         x += residual
         x = self.act2(x)
@@ -188,6 +220,25 @@ def forward(self, x):
 
 
 class Res2NetBNBlock(nn.Module):
+    """Res2Net bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          channels in bottleneck layer when width_factor=1.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r:              squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     expansion = 4
 
     def __init__(
@@ -279,7 +330,17 @@ def __init__(
     def out_channels(self):
         return self.channels * self.expansion
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -320,7 +381,7 @@ def forward(self, x):
             residual = self.downsample(residual)
 
         if self.se_layer:
-            x = self.se_layer(x)
+            x = self.se_layer(x, x_mask=x_mask)
 
         x += residual
         x = self.act3(x)
diff --git a/hyperion/torch/layer_blocks/resnet1d_blocks.py b/hyperion/torch/layer_blocks/resnet1d_blocks.py
index d1965708..01fd1087 100644
--- a/hyperion/torch/layer_blocks/resnet1d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet1d_blocks.py
@@ -113,6 +113,23 @@ def _make_upsample(
 
 
 class ResNet1dBasicBlock(nn.Module):
+    """ResNet 1d basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+
     expansion = 1
 
     # __constants__ = ['downsample']
@@ -182,7 +199,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -218,6 +244,22 @@ def forward(self, x):
 
 
 class ResNet1dBasicDecBlock(nn.Module):
+    """ResNet 1d basic Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            upsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     expansion = 1
 
     # __constants__ = ['downsample']
@@ -288,7 +330,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -324,6 +375,25 @@ def forward(self, x):
 
 
 class ResNet1dBNBlock(nn.Module):
+    """ResNet 1d bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -401,7 +471,17 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
+
         residual = x
 
         x = self.conv1(x)
@@ -443,6 +523,24 @@ def forward(self, x):
 
 
 class ResNet1dBNDecBlock(nn.Module):
+    """ResNet 1d bottleneck Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            upsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     def __init__(
         self,
         in_channels,
@@ -514,7 +612,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -556,6 +663,23 @@ def forward(self, x):
 
 
 class SEResNet1dBasicBlock(ResNet1dBasicBlock):
+    """Squeeze-excitation ResNet 1d basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     expansion = 1
 
     def __init__(
@@ -592,7 +716,17 @@ def __init__(
 
         self.se_layer = SEBlock1d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -609,7 +743,7 @@ def forward(self, x):
         if self.norm_before:
             x = self.bn2(x)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
 
@@ -629,6 +763,23 @@ def forward(self, x):
 
 
 class SEResNet1dBasicDecBlock(ResNet1dBasicDecBlock):
+    """Squeeze-excitation ResNet 1d basic Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            upsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     expansion = 1
 
     def __init__(
@@ -669,7 +820,17 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -686,7 +847,7 @@ def forward(self, x):
         if self.norm_before:
             x = self.bn2(x)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
 
@@ -706,6 +867,26 @@ def forward(self, x):
 
 
 class SEResNet1dBNBlock(ResNet1dBNBlock):
+    """Squeeze-excitation ResNet 1d bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+    
     def __init__(
         self,
         in_channels,
@@ -742,7 +923,17 @@ def __init__(
 
         self.se_layer = SEBlock1d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -765,7 +956,7 @@ def forward(self, x):
         if self.norm_before:
             x = self.bn3(x)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
 
@@ -785,6 +976,25 @@ def forward(self, x):
 
 
 class SEResNet1dBNDecBlock(ResNet1dBNDecBlock):
+     """Squeeze-excitation ResNet 1d bottleneck Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      width_factor:      multiplication factor for the number of channels in the bottleneck.
+      scale:             scale parameter of the Res2Net.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     def __init__(
         self,
         in_channels,
@@ -821,7 +1031,17 @@ def __init__(
 
         self.se_layer = SEBlock1d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -844,7 +1064,7 @@ def forward(self, x):
         if self.norm_before:
             x = self.bn3(x)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         if self.drop_connect_rate > 0:
             x = self.drop_connect(x)
 
@@ -864,6 +1084,24 @@ def forward(self, x):
 
 
 class ResNet1dEndpoint(nn.Module):
+    """ Class that connects the ouputs of the ResNet1d to the rest of the network
+        when using multilevel feature aggregation.
+
+        It converts the features of all the levels that we are going to aggregate
+        to the same temporal scale.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      in_scale:          resolution scale of the input feature maps.
+      scale:             resolution scale of the output feature maps.
+      upsampling_mode:   algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear'
+      activation:        Non-linear activation object, string of configuration dictionary.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+
+    """
     def __init__(
         self,
         in_channels,
@@ -872,21 +1110,17 @@ def __init__(
         scale,
         upsampling_mode="nearest",
         activation={"name": "relu6", "inplace": True},
+        use_norm=True,
         norm_layer=None,
         norm_before=True,
     ):
-        """
-        Class that connects the ouputs of the ResNet1d to the rest of the network
-        when using multilevel feature aggregation
-
-        It converts the features of all the levels that we are going to aggregate
-        to the same temporal scale
-        """
+        
         super().__init__()
         if norm_layer is None:
             norm_layer = nn.BatchNorm1d
         self.in_channels = in_channels
         self.channels = channels
+        self.use_norm = use_norm
         self.norm_before = norm_before
         self.rel_scale = in_scale / scale
         if scale >= in_scale:
@@ -906,12 +1140,21 @@ def __init__(
             )
 
         self.act = AF.create(activation)
-        if not self.norm_before:
+        if use_norm and not self.norm_before:
             self.bn = norm_layer(channels)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         x = self.resample(x)
         x = self.act(x)
-        if not self.norm_before:
+        if self.use_norm not self.norm_before:
             x = self.bn(x)
         return x
diff --git a/hyperion/torch/layer_blocks/resnet2d_blocks.py b/hyperion/torch/layer_blocks/resnet2d_blocks.py
index 6149319c..7fbb8327 100644
--- a/hyperion/torch/layer_blocks/resnet2d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet2d_blocks.py
@@ -79,6 +79,23 @@ def _make_upsample(in_channels, out_channels, stride, norm_layer, norm_before):
 
 
 class ResNet2dBasicBlock(nn.Module):
+    """ResNet 2d basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      
+    """
+
     expansion = 1
 
     def __init__(
@@ -140,7 +157,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -173,6 +199,23 @@ def forward(self, x):
 
 
 class ResNet2dBasicDecBlock(nn.Module):
+    """ResNet 2d basic Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            upsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      
+    """
+
     expansion = 1
 
     def __init__(
@@ -235,7 +278,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -268,6 +320,23 @@ def forward(self, x):
 
 
 class ResNet2dBNBlock(nn.Module):
+    """ResNet 2d bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size in bottleneck.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -339,7 +408,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -378,6 +456,22 @@ def forward(self, x):
 
 
 class ResNet2dBNDecBlock(nn.Module):
+     """ResNet 2d bottleneck Block decoder.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size in bottleneck.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            upsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     def __init__(
         self,
         in_channels,
@@ -443,7 +537,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -482,6 +585,23 @@ def forward(self, x):
 
 
 class SEResNet2dBasicBlock(ResNet2dBasicBlock):
+    """Squeeze-excitation ResNet 2d basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     expansion = 1
 
     def __init__(
@@ -516,7 +636,17 @@ def __init__(
 
         self.se_layer = SEBlock2d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -536,7 +666,7 @@ def forward(self, x):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act2(x)
 
@@ -550,6 +680,23 @@ def forward(self, x):
 
 
 class SEResNet2dBasicDecBlock(ResNet2dBasicDecBlock):
+    """Squeeze-excitation ResNet 2d basic Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      drop_connect_rate: drop-connect rate for stochastic number of layers.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     expansion = 1
 
     def __init__(
@@ -588,7 +735,17 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -608,7 +765,7 @@ def forward(self, x):
         if self.upsample is not None:
             residual = self.upsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act2(x)
 
@@ -622,6 +779,23 @@ def forward(self, x):
 
 
 class SEResNet2dBNBlock(ResNet2dBNBlock):
+    """Squeeze-excitation ResNet 2d bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     def __init__(
         self,
         in_channels,
@@ -656,7 +830,17 @@ def __init__(
 
         self.se_layer = SEBlock2d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -682,7 +866,7 @@ def forward(self, x):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act3(x)
 
@@ -696,6 +880,23 @@ def forward(self, x):
 
 
 class SEResNet2dBNDecBlock(ResNet2dBNDecBlock):
+    """Squeeze-excitation ResNet 2d bottleneck Block for decoders.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      kernel_size:       kernel size.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      expansion:         expansion factor of the bottlneck channels to output channels.
+      se_r:              squeeze-excitation compression ratio.
+      use_norm:          if True, it uses normalization layers, otherwise it does not.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
     def __init__(
         self,
         in_channels,
@@ -730,7 +931,17 @@ def __init__(
 
         self.se_layer = SEBlock2d(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -756,7 +967,7 @@ def forward(self, x):
         if self.upsample is not None:
             residual = self.upsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act3(x)
 
diff --git a/hyperion/torch/layer_blocks/resnet_blocks.py b/hyperion/torch/layer_blocks/resnet_blocks.py
index 439a440a..83e6d174 100644
--- a/hyperion/torch/layer_blocks/resnet_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet_blocks.py
@@ -112,9 +112,22 @@ def forward(self, x):
 
 
 class ResNetBasicBlock(nn.Module):
-    expansion = 1
+    """ResNet basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
 
-    # __constants__ = ['downsample']
+    expansion = 1
 
     def __init__(
         self,
@@ -166,7 +179,16 @@ def __init__(
     def out_channels(self):
         return self.channels
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -199,6 +221,20 @@ def forward(self, x):
 
 
 class ResNetBNBlock(nn.Module):
+    """Res2Net bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          channels in bottleneck layer when width_factor=1.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+
     expansion = 4
     # __constants__ = ['downsample']
 
@@ -256,7 +292,16 @@ def __init__(
     def out_channels(self):
         return self.channels * self.expansion
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -305,6 +350,18 @@ def forward(self, x):
 
 
 class ResNetEndpointBlock(nn.Module):
+    """ResNet endpoint basic block. This is used as output block when
+    the output combines feature maps from different resolution levels.
+
+    Attributes:
+      in_channels:       input channels.
+      out_channels:      output channels.
+      scale:             interpolation factor.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -334,7 +391,16 @@ def __init__(
         if self.scale > 1:
             self.upsample = Interpolate(scale_factor=scale, mode="nearest")
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
 
         if self.in_channels != self.out_channels:
             x = self.conv(x)
diff --git a/hyperion/torch/layer_blocks/seresnet_blocks.py b/hyperion/torch/layer_blocks/seresnet_blocks.py
index a5a7fecd..7f8d0bae 100644
--- a/hyperion/torch/layer_blocks/seresnet_blocks.py
+++ b/hyperion/torch/layer_blocks/seresnet_blocks.py
@@ -13,6 +13,23 @@
 
 
 class SEResNetBasicBlock(ResNetBasicBlock):
+    """Squeeze-excitation ResNet basic Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          output channels.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r:              squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -46,7 +63,16 @@ def __init__(
         else:
             self.se_layer = SEBlock2D(channels, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
@@ -80,6 +106,23 @@ def forward(self, x):
 
 
 class SEResNetBNBlock(ResNetBNBlock):
+    """Squeeze-excitation ResNet bottleneck Block.
+
+    Attributes:
+      in_channels:       input channels.
+      channels:          channels in bottleneck layer when width_factor=1.
+      activation:        Non-linear activation object, string of configuration dictionary.
+      stride:            downsampling stride of the convs.
+      dropout_rate:      dropout rate.
+      groups:            number of groups in the convolutions.
+      dilation:          dilation factor of the conv. kernels.
+      norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
+      norm_before:       if True, normalization layer is before the activation, after otherwise.
+      se_r=None:         squeeze-excitation compression ratio.
+      time_se:           If true, squeeze is done only in time dimension.
+      num_feats:         Number of features in dimension 2, needed if time_se=True.
+    """
+
     def __init__(
         self,
         in_channels,
@@ -115,7 +158,15 @@ def __init__(
         else:
             self.se_layer = SEBlock2D(channels * self.expansion, se_r, activation)
 
-    def forward(self, x):
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: unused.
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         residual = x
 
         x = self.conv1(x)
diff --git a/hyperion/torch/layer_blocks/spine_blocks.py b/hyperion/torch/layer_blocks/spine_blocks.py
index 21978192..c97cb027 100644
--- a/hyperion/torch/layer_blocks/spine_blocks.py
+++ b/hyperion/torch/layer_blocks/spine_blocks.py
@@ -132,6 +132,16 @@ def __init__(
         self.act1 = AF.create(activation)
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
@@ -200,6 +210,16 @@ def __init__(
         )
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         if self.do_endpoint_conv and self.in_channels != self.channels:
             x = self.conv1(x)
             if self.norm_before:
@@ -254,6 +274,16 @@ def __init__(
         self.bn2 = norm_layer(out_channels)
 
     def forward(self, x):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, out_channels, out_heigh, out_width).
+        """
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
diff --git a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py b/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
index c841a056..bdd9b707 100644
--- a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
+++ b/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
@@ -31,16 +31,16 @@ def __init__(self, in_feats, out_feats, hid_act, pos_enc, time_dim=1):
             nn.Linear(out_feats * (((in_feats - 1) // 2 - 1) // 2), out_feats), pos_enc
         )
 
-    def forward(self, x, mask):
+    def forward(self, x, x_mask=None):
         """Forward function.
 
         Args:
-          x: input tensor with size=(batch, time, num_feats)
-          mask: mask to indicate valid time steps for x (batch, time1, time2)
+          x: input tensor with size=(batch, time, in_feats)
+          x_mask: mask to indicate valid time steps for x (batch, time1, time2)
 
         Returns:
-           Tensor with output features
-           Tensor with subsampled mask
+           Tensor with output features with shape = (batch, time//4, out_feats)
+           Tensor with subsampled mask x4.
         """
         if self.time_dim == 1:
             x = x.transpose(1, 2)
@@ -49,6 +49,6 @@ def forward(self, x, mask):
         x = self.conv(x)
         b, c, f, t = x.size()
         x = self.out(x.contiguous().view(b, c * f, t).transpose(1, 2))
-        if mask is None:
+        if x_mask is None:
             return x, None
-        return x, mask[:, :, :-2:2][:, :, :-2:2]
+        return x, x_mask[:, :, :-2:2][:, :, :-2:2]
diff --git a/hyperion/torch/layer_blocks/transformer_feedforward.py b/hyperion/torch/layer_blocks/transformer_feedforward.py
index 900500ff..93cc6b66 100644
--- a/hyperion/torch/layer_blocks/transformer_feedforward.py
+++ b/hyperion/torch/layer_blocks/transformer_feedforward.py
@@ -40,7 +40,7 @@ def forward(self, x):
           x: input size=(batch, time, num_feats)
 
         Returns:
-          tensor size=(batch, time, num_feats)
+          Tensor size=(batch, time, num_feats)
         """
         if self.time_dim != 1:
             x = x.transpose(1, time_dim)
@@ -157,7 +157,7 @@ def forward(self, x):
             x: input tensors with size=(batch, time, num_channels) or
                size=(batch, num_channels, time).
         Returns:
-            output tensor same size as input
+            Output tensor same size as input
         """
         if self.time_dim != -1:
             x.transpose(-1, self.time_dim)
diff --git a/hyperion/torch/layers/interpolate.py b/hyperion/torch/layers/interpolate.py
index 94b3d2ed..335433fe 100644
--- a/hyperion/torch/layers/interpolate.py
+++ b/hyperion/torch/layers/interpolate.py
@@ -12,8 +12,9 @@ class Interpolate(nn.Module):
     """Interpolation class.
 
     Attributes:
-      scale_factor: upsampling scale factor.
-      mode: algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'.
+      scale_factor: Upsampling scale factor.
+      mode: Algorithm used for upsampling:
+            'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'.
     """
 
     def __init__(self, scale_factor, mode="nearest"):
@@ -34,7 +35,7 @@ def forward(self, x):
         """Interpolates the input.
 
         Args:
-          x: input tensor.
+          x: Input tensor.
 
         Returns:
           Interpolated tensor.

From 89c697ca288d76a1eb1ba407b6000999979460fb Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 10 Mar 2022 10:02:01 -0500
Subject: [PATCH 005/154] more minor bugs fixed

---
 hyperion/torch/narchs/dc1d_decoder.py         |   4 +-
 hyperion/torch/narchs/dc1d_encoder.py         |   4 +-
 hyperion/torch/narchs/dc2d_decoder.py         |   6 +-
 hyperion/torch/narchs/dc2d_encoder.py         |   4 +-
 hyperion/torch/narchs/efficient_net.py        |   2 +-
 hyperion/torch/narchs/resnet.py               |  24 +--
 hyperion/torch/narchs/resnet1d_decoder.py     |   6 +-
 hyperion/torch/narchs/resnet1d_encoder.py     |  33 +++-
 hyperion/torch/narchs/resnet2d_decoder.py     |   6 +-
 hyperion/torch/narchs/resnet2d_encoder.py     |  44 +++++-
 hyperion/torch/narchs/torch_na_loader.py      |   2 +-
 .../torch/narchs/transformer_encoder_v1.py    |   2 +-
 hyperion/torch/narchs/xvector_classif.py      | 145 ------------------
 13 files changed, 101 insertions(+), 181 deletions(-)
 delete mode 100644 hyperion/torch/narchs/xvector_classif.py

diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index c35d7720..82ac5a8a 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -66,7 +66,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(self.conv_channels) // 2, 32)
+            norm_groups = min(min(self.conv_channels) // 2, 32)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
         # stem block
@@ -196,7 +196,7 @@ def _standarize_convblocks_param(p, num_blocks, p_name):
         return p
 
     def _compute_out_size(self, in_size):
-        out_size = in_size * in_stride
+        out_size = in_size * self.in_stride
 
         for stride in self.conv_strides:
             out_size *= stride
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index 091629f4..c2fb3d02 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -64,7 +64,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(self.conv_channels) // 2, 32)
+            norm_groups = min(min(self.conv_channels) // 2, 32)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
         # stem block
@@ -209,7 +209,7 @@ def out_shape(self, in_shape=None):
         else:
             T = self._compute_out_size(in_shape[2])
 
-        return (in_shape[0], out_chanels, T)
+        return (in_shape[0], out_channels, T)
 
     def forward(self, x):
 
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index 6ad7c4c9..e21d615a 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -66,7 +66,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(self.conv_channels) // 2, 32)
+            norm_groups = min(min(self.conv_channels) // 2, 32)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
         # stem block
@@ -210,7 +210,7 @@ def _standarize_convblocks_param(p, num_blocks, p_name):
         return p
 
     def _compute_out_size(self, in_size):
-        out_size = in_size * in_stride
+        out_size = in_size * self.in_stride
 
         for stride in self.conv_strides:
             out_size *= stride
@@ -243,7 +243,7 @@ def out_shape(self, in_shape=None):
         else:
             W = self._compute_out_size(in_shape[3])
 
-        return (in_shape[0], out_chanels, H, W)
+        return (in_shape[0], out_channels, H, W)
 
     def _match_shape(self, x, target_shape):
         x_dim = x.dim()
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index c6857ff6..4102c4f7 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -65,7 +65,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(self.conv_channels) // 2, 32)
+            norm_groups = min(min(self.conv_channels) // 2, 32)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
         # stem block
@@ -215,7 +215,7 @@ def out_shape(self, in_shape=None):
         else:
             W = self._compute_out_size(in_shape[3])
 
-        return (in_shape[0], out_chanels, H, W)
+        return (in_shape[0], out_channels, H, W)
 
     def forward(self, x):
 
diff --git a/hyperion/torch/narchs/efficient_net.py b/hyperion/torch/narchs/efficient_net.py
index ab60b8e2..8a71d6f4 100644
--- a/hyperion/torch/narchs/efficient_net.py
+++ b/hyperion/torch/narchs/efficient_net.py
@@ -132,7 +132,7 @@ def __init__(
         # set depth/width scales from net name
         self.cfg_width_scale = width_scale
         self.cfg_depth_scale = depth_scale
-        if width_scale is None or dept_scale is None:
+        if width_scale is None or depth_scale is None:
             width_scale, depth_scale = self.efficientnet_params(effnet_type)[:2]
         self.width_scale = width_scale
         self.depth_scale = depth_scale
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index ca972713..9185964c 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -29,9 +29,12 @@ class ResNet(NetArch):
     """ResNet2D base class
 
     Attributes:
-      block: resnet basic block type in ['basic', 'bn', 'sebasic', 'sebn'], meaning
+      block: resnet basic block type in
+             ['basic', 'bn', 'sebasic', 'sebn', 'res2basic'
+             'res2bn', 'seres2basic', 'seres2bn'], meaning
              basic resnet block, bottleneck resnet block, basic block with squeeze-excitation,
-             and bottleneck block with squeeze-excitation
+             bottleneck block with squeeze-excitation, Res2Net basic and bottlenec, and
+             squeeze-excitation Res2Net basic and bottleneck.
 
       num_layers: list with the number of layers in each of the 4 layer blocks that we find in
                   resnets, after each layer block feature maps are downsmapled times 2 in each dimension
@@ -46,6 +49,8 @@ class ResNet(NetArch):
       out_act: output activation
       zero_init_residual: initializes batchnorm weights to zero so each residual block behaves as identitiy at
                           the beggining. We observed worse results when using this option in x-vectors
+      multilevel: if True, the output is the combination of the feature maps at different resolution levels.
+      endpoint_channels: number of output channels when multilevel is True.
       groups: number of groups in convolutions
       replace_stride_with_dilation: use dialted conv nets instead of downsammpling, we never tested this.
       dropout_rate: dropout rate
@@ -57,7 +62,8 @@ class ResNet(NetArch):
                instead of time-freq dimension or HxW dimensions
       in_feats: input feature size (number of components in dimension of 2 of input tensor), this is only
                 required when time_se=True to calculcate the size of the squeeze excitation matrices.
-
+      res2net_scale: Res2Net scale parameter
+      res2net_width_factor: Res2Net multiplier for the width of the bottlneck layers.
     """
 
     def __init__(
@@ -395,20 +401,14 @@ def out_shape(self, in_shape=None):
 
         return (in_shape[0], self.layer4[-1].out_channels, H, W)
 
-    def forward(self, x, use_amp=False):
-        if use_amp:
-            with torch.cuda.amp.autocast():
-                return self._forward(x)
-
-        return self._forward(x)
-
-    def _forward(self, x):
+    def forward(self, x, x_lengths=None):
         """forward function
 
         Args:
            x: input tensor of size=(batch, Cin, Hin, Win) for image or
               size=(batch, C, freq, time) for audio
-
+           x_lengths: when x are sequences with time in Win dimension, it
+                      contains the lengths of the sequences.
         Returns:
            Tensor with output logits of size=(batch, out_units) if out_units>0,
            otherwise, it returns tensor of represeantions of size=(batch, Cout, Hout, Wout)
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index e3290c71..f24887fe 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -85,7 +85,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(resb_channels) // 2, 32)
+            norm_groups = min(min(resb_channels) // 2, 32)
             norm_groups = max(norm_groups, resb_groups)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
@@ -236,7 +236,7 @@ def _standarize_resblocks_param(p, num_blocks, p_name):
         return p
 
     def _compute_out_size(self, in_size):
-        out_size = in_size * in_stride
+        out_size = in_size * self.in_stride
 
         for stride in self.conv_strides:
             out_size *= stride
@@ -264,7 +264,7 @@ def out_shape(self, in_shape=None):
         else:
             T = self._compute_out_size(in_shape[2])
 
-        return (in_shape[0], out_chanels, T)
+        return (in_shape[0], out_channels, T)
 
     def _match_shape(self, x, target_shape):
         t = x.size(-1)
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index 78ceeac6..2044f528 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -11,6 +11,7 @@
 import torch
 import torch.nn as nn
 
+from ..utils import seq_lengths_to_mask
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
 from ..layer_blocks import (
@@ -371,13 +372,36 @@ def _match_lens(endpoints):
 
         return endpoints
 
-    def forward(self, x):
+    @staticmethod
+    def _update_mask(x, x_lengths, x_mask=None):
+        if x_lengths is None:
+            return None
 
-        x = self.in_block(x)
+        if x_mask is not None and x.size(-1) == x_mask.size(-1):
+            return x_mask
+
+        return seq_lengths_to_mask(x_lengths, x.size(-1), time_dim=2)
+
+    def forward(self, x, x_lengths=None):
+        """forward function
+
+        Args:
+           x: input tensor of size=(batch, C, time)
+           x_lengths:  it contains the lengths of the sequences.
+        Returns:
+           Tensor with output logits of size=(batch, out_units) if out_units>0,
+           otherwise, it returns tensor of represeantions of size=(batch, Cout, out_time)
+
+        """
+
+        x_mask = self._update_mask(x, x_lengths)
+        x = self.in_block(x, x_mask=x_mask)
         endpoints = []
+
         for i, superblock in enumerate(self.blocks):
             for j, block in enumerate(superblock):
-                x = block(x)
+                x_mask = self._update_mask(x, x_lengths, x_mask)
+                x = block(x, x_mask=x_mask)
 
             if self.multilayer and self.is_endpoint[i]:
                 endpoint_i = x
@@ -401,11 +425,12 @@ def forward(self, x):
                 x = torch.mean(torch.stack(endpoints), 0)
 
         if self.head_channels > 0:
+            x_mask = self._update_mask(x, x_lengths, x_mask)
             x = self.head_block(x)
 
         return x
 
-    def forward_hid_feats(self, x, layers=None, return_output=False):
+    def forward_hid_feats(self, x, x_lengths=None, layers=None, return_output=False):
 
         assert layers is not None or return_output
         if layers is None:
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index f5becf76..6457ada1 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -86,7 +86,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(resb_channels) // 2, 32)
+            norm_groups = min(min(resb_channels) // 2, 32)
             norm_groups = max(norm_groups, resb_groups)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
@@ -237,7 +237,7 @@ def _standarize_resblocks_param(p, num_blocks, p_name):
         return p
 
     def _compute_out_size(self, in_size):
-        out_size = in_size * in_stride
+        out_size = in_size * self.in_stride
 
         for stride in self.conv_strides:
             out_size *= stride
@@ -270,7 +270,7 @@ def out_shape(self, in_shape=None):
         else:
             W = self._compute_out_size(in_shape[3])
 
-        return (in_shape[0], out_chanels, H, W)
+        return (in_shape[0], out_channels, H, W)
 
     def _match_shape(self, x, target_shape):
         x_dim = x.dim()
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index 22fc7fdd..a72cabac 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -9,6 +9,7 @@
 import torch
 import torch.nn as nn
 
+from ..utils import seq_lengths_to_mask
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..layer_blocks import ResNet2dBasicBlock, ResNet2dBNBlock, DC2dEncBlock
@@ -18,6 +19,35 @@
 
 
 class ResNet2dEncoder(NetArch):
+    """ResNet 2d Encoder.
+    This is similar to ResNet class but it offers more configuration possibilities
+
+    Attributes:
+        in_channels=1,
+        in_conv_channels=64,
+        in_kernel_size=3,
+        in_stride=1,
+        resb_type="basic",
+        resb_repeats=[2, 2, 2, 2],
+        resb_channels=[64, 128, 256, 512],
+        resb_kernel_sizes=3,
+        resb_strides=2,
+        resb_dilations=1,
+        resb_groups=1,
+        head_channels=0,
+        hid_act="relu6",
+        head_act=None,
+        dropout_rate=0,
+        se_r=16,
+        time_se=False,
+        in_feats=None,
+        res2net_width_factor=1,
+        res2net_scale=4,
+        use_norm=True,
+        norm_layer=None,
+        norm_before=True,
+    """
+
     def __init__(
         self,
         in_channels=1,
@@ -104,7 +134,7 @@ def __init__(
         self.norm_layer = norm_layer
         norm_groups = None
         if norm_layer == "group-norm":
-            norm_groups = min(np.min(resb_channels) // 2, 32)
+            norm_groups = min(min(resb_channels) // 2, 32)
             norm_groups = max(norm_groups, resb_groups)
         self._norm_layer = NLF.create(norm_layer, norm_groups)
 
@@ -266,7 +296,17 @@ def out_shape(self, in_shape=None):
         else:
             W = self._compute_out_size(in_shape[3])
 
-        return (in_shape[0], out_chanels, H, W)
+        return (in_shape[0], out_channels, H, W)
+
+    @staticmethod
+    def _update_mask(x, x_lengths, x_mask=None):
+        if x_lengths is None:
+            return None
+
+        if x_mask is not None and x.size(-1) == x_mask.size(-1):
+            return x_mask
+
+        return seq_lengths_to_mask(x_lengths, x.size(-1), time_dim=3)
 
     def forward(self, x):
 
diff --git a/hyperion/torch/narchs/torch_na_loader.py b/hyperion/torch/narchs/torch_na_loader.py
index 542742fa..97bf5fb9 100644
--- a/hyperion/torch/narchs/torch_na_loader.py
+++ b/hyperion/torch/narchs/torch_na_loader.py
@@ -37,7 +37,7 @@ class TorchNALoader(object):
     @staticmethod
     def load(file_path, extra_objs={}):
 
-        model_data = torch.load(model_path)
+        model_data = torch.load(file_path)
         cfg = model_data["model_cfg"]
         class_name = cfg["class_name"]
         del cfg["class_name"]
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index 8d479f24..f1a5b26c 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -157,7 +157,7 @@ def _make_in_layer(self):
                 nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx), pos_enc
             )
         elif isinstance(self.in_layer_type, nn.Module):
-            self.in_layer = nn.Sequential(in_layer_type, pos_enc)
+            self.in_layer = nn.Sequential(self.in_layer_type, pos_enc)
         elif self.in_layer_type is None:
             self.in_layer = pos_enc
         else:
diff --git a/hyperion/torch/narchs/xvector_classif.py b/hyperion/torch/narchs/xvector_classif.py
deleted file mode 100644
index e87c3db1..00000000
--- a/hyperion/torch/narchs/xvector_classif.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import numpy as np
-
-import torch.nn as nn
-from torch.nn import Linear, BatchNorm1d, Dropout
-
-from ..layers import ActivationFactory as AF
-from .net_arch import NetArch
-
-
-class XVectorClassifV1(NetArch):
-    def __init__(
-        self,
-        input_units,
-        num_classes,
-        embed_dim=512,
-        num_hid_layers=2,
-        hid_act="relu",
-        outputs="logits",
-        use_batchnorm=True,
-        dropout_rate=0,
-    ):
-
-        super(XVectorClassifV1, self).__init__()
-        assert num_hid_layers >= 1, "num_hid_layers (%d < 1)" % num_hid_layers
-
-        self.num_hid_layers = num_hid_layers
-        self.input_units = input_units
-        self.embed_dim = embed_dim
-        self.num_classes = num_classes
-        self.use_batchnorm = use_batchnorm
-        self.dropout_rate = dropout_rate
-        self.outputs = outputs
-
-        if isinstance(hid_units, list):
-            assert num_hid_layers == len(embed_dim)
-        else:
-            embed_dim = [embed_dim for i in range(num_hid_layers)]
-
-        units = [input_units] + embed_dim
-
-        # fully connected layers
-        fc_layers = []
-        for i in range(1, num_hid_layers + 1):
-            fc_layers.append(Linear(units[i - 1], units[i]))
-
-        self.fc_layers = nn.ModuleList(fc_layers)
-
-        # hidden activations
-        self.hid_acts = None
-        if hid_act is not None:
-            hid_acts = []
-            for i in range(num_hid_layers):
-                hid_act = AF.create(hid_act)
-                hid_acts.append(hid_act)
-            self.hid_acts = nn.ModuleList(hid_acts)
-
-        # batch normalization
-        self.batchnorm_layers = None
-        if use_batchnorm:
-            batchnorm_layers = []
-            for i in range(num_hid_layers):
-                batchnorm_layers.append(BatchNorm1d(units[i]))
-            self.batchnorm_layers = nn.ModuleList(batchnorm_layers)
-
-        # dropout
-        self.dropout_layers = None
-        if dropout_rate > 0:
-            dropout_layers = []
-            for i in range(num_hid_layers):
-                dropout_layers.append(Dropout(dropout_rate))
-            self.dropout_layers = nn.ModuleList(dropout_layers)
-
-        # output layers
-        self.logits_layer = Linear(units[-1], num_classes)
-
-    def forward(self, x):
-
-        for l in range(self.num_hid_layers):
-            if self.use_batchnorm:
-                x = self.batchnorm_layers[l](x)
-
-            x = self.fc_layers[l](x)
-            if self.hid_acts is not None:
-                x = self.hid_acts[l](x)
-
-            if self.dropout_rate > 0:
-                x = self.dropout_layers[l](x)
-
-        y = self.logits_layer(x)
-
-        return y
-
-    def extract_embed(self, x, embed_layers=0):
-
-        if isinstance(embed_layers, int):
-            embed_layers = [embed_layers]
-
-        last_embed_layer = np.max(embed_layers)
-        embed_layers = set(embed_layers)
-
-        embed_list = []
-        for l in range(self.num_hid_layers):
-            if self.use_batchnorm:
-                x = self.batchnorm_layers[l](x)
-
-            x = self.fc_layers[l](x)
-            if l in embed_layers:
-                embed_list.append(x)
-
-            if l == last_embed_layer:
-                break
-
-            if self.hid_acts is not None:
-                x = self.hid_acts[l](x)
-
-            if self.dropout_rate > 0:
-                x = self.dropout_layers[l](x)
-
-        y = torch.cat((embed_list), dim=-1)
-        return y
-
-    def get_config(self):
-
-        if self.hid_acts is None:
-            hid_act = None
-        else:
-            hid_act = AF.get_config(self.hid_acts[0])
-
-        config = {
-            "num_hid_layers": self.num_hid_layers,
-            "num_classes": self.num_classes,
-            "embed_dim": self.embed_dim,
-            "input_units": self.input_units,
-            "use_batchnorm": self.use_batchnorm,
-            "dropout_rate": self.dropout_rate,
-            "hid_act": hid_act,
-        }
-
-        base_config = super(XVectorClassifV1, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))

From c82231f2749565544bd073607fefcc9850d67fa3 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 17 Mar 2022 09:43:11 -0400
Subject: [PATCH 006/154] ***

---
 hyperion/torch/models/xvectors/spinenet_xvector.py  | 2 +-
 hyperion/torch/torch_model_loader.py                | 2 +-
 hyperion/torch/trainers/xvector_trainer_from_wav.py | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 1b7401a4..d3a22bce 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -3,7 +3,7 @@
  Copyright 2020 Magdalena Rybicka
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+from jsonargparse import ArgumentParser, ActionParser
 import logging
 
 import torch
diff --git a/hyperion/torch/torch_model_loader.py b/hyperion/torch/torch_model_loader.py
index 142656d1..92e0beb4 100644
--- a/hyperion/torch/torch_model_loader.py
+++ b/hyperion/torch/torch_model_loader.py
@@ -55,7 +55,7 @@ def load(file_path, extra_objs={}, map_location=None):
         if "n_averaged" in state_dict:
             del state_dict["n_averaged"]
 
-        cfg = self._fix_compatibilty(class_obj, cfg)
+        cfg = TorchModelLoader._fix_compatibilty(class_obj, cfg)
 
         p = re.compile("^module\.")
         num_tries = 3
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index a00016e6..3519b6d6 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -122,6 +122,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
+        self.feat_extractor.train()
         self.set_train_mode()
 
         for batch, (data, target) in enumerate(data_loader):
@@ -170,6 +171,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         """
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
+        self.feat_extractor.eval()
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"

From 80cfc3c14478e9f4812d77becd71621a47833654 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 25 Mar 2022 15:37:00 -0400
Subject: [PATCH 007/154] find bug not initializing augmentation

---
 .../v1/conf/fbank40_nomn_16k.pyconf           |  17 --
 .../conf/fbank40_nope_hammw_stmn_16k.pyconf   |  22 --
 .../v1/conf/fbank40_stmn_16k.pyconf           |  18 --
 .../v1/conf/fbank40_stmvn_16k.pyconf          |  19 --
 egs/sre19-cmn2/v1/conf/fbank64_8k.pyconf      |  14 -
 egs/sre19-cmn2/v1/conf/fbank64_mvn_8k.pyconf  |  18 --
 egs/sre19-cmn2/v1/conf/fbank64_stmn_8k.pyconf |  18 --
 egs/sre19-cmn2/v1/conf/fbank80_16k.pyconf     |  14 -
 egs/sre19-cmn2/v1/conf/fbank80_mvn_16k.pyconf |  18 --
 .../v1/conf/fbank80_stmn_16k.pyconf           |  18 --
 .../v1/conf/linfbank40_stmn_16k.pyconf        |  20 --
 egs/sre19-cmn2/v1/conf/vad_16k.pyconf         |  16 -
 ..._resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh |  24 +-
 egs/voxceleb/v1.1/run_011_train_xvector.sh    |  60 +---
 hyperion/__init__.py                          |  13 +-
 .../bin/torch-train-spinenet-xvec-from-wav.py |   2 +-
 hyperion/bin/torch-train-xvec-from-wav.py     | 215 --------------
 hyperion/bin/torch_train_xvec_from_wav.py     | 275 ++++++++++++++++++
 hyperion/io/data_reader.py                    |   2 +-
 hyperion/io/data_rw_factory.py                |   1 -
 hyperion/io/int32_writer.py                   |   2 +-
 hyperion/io/kaldi_data_reader.py              |   2 +-
 hyperion/np/__init__.py                       |   1 +
 hyperion/np/augment/reverb_augment.py         |   2 +-
 hyperion/np/augment/speed_augment.py          |   2 +-
 hyperion/np/metrics/confusion_matrix.py       |   2 +-
 hyperion/np/transforms/transform_list.py      |   6 +-
 hyperion/torch/__init__.py                    |  15 -
 hyperion/torch/data/audio_dataset.py          |  31 +-
 hyperion/torch/data/weighted_embed_sampler.py |   4 +
 hyperion/torch/data/weighted_seq_sampler.py   |  23 +-
 .../torch/layer_blocks/resnet1d_blocks.py     |  16 +-
 .../torch/layer_blocks/resnet2d_blocks.py     |  11 +-
 .../layer_blocks/transformer_feedforward.py   |  20 +-
 hyperion/torch/layers/global_pool.py          |  14 +-
 hyperion/torch/layers/margin_losses.py        |   4 +-
 hyperion/torch/lr_schedulers/cos_lr.py        |   8 +-
 hyperion/torch/lr_schedulers/lr_scheduler.py  |   5 +-
 hyperion/torch/models/xvectors/xvector.py     |   2 +-
 hyperion/torch/torch_model_loader.py          |   2 +-
 hyperion/torch/trainers/ae_trainer.py         |   2 +
 hyperion/torch/trainers/dvae_trainer.py       |   2 +
 hyperion/torch/trainers/plda_trainer.py       |   2 +
 hyperion/torch/trainers/torch_trainer.py      |  50 +++-
 hyperion/torch/trainers/vae_trainer.py        |   2 +
 hyperion/torch/trainers/vq_dvae_trainer.py    |   2 +
 hyperion/torch/trainers/vq_vae_trainer.py     |   2 +
 .../torch/trainers/xvector_adv_trainer.py     |   3 +
 .../trainers/xvector_adv_trainer_from_wav.py  |   7 +-
 hyperion/torch/trainers/xvector_trainer.py    |   2 +
 .../trainers/xvector_trainer_deep_feat_reg.py |   2 +
 .../xvector_trainer_deep_feat_reg_from_wav.py |   2 +
 .../trainers/xvector_trainer_from_wav.py      |   2 +
 53 files changed, 485 insertions(+), 571 deletions(-)
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank40_nomn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank40_nope_hammw_stmn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank40_stmn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank40_stmvn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank64_8k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank64_mvn_8k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank64_stmn_8k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank80_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank80_mvn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/fbank80_stmn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/linfbank40_stmn_16k.pyconf
 delete mode 100644 egs/sre19-cmn2/v1/conf/vad_16k.pyconf
 delete mode 100755 hyperion/bin/torch-train-xvec-from-wav.py
 create mode 100755 hyperion/bin/torch_train_xvec_from_wav.py

diff --git a/egs/sre19-cmn2/v1/conf/fbank40_nomn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank40_nomn_16k.pyconf
deleted file mode 100644
index d04eb2ec..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank40_nomn_16k.pyconf
+++ /dev/null
@@ -1,17 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-40
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-no-norm-mean
diff --git a/egs/sre19-cmn2/v1/conf/fbank40_nope_hammw_stmn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank40_nope_hammw_stmn_16k.pyconf
deleted file mode 100644
index da766d41..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank40_nope_hammw_stmn_16k.pyconf
+++ /dev/null
@@ -1,22 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-preemphasis-coeff
-0
---feats-window-type
-hamming
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-40
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/fbank40_stmn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank40_stmn_16k.pyconf
deleted file mode 100644
index 919efdec..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank40_stmn_16k.pyconf
+++ /dev/null
@@ -1,18 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-40
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/fbank40_stmvn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank40_stmvn_16k.pyconf
deleted file mode 100644
index b81e9283..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank40_stmvn_16k.pyconf
+++ /dev/null
@@ -1,19 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-40
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
---mvn-norm-var
diff --git a/egs/sre19-cmn2/v1/conf/fbank64_8k.pyconf b/egs/sre19-cmn2/v1/conf/fbank64_8k.pyconf
deleted file mode 100644
index f5a57052..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank64_8k.pyconf
+++ /dev/null
@@ -1,14 +0,0 @@
---sample-frequency
-8000
---frame-length
-25
---low-freq
-20 
---high-freq
-3700
---num-filters
-64
---snip-edges
-false
---use-energy
-false
diff --git a/egs/sre19-cmn2/v1/conf/fbank64_mvn_8k.pyconf b/egs/sre19-cmn2/v1/conf/fbank64_mvn_8k.pyconf
deleted file mode 100644
index 29ce58a9..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank64_mvn_8k.pyconf
+++ /dev/null
@@ -1,18 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-8000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-3700
---feats-num-filters
-64
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/fbank64_stmn_8k.pyconf b/egs/sre19-cmn2/v1/conf/fbank64_stmn_8k.pyconf
deleted file mode 100644
index 29ce58a9..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank64_stmn_8k.pyconf
+++ /dev/null
@@ -1,18 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-8000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-3700
---feats-num-filters
-64
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/fbank80_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank80_16k.pyconf
deleted file mode 100644
index 3e65fe32..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank80_16k.pyconf
+++ /dev/null
@@ -1,14 +0,0 @@
---sample-frequency
-16000
---frame-length
-25
---low-freq
-20 
---high-freq
-7600
---num-filters
-80
---snip-edges
-false
---use-energy
-false
diff --git a/egs/sre19-cmn2/v1/conf/fbank80_mvn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank80_mvn_16k.pyconf
deleted file mode 100644
index ffdbf165..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank80_mvn_16k.pyconf
+++ /dev/null
@@ -1,18 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-80
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/fbank80_stmn_16k.pyconf b/egs/sre19-cmn2/v1/conf/fbank80_stmn_16k.pyconf
deleted file mode 100644
index ffdbf165..00000000
--- a/egs/sre19-cmn2/v1/conf/fbank80_stmn_16k.pyconf
+++ /dev/null
@@ -1,18 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-20 
---feats-high-freq
-7600
---feats-num-filters
-80
---feats-snip-edges
-false
---feats-use-energy
-false
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/linfbank40_stmn_16k.pyconf b/egs/sre19-cmn2/v1/conf/linfbank40_stmn_16k.pyconf
deleted file mode 100644
index f80faad2..00000000
--- a/egs/sre19-cmn2/v1/conf/linfbank40_stmn_16k.pyconf
+++ /dev/null
@@ -1,20 +0,0 @@
---feats-audio-feat
-logfb
---feats-sample-frequency
-16000
---feats-frame-length
-25
---feats-low-freq
-64 
---feats-high-freq
-8000
---feats-num-filters
-40
---feats-snip-edges
-false
---feats-use-energy
-false
---feats-fb-type
-linear
---mvn-context
-150
diff --git a/egs/sre19-cmn2/v1/conf/vad_16k.pyconf b/egs/sre19-cmn2/v1/conf/vad_16k.pyconf
deleted file mode 100644
index b52af74b..00000000
--- a/egs/sre19-cmn2/v1/conf/vad_16k.pyconf
+++ /dev/null
@@ -1,16 +0,0 @@
---sample-frequency
-16000
---frame-shift
-10
---frame-length
-25 
---snip-edges
-false
---vad-energy-threshold
-5.5
---vad-energy-mean-scale
-0.5
---vad-proportion-threshold
-0.12
---vad-frames-context
-2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index fc62c86b..63fecf32 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,31 +9,27 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+
+nnet_type=resnet
+
+resnet_type=resnet34
 batch_size_1gpu=32
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=resnet34 
 dropout=0
 embed_dim=256
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/run_011_train_xvector.sh b/egs/voxceleb/v1.1/run_011_train_xvector.sh
index 0b0e4d50..9b30369e 100755
--- a/egs/voxceleb/v1.1/run_011_train_xvector.sh
+++ b/egs/voxceleb/v1.1/run_011_train_xvector.sh
@@ -10,9 +10,8 @@ set -e
 stage=1
 ngpu=4
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
+num_workers=""
 use_tb=false
 use_wandb=false
 
@@ -20,20 +19,17 @@ use_wandb=false
 . $config_file
 . datapath.sh
 
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/${nnet_data}_proc_audio_no_sil
 
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
 fi
 if [ "$use_tb" == "true" ];then
-    args="$args --use-tensorboard"
+    extra_args="$extra_args --trainer.use-tensorboard"
 fi
 if [ "$use_wandb" == "true" ];then
-    args="$args --use-wandb --wandb.project voxceleb-v1.1 --wandb.name $nnet_name.$(date -Iminutes)"
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
 fi
 
 if [ "$interactive" == "true" ];then
@@ -43,47 +39,21 @@ fi
 # Network Training
 if [ $stage -le 1 ]; then
 
-  if [[ ${nnet_type} =~ resnet1d ]]; then
-    	train_exec=torch-train-resnet1d-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ resnet ]] || [[ ${nnet_type} =~ resnext ]] || [[ ${nnet_type} =~ res2net ]] || [[ ${nnet_type} =~ res2next ]]; then
-    train_exec=torch-train-resnet-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ efficientnet ]]; then
-    train_exec=torch-train-efficientnet-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ tdnn ]]; then
-    train_exec=torch-train-tdnn-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ transformer ]]; then
-    train_exec=torch-train-transformer-xvec-v1-from-wav.py
-  elif [[ ${nnet_type} =~ spinenet ]] || [[ ${nnet_type} =~ spine2net ]] || [[ ${nnet_type} =~ r0_sp53 ]]; then
-    train_exec=torch-train-spinenet-xvec-from-wav.py
-  else
-    echo "$nnet_type not supported"
-    exit 1
-  fi
   
   mkdir -p $nnet_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    $train_exec --feats $feat_config $aug_opt \
-    --audio-path $list_dir/wav.scp \
-    --time-durs-file $list_dir/utt2dur \
-    --train-list $list_dir/lists_xvec/train.scp \
-    --val-list $list_dir/lists_xvec/val.scp \
-    --class-file $list_dir/lists_xvec/class2int \
-    --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-    --iters-per-epoch $ipe \
-    --batch-size $batch_size \
-    --num-workers $num_workers \
-    --grad-acc-steps $grad_acc_steps \
-    --embed-dim $embed_dim $nnet_opt $opt_opt $lrs_opt \
-    --epochs $nnet_num_epochs \
-    --cos-scale $s --margin $margin --margin-warmup-epochs $margin_warmup \
-    --dropout-rate $dropout \
+    torch_train_xvec_from_wav.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir $args \
     --num-gpus $ngpu \
-    --log-interval $log_interval \
-    --exp-path $nnet_dir $args 
   
 fi
 
-
-exit
diff --git a/hyperion/__init__.py b/hyperion/__init__.py
index 6e59062b..055441cd 100644
--- a/hyperion/__init__.py
+++ b/hyperion/__init__.py
@@ -5,17 +5,10 @@
 
 
 from . import utils
-from . import metrics
-from . import pdfs
-from . import transforms
+from . import np
 from . import io
-from . import feats
-from . import calibration
-from . import score_norm
-
-# from . import keras
+from . import torch
 from . import helpers
 
-# from . import generators
 
-__version__ = "0.3.1"
+__version__ = "0.4.0a"
diff --git a/hyperion/bin/torch-train-spinenet-xvec-from-wav.py b/hyperion/bin/torch-train-spinenet-xvec-from-wav.py
index 91aa17b1..7bac503c 100755
--- a/hyperion/bin/torch-train-spinenet-xvec-from-wav.py
+++ b/hyperion/bin/torch-train-spinenet-xvec-from-wav.py
@@ -15,6 +15,7 @@
 import time
 import logging
 import multiprocessing
+from pathlib import Path
 
 import numpy as np
 
@@ -22,7 +23,6 @@
 import torch.nn as nn
 
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.models import SpineNetXVector as XVec
diff --git a/hyperion/bin/torch-train-xvec-from-wav.py b/hyperion/bin/torch-train-xvec-from-wav.py
deleted file mode 100755
index 8dcd0482..00000000
--- a/hyperion/bin/torch-train-xvec-from-wav.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python
-"""
- Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
-import logging
-import multiprocessing
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-
-from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
-from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.models import ResNetXVector as RXVec
-from hyperion.torch.models import EfficientNetXVector as EXVec
-from hyperion.torch.models import TDNNXVector as TDXVec
-from hyperion.torch.models import TransformerXVectorV1 as TFXVec
-
-xvec_dict = {
-    "resnet": RXVec,
-    "efficientnet": EXVec,
-    "tdnn": TDXVec,
-    "transformer": TFXVec,
-}
-
-
-def init_data(
-    audio_path,
-    train_list,
-    val_list,
-    train_aug_cfg,
-    val_aug_cfg,
-    num_workers,
-    num_gpus,
-    rank,
-    **kwargs
-):
-
-    ad_args = AD.filter_args(**kwargs)
-    sampler_args = Sampler.filter_args(**kwargs)
-    if rank == 0:
-        logging.info("audio dataset args={}".format(ad_args))
-        logging.info("sampler args={}".format(sampler_args))
-        logging.info("init datasets")
-
-    train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
-    val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
-
-    if rank == 0:
-        logging.info("init samplers")
-    train_sampler = Sampler(train_data, **sampler_args)
-    val_sampler = Sampler(val_data, **sampler_args)
-
-    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = (
-        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
-    )
-
-    train_loader = torch.utils.data.DataLoader(
-        train_data, batch_sampler=train_sampler, **largs
-    )
-
-    test_loader = torch.utils.data.DataLoader(
-        val_data, batch_sampler=val_sampler, **largs
-    )
-
-    return train_loader, test_loader
-
-
-def init_feats(rank, **kwargs):
-    feat_args = AF.filter_args(**kwargs["feats"])
-    if rank == 0:
-        logging.info("feat args={}".format(feat_args))
-        logging.info("initializing feature extractor")
-    feat_extractor = AF(trans=True, **feat_args)
-    if rank == 0:
-        logging.info("feat-extractor={}".format(feat_extractor))
-    return feat_extractor
-
-
-def init_xvector(num_classes, rank, xvec_class, **kwargs):
-
-    xvec_args = xvec_class.filter_args(**kwargs)
-    if rank == 0:
-        logging.info("xvector network args={}".format(xvec_args))
-    xvec_args["num_classes"] = num_classes
-    model = xvec_class(**xvec_args)
-    if rank == 0:
-        logging.info("x-vector-model={}".format(model))
-    return model
-
-
-def train_xvec(gpu_id, args):
-
-    config_logger(args.verbose)
-    del args.verbose
-    logging.debug(args)
-
-    kwargs = namespace_to_dict(args)
-    torch.manual_seed(args.seed)
-    set_float_cpu("float32")
-
-    ddp_args = ddp.filter_ddp_args(**kwargs)
-    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    kwargs["rank"] = rank
-
-    train_loader, test_loader = init_data(**kwargs)
-    feat_extractor = init_feats(**kwargs)
-    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
-
-    trn_args = Trainer.filter_args(**kwargs)
-    if rank == 0:
-        logging.info("trainer args={}".format(trn_args))
-    metrics = {"acc": CategoricalAccuracy()}
-    trainer = Trainer(
-        model,
-        feat_extractor,
-        device=device,
-        metrics=metrics,
-        ddp=world_size > 1,
-        **trn_args
-    )
-    if args.resume:
-        trainer.load_last_checkpoint()
-    trainer.fit(train_loader, test_loader)
-
-    ddp.ddp_cleanup()
-
-
-def make_parser(xvec_class):
-    parser = ArgumentParser()
-
-    parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--audio-path", required=True)
-    parser.add_argument("--train-list", required=True)
-    parser.add_argument("--val-list", required=True)
-
-    AD.add_class_args(parser)
-    Sampler.add_class_args(parser)
-
-    parser.add_argument("--train-aug-cfg", default=None)
-    parser.add_argument("--val-aug-cfg", default=None)
-
-    parser.add_argument(
-        "--num-workers", type=int, default=5, help="num_workers of data loader"
-    )
-
-    AF.add_class_args(parser, prefix="feats")
-    xvec_class.add_class_args(parser)
-    Trainer.add_class_args(parser)
-    ddp.add_ddp_args(parser)
-    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
-    parser.add_argument(
-        "--resume",
-        action="store_true",
-        default=False,
-        help="resume training from checkpoint",
-    )
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
-
-    return parser
-
-
-if __name__ == "__main__":
-
-    parser = ArgumentParser(description="Train XVector from audio files")
-
-    parser.add_argument("--cfg", action=ActionConfigFile)
-
-    subcommands = parser.add_subcommands()
-
-    for k, v in xvec_dict.items():
-        parser_k = make_parser(v)
-        subcommands.add_subcommand(k, parser_k)
-
-    args = parser.parse_args()
-    try:
-        gpu_id = int(os.environ["LOCAL_RANK"])
-    except:
-        gpu_id = 0
-
-    xvec_type = args.subcommand
-    args_sc = vars(args)[xvec_type]
-
-    if gpu_id == 0:
-        try:
-            config_file = Path(args_sc.exp_path) / "config.yaml"
-            parser.save(args, str(config_file), format="yaml", overwrite=True)
-        except:
-            pass
-
-    args_sc.xvec_class = xvec_dict[xvec_type]
-    # torch docs recommend using forkserver
-    multiprocessing.set_start_method("forkserver")
-    train_xvec(gpu_id, args_sc)
diff --git a/hyperion/bin/torch_train_xvec_from_wav.py b/hyperion/bin/torch_train_xvec_from_wav.py
new file mode 100755
index 00000000..df948b87
--- /dev/null
+++ b/hyperion/bin/torch_train_xvec_from_wav.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import TDNNXVector as TDXVec
+from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    "efficientnet": EXVec,
+    "tdnn": TDXVec,
+    "transformer": TFXVec,
+    "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    ad_args["is_val"] = partition == "val"
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+# def init_data(
+#     audio_path,
+#     train_list,
+#     val_list,
+#     train_aug_cfg,
+#     val_aug_cfg,
+#     num_workers,
+#     num_gpus,
+#     rank,
+#     **kwargs
+# ):
+
+#     ad_args = AD.filter_args(**kwargs)
+#     sampler_args = Sampler.filter_args(**kwargs)
+#     if rank == 0:
+#         logging.info("audio dataset args={}".format(ad_args))
+#         logging.info("sampler args={}".format(sampler_args))
+#         logging.info("init datasets")
+
+#     train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
+#     val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
+
+#     if rank == 0:
+#         logging.info("init samplers")
+#     train_sampler = Sampler(train_data, **sampler_args)
+#     val_sampler = Sampler(val_data, **sampler_args)
+
+#     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+#     largs = (
+#         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+#     )
+
+#     train_loader = torch.utils.data.DataLoader(
+#         train_data, batch_sampler=train_sampler, **largs
+#     )
+
+#     test_loader = torch.utils.data.DataLoader(
+#         val_data, batch_sampler=val_sampler, **largs
+#     )
+
+#     return train_loader, test_loader
+
+
+def init_feats(rank, **kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    if rank == 0:
+        logging.info("feat args={}".format(feat_args))
+        logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=True, **feat_args)
+    if rank == 0:
+        logging.info("feat-extractor={}".format(feat_extractor))
+    return feat_extractor
+
+
+def init_xvector(num_classes, rank, xvec_class, **kwargs):
+
+    xvec_args = xvec_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network args={}".format(xvec_args))
+    xvec_args["num_classes"] = num_classes
+    model = xvec_class(**xvec_args)
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    feat_extractor = init_feats(**kwargs)
+    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        feat_extractor,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+    # parser.add_argument("--audio-path", required=True)
+    # parser.add_argument("--train-list", required=True)
+    # parser.add_argument("--val-list", required=True)
+
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    # parser.add_argument("--train-aug-cfg", default=None)
+    # parser.add_argument("--val-aug-cfg", default=None)
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
+
+    AF.add_class_args(parser, prefix="feats")
+    xvec_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(parser, prefix="trainer")
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    # parser.add_argument(
+    #     "--resume",
+    #     action="store_true",
+    #     default=False,
+    #     help="resume training from checkpoint",
+    # )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train XVector from audio files")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
diff --git a/hyperion/io/data_reader.py b/hyperion/io/data_reader.py
index f0c61d3a..da0279e1 100644
--- a/hyperion/io/data_reader.py
+++ b/hyperion/io/data_reader.py
@@ -10,7 +10,7 @@
 
 from ..hyp_defs import float_cpu
 from ..utils.scp_list import SCPList
-from ..transforms import TransformList
+from ..np.transforms import TransformList
 
 
 class DataReader(object):
diff --git a/hyperion/io/data_rw_factory.py b/hyperion/io/data_rw_factory.py
index ed408156..0c49cd9f 100644
--- a/hyperion/io/data_rw_factory.py
+++ b/hyperion/io/data_rw_factory.py
@@ -13,7 +13,6 @@
 from .ark_data_reader import SequentialArkFileDataReader as SAFDR
 from .ark_data_reader import SequentialArkScriptDataReader as SASDR
 from .ark_data_reader import RandomAccessArkDataReader as RADR
-from .h5_data_writer import H5DataWriter as H5DW
 from .h5_data_reader import SequentialH5FileDataReader as SH5FDR
 from .h5_data_reader import SequentialH5ScriptDataReader as SH5SDR
 from .h5_data_reader import RandomAccessH5FileDataReader as RH5FDR
diff --git a/hyperion/io/int32_writer.py b/hyperion/io/int32_writer.py
index c823dc0e..d881fb16 100644
--- a/hyperion/io/int32_writer.py
+++ b/hyperion/io/int32_writer.py
@@ -12,4 +12,4 @@ class Int32Writer(DataWriter):
     """Class to write data to int32 files."""
 
     def __init__(self, wspecifier):
-        super(Int32Writer, self).__init__(wspecifier)
+        super().__init__(wspecifier)
diff --git a/hyperion/io/kaldi_data_reader.py b/hyperion/io/kaldi_data_reader.py
index 712941ec..6313cb29 100644
--- a/hyperion/io/kaldi_data_reader.py
+++ b/hyperion/io/kaldi_data_reader.py
@@ -133,7 +133,7 @@ def _read_ascii_matrix(f):
         while 1:
             line = f.readline()
             if len(line) == 0:
-                raise BadInputFormat  # eof, should not happen!
+                raise ValueError()  # eof, should not happen!
             if len(line.strip()) == 0:
                 continue  # skip empty line
             arr = line.strip().split()
diff --git a/hyperion/np/__init__.py b/hyperion/np/__init__.py
index d2774314..86fff349 100644
--- a/hyperion/np/__init__.py
+++ b/hyperion/np/__init__.py
@@ -3,5 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+
 from .np_model import NPModel
 from .np_model_loader import NPModelLoader
diff --git a/hyperion/np/augment/reverb_augment.py b/hyperion/np/augment/reverb_augment.py
index fe225e53..ef5293d6 100644
--- a/hyperion/np/augment/reverb_augment.py
+++ b/hyperion/np/augment/reverb_augment.py
@@ -15,7 +15,7 @@
 from scipy import signal
 
 from ...hyp_defs import float_cpu
-from ..io import RandomAccessDataReaderFactory as DRF
+from ...io import RandomAccessDataReaderFactory as DRF
 
 
 class RIRNormType(Enum):
diff --git a/hyperion/np/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
index 7fdaab3c..2f353ebe 100644
--- a/hyperion/np/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -5,7 +5,7 @@
 
 import logging
 from copy import deepcopy
-import multiprocessing
+import yaml
 import numpy as np
 from librosa.effects import time_stretch
 
diff --git a/hyperion/np/metrics/confusion_matrix.py b/hyperion/np/metrics/confusion_matrix.py
index 2efdd9e4..084aa7a9 100644
--- a/hyperion/np/metrics/confusion_matrix.py
+++ b/hyperion/np/metrics/confusion_matrix.py
@@ -8,7 +8,7 @@
 import matplotlib.pyplot as plt
 from sklearn.metrics import confusion_matrix
 
-from ..utils.list_utils import list2ndarray
+from ...utils.list_utils import list2ndarray
 
 
 def compute_confusion_matrix(
diff --git a/hyperion/np/transforms/transform_list.py b/hyperion/np/transforms/transform_list.py
index 3e89966a..62bc802e 100644
--- a/hyperion/np/transforms/transform_list.py
+++ b/hyperion/np/transforms/transform_list.py
@@ -8,7 +8,7 @@
 import numpy as np
 import h5py
 
-from ..hyp_model import HypModel
+from ..np_model import NPModel
 
 from .cent_whiten import CentWhiten
 from .cent_whiten_up import CentWhitenUP
@@ -22,11 +22,11 @@
 from .gaussianizer import Gaussianizer
 
 
-class TransformList(HypModel):
+class TransformList(NPModel):
     """Class to perform a list of transformations"""
 
     def __init__(self, transforms, **kwargs):
-        super(TransformList, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         if not isinstance(transforms, list):
             transforms = [transforms]
         self.transforms = transforms
diff --git a/hyperion/torch/__init__.py b/hyperion/torch/__init__.py
index 8fade929..41745d38 100644
--- a/hyperion/torch/__init__.py
+++ b/hyperion/torch/__init__.py
@@ -3,21 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-#
-
-# from . import utils
-# from . import loggers
-# from . import metrics
-# from . import lr_schedulers
-# from . import data
-# from . import layers
-# from . import layer_blocks
-# from . import narchs
-# from . import trainers
-# from . import transforms
-# from . import adv_attacks
-# from . import helpers
-# from . import seq_embed
 
 from .torch_model import TorchModel
 from .torch_model_loader import TorchModelLoader
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 1801f11a..38da8eb9 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -25,7 +25,7 @@
 class AudioDataset(Dataset):
     def __init__(
         self,
-        audio_path,
+        audio_file,
         key_file,
         class_file=None,
         time_durs_file=None,
@@ -51,8 +51,8 @@ def __init__(
         self.world_size = world_size
 
         if rank == 0:
-            logging.info("opening dataset %s" % audio_path)
-        self.r = AR(audio_path, wav_scale=wav_scale)
+            logging.info("opening dataset %s", audio_file)
+        self.r = AR(audio_file, wav_scale=wav_scale)
         if rank == 0:
             logging.info("loading utt2info file %s" % key_file)
         self.u2c = Utt2Info.load(key_file, sep=" ")
@@ -62,7 +62,6 @@ def __init__(
         self.is_val = is_val
         self._read_time_durs_file(time_durs_file)
 
-        # self._seq_lengths = self.r.read_time_duration(self.u2c.key)
         self._prune_short_seqs(min_chunk_length)
 
         self.short_seq_exist = self._seq_shorter_than_max_length_exists(
@@ -366,6 +365,9 @@ def filter_args(**kwargs):
 
         ar_args = AR.filter_args(**kwargs)
         valid_args = (
+            "audio_file",
+            "key_file",
+            "aug_cfg",
             "path_prefix",
             "class_file",
             "time_durs_file",
@@ -380,7 +382,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -388,6 +390,19 @@ def add_class_args(parser, prefix=None):
         # parser.add_argument('--path-prefix',
         #                     default='',
         #                     help=('path prefix for rspecifier scp file'))
+        if "audio_file" not in skip:
+            parser.add_argument(
+                "--audio-file",
+                required=True,
+                help=("audio manifest file"),
+            )
+
+        if "key_file" not in skip:
+            parser.add_argument(
+                "--key-file",
+                required=True,
+                help=("key manifest file"),
+            )
 
         parser.add_argument(
             "--class-file",
@@ -399,6 +414,12 @@ def add_class_args(parser, prefix=None):
             "--time-durs-file", default=None, help=("utt to duration in secs file")
         )
 
+        parser.add_argument(
+            "--aug-cfg",
+            default=None,
+            help=("augmentation configuration file."),
+        )
+
         parser.add_argument(
             "--min-chunk-length",
             type=float,
diff --git a/hyperion/torch/data/weighted_embed_sampler.py b/hyperion/torch/data/weighted_embed_sampler.py
index 61e4a0ad..2c381365 100644
--- a/hyperion/torch/data/weighted_embed_sampler.py
+++ b/hyperion/torch/data/weighted_embed_sampler.py
@@ -36,6 +36,10 @@ def __iter__(self):
         self.batch = 0
         return self
 
+    @property
+    def avg_batch_size(self):
+        return self.batch_size
+
     def _remove_duplicate_idx(self, utt_idx):
         utt_idx_uniq = torch.unique(utt_idx)
         c = 0
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index 9d128bb8..34c3fcbc 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -35,7 +35,7 @@ def __init__(
             world_size = 1
 
         self.dataset = dataset
-        self.batch_size = int(math.ceil(batch_size / world_size))
+        self.batch_size = batch_size
         self.num_egs_per_class = num_egs_per_class
         self.num_egs_per_utt = num_egs_per_utt
         self.var_batch_size = var_batch_size
@@ -65,13 +65,22 @@ def __init__(
                 self.iters_per_epoch * dataset.num_seqs / avg_batch_size / world_size
             )
         )
-
-        logging.info("num batches per epoch: %d" % self._len)
+        print(
+            "num_batches",
+            self.iters_per_epoch,
+            dataset.num_seqs,
+            avg_batch_size,
+            world_size,
+            self._len,
+            flush=True,
+        )
+        self.avg_batch_size = avg_batch_size
+        logging.info("num batches per epoch: %d", self._len)
 
         self._num_classes_per_batch = int(
-            math.ceil(batch_size / num_egs_per_class / num_egs_per_utt)
+            math.ceil(avg_batch_size / num_egs_per_class / num_egs_per_utt)
         )
-        logging.info("num classes per batch: %d" % self._num_classes_per_batch)
+        logging.info("num classes per batch: %d", self._num_classes_per_batch)
 
         # self.weights = torch.as_tensor(dataset.class_weights, dtype=torch.double)
 
@@ -228,7 +237,9 @@ def add_class_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        parser.add_argument("--batch-size", default=128, type=int, help=("batch size"))
+        parser.add_argument(
+            "--batch-size", default=128, type=int, help=("batch size per gpu")
+        )
 
         parser.add_argument(
             "--var-batch-size",
diff --git a/hyperion/torch/layer_blocks/resnet1d_blocks.py b/hyperion/torch/layer_blocks/resnet1d_blocks.py
index 01fd1087..ca99bb3d 100644
--- a/hyperion/torch/layer_blocks/resnet1d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet1d_blocks.py
@@ -260,6 +260,7 @@ class ResNet1dBasicDecBlock(nn.Module):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     expansion = 1
 
     # __constants__ = ['downsample']
@@ -541,6 +542,7 @@ class ResNet1dBNDecBlock(nn.Module):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     def __init__(
         self,
         in_channels,
@@ -680,6 +682,7 @@ class SEResNet1dBasicBlock(ResNet1dBasicBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     expansion = 1
 
     def __init__(
@@ -780,6 +783,7 @@ class SEResNet1dBasicDecBlock(ResNet1dBasicDecBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     expansion = 1
 
     def __init__(
@@ -886,7 +890,7 @@ class SEResNet1dBNBlock(ResNet1dBNBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
-    
+
     def __init__(
         self,
         in_channels,
@@ -976,7 +980,7 @@ def forward(self, x, x_mask=None):
 
 
 class SEResNet1dBNDecBlock(ResNet1dBNDecBlock):
-     """Squeeze-excitation ResNet 1d bottleneck Block for decoders.
+    """Squeeze-excitation ResNet 1d bottleneck Block for decoders.
 
     Attributes:
       in_channels:       input channels.
@@ -995,6 +999,7 @@ class SEResNet1dBNDecBlock(ResNet1dBNDecBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm1d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     def __init__(
         self,
         in_channels,
@@ -1084,7 +1089,7 @@ def forward(self, x, x_mask=None):
 
 
 class ResNet1dEndpoint(nn.Module):
-    """ Class that connects the ouputs of the ResNet1d to the rest of the network
+    """Class that connects the ouputs of the ResNet1d to the rest of the network
         when using multilevel feature aggregation.
 
         It converts the features of all the levels that we are going to aggregate
@@ -1102,6 +1107,7 @@ class ResNet1dEndpoint(nn.Module):
       norm_before:       if True, normalization layer is before the activation, after otherwise.
 
     """
+
     def __init__(
         self,
         in_channels,
@@ -1114,7 +1120,7 @@ def __init__(
         norm_layer=None,
         norm_before=True,
     ):
-        
+
         super().__init__()
         if norm_layer is None:
             norm_layer = nn.BatchNorm1d
@@ -1155,6 +1161,6 @@ def forward(self, x, x_mask=None):
         """
         x = self.resample(x)
         x = self.act(x)
-        if self.use_norm not self.norm_before:
+        if self.use_norm and not self.norm_before:
             x = self.bn(x)
         return x
diff --git a/hyperion/torch/layer_blocks/resnet2d_blocks.py b/hyperion/torch/layer_blocks/resnet2d_blocks.py
index 7fbb8327..65761526 100644
--- a/hyperion/torch/layer_blocks/resnet2d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet2d_blocks.py
@@ -93,7 +93,7 @@ class ResNet2dBasicBlock(nn.Module):
       use_norm:          if True, it uses normalization layers, otherwise it does not.
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
-      
+
     """
 
     expansion = 1
@@ -213,7 +213,7 @@ class ResNet2dBasicDecBlock(nn.Module):
       use_norm:          if True, it uses normalization layers, otherwise it does not.
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
-      
+
     """
 
     expansion = 1
@@ -456,7 +456,7 @@ def forward(self, x, x_mask=None):
 
 
 class ResNet2dBNDecBlock(nn.Module):
-     """ResNet 2d bottleneck Block decoder.
+    """ResNet 2d bottleneck Block decoder.
 
     Attributes:
       in_channels:       input channels.
@@ -472,6 +472,7 @@ class ResNet2dBNDecBlock(nn.Module):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     def __init__(
         self,
         in_channels,
@@ -602,6 +603,7 @@ class SEResNet2dBasicBlock(ResNet2dBasicBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     expansion = 1
 
     def __init__(
@@ -697,6 +699,7 @@ class SEResNet2dBasicDecBlock(ResNet2dBasicDecBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     expansion = 1
 
     def __init__(
@@ -796,6 +799,7 @@ class SEResNet2dBNBlock(ResNet2dBNBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     def __init__(
         self,
         in_channels,
@@ -897,6 +901,7 @@ class SEResNet2dBNDecBlock(ResNet2dBNDecBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
     """
+
     def __init__(
         self,
         in_channels,
diff --git a/hyperion/torch/layer_blocks/transformer_feedforward.py b/hyperion/torch/layer_blocks/transformer_feedforward.py
index 93cc6b66..7d2e8c1b 100644
--- a/hyperion/torch/layer_blocks/transformer_feedforward.py
+++ b/hyperion/torch/layer_blocks/transformer_feedforward.py
@@ -43,7 +43,7 @@ def forward(self, x):
           Tensor size=(batch, time, num_feats)
         """
         if self.time_dim != 1:
-            x = x.transpose(1, time_dim)
+            x = x.transpose(1, self.time_dim)
 
         x = self.activation(self.w_1(x))
         if self.dropout_rate > 0:
@@ -51,7 +51,7 @@ def forward(self, x):
 
         x = self.w_2(x)
         if self.time_dim != 1:
-            x = x.transpose(1, time_dim)
+            x = x.transpose(1, self.time_dim)
 
         return x
 
@@ -73,7 +73,13 @@ class Conv1dx2(nn.Module):
     """
 
     def __init__(
-        self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1
+        self,
+        num_channels,
+        hid_channels,
+        kernel_size,
+        activation="relu6",
+        dropout_rate=0,
+        time_dim=-1,
     ):
 
         super().__init__()
@@ -133,7 +139,13 @@ class Conv1dLinear(nn.Module):
     """
 
     def __init__(
-        self, num_channels, hid_channels, kernel_size, dropout_rate=0, time_dim=-1
+        self,
+        num_channels,
+        hid_channels,
+        kernel_size,
+        activation="relu6",
+        dropout_rate=0,
+        time_dim=-1,
     ):
         super().__init__()
         self.w_1 = nn.Conv1d(
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 467ea589..b6b3569e 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -10,9 +10,7 @@
 import torch.nn as nn
 import torch.nn.functional as nnf
 
-from hyperion.torch.utils.masking import seq_lengths_to_mask
-
-from ..utils import seq_le
+from ..utils import seq_lengths_to_mask
 
 SQRT_EPS = 1e-5
 N_EPS = 1e-6
@@ -44,7 +42,7 @@ def _standardize_weights(self, x, x_lengths=None, weights=None):
         """
         if weights is None:
             return seq_lengths_to_mask(
-                x, x.size(self.dim), dtype=x.dtype, time_dim=self.dim
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=self.dim
             )
 
         if weights.dim() == x.dim():
@@ -478,7 +476,9 @@ def __str__(self):
     def _standardize_weights(self, x, x_lengths=None, weights=None):
         """standardizes the weights to have shape (batch, max_length)."""
         if weights is None:
-            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=1)
+            return seq_lengths_to_mask(
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=1
+            )
 
         if weights.dim() == x.dim():
             return weights.traspose(1, self.dim)
@@ -597,7 +597,9 @@ def __str__(self):
     def _standardize_weights(self, x, x_lengths=None, weights=None):
         """standardizes the weights to have shape (batch, max_length)."""
         if weights is None:
-            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=1)
+            return seq_lengths_to_mask(
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=1
+            )
 
         if weights.dim() == x.dim():
             return weights.traspose(1, self.dim)
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index 5ae2b518..63da2493 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -251,8 +251,8 @@ def forward(self, x, y=None):
         """Computes penalized logits.
 
         Args:
-          x: input feature tensor with shape = (batch, in_feats).
-          y: ground truth classes. This is required to penalize the logit of
+          x: Input feature tensor with shape = (batch, in_feats).
+          y: Ground truth classes. This is required to penalize the logit of
              the true class at training time.
 
         Returns:
diff --git a/hyperion/torch/lr_schedulers/cos_lr.py b/hyperion/torch/lr_schedulers/cos_lr.py
index 6e36cf2a..83b9206f 100644
--- a/hyperion/torch/lr_schedulers/cos_lr.py
+++ b/hyperion/torch/lr_schedulers/cos_lr.py
@@ -64,7 +64,7 @@ def __init__(
         self.gamma = gamma
 
     def on_epoch_begin(self, epoch=None, epoch_updates=1, **kwargs):
-        super(CosineLR, self).on_epoch_begin(epoch)
+        super().on_epoch_begin(epoch)
         if self.update_lr_on_opt_step:
             # T has to correspond to an integer number of epochs
             T = int(math.ceil(self.T / epoch_updates) * epoch_updates)
@@ -122,7 +122,7 @@ def __init__(
         step=-1,
         update_lr_on_opt_step=False,
     ):
-        super(AdamCosineLR, super).__init__(
+        super().__init__(
             optimizer,
             T,
             T_mul,
@@ -143,12 +143,12 @@ def get_lr(self, step):
             if self.warm_restarts:
                 self.last_restart = step
                 x = 0
-                self.T *= T_mul
+                self.T *= self.T_mul
                 self.num_restarts += 1
             else:
                 return self.min_lrs
 
-        alpha = gamma ** self.num_restarts
+        alpha = self.gamma ** self.num_restarts
         r = math.pi / self.T
 
         return [
diff --git a/hyperion/torch/lr_schedulers/lr_scheduler.py b/hyperion/torch/lr_schedulers/lr_scheduler.py
index 319ea7a2..2ad1740e 100644
--- a/hyperion/torch/lr_schedulers/lr_scheduler.py
+++ b/hyperion/torch/lr_schedulers/lr_scheduler.py
@@ -56,7 +56,7 @@ def __init__(
 
     @property
     def in_warmup(self):
-        return self.step <= self.warmup_steps
+        return self.step < self.warmup_steps
 
     def state_dict(self):
         """Returns the state of the scheduler as a :class:`dict`.
@@ -104,9 +104,6 @@ def on_epoch_end(self, metrics=None):
 
     def on_opt_step(self):
 
-        # self.update_lr_on_opt_step=True
-        # print('exp-lr', self.last_step, self.hold_steps, self.decay_rate, self.decay_steps)
-
         if self.in_warmup:
             for param_group, lr in zip(
                 self.optimizer.param_groups, self.get_warmup_lr()
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 685ead4a..3e9e9fcd 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -699,7 +699,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        valid_args = ("loss_type", "s", "margin", "margin_warmup_epochs")
+        valid_args = ("loss_type", "cos_scale", "margin", "margin_warmup_epochs")
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
         return args
diff --git a/hyperion/torch/torch_model_loader.py b/hyperion/torch/torch_model_loader.py
index 92e0beb4..c173cd50 100644
--- a/hyperion/torch/torch_model_loader.py
+++ b/hyperion/torch/torch_model_loader.py
@@ -55,7 +55,7 @@ def load(file_path, extra_objs={}, map_location=None):
         if "n_averaged" in state_dict:
             del state_dict["n_averaged"]
 
-        cfg = TorchModelLoader._fix_compatibilty(class_obj, cfg)
+        cfg = TorchModelLoader._fix_compatibility(class_obj, cfg)
 
         p = re.compile("^module\.")
         num_tries = 3
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 8646c79f..4bd6790a 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -56,6 +56,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -86,6 +87,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 0d9b1de3..5649cc01 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -54,6 +54,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -82,6 +83,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index 4365ed56..dd797996 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -58,6 +58,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -91,6 +92,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 72f6d164..ecdb4dd8 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -4,6 +4,7 @@
 """
 
 import os
+import math
 import contextlib
 from collections import OrderedDict as ODict
 from enum import Enum
@@ -76,6 +77,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -102,6 +104,7 @@ def __init__(
         self.epochs = epochs
         self.cur_epoch = cur_epoch
         self.grad_acc_steps = grad_acc_steps
+        self.eff_batch_size = eff_batch_size
         self.exp_path = Path(exp_path)
 
         if loggers is None:
@@ -113,8 +116,6 @@ def __init__(
         else:
             self.loggers = loggers
 
-        # self.lr_scheduler = lr_scheduler
-
         self.metrics = metrics
         self.device = device
         self.train_mode = train_mode
@@ -211,8 +212,7 @@ def fit(self, train_data, val_data=None):
           val_data: PyTorch data loader for the validation loop
         """
         self.exp_path.mkdir(parents=True, exist_ok=True)
-        # if not os.path.exists(self.exp_path):
-        #     os.makedirs(self.exp_path)
+        self._compute_grad_acc_steps(train_data)
 
         if self.do_swa and self.cur_epoch >= self.swa_start:
             self.in_swa = True
@@ -435,6 +435,40 @@ def _get_lr(self):
         for param_group in self.optimizer.param_groups:
             return param_group["lr"]
 
+    def _compute_grad_acc_steps(self, data_loader):
+        if self.eff_batch_size is None:
+            return
+
+        if data_loader.batch_sampler is not None:
+            try:
+                batch_size = data_loader.batch_sampler.avg_batch_size
+            except:
+                logging.warn(
+                    "batch sampler doesn't have avg_batch_size property, "
+                    "we cannot estimate grad_acc_steps, using grad_acc_steps=%d",
+                    self.grad_acc_steps,
+                )
+                return
+
+            self.grad_acc_steps = int(
+                math.ceil(self.eff_batch_size / batch_size / self.world_size)
+            )
+            logging.info(
+                "Setting grad_acc_steps=%d for"
+                "eff_batch_size=%d, avg_batch_size=%d, world_size=%d",
+                self.grad_acc_steps,
+                self.eff_batch_size,
+                batch_size,
+                self.world_size,
+            )
+            return
+
+        logging.warn(
+            "We cannot determine the batch_size, "
+            "we cannot estimate grad_acc_steps, using grad_acc_steps=%d",
+            self.grad_acc_steps,
+        )
+
     def checkpoint(self, logs=None):
         """Creates a checkpoint of the training, to save and posterior recovery
 
@@ -566,6 +600,7 @@ def load_last_checkpoint(self):
     def filter_args(**kwargs):
         valid_args = (
             "grad_acc_steps",
+            "eff_batch_size",
             "epochs",
             "log_interval",
             "use_amp",
@@ -604,6 +639,12 @@ def add_class_args(parser, prefix=None, skip=[]):
             default=1,
             help="gradient accumulation batches before weigth update",
         )
+        parser.add_argument(
+            "--eff-batch-size",
+            type=int,
+            default=None,
+            help="effective total batch size, if given, it overrides grad_acc_steps",
+        )
         parser.add_argument("--epochs", type=int, default=200, help="number of epochs")
         parser.add_argument(
             "--log-interval",
@@ -680,6 +721,5 @@ def add_class_args(parser, prefix=None, skip=[]):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='trainer options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 53486c7b..bc72bbe2 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -54,6 +54,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -82,6 +83,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index a2da616c..ac87ba5a 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -54,6 +54,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -81,6 +82,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index d187af79..1b13bac1 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -54,6 +54,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -81,6 +82,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 0784a2ea..7dee1303 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -7,6 +7,7 @@
 
 import time
 import logging
+from jsonargparse import ArgumentParser, ActionParser
 
 import torch
 import torch.nn as nn
@@ -58,6 +59,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         p_attack=0.8,
         p_val_attack=0,
         device=None,
@@ -88,6 +90,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index fef0b3b5..0719f350 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -7,11 +7,12 @@
 
 import time
 import logging
+from jsonargparse import ArgumentParser, ActionParser
 
 import torch
 import torch.nn as nn
 
-from ..utils import MetricAcc  
+from ..utils import MetricAcc
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
 
 
@@ -60,6 +61,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         p_attack=0.8,
         p_val_attack=0,
         device=None,
@@ -91,6 +93,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
@@ -128,7 +131,6 @@ def __init__(
                 % (p_attack, 1.0 / self.grad_acc_steps)
             )
 
-
     def train_epoch(self, data_loader):
 
         self.model.update_loss_margin(self.cur_epoch)
@@ -258,4 +260,3 @@ def add_class_args(parser, prefix=None, skip=[]):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 2e032a49..3e704bd5 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -54,6 +54,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -85,6 +86,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 47801c29..e6014750 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -61,6 +61,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         reg_layers_enc=None,
         reg_layers_classif=None,
         reg_weight_enc=0.1,
@@ -94,6 +95,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 6763b035..dafeb0c5 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -62,6 +62,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         reg_layers_enc=None,
         reg_layers_classif=None,
         reg_weight_enc=0.1,
@@ -96,6 +97,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             reg_layers_enc=reg_layers_enc,
             reg_layers_classif=reg_layers_classif,
             reg_weight_enc=reg_weight_enc,
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 3519b6d6..a8f9da99 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -55,6 +55,7 @@ def __init__(
         exp_path="./train",
         cur_epoch=0,
         grad_acc_steps=1,
+        eff_batch_size=None,
         device=None,
         metrics=None,
         lrsched=None,
@@ -83,6 +84,7 @@ def __init__(
             exp_path,
             cur_epoch=cur_epoch,
             grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
             device=device,
             metrics=metrics,
             lrsched=lrsched,

From 3a0eeff1baa3ac6ba38be9a9db429878c67ae0a6 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 1 Apr 2022 12:02:17 -0400
Subject: [PATCH 008/154] voxceleb/v1.1 refactorized

---
 egs/sre19-cmn2/v1/conf/ecapatdnn_small.yaml   | 34 +++++++++++++
 egs/sre19-cmn2/v1/conf/efficientnet_b4.yaml   | 20 ++++++++
 egs/sre19-cmn2/v1/conf/efficientnet_b7.yaml   | 22 ++++++++
 .../v1/conf/lrsched_exp_default.yaml          |  7 +++
 .../v1/conf/optim_adam_default.yaml           |  6 +++
 egs/sre19-cmn2/v1/conf/res2net50.yaml         | 13 +++++
 egs/sre19-cmn2/v1/conf/resnet34.yaml          | 11 ++++
 egs/sre19-cmn2/v1/conf/spinenet49.yaml        | 11 ++++
 .../v1/conf/train_data_default.yaml           | 10 ++++
 .../v1/conf/train_ecapatdnn_xvec_default.yaml |  7 +++
 .../v1/conf/train_effnetb4_xvec_default.yaml  |  7 +++
 .../v1/conf/train_res2net50_xvec_default.yaml |  7 +++
 .../v1/conf/train_resnet34_xvec_default.yaml  |  7 +++
 .../conf/train_spinenet49_xvec_default.yaml   |  7 +++
 egs/sre19-cmn2/v1/conf/trainer_default.yaml   |  6 +++
 .../v1/conf/trainer_swa_default.yaml          |  9 ++++
 egs/sre19-cmn2/v1/conf/val_data_default.yaml  | 10 ++++
 egs/voxceleb/v1.1/conf                        |  1 -
 egs/voxceleb/v1.1/conf/clsp.conf              | 11 ++++
 egs/voxceleb/v1.1/conf/coe_gpu_bigmem.conf    | 11 ++++
 egs/voxceleb/v1.1/conf/coe_gpu_long.conf      | 13 +++++
 egs/voxceleb/v1.1/conf/coe_gpu_rtx.conf       | 11 ++++
 egs/voxceleb/v1.1/conf/coe_gpu_short.conf     | 11 ++++
 egs/voxceleb/v1.1/conf/coe_gpu_v100.conf      | 11 ++++
 egs/voxceleb/v1.1/conf/ecapatdnn_small.yaml   | 34 +++++++++++++
 egs/voxceleb/v1.1/conf/efficientnet_b4.yaml   | 20 ++++++++
 egs/voxceleb/v1.1/conf/efficientnet_b7.yaml   | 22 ++++++++
 egs/voxceleb/v1.1/conf/fbank64_8k.yaml        |  7 +++
 egs/voxceleb/v1.1/conf/fbank64_stmn_8k.yaml   | 12 +++++
 egs/voxceleb/v1.1/conf/fbank80_16k.yaml       |  7 +++
 egs/voxceleb/v1.1/conf/fbank80_stmn_16k.yaml  | 12 +++++
 .../v1.1/conf/lrsched_exp_default.yaml        |  7 +++
 egs/voxceleb/v1.1/conf/noise_aug.yaml         | 19 +++++++
 egs/voxceleb/v1.1/conf/online_pitch.conf      |  1 +
 .../v1.1/conf/optim_adam_default.yaml         |  6 +++
 egs/voxceleb/v1.1/conf/res2net50.yaml         | 13 +++++
 egs/voxceleb/v1.1/conf/resnet34.yaml          | 11 ++++
 egs/voxceleb/v1.1/conf/reverb_noise_aug.yaml  | 35 +++++++++++++
 egs/voxceleb/v1.1/conf/spinenet49.yaml        | 11 ++++
 .../v1.1/conf/train_data_default.yaml         | 10 ++++
 .../conf/train_ecapatdnn_xvec_default.yaml    |  7 +++
 .../conf/train_effnetb4_xvec_default.yaml     |  7 +++
 .../conf/train_res2net50_xvec_default.yaml    |  7 +++
 .../conf/train_resnet34_xvec_default.yaml     |  7 +++
 .../conf/train_spinenet49_xvec_default.yaml   |  7 +++
 egs/voxceleb/v1.1/conf/trainer_default.yaml   |  6 +++
 .../v1.1/conf/trainer_swa_default.yaml        |  9 ++++
 egs/voxceleb/v1.1/conf/vad_16k.yaml           |  8 +++
 egs/voxceleb/v1.1/conf/vad_8k.yaml            |  8 +++
 egs/voxceleb/v1.1/conf/val_data_default.yaml  | 10 ++++
 ...statsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh | 50 ++++---------------
 ...fnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | 29 +++++------
 ..._eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | 28 +++++------
 ...net34_345_arcs30m0.3_adam_lr0.05_amp.v1.sh | 32 ++++++------
 ...lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | 26 ++++------
 ...pinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | 27 ++++------
 ...et34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 33 ++++++------
 ...et34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 35 ++++++-------
 ...et50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | 33 ++++++------
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 34 ++++++-------
 ...et50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | 33 ++++++------
 ...w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | 35 ++++++-------
 ..._resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh |  3 +-
 ...net34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | 27 +++++-----
 ...4_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh | 27 ++++------
 ..._resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | 34 ++++++-------
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 36 ++++++-------
 ...ine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | 29 +++++------
 ...ne2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | 29 +++++------
 ...pinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | 26 ++++------
 ...inenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | 29 +++++------
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 35 ++++++-------
 ...eresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | 23 ++++-----
 ...ine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | 30 +++++------
 ...ne2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | 31 +++++-------
 hyp_utils/conda_env.sh                        | 37 +++++++-------
 hyp_utils/feats/make_evad.sh                  |  2 +-
 .../xvectors/extract_xvectors_from_wav.sh     |  4 +-
 .../make_babble_noise_for_nnet_train.sh       |  2 +-
 .../xvectors/pack_rirs_for_nnet_train.sh      |  2 +-
 .../preprocess_audios_for_nnet_train.sh       |  2 +-
 ...te-energy-vad.py => compute_energy_vad.py} |  0
 ...om-wav.py => extract_xvectors_from_wav.py} |  0
 ...es.py => make_babble_noise_audio_files.py} |  0
 .../{pack-wav-rirs.py => pack_wav_rirs.py}    |  0
 ...dio-files.py => preprocess_audio_files.py} |  0
 hyperion/bin/torch_train_xvec_from_wav.py     |  3 +-
 hyperion/torch/layer_blocks/res2net_blocks.py |  4 +-
 hyperion/torch/layers/global_pool.py          |  4 +-
 hyperion/torch/layers/pool_factory.py         |  2 +
 hyperion/torch/models/xvectors/xvector.py     |  1 +
 hyperion/torch/trainers/torch_trainer.py      |  6 ++-
 92 files changed, 915 insertions(+), 474 deletions(-)
 create mode 100644 egs/sre19-cmn2/v1/conf/ecapatdnn_small.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/efficientnet_b4.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/efficientnet_b7.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/lrsched_exp_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/optim_adam_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/res2net50.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/resnet34.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/spinenet49.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_data_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_ecapatdnn_xvec_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_effnetb4_xvec_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_res2net50_xvec_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_resnet34_xvec_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/train_spinenet49_xvec_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/trainer_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/trainer_swa_default.yaml
 create mode 100644 egs/sre19-cmn2/v1/conf/val_data_default.yaml
 delete mode 120000 egs/voxceleb/v1.1/conf
 create mode 100644 egs/voxceleb/v1.1/conf/clsp.conf
 create mode 100644 egs/voxceleb/v1.1/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/voxceleb/v1.1/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/v1.1/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/v1.1/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/v1.1/conf/coe_gpu_v100.conf
 create mode 100644 egs/voxceleb/v1.1/conf/ecapatdnn_small.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/efficientnet_b4.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/efficientnet_b7.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/fbank64_8k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/fbank64_stmn_8k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/fbank80_16k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/fbank80_stmn_16k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/lrsched_exp_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/noise_aug.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/online_pitch.conf
 create mode 100644 egs/voxceleb/v1.1/conf/optim_adam_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/res2net50.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/resnet34.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/reverb_noise_aug.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/spinenet49.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_data_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn_xvec_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_effnetb4_xvec_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_resnet34_xvec_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_spinenet49_xvec_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/trainer_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/trainer_swa_default.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/vad_16k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/vad_8k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/val_data_default.yaml
 rename hyperion/bin/{compute-energy-vad.py => compute_energy_vad.py} (100%)
 rename hyperion/bin/{torch-extract-xvectors-from-wav.py => extract_xvectors_from_wav.py} (100%)
 rename hyperion/bin/{make-babble-noise-audio-files.py => make_babble_noise_audio_files.py} (100%)
 rename hyperion/bin/{pack-wav-rirs.py => pack_wav_rirs.py} (100%)
 rename hyperion/bin/{preprocess-audio-files.py => preprocess_audio_files.py} (100%)

diff --git a/egs/sre19-cmn2/v1/conf/ecapatdnn_small.yaml b/egs/sre19-cmn2/v1/conf/ecapatdnn_small.yaml
new file mode 100644
index 00000000..fd386500
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/ecapatdnn_small.yaml
@@ -0,0 +1,34 @@
+resnet_enc:
+  in_feats: 80
+  in_conv_channels: 512
+  in_kernel_size: 5
+  in_stride: 1
+  resb_type: seres2bn
+  resb_repeats:
+  - 1
+  - 1
+  - 1
+  resb_channels:
+  - 512
+  resb_kernel_sizes:
+  - 3
+  resb_dilations:
+  - 2
+  - 3
+  - 4
+  resb_strides:
+  - 1
+  res2net_width_factor: 1
+  res2net_scale: 8
+  se_r: 4
+  multilayer: true
+  multilayer_concat: true
+  endpoint_channels: 1536
+pool_net:
+  pool_type: ch-wise-att-mean+stddev
+  inner_feats: 128
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/sre19-cmn2/v1/conf/efficientnet_b4.yaml b/egs/sre19-cmn2/v1/conf/efficientnet_b4.yaml
new file mode 100644
index 00000000..f87c1e02
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/efficientnet_b4.yaml
@@ -0,0 +1,20 @@
+effnet_type: efficientnet-b4
+in_feats: 80
+in_channels: 1
+in_kernel_size: 3
+in_stride: 1
+se_r: 4
+fix_stem_head: true
+mbconv_strides:
+- 1
+- 1
+- 2
+- 2
+- 1
+- 2
+- 1
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/sre19-cmn2/v1/conf/efficientnet_b7.yaml b/egs/sre19-cmn2/v1/conf/efficientnet_b7.yaml
new file mode 100644
index 00000000..bae5c7cb
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/efficientnet_b7.yaml
@@ -0,0 +1,22 @@
+effnet_type: efficientnet-b7
+in_feats: 80
+in_channels: 1
+in_kernel_size: 3
+in_stride: 1
+se_r: 4
+fix_stem_head: true
+mbconv_strides:
+- 1
+- 1
+- 2
+- 2
+- 1
+- 2
+- 1
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
+norm_layer: instance-norm-affine
+head_norm_layer: layer-norm
diff --git a/egs/sre19-cmn2/v1/conf/lrsched_exp_default.yaml b/egs/sre19-cmn2/v1/conf/lrsched_exp_default.yaml
new file mode 100644
index 00000000..fe08b704
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/lrsched_exp_default.yaml
@@ -0,0 +1,7 @@
+lrsch_type: exp_lr
+decay_rate: 0.5
+decay_steps: 8000
+hold_steps: 40000
+min_lr: 1.0e-05
+update_lr_on_opt_step: true
+warmup_steps: 1000
diff --git a/egs/sre19-cmn2/v1/conf/optim_adam_default.yaml b/egs/sre19-cmn2/v1/conf/optim_adam_default.yaml
new file mode 100644
index 00000000..b6620069
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/optim_adam_default.yaml
@@ -0,0 +1,6 @@
+opt_type: adam
+lr: 0.05
+amsgrad: true
+beta1: 0.9
+beta2: 0.95
+weight_decay: 1.0e-05
diff --git a/egs/sre19-cmn2/v1/conf/res2net50.yaml b/egs/sre19-cmn2/v1/conf/res2net50.yaml
new file mode 100644
index 00000000..48067a3d
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/res2net50.yaml
@@ -0,0 +1,13 @@
+resnet_type: res2net50
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+res2net_width_factor: 3.25
+res2net_scale: 8
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/sre19-cmn2/v1/conf/resnet34.yaml b/egs/sre19-cmn2/v1/conf/resnet34.yaml
new file mode 100644
index 00000000..98695823
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/resnet34.yaml
@@ -0,0 +1,11 @@
+resnet_type: resnet34
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/sre19-cmn2/v1/conf/spinenet49.yaml b/egs/sre19-cmn2/v1/conf/spinenet49.yaml
new file mode 100644
index 00000000..66b8d517
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/spinenet49.yaml
@@ -0,0 +1,11 @@
+spinenet_type: spinenet49
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/sre19-cmn2/v1/conf/train_data_default.yaml b/egs/sre19-cmn2/v1/conf/train_data_default.yaml
new file mode 100644
index 00000000..451ffa35
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_data_default.yaml
@@ -0,0 +1,10 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml 
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/train_ecapatdnn_xvec_default.yaml b/egs/sre19-cmn2/v1/conf/train_ecapatdnn_xvec_default.yaml
new file mode 100644
index 00000000..46298946
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_ecapatdnn_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: ecapatdnn_small.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/train_effnetb4_xvec_default.yaml b/egs/sre19-cmn2/v1/conf/train_effnetb4_xvec_default.yaml
new file mode 100644
index 00000000..1bc74de6
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_effnetb4_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: efficientnet_b4.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/train_res2net50_xvec_default.yaml b/egs/sre19-cmn2/v1/conf/train_res2net50_xvec_default.yaml
new file mode 100644
index 00000000..1d387790
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_res2net50_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: resnet34.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/train_resnet34_xvec_default.yaml b/egs/sre19-cmn2/v1/conf/train_resnet34_xvec_default.yaml
new file mode 100644
index 00000000..1d387790
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_resnet34_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: resnet34.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/train_spinenet49_xvec_default.yaml b/egs/sre19-cmn2/v1/conf/train_spinenet49_xvec_default.yaml
new file mode 100644
index 00000000..07167987
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/train_spinenet49_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: spinenet49.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/sre19-cmn2/v1/conf/trainer_default.yaml b/egs/sre19-cmn2/v1/conf/trainer_default.yaml
new file mode 100644
index 00000000..86dcc2e4
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/trainer_default.yaml
@@ -0,0 +1,6 @@
+optim: optim_adam_default.yaml
+lrsched: lrsched_exp_default.yaml
+use_amp: true
+log_interval: 1000
+epochs: 70
+eff_batch_size: 512
diff --git a/egs/sre19-cmn2/v1/conf/trainer_swa_default.yaml b/egs/sre19-cmn2/v1/conf/trainer_swa_default.yaml
new file mode 100644
index 00000000..0cafad01
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/trainer_swa_default.yaml
@@ -0,0 +1,9 @@
+optim: optim_adam_default.yaml
+lrsched: lrsched_exp_default.yaml
+use_amp: true
+log_interval: 1000
+epochs: 80
+eff_batch_size: 512
+swa_start: 60
+swa_lr: 1e-3
+swa_anneal_epochs: 5
diff --git a/egs/sre19-cmn2/v1/conf/val_data_default.yaml b/egs/sre19-cmn2/v1/conf/val_data_default.yaml
new file mode 100644
index 00000000..451ffa35
--- /dev/null
+++ b/egs/sre19-cmn2/v1/conf/val_data_default.yaml
@@ -0,0 +1,10 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml 
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf b/egs/voxceleb/v1.1/conf
deleted file mode 120000
index 25a735e3..00000000
--- a/egs/voxceleb/v1.1/conf
+++ /dev/null
@@ -1 +0,0 @@
-../v1/conf
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/clsp.conf b/egs/voxceleb/v1.1/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/v1.1/conf/coe_gpu_bigmem.conf b/egs/voxceleb/v1.1/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/voxceleb/v1.1/conf/coe_gpu_long.conf b/egs/voxceleb/v1.1/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/v1.1/conf/coe_gpu_rtx.conf b/egs/voxceleb/v1.1/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/v1.1/conf/coe_gpu_short.conf b/egs/voxceleb/v1.1/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/v1.1/conf/coe_gpu_v100.conf b/egs/voxceleb/v1.1/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/voxceleb/v1.1/conf/ecapatdnn_small.yaml b/egs/voxceleb/v1.1/conf/ecapatdnn_small.yaml
new file mode 100644
index 00000000..fd386500
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/ecapatdnn_small.yaml
@@ -0,0 +1,34 @@
+resnet_enc:
+  in_feats: 80
+  in_conv_channels: 512
+  in_kernel_size: 5
+  in_stride: 1
+  resb_type: seres2bn
+  resb_repeats:
+  - 1
+  - 1
+  - 1
+  resb_channels:
+  - 512
+  resb_kernel_sizes:
+  - 3
+  resb_dilations:
+  - 2
+  - 3
+  - 4
+  resb_strides:
+  - 1
+  res2net_width_factor: 1
+  res2net_scale: 8
+  se_r: 4
+  multilayer: true
+  multilayer_concat: true
+  endpoint_channels: 1536
+pool_net:
+  pool_type: ch-wise-att-mean+stddev
+  inner_feats: 128
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/voxceleb/v1.1/conf/efficientnet_b4.yaml b/egs/voxceleb/v1.1/conf/efficientnet_b4.yaml
new file mode 100644
index 00000000..f87c1e02
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/efficientnet_b4.yaml
@@ -0,0 +1,20 @@
+effnet_type: efficientnet-b4
+in_feats: 80
+in_channels: 1
+in_kernel_size: 3
+in_stride: 1
+se_r: 4
+fix_stem_head: true
+mbconv_strides:
+- 1
+- 1
+- 2
+- 2
+- 1
+- 2
+- 1
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/voxceleb/v1.1/conf/efficientnet_b7.yaml b/egs/voxceleb/v1.1/conf/efficientnet_b7.yaml
new file mode 100644
index 00000000..bae5c7cb
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/efficientnet_b7.yaml
@@ -0,0 +1,22 @@
+effnet_type: efficientnet-b7
+in_feats: 80
+in_channels: 1
+in_kernel_size: 3
+in_stride: 1
+se_r: 4
+fix_stem_head: true
+mbconv_strides:
+- 1
+- 1
+- 2
+- 2
+- 1
+- 2
+- 1
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
+norm_layer: instance-norm-affine
+head_norm_layer: layer-norm
diff --git a/egs/voxceleb/v1.1/conf/fbank64_8k.yaml b/egs/voxceleb/v1.1/conf/fbank64_8k.yaml
new file mode 100644
index 00000000..a77eb899
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/fbank64_8k.yaml
@@ -0,0 +1,7 @@
+sample_frequency: 8000
+frame_length: 25
+low_freq: 20 
+high_freq: 3700
+num_filters: 64
+snip_edges: false
+use_energy: false
diff --git a/egs/voxceleb/v1.1/conf/fbank64_stmn_8k.yaml b/egs/voxceleb/v1.1/conf/fbank64_stmn_8k.yaml
new file mode 100644
index 00000000..dfd0d3e5
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/fbank64_stmn_8k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/v1.1/conf/fbank80_16k.yaml b/egs/voxceleb/v1.1/conf/fbank80_16k.yaml
new file mode 100644
index 00000000..88bae69e
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/fbank80_16k.yaml
@@ -0,0 +1,7 @@
+sample_frequency: 16000
+frame_length: 25
+low_freq: 20 
+high_freq: 7600
+num_filters: 80
+snip_edges: false
+use_energy: false
diff --git a/egs/voxceleb/v1.1/conf/fbank80_stmn_16k.yaml b/egs/voxceleb/v1.1/conf/fbank80_stmn_16k.yaml
new file mode 100644
index 00000000..f4091f5d
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/fbank80_stmn_16k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/v1.1/conf/lrsched_exp_default.yaml b/egs/voxceleb/v1.1/conf/lrsched_exp_default.yaml
new file mode 100644
index 00000000..fe08b704
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/lrsched_exp_default.yaml
@@ -0,0 +1,7 @@
+lrsch_type: exp_lr
+decay_rate: 0.5
+decay_steps: 8000
+hold_steps: 40000
+min_lr: 1.0e-05
+update_lr_on_opt_step: true
+warmup_steps: 1000
diff --git a/egs/voxceleb/v1.1/conf/noise_aug.yaml b/egs/voxceleb/v1.1/conf/noise_aug.yaml
new file mode 100644
index 00000000..7e575faf
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/noise_aug.yaml
@@ -0,0 +1,19 @@
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/voxceleb/v1.1/conf/online_pitch.conf b/egs/voxceleb/v1.1/conf/online_pitch.conf
new file mode 100644
index 00000000..926bcfca
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/online_pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=8000
diff --git a/egs/voxceleb/v1.1/conf/optim_adam_default.yaml b/egs/voxceleb/v1.1/conf/optim_adam_default.yaml
new file mode 100644
index 00000000..b6620069
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/optim_adam_default.yaml
@@ -0,0 +1,6 @@
+opt_type: adam
+lr: 0.05
+amsgrad: true
+beta1: 0.9
+beta2: 0.95
+weight_decay: 1.0e-05
diff --git a/egs/voxceleb/v1.1/conf/res2net50.yaml b/egs/voxceleb/v1.1/conf/res2net50.yaml
new file mode 100644
index 00000000..48067a3d
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/res2net50.yaml
@@ -0,0 +1,13 @@
+resnet_type: res2net50
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+res2net_width_factor: 3.25
+res2net_scale: 8
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/voxceleb/v1.1/conf/resnet34.yaml b/egs/voxceleb/v1.1/conf/resnet34.yaml
new file mode 100644
index 00000000..98695823
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/resnet34.yaml
@@ -0,0 +1,11 @@
+resnet_type: resnet34
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/voxceleb/v1.1/conf/reverb_noise_aug.yaml b/egs/voxceleb/v1.1/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/voxceleb/v1.1/conf/spinenet49.yaml b/egs/voxceleb/v1.1/conf/spinenet49.yaml
new file mode 100644
index 00000000..66b8d517
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/spinenet49.yaml
@@ -0,0 +1,11 @@
+spinenet_type: spinenet49
+in_channels: 1
+in_feats: 80
+in_kernel_size: 3
+in_stride: 1
+no_maxpool: true
+embed_dim: 256
+cos_scale: 30.0
+margin: 0.3
+margin_warmup_epochs: 20.0
+dropout_rate: 0.0
diff --git a/egs/voxceleb/v1.1/conf/train_data_default.yaml b/egs/voxceleb/v1.1/conf/train_data_default.yaml
new file mode 100644
index 00000000..451ffa35
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_data_default.yaml
@@ -0,0 +1,10 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml 
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn_xvec_default.yaml
new file mode 100644
index 00000000..46298946
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: ecapatdnn_small.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/train_effnetb4_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_effnetb4_xvec_default.yaml
new file mode 100644
index 00000000..1bc74de6
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_effnetb4_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: efficientnet_b4.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
new file mode 100644
index 00000000..1d387790
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: resnet34.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/train_resnet34_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_resnet34_xvec_default.yaml
new file mode 100644
index 00000000..1d387790
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_resnet34_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: resnet34.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/train_spinenet49_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_spinenet49_xvec_default.yaml
new file mode 100644
index 00000000..07167987
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_spinenet49_xvec_default.yaml
@@ -0,0 +1,7 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+feats: fbank80_stmn_16k.yaml
+model: spinenet49.yaml
+trainer: trainer_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/trainer_default.yaml b/egs/voxceleb/v1.1/conf/trainer_default.yaml
new file mode 100644
index 00000000..86dcc2e4
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/trainer_default.yaml
@@ -0,0 +1,6 @@
+optim: optim_adam_default.yaml
+lrsched: lrsched_exp_default.yaml
+use_amp: true
+log_interval: 1000
+epochs: 70
+eff_batch_size: 512
diff --git a/egs/voxceleb/v1.1/conf/trainer_swa_default.yaml b/egs/voxceleb/v1.1/conf/trainer_swa_default.yaml
new file mode 100644
index 00000000..0cafad01
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/trainer_swa_default.yaml
@@ -0,0 +1,9 @@
+optim: optim_adam_default.yaml
+lrsched: lrsched_exp_default.yaml
+use_amp: true
+log_interval: 1000
+epochs: 80
+eff_batch_size: 512
+swa_start: 60
+swa_lr: 1e-3
+swa_anneal_epochs: 5
diff --git a/egs/voxceleb/v1.1/conf/vad_16k.yaml b/egs/voxceleb/v1.1/conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/v1.1/conf/vad_8k.yaml b/egs/voxceleb/v1.1/conf/vad_8k.yaml
new file mode 100644
index 00000000..7592c9d1
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/vad_8k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 8000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/v1.1/conf/val_data_default.yaml b/egs/voxceleb/v1.1/conf/val_data_default.yaml
new file mode 100644
index 00000000..451ffa35
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/val_data_default.yaml
@@ -0,0 +1,10 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml 
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 2b4f07a7..3cd4b108 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,4 +1,4 @@
-# Time SE Res2Net50 w26s4 x-vector with mixed precision training
+# ECAPA-TDNN small
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,56 +9,26 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
 nnet_type=resnet1d
-block_type=seres2bn # squeeze-excitation res2net bottleneck
-channels=512
-ep_channels=1536
-width_factor=1
-scale=8
-se_r=4
-dropout=0
 
-attstats_inner=128
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
 embed_dim=256
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet_enc.in-feats 80 \
-		     --resnet_enc.in-conv-channels $channels \
-		     --resnet_enc.in-kernel-size 5 \
-		     --resnet_enc.in-stride 1 \
-		     --resnet_enc.resb-type $block_type \
-		     --resnet_enc.resb-repeats 1 1 1 \
-		     --resnet_enc.resb-channels $channels \
-		     --resnet_enc.resb-kernel-sizes 3 \
-		     --resnet_enc.resb-dilations 2 3 4 \
-		     --resnet_enc.resb-strides 1 \
-		     --resnet_enc.res2net-width-factor $width_factor \
-		     --resnet_enc.res2net-scale $scale \
-		     --resnet_enc.se-r $se_r \
-		     --resnet_enc.multilayer \
-                     --resnet_enc.multilayer-concat \
-                     --resnet_enc.endpoint-channels $ep_channels \
-		     --pool_net.pool-type ch-wise-att-mean+stddev \
-		     --pool_net.inner-feats $attstats_inner \
-		     --embed-dim $embed_dim"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_ecapatdnn_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu"
 
 nnet_name=${feat_type}_ecapatdnn512x3_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
+
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
index 0765b60d..2806a422 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
@@ -9,32 +9,28 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.01
+# x-vector cfg
+nnet_type=efficientnet
 
-nnet_type=efficientnet-b4
+effnet_type=efficientnet-b4
 dropout=0
 embed_dim=256
-se_r=4
-
 s=30
 margin_warmup=20
 margin=0.3
+se_r=4
 
-nnet_opt="--effnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --se-r $se_r --fix-stem-head --mbconv-strides 1 1 2 2 1 2 1"
+batch_size_1gpu=16
+eff_batch_size=512 # effective batch size
+lr=0.01
+nnet_num_epochs=70
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_effnetb4_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
+
+nnet_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -51,4 +47,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
index 7d1fd1dc..d83ca483 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
@@ -9,32 +9,28 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=2
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.01
+# x-vector cfg
+nnet_type=efficientnet
 
-nnet_type=efficientnet-b7
+effnet_type=efficientnet-b7
 dropout=0
 embed_dim=256
-se_r=4
-
 s=30
 margin_warmup=20
 margin=0.3
+se_r=4
 
-nnet_opt="--effnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --se-r $se_r --fix-stem-head --mbconv-strides 1 1 2 2 1 2 1 --norm-layer instance-norm-affine --head-norm-layer layer-norm"
+batch_size_1gpu=2
+eff_batch_size=512 # effective batch size
+lr=0.01
+nnet_num_epochs=70
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_effnetb4_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model $PWD/conf/efficientnet_b7.yaml --trainer.optim.lr $lr"
+
+nnet_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_eina_hln_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_eina_hln_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_345_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_345_arcs30m0.3_adam_lr0.05_amp.v1.sh
index dbab12ae..9bfb7bb7 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_345_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_345_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,38 +1,35 @@
-# LResNet34_345 (multi-level feature) x-vector with mixed precision training
+# LResNet34 x-vector with mixed precision training
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=64
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
+
+nnet_type=resnet
 
-nnet_type=lresnet34_345
+resnet_type=lresnet34_345
+batch_size_1gpu=128
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -50,3 +47,4 @@ lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index c243020f..4aabd592 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -7,34 +7,29 @@ feat_type=fbank80_stmn
 #vad
 vad_config=conf/vad_16k.yaml
 
-
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+
+nnet_type=resnet
+
+resnet_type=lresnet34
 batch_size_1gpu=128
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=lresnet34 #light resnet
 dropout=0
 embed_dim=256
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -52,3 +47,4 @@ lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lspinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lspinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 9ba45ab5..2afe35ef 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lspinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_lspinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,35 +4,31 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=lspinenet49
 batch_size_1gpu=64
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=lspinenet49
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -49,4 +45,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 7cfe8894..f995fc0f 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=64
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net34
+nnet_type=resnet
+
+resnet_type=res2net34
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=1
-scale=4
-ws_tag=w16s4
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=1
+scale=4
+ws_tag=w16s4
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 75f3bbbd..a2e8cdba 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,4 +1,4 @@
-# Res2Net34 w26 s4 x-vector with mixed precision training
+# Res2Net34 w26s4 x-vector with mixed precision training
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=64
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net34
+nnet_type=resnet
+
+resnet_type=res2net34
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=1.625
+scale=4
+ws_tag=w26s4
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
index cbd13a22..6ddb9e2c 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net50 
+nnet_type=resnet
+
+resnet_type=res2net50
+batch_size_1gpu=16
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=1.625
-scale=8
-ws_tag=w13s8
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=1.625
+scale=8
+ws_tag=w13s8
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 4c7e6fc5..cfec2b09 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net50 
+nnet_type=resnet
+
+resnet_type=res2net50
+batch_size_1gpu=16
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=1.625
+scale=4
+ws_tag=w26s4
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
@@ -54,3 +49,4 @@ lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
index db3bfea8..3cf18fcf 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=24
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net50 
+nnet_type=resnet
+
+resnet_type=res2net50
+batch_size_1gpu=16
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=3.25
-scale=8
-ws_tag=w26s8
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=3.25
+scale=8
+ws_tag=w26s8
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
index c2191649..a5767e50 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
@@ -1,4 +1,4 @@
-# Res2Net50 w26s8 x-vector with mixed precision training
+# Res2Net50 w26s8 x-vector with mixed precision training and SWA
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,38 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=24
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=res2net50 
+nnet_type=resnet
+
+resnet_type=res2net50
+batch_size_1gpu=16
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-width_factor=3.25
-scale=8
-ws_tag=w26s8
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+width_factor=3.25
+scale=8
+ws_tag=w26s8
+nnet_num_epochs=90
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --trainer.epochs $nnet_num_epochs --trainer.swa-start 70 --trainer.swa-lr 1e-3 --trainer.swa-anneal-epochs 5"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 70 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
-nnet_num_epochs=90
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/swa_model_ep0091.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 63fecf32..b10e5e86 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -11,7 +11,6 @@ vad_config=conf/vad_16k.yaml
 nnet_data=voxceleb2cat_train
 
 # x-vector cfg
-
 nnet_type=resnet
 
 resnet_type=resnet34
@@ -25,7 +24,7 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_xvec_default.yaml
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
 xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu"
 
 nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
index d5f9e623..2666b93e 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
@@ -1,4 +1,4 @@
-# ResNet34 x-vector with mixed precision training
+# ResNet34 x-vector with mixed precision training and SWA
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,31 +9,27 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+
+nnet_type=resnet
+
+resnet_type=resnet34
 batch_size_1gpu=32
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=resnet34 
 dropout=0
 embed_dim=256
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer $PWD/conf/trainer_swa_default.yaml"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 60 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
-nnet_num_epochs=80
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/swa_model_ep0081.pth
 
@@ -51,3 +47,4 @@ lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
index b172ad91..0ec34ef1 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
@@ -1,4 +1,4 @@
-# ResNet34 x-vector with mixed precision training
+# ResNet34 x-vector with mixed precision training and sharded distrib. data parallel
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,33 +9,28 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+
+nnet_type=resnet
+
+resnet_type=resnet34
 batch_size_1gpu=32
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.01
-
-nnet_type=resnet34 
 dropout=0
 embed_dim=256
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.ddp-type oss_sharded_ddp"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --ddp-type oss_sharded_ddp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_sharded_ddp_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_sharded_ddp_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/swa_model_ep0071.pth
 nnet=$nnet_dir/model_ep0070.pth
 
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
index f9b8c038..ced8b8d6 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,4 +1,5 @@
-# LResNet34 x-vector with mixed precision training
+# ResNet50 x-vector with mixed precision training
+
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -8,32 +9,28 @@ feat_type=fbank80_stmn
 vad_config=conf/vad_16k.yaml
 
 # x-vector training 
-nnet_data=voxceleb2cat
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=resnet
 
+resnet_type=resnet50
 batch_size_1gpu=32
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=resnet50
 dropout=0
 embed_dim=256
-
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -42,12 +39,13 @@ nnet=$nnet_dir/model_ep0070.pth
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
 if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat
+    plda_data=voxceleb2cat_train
 else
-    plda_data=voxceleb2cat_augx${plda_num_augs}
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
 fi
 plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 8ee1b484..08669114 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,4 +1,4 @@
-# SE ResNet34 x-vector with mixed precision training
+# Squeeze-Excitation Res2Net50 w26s8 x-vector with mixed precision training
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,39 +9,34 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=seres2net50 
+nnet_type=resnet
+
+resnet_type=seres2net50
+batch_size_1gpu=24
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
 width_factor=1.625
 scale=4
 ws_tag=w26s4
+nnet_num_epochs=70
 se_r=16
 
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r"
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
@@ -55,3 +50,4 @@ lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
 
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 5ea146b6..f3a5ef5a 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,38 +4,34 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=spine2net49
 batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=spine2net49
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
-
 width_factor=1.625
 scale=4
 ws_tag=w26s4
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+
+nnet_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -52,4 +48,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 6aa20991..40957669 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,38 +4,34 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=spine2net49s
 batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=spine2net49s
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
-
 width_factor=1.625
 scale=4
 ws_tag=w26s4
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+
+nnet_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -52,4 +48,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 28418a2e..43f539f9 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,35 +4,31 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=spinenet49
 batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=spinenet49
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 8be0e057..f834b2cb 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,35 +4,31 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=64
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
+nnet_type=spinenet
 
-nnet_type=spinenet49s
+spinenet_type=spinenet49s
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -49,4 +45,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 7a3b0351..243dab65 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -1,4 +1,4 @@
-# Time SE Res2Net50 w26s4 x-vector with mixed precision training
+# Time-Squeeze-Excitation Res2Net50 w26s8 x-vector with mixed precision training
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,39 +9,34 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
+# x-vector cfg
 
-nnet_type=tseres2net50 
+nnet_type=resnet
+
+resnet_type=tseres2net50
+batch_size_1gpu=24
+eff_batch_size=512 # effective batch size
 dropout=0
 embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
 width_factor=1.625
 scale=4
 ws_tag=w26s4
+nnet_num_epochs=70
 se_r=256
 
-s=30
-margin_warmup=20
-margin=0.3
+xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r"
+nnet_name=${feat_type}_${resnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=6
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 35a146a5..749ca557 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -9,32 +9,29 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+
+nnet_type=resnet
+
+resnet_type=tseresnet34
 batch_size_1gpu=32
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=tseresnet34 
 dropout=0
 embed_dim=256
 se_r=16
 
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
+nnet_num_epochs=70
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --se-r $se_r"
+xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.se-r $se_r"
 
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+nnet_name=${feat_type}_${resnet_type}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 48f54f8b..d3a5595c 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,39 +4,35 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+# vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=tsespine2net49
 batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=tsespine2net49
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
-
 width_factor=1.625
 scale=4
-se_r=256
 ws_tag=w26s4
+se_r=256
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
+
+nnet_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 6253ee54..4ffdd48b 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -4,39 +4,35 @@
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+# vad
+vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
+# x-vector cfg
+nnet_type=spinenet
+
+spinenet_type=tsespine2net49s
 batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=tsespine2net49s
 dropout=0
 embed_dim=256
-
-loss_type=arc-softmax
+lr=0.05
 s=30
 margin_warmup=20
 margin=0.3
-
 width_factor=1.625
 scale=4
-se_r=256
 ws_tag=w26s4
+se_r=256
+nnet_num_epochs=70
 
-nnet_opt="--spinenet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
+
+nnet_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_name=${feat_type}_${nnet_type}_${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
@@ -53,4 +49,3 @@ plda_type=splda
 lda_dim=200
 plda_y_dim=150
 plda_z_dim=200
-
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 1aea9eb9..1ed39a7d 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -50,27 +50,28 @@ conda activate $conda_env
 command="python"
 if [ $num_gpus -gt 0 ];then
     # set CUDA_VISIBLE_DEVICES
+  if [ ! -z "$SGE_HGR_gpu" ]; then
     echo "SGE_HGR_gpu=$SGE_HGR_gpu"
-    if [ ! -z "$SGE_HGR_gpu" ]; then
-	export CUDA_VISIBLE_DEVICES=$(echo $SGE_HGR_gpu | sed 's@ @,@g')
-    else
-	# seach location of free-gpu program in the PATH or hyp_utils directory
-	free_gpu=$(which free-gpu)
-	if [ -z "$free_gpu" ];then
-	    free_gpu=$(which hyp_utils/free-gpu)
-	fi
-    
-	if [ ! -z "$free_gpu" ];then
-	    # if free-gpu found set env var, otherwise we assume that you can use any gpu
-	    export CUDA_VISIBLE_DEVICES=$($free_gpu -n $num_gpus)
-	fi
+    export CUDA_VISIBLE_DEVICES=$(echo $SGE_HGR_gpu | sed 's@ @,@g')
+  else
+    # seach location of free-gpu program in the PATH or hyp_utils directory
+    free_gpu=$(which free-gpu)
+    if [ -z "$free_gpu" ];then
+      free_gpu=$(which hyp_utils/free-gpu)
     fi
-    echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-    if [ $num_gpus -gt 1 ];then
-      [[ $(type -P "$torchrun") ]] && command="torchrun" \
-	  || command="python -m torch.distributed.run"
-       command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
+    
+    if [ ! -z "$free_gpu" ];then
+      # if free-gpu found set env var, otherwise we assume that you can use any gpu
+      export CUDA_VISIBLE_DEVICES=$($free_gpu -n $num_gpus)
     fi
+  fi
+  echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+  # export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
+  if [ $num_gpus -gt 1 ];then
+    [[ $(type -P "$torchrun") ]] && command="torchrun" \
+	|| command="python -m torch.distributed.run"
+    command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
+  fi
 fi
 
 py_exec=$(which $1)
diff --git a/hyp_utils/feats/make_evad.sh b/hyp_utils/feats/make_evad.sh
index 8717fc3c..373fc4a6 100755
--- a/hyp_utils/feats/make_evad.sh
+++ b/hyp_utils/feats/make_evad.sh
@@ -86,7 +86,7 @@ fi
 
 $cmd JOB=1:$nj $logdir/make_vad_${name}.JOB.log \
     hyp_utils/conda_env.sh \
-    compute-energy-vad.py --cfg $vad_config $opt_args \
+    compute_energy_vad.py --cfg $vad_config $opt_args \
     --input $scp --output ark,scp:$vaddir/vad_$name.JOB.ark,$vaddir/vad_$name.JOB.scp \
     --part-idx JOB --num-parts $nj || exit 1
 
diff --git a/hyp_utils/xvectors/extract_xvectors_from_wav.sh b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
index 2aa0d460..ef06d94d 100755
--- a/hyp_utils/xvectors/extract_xvectors_from_wav.sh
+++ b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
@@ -85,7 +85,7 @@ if [ $stage -le 0 ];then
     set +e
     $cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	torch-extract-xvectors-from-wav.py \
+	extract_xvectors_from_wav.py \
 	--feats $feat_config ${args} $write_num_frames_opt \
 	--part-idx JOB --num-parts $nj \
 	--input $data_dir/wav.scp \
@@ -107,7 +107,7 @@ if [ $stage -le 1 ];then
 	    fi
 	    $cmd $output_dir/log/extract_xvectors.$i.log \
 		 hyp_utils/conda_env.sh --num-gpus $num_gpus \
-		 torch-extract-xvectors-from-wav.py \
+		 extract_xvectors_from_wav.py \
 		 --feats $feat_config ${args} $write_num_frames_opt \
 		 --part-idx $i --num-parts $nj \
 		 --input $data_dir/wav.scp \
diff --git a/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh b/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
index 92256004..27c77454 100755
--- a/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
+++ b/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
@@ -51,7 +51,7 @@ output_dir=$(utils/make_absolute.sh $dir)
 args=""
 $cmd $dir/log/make_babble_noise_${name}.log \
     hyp_utils/conda_env.sh \
-    make-babble-noise-audio-files.py ${args} \
+    make_babble_noise_audio_files.py ${args} \
     --output-audio-format $file_format $args $proc_opts \
     --min-spks $min_spks --max-spks $max_spks --num-reuses $num_reuses \
     --write-time-durs $data_out/utt2dur \
diff --git a/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh b/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
index 9c122f1e..c6634135 100755
--- a/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
+++ b/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
@@ -63,7 +63,7 @@ utils/create_data_link.pl $output_dir/rirs_${name}.${file_format}
 args=""
 $cmd $dir/log/pack_rirs_${name}.log \
     hyp_utils/conda_env.sh \
-    pack-wav-rirs.py ${args} --input $data_in/wav.scp \
+    pack_wav_rirs.py ${args} --input $data_in/wav.scp \
      --output ${file_format},scp:$output_dir/rirs_${name}.${file_format},$data_out/rirs.scp || exit 1;
 
 
diff --git a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
index 35794d65..1a1fd7ad 100755
--- a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
+++ b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
@@ -88,7 +88,7 @@ fi
 
 $cmd JOB=1:$nj $dir/log/preproc_audios_${name}.JOB.log \
     hyp_utils/conda_env.sh \
-    preprocess-audio-files.py ${args} --output-audio-format $file_format $args $proc_opts \
+    preprocess_audio_files.py ${args} --output-audio-format $file_format $args $proc_opts \
     --write-time-durs $output_dir/utt2dur.${name}.JOB \
     --part-idx JOB --num-parts $nj \
     --input $data_in/wav.scp \
diff --git a/hyperion/bin/compute-energy-vad.py b/hyperion/bin/compute_energy_vad.py
similarity index 100%
rename from hyperion/bin/compute-energy-vad.py
rename to hyperion/bin/compute_energy_vad.py
diff --git a/hyperion/bin/torch-extract-xvectors-from-wav.py b/hyperion/bin/extract_xvectors_from_wav.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors-from-wav.py
rename to hyperion/bin/extract_xvectors_from_wav.py
diff --git a/hyperion/bin/make-babble-noise-audio-files.py b/hyperion/bin/make_babble_noise_audio_files.py
similarity index 100%
rename from hyperion/bin/make-babble-noise-audio-files.py
rename to hyperion/bin/make_babble_noise_audio_files.py
diff --git a/hyperion/bin/pack-wav-rirs.py b/hyperion/bin/pack_wav_rirs.py
similarity index 100%
rename from hyperion/bin/pack-wav-rirs.py
rename to hyperion/bin/pack_wav_rirs.py
diff --git a/hyperion/bin/preprocess-audio-files.py b/hyperion/bin/preprocess_audio_files.py
similarity index 100%
rename from hyperion/bin/preprocess-audio-files.py
rename to hyperion/bin/preprocess_audio_files.py
diff --git a/hyperion/bin/torch_train_xvec_from_wav.py b/hyperion/bin/torch_train_xvec_from_wav.py
index df948b87..c488b5c5 100755
--- a/hyperion/bin/torch_train_xvec_from_wav.py
+++ b/hyperion/bin/torch_train_xvec_from_wav.py
@@ -131,7 +131,6 @@ def init_feats(rank, **kwargs):
 
 
 def init_xvector(num_classes, rank, xvec_class, **kwargs):
-
     xvec_args = xvec_class.filter_args(**kwargs["model"])
     if rank == 0:
         logging.info("xvector network args={}".format(xvec_args))
@@ -171,7 +170,7 @@ def train_xvec(gpu_id, args):
         device=device,
         metrics=metrics,
         ddp=world_size > 1,
-        **trn_args
+        **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index daf391be..83ce7601 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -96,7 +96,7 @@ def __init__(
         width_in = in_channels // scale
         width_mid = int(width_factor * channels) // scale
         self.width_in = width_in
-        self.has_proj1 = width_in != width_mid
+        self.has_proj1 = width_in != width_mid and stride == 1
         self.scale = scale
         channels_mid = width_mid * scale
         if scale == 1:
@@ -189,7 +189,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn1s[i](x_i)
             x_i = self.act1(x_i)
             if not self.norm_before:
-                x_i = self.bn1(x_i)
+                x_i = self.bn1s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index b6b3569e..5001bfd0 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -750,7 +750,9 @@ def _standardize_weights(self, x, x_lengths=None, weights=None):
         multiplied by the input data.
         """
         if weights is None:
-            return seq_lengths_to_mask(x, x.size(self.dim), dtype=x.dtype, time_dim=-1)
+            return seq_lengths_to_mask(
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=-1
+            )
 
         if weights.dim() == x.dim():
             return weights.transpose(self.dim, -1)
diff --git a/hyperion/torch/layers/pool_factory.py b/hyperion/torch/layers/pool_factory.py
index fa1032a8..723c64a4 100644
--- a/hyperion/torch/layers/pool_factory.py
+++ b/hyperion/torch/layers/pool_factory.py
@@ -93,6 +93,8 @@ def create(
                 keepdim=keepdim,
             )
 
+        raise ValueError(f"Invalid pooling type {pool_type}")
+
     @staticmethod
     def filter_args(**kwargs):
         """Filters the arguments corresponding to the creation of a pooling layer.
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 3e9e9fcd..d11fb020 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -563,6 +563,7 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
+
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index ecdb4dd8..2755bbbe 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -149,7 +149,9 @@ def __init__(
                 oss = False if ddp_type == DDPType.DDP else True
                 self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
                 self.model = TorchDDP(
-                    self.model, device_ids=[device], output_device=device
+                    self.model,
+                    device_ids=[device],
+                    output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
                 self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
@@ -454,7 +456,7 @@ def _compute_grad_acc_steps(self, data_loader):
                 math.ceil(self.eff_batch_size / batch_size / self.world_size)
             )
             logging.info(
-                "Setting grad_acc_steps=%d for"
+                "Setting grad_acc_steps=%d for "
                 "eff_batch_size=%d, avg_batch_size=%d, world_size=%d",
                 self.grad_acc_steps,
                 self.eff_batch_size,

From ed08db306b4ec584c654f438427dcd7439eb22f9 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 29 Apr 2022 16:29:14 -0400
Subject: [PATCH 009/154] huffing face x-vectors working on multi-gpu

---
 egs/voxceleb/v1.1/default_config.sh           |  55 +-
 egs/voxceleb/v1.1/run_011_train_xvector.sh    |   2 +-
 egs/voxceleb/v2/README.md                     | 205 ++++++
 egs/voxceleb/v2/cmd.sh                        |  28 +
 egs/voxceleb/v2/conf/clsp.conf                |  11 +
 egs/voxceleb/v2/conf/coe_gpu_bigmem.conf      |  11 +
 egs/voxceleb/v2/conf/coe_gpu_long.conf        |  13 +
 egs/voxceleb/v2/conf/coe_gpu_rtx.conf         |  11 +
 egs/voxceleb/v2/conf/coe_gpu_short.conf       |  11 +
 egs/voxceleb/v2/conf/coe_gpu_v100.conf        |  11 +
 .../v2/conf/hubertbase_ecapatdnn512x2.yaml    |  37 +
 egs/voxceleb/v2/conf/lrsched_exp_default.yaml |   7 +
 egs/voxceleb/v2/conf/optim_adam_default.yaml  |   6 +
 egs/voxceleb/v2/conf/reverb_noise_aug.yaml    |  35 +
 egs/voxceleb/v2/conf/train_data_default.yaml  |  11 +
 ...ain_hubertbase_ecapatdnn512x2_default.yaml |   6 +
 ...v2vec2base960h_ecapatdnn512x2_default.yaml |   6 +
 ...n_wav2vec2base_ecapatdnn512x2_default.yaml |   6 +
 ...rain_wavlmbase_ecapatdnn512x2_default.yaml |   6 +
 egs/voxceleb/v2/conf/trainer_swa_default.yaml |   9 +
 egs/voxceleb/v2/conf/vad_16k.yaml             |   8 +
 egs/voxceleb/v2/conf/val_data_default.yaml    |  11 +
 .../conf/wav2vec2base960h_ecapatdnn512x2.yaml |  37 +
 .../v2/conf/wav2vec2base_ecapatdnn512x2.yaml  |  37 +
 .../v2/conf/wavlmbase_ecapatdnn512x2.yaml     |  37 +
 egs/voxceleb/v2/datapath.sh                   |  22 +
 egs/voxceleb/v2/default_config.sh             |   1 +
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  48 ++
 ...tdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh |  48 ++
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  51 ++
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  49 ++
 egs/voxceleb/v2/hyp_utils                     |   1 +
 egs/voxceleb/v2/local                         |   1 +
 egs/voxceleb/v2/path.sh                       |   5 +
 egs/voxceleb/v2/run_001_prepare_data.sh       |  28 +
 egs/voxceleb/v2/run_002_compute_evad.sh       |  57 ++
 .../v2/run_010_prepare_xvec_train_data.sh     |  42 ++
 egs/voxceleb/v2/run_011_train_xvector.sh      |  59 ++
 egs/voxceleb/v2/run_030_extract_xvectors.sh   |  60 ++
 egs/voxceleb/v2/run_040_eval_be.sh            | 125 ++++
 egs/voxceleb/v2/steps                         |   1 +
 egs/voxceleb/v2/steps_be                      |   1 +
 egs/voxceleb/v2/steps_pyfe                    |   1 +
 egs/voxceleb/v2/steps_xvec                    |   1 +
 egs/voxceleb/v2/utils                         |   1 +
 hyp_utils/conda_env.sh                        |   2 +-
 .../xvectors/extract_wav2vec2xvectors.sh      | 155 ++++
 hyperion/bin/extract_wav2vec2xvectors.py      | 313 ++++++++
 hyperion/bin/train_wav2vec2xvector.py         | 197 ++++++
 hyperion/bin/train_xvector_from_feats.py      | 204 ++++++
 ..._from_wav.py => train_xvector_from_wav.py} |   1 -
 hyperion/torch/data/audio_dataset.py          |   3 -
 hyperion/torch/data/feat_seq_dataset.py       |  24 +-
 hyperion/torch/models/__init__.py             |   6 +
 .../torch/models/wav2xvectors/__init__.py     |   4 +
 .../hf_hubert2resnet1d_xvector.py             |  74 ++
 .../hf_wav2vec2resnet1d_xvector.py            |  54 +-
 .../models/wav2xvectors/hf_wav2xvector.py     | 229 +++++-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py |  74 ++
 .../torch/models/wav2xvectors/wav2xvector.py  |   1 +
 .../torch/models/xvectors/resnet1d_xvector.py |   2 +-
 hyperion/torch/models/xvectors/xvector.py     |  45 +-
 hyperion/torch/tpm/__init__.py                |   6 +
 hyperion/torch/tpm/hf/__init__.py             |   8 +
 hyperion/torch/tpm/hf/hf_hubert.py            | 553 +++++++++++++++
 hyperion/torch/tpm/hf/hf_wav2vec2.py          | 668 ++++++++++++++++++
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 331 +++++++++
 hyperion/torch/tpm/hf/hf_wavlm.py             | 622 ++++++++++++++++
 hyperion/torch/trainers/torch_trainer.py      |   7 +-
 hyperion/torch/trainers/xvector_trainer.py    |  12 +
 hyperion/torch/utils/__init__.py              |   2 +-
 hyperion/torch/utils/ddp.py                   |  25 +-
 hyperion/torch/utils/masking.py               |   2 +-
 73 files changed, 4756 insertions(+), 87 deletions(-)
 mode change 100644 => 120000 egs/voxceleb/v1.1/default_config.sh
 create mode 100644 egs/voxceleb/v2/README.md
 create mode 100755 egs/voxceleb/v2/cmd.sh
 create mode 100644 egs/voxceleb/v2/conf/clsp.conf
 create mode 100644 egs/voxceleb/v2/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/voxceleb/v2/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/v2/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/v2/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/v2/conf/coe_gpu_v100.conf
 create mode 100644 egs/voxceleb/v2/conf/hubertbase_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/lrsched_exp_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/optim_adam_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/reverb_noise_aug.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_data_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbase_ecapatdnn512x2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/trainer_swa_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/vad_16k.yaml
 create mode 100644 egs/voxceleb/v2/conf/val_data_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base960h_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/datapath.sh
 create mode 120000 egs/voxceleb/v2/default_config.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
 create mode 120000 egs/voxceleb/v2/hyp_utils
 create mode 120000 egs/voxceleb/v2/local
 create mode 100755 egs/voxceleb/v2/path.sh
 create mode 100755 egs/voxceleb/v2/run_001_prepare_data.sh
 create mode 100755 egs/voxceleb/v2/run_002_compute_evad.sh
 create mode 100755 egs/voxceleb/v2/run_010_prepare_xvec_train_data.sh
 create mode 100755 egs/voxceleb/v2/run_011_train_xvector.sh
 create mode 100755 egs/voxceleb/v2/run_030_extract_xvectors.sh
 create mode 100755 egs/voxceleb/v2/run_040_eval_be.sh
 create mode 120000 egs/voxceleb/v2/steps
 create mode 120000 egs/voxceleb/v2/steps_be
 create mode 120000 egs/voxceleb/v2/steps_pyfe
 create mode 120000 egs/voxceleb/v2/steps_xvec
 create mode 120000 egs/voxceleb/v2/utils
 create mode 100755 hyp_utils/xvectors/extract_wav2vec2xvectors.sh
 create mode 100755 hyperion/bin/extract_wav2vec2xvectors.py
 create mode 100755 hyperion/bin/train_wav2vec2xvector.py
 create mode 100755 hyperion/bin/train_xvector_from_feats.py
 rename hyperion/bin/{torch_train_xvec_from_wav.py => train_xvector_from_wav.py} (99%)
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
 create mode 100644 hyperion/torch/tpm/__init__.py
 create mode 100644 hyperion/torch/tpm/hf/__init__.py
 create mode 100644 hyperion/torch/tpm/hf/hf_hubert.py
 create mode 100644 hyperion/torch/tpm/hf/hf_wav2vec2.py
 create mode 100644 hyperion/torch/tpm/hf/hf_wav2vec_base.py
 create mode 100644 hyperion/torch/tpm/hf/hf_wavlm.py

diff --git a/egs/voxceleb/v1.1/default_config.sh b/egs/voxceleb/v1.1/default_config.sh
deleted file mode 100644
index 652b4d61..00000000
--- a/egs/voxceleb/v1.1/default_config.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-# Default parameters
-# LResNet34 x-vector without mixed precision training
-
-# acoustic features
-feat_config=conf/fbank80_stmn_16k.yaml
-feat_type=fbank80_stmn
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=128
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=lresnet34 #light resnet
-dropout=0
-embed_dim=256
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}.v1
-nnet_num_epochs=70
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v1.1/default_config.sh b/egs/voxceleb/v1.1/default_config.sh
new file mode 120000
index 00000000..8f713463
--- /dev/null
+++ b/egs/voxceleb/v1.1/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/run_011_train_xvector.sh b/egs/voxceleb/v1.1/run_011_train_xvector.sh
index 9b30369e..17d50722 100755
--- a/egs/voxceleb/v1.1/run_011_train_xvector.sh
+++ b/egs/voxceleb/v1.1/run_011_train_xvector.sh
@@ -44,7 +44,7 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    torch_train_xvec_from_wav.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+    train_xvector_from_wav.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
diff --git a/egs/voxceleb/v2/README.md b/egs/voxceleb/v2/README.md
new file mode 100644
index 00000000..5b5b93e5
--- /dev/null
+++ b/egs/voxceleb/v2/README.md
@@ -0,0 +1,205 @@
+# VoxCeleb V1.1
+
+Recipe for the VoxCeleb Speaker Verification Task
+
+## Differences w.r.t VoxCeleb V1 recipe
+
+In recipe version V1: 
+   - We compute speech augmentations and acoustic features offline and dump them to disk. 
+   - Augmentation is performed using Kaldi scripts and wav-reverbate tool
+   - Babble noise is created on-the-fly when computing features by mixing 3-7 single speaker files.
+
+In this recipe:
+   - We compute speech augmentations and acoustic features are computed always on-the-fly,
+     we don't dump any features to disk. 
+   - Augmentation is performed using Hyperin SpeechAugment class.
+   - The behavior of this class is controlled 
+     by the the configuration file `conf/reverb_noise_aug.yml`, 
+     which mimics the proportions of noise and RIR types, and SNRs used in the V1 or the recipe.
+   - Babble noise is created offline by mixing 3-10 single speaker files.
+
+
+## Citing
+
+## Training Data
+
+   - x-Vector network is trained on Voxceleb2 dev + test with augmentations
+     - MUSAN noise
+     - RIR reverberation
+
+## Test data
+
+   - Test data is VoxCeleb 1
+   - We evaluate 6 conditions:
+      - VoxCeleb-O (Original): Original Voxceleb test set with 40 speakers
+      - Voxceleb-O-cleaned: VoxCeleb-O cleaned-up of some errors
+      - VoxCeleb-E (Entire): List using all utterances of VoxCeleb1
+      - Voxceleb-E-cleaned: VoxCeleb-E cleaned-up of some errors
+      - VoxCeleb-H (Hard): List of hard trials between all utterances of VoxCeleb1, same gender and nationality trials.
+      - Voxceleb-H-cleaned: VoxCeleb-H cleaned-up of some errors
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it will use Light ResNet (16 base channels)
+   - For better performance use full ResNet (64 base channels) using `config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh` file as
+```bash
+run_011_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+run_030_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
+run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+```
+
+   - To train with mixed precision training use config file `config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh`
+
+## Recipe Steps:
+
+   - `run_001_prepare_data.sh`
+      - Data preparation script to generate Kaldi style data directories for 
+          - VoxCeleb2 train+test
+          - VoxCeleb1 O/E/H eval sets
+
+   - `run_002_compute_evad.sh`
+      - Computes Energy VAD for all datasets
+
+   - `run_003_prepare_noises_rirs.sh`
+      - Prepares MUSAN noises, music to be used by SpeechAugment class.
+      - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
+      - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
+
+   - `run_010_prepare_xvec_train_data.sh`
+      - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
+      - Removes silence from the audios
+      - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
+      - Creates training and validation lists for x-vector training
+
+   - `run_011_train_xvector.sh`
+      - Trains the x-vector network
+
+   - `run_030_extract_xvectors.sh`
+      - Extracts x-vectors for VoxCeleb2 or VoxCeleb2+augmentation for PLDA training
+      - Exctracts x-vectors for VoxCeleb1 test sets
+
+   - `run_040_eval_be.sh`
+      - Trains PLDA and evals PLDA and cosine scoring back-ends
+
+
+## Results
+
+### VoxCeleb 1 Original-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.00 | 0.129 | 0.216 |
+| | | | Cosine | 2.04 | 0.138 | 0.210 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA |  1.35 | 0.091 | 0.159 |
+| | | | Cosine |  1.22 | 0.082 | 0.129 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.074 | 0.124 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA |  1.30 | 0.090 | 0.160 |
+| | | | Cosine |  1.44 | 0.100 | 0.173 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.23 | 0.091 | 0.143 |
+| | | | Cosine |  1.17 | 0.081 | 0.110 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.37 | 0.104 | 0.179 |
+| | | | Cosine | 1.31 | 0.080 | 0.139 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.29 | 0.088 | 0.129 |
+| | | | Cosine | 1.23 | 0.083 | 0.136 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.095 | 0.156 |
+| | | | Cosine | 1.29 | 0.089 | 0.146 |
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.084 | 0.136 |
+| | | | Cosine | 1.18 | 0.078 | 0.115 |
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.084 | 0.145 |
+| | | | Cosine | 1.12 | 0.073 | 0.131 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.53 | 0.104 | 0.189 |
+| | | | Cosine | 1.31 | 0.084 | 0.132 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  0.98 | 0.066 | 0.116 |
+| | | | Cosine | 1.12 | 0.071 | 0.103 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.05 | 0.077 | 0.123 |
+| | | | Cosine | 0.96 | 0.065 | 0.110 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.04 | 0.071 | 0.118 |
+| | | | Cosine | 0.93 | 0.067 | 0.108 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  0.90 | 0.067 | 0.118 |
+| | | | Cosine | 0.85 | 0.060 | 0.094 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
+| | | | Cosine | 1.29 | 0.084 | 0.140 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |
+
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.86 | 0.124 | 0.210 |
+| | | | Cosine | 1.93 | 0.122 | 0.201 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.43 | 0.091 | 0.159 |
+| | | | Cosine | 1.24 | 0.080 | 0.136 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.077 | 0.132 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 1.27 | 0.084 | 0.150 |
+| | | | Cosine | 1.30 | 0.082 | 0.150 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.30 | 0.083 | 0.146 |
+| | | | Cosine | 1.09 | 0.071 | 0.124 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.45 | 0.097 | 0.165 |
+| | | | Cosine | 1.15 | 0.076 | 0.132 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.47 | 0.094 | 0.165 |
+| | | | Cosine | 1.27 | 0.082 | 0.148 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.31 | 0.086 | 0.149 |
+| | | | Cosine | 1.22 | 0.079 | 0.134 |
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.27 | 0.082 | 0.145 |
+| | | | Cosine | 1.16 | 0.074 | 0.130 |
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.077 | 0.136 |
+| | | | Cosine | 1.11 | 0.071 | 0.125 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.46 | 0.097 | 0.173 |
+| | | | Cosine | 1.24 | 0.080 | 0.140 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.071 | 0.127 |
+| | | | Cosine | 1.05 | 0.067 | 0.117 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.078 | 0.134 |
+| | | | Cosine | 1.05 | 0.069 | 0.121 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.18 | 0.075 | 0.131 |
+| | | | Cosine | 0.98 | 0.063 | 0.110 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA |  1.17 | 0.072 | 0.123 |
+| | | | Cosine | 0.94 | 0.061 | 0.107 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
+| | | | Cosine | 1.27 | 0.079 | 0.142 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |
+
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 3.29 | 0.195 | 0.318 |
+| | | | Cosine | 3.27 | 0.188 | 0.303 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.66 | 0.160 | 0.258 |
+| | | | Cosine | 2.32 | 0.139 | 0.232 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 2.19 | 0.133 | 0.215 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 2.33 | 0.139 | 0.227 |
+| | | | Cosine | 2.33 | 0.142 | 0.235 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.46 | 0.142 | 0.237 |
+| | | | Cosine | 2.14 | 0.126 | 0.203 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 2.57 | 0.153 | 0.255 |
+| | | | Cosine | 2.11 | 0.127 | 0.205 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 2.64 | 0.157 | 0.244 |
+| | | | Cosine | 2.33 | 0.141 | 0.232 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  2.42 | 0.144 | 0.245 |
+| | | | Cosine | 2.26 | 0.133 | 0.224
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.39 | 0.141 | 0.235 |
+| | | | Cosine | 2.17 | 0.128 | 0.215
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.28 | 0.131 | 0.225 |
+| | | | Cosine | 2.11 | 0.124 | 0.204 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  2.77 | 0.172 | 0.271 |
+| | | | Cosine | 2.45 | 0.141 | 0.225 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  2.07 | 0.124 | 0.201 |
+| | | | Cosine | 1.95 | 0.113 | 0.181 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  2.34 | 0.136 | 0.230 |
+| | | | Cosine | 1.99 | 0.119 | 0.196 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  2.18 | 0.127 | 0.211 |
+| | | | Cosine | 1.89 | 0.112 | 0.184 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  2.14 | 0.125 | 0.209 |
+| | | | Cosine | 1.84 | 0.110 | 0.186 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
+| | | | Cosine | 2.26 | 0.134 | 0.214 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
diff --git a/egs/voxceleb/v2/cmd.sh b/egs/voxceleb/v2/cmd.sh
new file mode 100755
index 00000000..040f458b
--- /dev/null
+++ b/egs/voxceleb/v2/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/voxceleb/v2/conf/clsp.conf b/egs/voxceleb/v2/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/v2/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/v2/conf/coe_gpu_bigmem.conf b/egs/voxceleb/v2/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/voxceleb/v2/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/voxceleb/v2/conf/coe_gpu_long.conf b/egs/voxceleb/v2/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/v2/conf/coe_gpu_rtx.conf b/egs/voxceleb/v2/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/v2/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/v2/conf/coe_gpu_short.conf b/egs/voxceleb/v2/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/v2/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/v2/conf/coe_gpu_v100.conf b/egs/voxceleb/v2/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/voxceleb/v2/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/voxceleb/v2/conf/hubertbase_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/hubertbase_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..94bb31cc
--- /dev/null
+++ b/egs/voxceleb/v2/conf/hubertbase_ecapatdnn512x2.yaml
@@ -0,0 +1,37 @@
+hf_feats:
+  pretrained_model_path: facebook/hubert-base-ls960
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/lrsched_exp_default.yaml b/egs/voxceleb/v2/conf/lrsched_exp_default.yaml
new file mode 100644
index 00000000..fe08b704
--- /dev/null
+++ b/egs/voxceleb/v2/conf/lrsched_exp_default.yaml
@@ -0,0 +1,7 @@
+lrsch_type: exp_lr
+decay_rate: 0.5
+decay_steps: 8000
+hold_steps: 40000
+min_lr: 1.0e-05
+update_lr_on_opt_step: true
+warmup_steps: 1000
diff --git a/egs/voxceleb/v2/conf/optim_adam_default.yaml b/egs/voxceleb/v2/conf/optim_adam_default.yaml
new file mode 100644
index 00000000..b6620069
--- /dev/null
+++ b/egs/voxceleb/v2/conf/optim_adam_default.yaml
@@ -0,0 +1,6 @@
+opt_type: adam
+lr: 0.05
+amsgrad: true
+beta1: 0.9
+beta2: 0.95
+weight_decay: 1.0e-05
diff --git a/egs/voxceleb/v2/conf/reverb_noise_aug.yaml b/egs/voxceleb/v2/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/voxceleb/v2/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/voxceleb/v2/conf/train_data_default.yaml b/egs/voxceleb/v2/conf/train_data_default.yaml
new file mode 100644
index 00000000..72c77204
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_data_default.yaml
@@ -0,0 +1,11 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml
+  wav_scale: 1
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
new file mode 100644
index 00000000..6cec83c8
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: hubertbase_ecapatdnn512x2.yaml
+trainer: trainer_swa_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
new file mode 100644
index 00000000..a7fc925e
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: wav2vec2base960h_ecapatdnn512x2.yaml
+trainer: trainer_swa_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
new file mode 100644
index 00000000..90f35805
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: wav2vec2base_ecapatdnn512x2.yaml
+trainer: trainer_swa_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbase_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbase_ecapatdnn512x2_default.yaml
new file mode 100644
index 00000000..424c9bd6
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbase_ecapatdnn512x2_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: wavlmbase_ecapatdnn512x2.yaml
+trainer: trainer_swa_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/trainer_swa_default.yaml b/egs/voxceleb/v2/conf/trainer_swa_default.yaml
new file mode 100644
index 00000000..c45e3eb5
--- /dev/null
+++ b/egs/voxceleb/v2/conf/trainer_swa_default.yaml
@@ -0,0 +1,9 @@
+optim: optim_adam_default.yaml
+lrsched: lrsched_exp_default.yaml
+use_amp: true
+log_interval: 1000
+epochs: 63
+eff_batch_size: 512
+swa_start: 60
+swa_lr: 1e-4
+swa_anneal_epochs: 1
diff --git a/egs/voxceleb/v2/conf/vad_16k.yaml b/egs/voxceleb/v2/conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/v2/conf/val_data_default.yaml b/egs/voxceleb/v2/conf/val_data_default.yaml
new file mode 100644
index 00000000..72c77204
--- /dev/null
+++ b/egs/voxceleb/v2/conf/val_data_default.yaml
@@ -0,0 +1,11 @@
+dataset:
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  aug_cfg: conf/reverb_noise_aug.yaml
+  wav_scale: 1
+sampler:
+  batch_size: 32
+  iters_per_epoch: 6
+data_loader:
+  num_workers: 8
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/wav2vec2base960h_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base960h_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..85964372
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base960h_ecapatdnn512x2.yaml
@@ -0,0 +1,37 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..fb7c7cde
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_ecapatdnn512x2.yaml
@@ -0,0 +1,37 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..b5d14412
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml
@@ -0,0 +1,37 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/datapath.sh b/egs/voxceleb/v2/datapath.sh
new file mode 100644
index 00000000..9a2f7529
--- /dev/null
+++ b/egs/voxceleb/v2/datapath.sh
@@ -0,0 +1,22 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  # voxceleb1_root=/export/corpora5/VoxCeleb1_v1 #voxceleb1 v1
+  voxceleb1_root=/export/corpora5/VoxCeleb1_v2 #voxceleb1 v2
+  voxceleb2_root=/export/corpora5/VoxCeleb2
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
+  voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
+  voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  musan_root=/expscratch/dgromero/corpora-open/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/egs/voxceleb/v2/default_config.sh b/egs/voxceleb/v2/default_config.sh
new file mode 120000
index 00000000..65108e89
--- /dev/null
+++ b/egs/voxceleb/v2/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
\ No newline at end of file
diff --git a/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
new file mode 100644
index 00000000..9ea07c9c
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
@@ -0,0 +1,48 @@
+# Hubert base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=hubertbase
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_hubert2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+lr=0.002
+xvec_train_base_cfg=conf/train_hubertbase_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
new file mode 100644
index 00000000..b6cbdf30
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -0,0 +1,48 @@
+# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wav2vec2base960h
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+lr=0.002
+xvec_train_base_cfg=conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
new file mode 100644
index 00000000..a021d5a1
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
@@ -0,0 +1,51 @@
+# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wav2vec2base
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+lr=0.002
+lr=0.001
+lr=0.005
+xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 10000 --trainer.lrsched.hold-steps 20000"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v2 #v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+nnet=$nnet_dir/model_ep0060.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
new file mode 100644
index 00000000..ba4272a2
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wavlmbase
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+lr=0.002
+lr=0.001
+xvec_train_base_cfg=conf/train_wavlmbase_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/hyp_utils b/egs/voxceleb/v2/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/voxceleb/v2/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/v2/local b/egs/voxceleb/v2/local
new file mode 120000
index 00000000..740b697d
--- /dev/null
+++ b/egs/voxceleb/v2/local
@@ -0,0 +1 @@
+../v1/local/
\ No newline at end of file
diff --git a/egs/voxceleb/v2/path.sh b/egs/voxceleb/v2/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/voxceleb/v2/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/voxceleb/v2/run_001_prepare_data.sh b/egs/voxceleb/v2/run_001_prepare_data.sh
new file mode 100755
index 00000000..7bf15448
--- /dev/null
+++ b/egs/voxceleb/v2/run_001_prepare_data.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. datapath.sh 
+
+
+if [ $stage -le 1 ];then
+  # Prepare the VoxCeleb2 dataset for training.
+  local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
+fi
+
+if [ $stage -le 2 ];then
+  # prepare voxceleb1 for test
+  # This script is for the old version of the dataset
+  # local/make_voxceleb1_oeh.pl $voxceleb1_root data
+  # Use this for the newer version of voxceleb1:
+  local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
+fi
diff --git a/egs/voxceleb/v2/run_002_compute_evad.sh b/egs/voxceleb/v2/run_002_compute_evad.sh
new file mode 100755
index 00000000..eeae00ac
--- /dev/null
+++ b/egs/voxceleb/v2/run_002_compute_evad.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+storage_name=$(date +'%m_%d_%H_%M')
+vaddir=`pwd`/exp/vad_e
+vad_config=conf/vad_16k.yaml
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+
+if [ $stage -le 1 ]; then
+    # Prepare to distribute data over multiple machines
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
+	if [ "$nodes" == "b0" ];then
+	    utils/create_split_dir.pl \
+			    utils/create_split_dir.pl \
+		/export/b{04,05,06,07}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "b1" ];then
+	    utils/create_split_dir.pl \
+		/export/b{14,15,16,17}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "c0" ];then
+	    utils/create_split_dir.pl \
+		/export/c{06,07,08,09}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs01" ];then
+	    utils/create_split_dir.pl \
+		/export/fs01/$dir_name $vaddir/storage
+	else
+	    echo "we don't distribute data between multiple machines"
+	fi
+    fi
+fi
+
+#Train datasets
+if [ $stage -le 2 ];then 
+    for name in voxceleb2cat_train voxceleb1_test
+    do
+	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+	nj=$(($num_spk < 40 ? $num_spk:40))
+	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
+	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+	    data/${name} exp/make_vad/$name $vaddir
+	utils/fix_data_dir.sh data/${name}
+    done
+fi
+
+
diff --git a/egs/voxceleb/v2/run_010_prepare_xvec_train_data.sh b/egs/voxceleb/v2/run_010_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..5936fbf4
--- /dev/null
+++ b/egs/voxceleb/v2/run_010_prepare_xvec_train_data.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 2 ]; then
+    # This script preprocess audio for x-vector training
+    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
+	--storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
+    hyp_utils/kaldi/utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 3 ]; then
+    # Now, we remove files with less than 4s
+    hyp_utils/remove_short_audios.sh --min-len 4 data/${nnet_data}_proc_audio_no_sil
+
+    # We also want several utterances per speaker. Now we'll throw out speakers
+    # with fewer than 4 utterances.
+    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 4 ]; then
+    # Prepare train and validation lists for x-vectors
+    local/make_train_lists_sup_embed_with_augm.sh \
+	data/${nnet_data}_proc_audio_no_sil \
+	data/${nnet_data}_proc_audio_no_sil/lists_xvec
+fi
+
+exit
diff --git a/egs/voxceleb/v2/run_011_train_xvector.sh b/egs/voxceleb/v2/run_011_train_xvector.sh
new file mode 100755
index 00000000..0b9a092e
--- /dev/null
+++ b/egs/voxceleb/v2/run_011_train_xvector.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_wav2vec2xvector.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
diff --git a/egs/voxceleb/v2/run_030_extract_xvectors.sh b/egs/voxceleb/v2/run_030_extract_xvectors.sh
new file mode 100755
index 00000000..90186a42
--- /dev/null
+++ b/egs/voxceleb/v2/run_030_extract_xvectors.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+use_gpu=false
+xvec_chunk_length=120 #seconds
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+    xvec_cmd="$cuda_eval_cmd --mem 6G"
+else
+    xvec_cmd="$train_cmd --mem 12G"
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+if [ $stage -le 1 ]; then
+    # Extract xvectors for training LDA/PLDA
+    for name in voxceleb2cat_train
+    do
+	if [ $plda_num_augs -eq 0 ]; then
+    	    steps_xvec/extract_wav2vec2xvectors.sh --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+		--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+    		$nnet data/${name} \
+    		$xvector_dir/${name}
+	else
+	    steps_xvec/extract_wav2vec2xvectors.sh --cmd "$xvec_cmd" --nj 300 ${xvec_args} \
+		--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+		--aug-config $plda_aug_config --num-augs $plda_num_augs \
+    		$nnet data/${name} \
+    		$xvector_dir/${name}_augx${plda_num_augs} \
+		data/${name}_augx${plda_num_augs}
+	fi
+    done
+fi
+
+
+if [ $stage -le 2 ]; then
+    # Extracts x-vectors for evaluation
+    for name in voxceleb1_test 
+    do
+	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+	nj=$(($num_spk < 100 ? $num_spk:100))
+	steps_xvec/extract_wav2vec2xvectors.sh \
+	  --cmd "$xvec_cmd" --nj $nj ${xvec_args} \
+	  $nnet data/$name \
+	  $xvector_dir/$name
+    done
+fi
+
+exit
diff --git a/egs/voxceleb/v2/run_040_eval_be.sh b/egs/voxceleb/v2/run_040_eval_be.sh
new file mode 100755
index 00000000..cd168180
--- /dev/null
+++ b/egs/voxceleb/v2/run_040_eval_be.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
+#                
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh 
+
+plda_label=${plda_type}y${plda_y_dim}_v1
+be_name=lda${lda_dim}_${plda_label}_${plda_data}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name/${be_name}
+score_plda_dir=$score_dir/plda
+score_cosine_dir=exp/scores/$nnet_name/cosine
+
+if [ $stage -le 1 ]; then
+
+    echo "Train PLDA on Voxceleb2"
+    steps_be/train_be_v1.sh --cmd "$train_cmd" \
+				--lda_dim $lda_dim \
+				--plda_type $plda_type \
+				--y_dim $plda_y_dim --z_dim $plda_z_dim \
+				$xvector_dir/$plda_data/xvector.scp \
+				data/$plda_data \
+				$be_dir &
+
+
+    wait
+
+fi
+
+
+if [ $stage -le 2 ];then
+
+    echo "Eval Voxceleb 1 with LDA+CentWhiten+LNorm+PLDA"
+    steps_be/eval_be_v1.sh --cmd "$train_cmd" --plda_type $plda_type \
+    	data/voxceleb1_test/trials \
+    	data/voxceleb1_test/utt2model \
+    	$xvector_dir/voxceleb1_test/xvector.scp \
+    	$be_dir/lda_lnorm.h5 \
+    	$be_dir/plda.h5 \
+    	$score_plda_dir/voxceleb1_scores
+
+    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+    	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+
+    for f in $(ls $score_plda_dir/*_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+
+score_plda_dir=$score_cosine_dir
+
+if [ $stage -le 3 ];then
+
+    echo "Eval Voxceleb 1 with Cosine scoring"
+    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    	data/voxceleb1_test/trials \
+    	data/voxceleb1_test/utt2model \
+    	$xvector_dir/voxceleb1_test/xvector.scp \
+    	$score_plda_dir/voxceleb1_scores
+
+    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+
+    for f in $(ls $score_plda_dir/*_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+be_dir=exp/be/$nnet_name/cw
+score_plda_dir=$score_dir/cw_cosine
+
+if [ $stage -le 4 ]; then
+    echo "Train centering+whitening on Voxceleb2"
+    steps_be/train_be_v2.sh --cmd "$train_cmd" \
+	$xvector_dir/$plda_data/xvector.scp \
+	data/$plda_data \
+	$be_dir
+fi
+
+
+if [ $stage -le 5 ];then
+
+    echo "Eval Voxceleb 1 with CentWhiten + Cosine scoring"
+    steps_be/eval_be_v2.sh --cmd "$train_cmd" \
+    	data/voxceleb1_test/trials \
+    	data/voxceleb1_test/utt2model \
+    	$xvector_dir/voxceleb1_test/xvector.scp \
+	$be_dir/cw.h5 \
+    	$score_plda_dir/voxceleb1_scores
+
+    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+
+    for f in $(ls $score_plda_dir/*_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+exit
+
diff --git a/egs/voxceleb/v2/steps b/egs/voxceleb/v2/steps
new file mode 120000
index 00000000..aede39fe
--- /dev/null
+++ b/egs/voxceleb/v2/steps
@@ -0,0 +1 @@
+hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/voxceleb/v2/steps_be b/egs/voxceleb/v2/steps_be
new file mode 120000
index 00000000..b2098c2a
--- /dev/null
+++ b/egs/voxceleb/v2/steps_be
@@ -0,0 +1 @@
+../v1/steps_be
\ No newline at end of file
diff --git a/egs/voxceleb/v2/steps_pyfe b/egs/voxceleb/v2/steps_pyfe
new file mode 120000
index 00000000..7b9d122a
--- /dev/null
+++ b/egs/voxceleb/v2/steps_pyfe
@@ -0,0 +1 @@
+hyp_utils/feats
\ No newline at end of file
diff --git a/egs/voxceleb/v2/steps_xvec b/egs/voxceleb/v2/steps_xvec
new file mode 120000
index 00000000..af66a94d
--- /dev/null
+++ b/egs/voxceleb/v2/steps_xvec
@@ -0,0 +1 @@
+hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/voxceleb/v2/utils b/egs/voxceleb/v2/utils
new file mode 120000
index 00000000..3d590a1d
--- /dev/null
+++ b/egs/voxceleb/v2/utils
@@ -0,0 +1 @@
+hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 1ed39a7d..283a7a49 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -66,7 +66,7 @@ if [ $num_gpus -gt 0 ];then
     fi
   fi
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-  # export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
+  export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
   if [ $num_gpus -gt 1 ];then
     [[ $(type -P "$torchrun") ]] && command="torchrun" \
 	|| command="python -m torch.distributed.run"
diff --git a/hyp_utils/xvectors/extract_wav2vec2xvectors.sh b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
new file mode 100755
index 00000000..56ed6b56
--- /dev/null
+++ b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+#               2019  Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+nj=30
+cmd="run.pl"
+
+chunk_length=0     # The chunk size over which the embedding is extracted.
+use_gpu=false
+write_utt2speech_dur=true  # If true writes utt2speech_dur.
+stage=0
+min_utt_length=5
+max_utt_length=120
+random_utt_length=false
+aug_config=""
+num_augs=0
+use_bin_vad=true
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ] && [ $# != 4 ]; then
+  echo "Usage: $0 [options] <nnet-model> <data> <xvector-dir> [<data-out-dir>]"
+  echo " e.g.: $0 --aug-config conf/noise_aug.yml exp/xvector_nnet/model.pt data/train exp/xvectors_train [data/train_aug]"
+  echo "main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --use-gpu <bool|false>                           # If true, use GPU."
+  echo "  --nj <n|30>                                      # Number of jobs"
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --use-bin-vad <bool|true>                        # If true, uses binary VAD from vad.scp"
+  echo "  --write-utt2speech-dur <bool|true>               # If true, write utt2speech_dur (in secs) file."
+  echo "  --chunk-length <n|0>                             # If provided, applies encoder with specified chunk-length and "
+  echo "                                                   # concatenates the chunks outputs before pooling"
+  echo "  --aug-config <str>                               # augmentation config file"
+  echo "  --random-utt-length                              # If true, extracts a random chunk from the utterance between "
+  echo "                                                   # min_utt_length and max_utt_length"
+  echo "  --min-utt-length <n|5>                           # "
+  echo "  --max-utt-length <n|120>                         # "
+  
+
+fi
+
+nnet_file=$1
+data_dir=$2
+output_dir=$3
+data_out_dir=$4
+
+for f in $data_dir/wav.scp ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+log_dir=$output_dir/log
+mkdir -p $log_dir
+
+num_gpus=0
+args=""
+if [ "$use_gpu" == "true" ];then
+    cmd="$cmd --gpu 1"
+    num_gpus=1
+    args="--use-gpu"
+fi
+
+if [ "$use_bin_vad" == "true" ];then
+    f=$data_dir/vad.scp 
+    [ ! -f $f ] && echo "No such file $f" && exit 1;
+    args="${args} --vad scp:$f"
+fi
+
+if [ -n "$aug_config" ];then
+    args="${args} --aug-cfg $aug_config --num-augs $num_augs --aug-info-path $output_dir/aug_info.JOB.csv"
+fi
+
+if [ "$random_utt_length" == "true" ];then
+    args="${args} --random-utt-length --min-utt-length $min_utt_length --max-utt-length $max_utt_length"
+fi
+
+if [ "$write_utt2speech_dur" == "true" ];then
+    write_speech_dur_opt="--write-speech-dur $output_dir/utt2speech_dur.JOB"
+fi
+
+if [ $stage -le 0 ];then
+    set +e
+    $cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	extract_wav2vec2xvectors.py \
+	${args} $write_speech_dur_opt \
+	--part-idx JOB --num-parts $nj \
+	--input $data_dir/wav.scp \
+	--model-path $nnet_file --chunk-length $chunk_length \
+	--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
+    set -e
+fi
+
+if [ $stage -le 1 ];then
+    for((i=1;i<=$nj;i++))
+    do
+	status=$(tail -n 1 $output_dir/log/extract_xvectors.$i.log | \
+			awk '/status 0/ { print 0} 
+                            !/status 0/ { print 1}')
+	if [ $status -eq 1 ];then
+	    echo "JOB $i failed, resubmitting"
+	    if [ "$write_utt2speech_dur" == "true" ];then
+		write_speech_dur_opt="--write-speech-dur $output_dir/utt2speech_dur.$i"
+	    fi
+	    $cmd $output_dir/log/extract_xvectors.$i.log \
+		 hyp_utils/conda_env.sh --num-gpus $num_gpus \
+		 extract_wav2vec2xvectors.py \
+		 ${args} $write_speech_dur_opt \
+		 --part-idx $i --num-parts $nj \
+		 --input $data_dir/wav.scp \
+		 --model-path $nnet_file --chunk-length $chunk_length \
+		 --output ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
+	fi
+    done
+    wait
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: combining xvectors across jobs"
+  for j in $(seq $nj); do cat $output_dir/xvector.$j.scp; done > $output_dir/xvector.scp || exit 1;
+  if [ "$write_utt2speech_dur" == "true" ];then
+      for n in $(seq $nj); do
+	  cat $output_dir/utt2speech_dur.$n || exit 1;
+      done > $output_dir/utt2speech_dur || exit 1
+  fi
+
+  if [ -f $output_dir/aug_info.1.csv ];then
+      cat $output_dir/aug_info.1.csv > $output_dir/aug_info.csv
+      for j in $(seq 2 $nj);
+      do
+	  tail -n +2 $output_dir/aug_info.$j.csv
+      done >> $output_dir/aug_info.csv
+  fi
+fi
+
+if [ $stage -le 3 ]; then
+    if [ -n "$data_out_dir" ];then
+	echo "$0: creating data dir $data_out_dir for augmented x-vectors"
+	mkdir -p $data_out_dir
+	awk -F "," '$1 != "key_aug" { print $1,$2}' $output_dir/aug_info.csv \
+	> $data_out_dir/augm2clean 
+	awk -v u2s=$data_dir/utt2spk 'BEGIN{
+while(getline < u2s)
+{
+   spk[$1]=$2
+}
+}
+{ print $1,spk[$2]}' $data_out_dir/augm2clean > $data_out_dir/utt2spk
+	utils/utt2spk_to_spk2utt.pl $data_out_dir/utt2spk > $data_out_dir/spk2utt
+	cp $output_dir/utt2speech_dur $data_out_dir
+    else
+	cp $output_dir/utt2speech_dur $data_dir
+    fi
+fi
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
new file mode 100755
index 00000000..5675ace1
--- /dev/null
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+import pandas as pd
+
+import torch
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.augment import SpeechAugment
+
+from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def augment(key0, x0, augmenter, aug_df, aug_id):
+    if augmenter is None:
+        x = x0
+        key = key0
+    else:
+        x, aug_info = augmenter(x0)
+        key = "%s-aug-%02d" % (key0, aug_id)
+        aug_df_row = {
+            "key_aug": key,
+            "key_orig": key0,
+            "noise_type": aug_info["noise"]["noise_type"],
+            "snr": aug_info["noise"]["snr"],
+            "rir_type": aug_info["reverb"]["rir_type"],
+            "srr": aug_info["reverb"]["srr"],
+            "sdr": aug_info["sdr"],
+        }
+
+        aug_df.append(pd.DataFrame(aug_df_row, index=[0]))
+
+    return key, x
+
+
+def select_random_chunk(key, x, fs, min_utt_length, max_utt_length, rng):
+    utt_length = rng.randint(low=fs * min_utt_length, high=fs * max_utt_length + 1)
+    if utt_length < x.shape[1]:
+        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        x = x[:, first_frame : first_frame + utt_length]
+        logging.info(
+            "extract-random-utt %s of length=%d first-frame=%d",
+            key,
+            x.shape[1],
+            first_frame,
+        )
+    return x
+
+
+def extract_xvectors(
+    input_spec,
+    output_spec,
+    vad_spec,
+    write_speech_dur,
+    scp_sep,
+    vad_path_prefix,
+    model_path,
+    chunk_length,
+    embed_layer,
+    random_utt_length,
+    min_utt_length,
+    max_utt_length,
+    aug_cfg,
+    num_augs,
+    aug_info_path,
+    use_gpu,
+    **kwargs
+):
+
+    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    device = init_device(use_gpu)
+    model = load_model(model_path, device)
+
+    if write_speech_dur is not None:
+        keys = []
+        info = []
+
+    if aug_cfg is not None:
+        augmenter = SpeechAugment.create(aug_cfg, rng=rng)
+        aug_df = []
+    else:
+        augmenter = None
+        aug_df = None
+        num_augs = 1
+
+    ar_args = AR.filter_args(**kwargs)
+    logging.info("opening output stream: %s", output_spec)
+    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+
+        logging.info(
+            "opening input stream: {} with args={}".format(input_spec, ar_args)
+        )
+        with AR(input_spec, **ar_args) as reader:
+
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s", vad_spec)
+                v_reader = VRF.create(
+                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
+                )
+
+            while not reader.eof():
+                t1 = time.time()
+                key, x0, fs = reader.read(1)
+                if len(key) == 0:
+                    break
+
+                x0 = x0[0]
+                key0 = key[0]
+                fs = fs[0]
+                t2 = time.time()
+
+                logging.info("processing utt %s", key0)
+                for aug_id in range(num_augs):
+                    t3 = time.time()
+                    key, x = augment(key0, x0, augmenter, aug_df, aug_id)
+                    t4 = time.time()
+                    with torch.no_grad():
+                        x = torch.tensor(
+                            x[None, :], dtype=torch.get_default_dtype()
+                        ).to(device)
+
+                        t5 = time.time()
+                        tot_samples = x.shape[1]
+                        if vad_spec is not None:
+                            vad = v_reader.read(key0)[0]
+                            vad = torch.tensor(
+                                vad[None, None, :], dtype=torch.float
+                            ).to(device)
+                            vad = torch.nn.functional.interpolate(
+                                vad, size=x.size(-1), mode="nearest"
+                            ).bool()[0, 0]
+                            x = x[:, vad]
+
+                        logging.info(
+                            "utt %s detected %d/%d (%.2f %%) speech samples",
+                            key,
+                            x.shape[1],
+                            tot_samples,
+                            x.shape[1] / tot_samples * 100,
+                        )
+
+                        if random_utt_length:
+                            x = select_random_chunk(
+                                key, x, fs, min_utt_length, max_utt_length, rng
+                            )
+
+                        t6 = time.time()
+                        if x.shape[1] == 0:
+                            y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                        else:
+                            y = (
+                                model.extract_embed(
+                                    x,
+                                    chunk_length=fs * chunk_length,
+                                    embed_layer=embed_layer,
+                                )
+                                .cpu()
+                                .numpy()[0]
+                            )
+
+                    t7 = time.time()
+                    writer.write([key], [y])
+                    if write_speech_dur is not None:
+                        keys.append(key)
+                        info.append(str(x.shape[1] * fs))
+
+                    t8 = time.time()
+                    read_time = t2 - t1
+                    tot_time = read_time + t8 - t3
+                    logging.info(
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "aug-time=%.3f feat-time=%.3f "
+                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                            "rt-factor=%.2f"
+                        ),
+                        key,
+                        tot_time,
+                        read_time,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        t7 - t6,
+                        t8 - t7,
+                        x.shape[1] / fs / tot_time,
+                    )
+
+    if write_speech_dur is not None:
+        logging.info("writing speech duration in secs to %s", write_speech_dur)
+        u2sd = Utt2Info.create(keys, info)
+        u2sd.save(write_speech_dur)
+
+    if aug_info_path is not None:
+        aug_df = pd.concat(aug_df, ignore_index=True)
+        aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Extracts x-vectors from waveform computing " "acoustic features on the fly"
+        )
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument("--write-speech-dur", default=None)
+    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
+    parser.add_argument(
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
+    )
+
+    AR.add_class_args(parser)
+
+    parser.add_argument("--aug-cfg", default=None)
+    parser.add_argument("--aug-info-path", default=None)
+    parser.add_argument(
+        "--num-augs", default=1, type=int, help="number of augmentations per utterance"
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--chunk-length",
+        type=int,
+        default=0,
+        help=(
+            "number of frames used in each forward pass "
+            "of the x-vector encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from, "
+            "if None, it uses layer set in training phase"
+        ),
+    )
+
+    parser.add_argument(
+        "--random-utt-length",
+        default=False,
+        action="store_true",
+        help="calculates x-vector from a random chunk",
+    )
+    parser.add_argument(
+        "--min-utt-length",
+        type=int,
+        default=5,
+        help=("minimum utterance length in secs when using random utt length"),
+    )
+    parser.add_argument(
+        "--max-utt-length",
+        type=int,
+        default=120,
+        help=("maximum utterance length in secs when using random utt length"),
+    )
+
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    extract_xvectors(**namespace_to_dict(args))
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
new file mode 100755
index 00000000..cbb37bb3
--- /dev/null
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import (
+    HFWav2Vec2ResNet1dXVector,
+    HFHubert2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+)
+
+model_dict = {
+    "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
+    "hf_hubert2resnet1d": HFHubert2ResNet1dXVector,
+    "hf_wavlm2resnet1d": HFWavLM2ResNet1dXVector,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    ad_args["is_val"] = partition == "val"
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_model(num_classes, rank, model_class, **kwargs):
+    model_args = model_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("model network args={}".format(model_args))
+    model_args["xvector"]["num_classes"] = num_classes
+    model = model_class(**model_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
+
+    model_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(parser, prefix="trainer")
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train Wav2Vec2XVector model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
new file mode 100755
index 00000000..c09f15a4
--- /dev/null
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.data import FeatSeqDataset as SD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import TDNNXVector as TDXVec
+from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    "efficientnet": EXVec,
+    "tdnn": TDXVec,
+    "transformer": TFXVec,
+    "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    sd_args = SD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, sd_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    sd_args["is_val"] = partition == "val"
+    dataset = SD(**sd_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_xvector(num_classes, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network args={}".format(xvec_args))
+    xvec_args["num_classes"] = num_classes
+    model = xvec_class(**xvec_args)
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+
+    SD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    SD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
+
+    xvec_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(parser, prefix="trainer")
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train XVector from audio files")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
diff --git a/hyperion/bin/torch_train_xvec_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
similarity index 99%
rename from hyperion/bin/torch_train_xvec_from_wav.py
rename to hyperion/bin/train_xvector_from_wav.py
index c488b5c5..39b037ba 100755
--- a/hyperion/bin/torch_train_xvec_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -22,7 +22,6 @@
 import torch.nn as nn
 
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.data import AudioDataset as AD
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 38da8eb9..f86ad0a2 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -387,9 +387,6 @@ def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        # parser.add_argument('--path-prefix',
-        #                     default='',
-        #                     help=('path prefix for rspecifier scp file'))
         if "audio_file" not in skip:
             parser.add_argument(
                 "--audio-file",
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index 2774c899..462bfe41 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -26,7 +26,7 @@
 class FeatSeqDataset(Dataset):
     def __init__(
         self,
-        rspecifier,
+        feat_file,
         key_file,
         class_file=None,
         num_frames_file=None,
@@ -39,8 +39,8 @@ def __init__(
         is_val=False,
     ):
 
-        logging.info("opening dataset %s" % rspecifier)
-        self.r = RF.create(rspecifier, path_prefix=path_prefix, scp_sep=" ")
+        logging.info("opening dataset %s", feat_file)
+        self.r = RF.create(feat_file, path_prefix=path_prefix, scp_sep=" ")
         logging.info("loading utt2info file %s" % key_file)
         self.u2c = Utt2Info.load(key_file, sep=" ")
         logging.info("dataset contains %d seqs" % self.num_seqs)
@@ -264,6 +264,8 @@ def _get_random_chunk(self, index):
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
+            "feat_file",
+            "key_file",
             "path_prefix",
             "class_file",
             "num_frames_file",
@@ -276,11 +278,25 @@ def filter_args(**kwargs):
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix=None, skip={"feat_file", "key_file"}):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        if "feat_file" not in skip:
+            parser.add_argument(
+                "--feat-file",
+                required=True,
+                help=("acoustic features manifest file"),
+            )
+
+        if "key_file" not in skip:
+            parser.add_argument(
+                "--key-file",
+                required=True,
+                help=("key manifest file"),
+            )
+
         parser.add_argument(
             "--path-prefix", default="", help=("path prefix for rspecifier scp file")
         )
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index be4e0441..e953f58c 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -12,5 +12,11 @@
 from .xvectors.spinenet_xvector import SpineNetXVector
 from .xvectors.resnet1d_xvector import ResNet1dXVector
 
+from .wav2xvectors import (
+    HFWav2Vec2ResNet1dXVector,
+    HFHubert2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+)
+
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
diff --git a/hyperion/torch/models/wav2xvectors/__init__.py b/hyperion/torch/models/wav2xvectors/__init__.py
index d1e65dd0..015c8d0f 100644
--- a/hyperion/torch/models/wav2xvectors/__init__.py
+++ b/hyperion/torch/models/wav2xvectors/__init__.py
@@ -11,3 +11,7 @@
 # from .wav2transformer_xvector_v1 import Wav2TransformerXVectorV1
 # from .wav2spinenet_xvector import Wav2SpineNetXVector
 from .wav2resnet1d_xvector import Wav2ResNet1dXVector
+
+from .hf_wav2vec2resnet1d_xvector import HFWav2Vec2ResNet1dXVector
+from .hf_hubert2resnet1d_xvector import HFHubert2ResNet1dXVector
+from .hf_wavlm2resnet1d_xvector import HFWavLM2ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
new file mode 100644
index 00000000..d585567f
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -0,0 +1,74 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
+
+import torch
+import torch.nn as nn
+
+from ..xvectors import ResNet1dXVector
+from ...tpm import HFHubert
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFHubert2ResNet1dXVector(HFWav2XVector):
+    """Class extracting Hubert + ResNet1d x-vectors from waveform.
+
+    Attributes:
+      Attributes:
+      hf_feats: HFHubert configuration dictionary or object.
+                This is a warpper over Hugging Face Hubert model.
+      xvector: ResNet1dXVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the Hubert layers from "feat_fusion_start" to
+                         the Hubert "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the Hubert model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFHubert],
+        xvector: Union[Dict, ResNet1dXVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+
+        if isinstance(hf_feats, dict):
+            hf_feats = HFHubert(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFHubert)
+
+        if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector = ResNet1dXVector(**xvector)
+        else:
+            assert isinstance(xvector, ResNet1dXVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFHubert.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFHubert.add_class_args(parser, prefix="hf_feats")
+        ResNet1dXVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 78724174..3b44c53f 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -4,37 +4,75 @@
 """
 import logging
 from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
 
 import torch
 import torch.nn as nn
 
 from ..xvectors import ResNet1dXVector
-from ...tpm import HFWav2Vec
+from ...tpm import HFWav2Vec2
 from .hf_wav2xvector import HFWav2XVector
 
 
 class HFWav2Vec2ResNet1dXVector(HFWav2XVector):
-    """Class extracting ResNet1d x-vectors from waveform.
-    It contains acoustic feature extraction, feature normalization and
-    ResNet1dXVector extractor.
+    """Class extracting Wav2Vec2 + ResNet1d x-vectors from waveform.
 
     Attributes:
       Attributes:
       hf_feats: HFWav2Vec configuration dictionary or object.
                 This is a warpper over Hugging Face Wav2Vec model.
       xvector: ResNet1dXVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
     """
 
-    def __init__(self, hf_feats, xvector):
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        xvector: Union[Dict, ResNet1dXVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
 
         if isinstance(hf_feats, dict):
-            hf_feats = HFWav2Vec(**hf_feats)
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
         else:
-            assert isinstance(hf_feats, HFWav2Vec)
+            assert isinstance(hf_feats, HFWav2Vec2)
 
         if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            if "class_name" in xvector:
+                del xvector["class_name"]
             xvector = ResNet1dXVector(**xvector)
         else:
             assert isinstance(xvector, ResNet1dXVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector)
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        ResNet1dXVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index a471343c..f5f2c840 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -8,8 +8,10 @@
 import torch
 import torch.nn as nn
 
+# import torch.nn.functional as nnf
 
 from ...torch_model import TorchModel
+from ...utils import remove_silence
 
 
 class HFWav2XVector(TorchModel):
@@ -18,9 +20,234 @@ class HFWav2XVector(TorchModel):
     Attributes:
        hf_feats: hugging face model wrapper object.
        xvector: x-vector model object.
+       feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                          the wav2vec "num_layers".
+       feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
     """
 
-    def __init__(self, hf_feats, xvector):
+    def __init__(
+        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+    ):
 
+        super().__init__()
         self.hf_feats = hf_feats
         self.xvector = xvector
+        self.feat_fusion_start = feat_fusion_start
+        self.feat_fusion_method = feat_fusion_method
+        self._make_fuser()
+
+    def _make_fuser(self):
+        if self.feat_fusion_method == "last":
+            self.feat_fuser = None
+            return
+
+        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+        layer_dim = self.hf_feats.hidden_size
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
+        elif self.feat_fusion_method == "linear":
+            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
+            self.feat_fuser.weights.data = torch.ones(num_layers) / num_layers
+        elif self.feat_fusion_method == "cat":
+            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+
+    def _fuse_hid_feats(self, hid_feats):
+        """Fuses the hidden features from the Wav2Vec model.
+
+        Args:
+          hid_feats: list of hidden features Tensors from Wav2Vec model.
+
+        Returns:
+          Tensor of fused features (batch, channels, time)
+        """
+        if len(hid_feats) == 1:
+            # There is only one layer of features
+            return hid_feats[0]
+
+        hid_feats = hid_feats[self.feat_fusion_start :]
+        if self.feat_fusion_method == "weighted-avg":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+            feats = torch.sum(hid_feats * norm_weights, dim=-1)
+        elif self.feat_fusion_method == "linear":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats)
+        elif self.feat_fusion_method == "cat":
+            hid_feats = torch.cat(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats)
+        elif self.feat_fusion_method == "last":
+            feats = hid_feats[-1]
+
+        return feats
+
+    def update_loss_margin(self, epoch):
+        """Updates the value of the margin in AAM/AM-softmax losses
+           given the epoch number
+
+        Args:
+          epoch: epoch which is about to start
+        """
+        self.xvector.update_loss_margin(epoch)
+
+    def forward_feats(self, x, x_lengths, return_feat_layers=None):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fusion_method == "last"
+            else True
+        )
+        hf_output = self.hf_feats(x, x_lengths, return_hid_states=return_hid_states)
+        feat_lengths = hf_output["hidden_states_lengths"]
+        if return_hid_states:
+            hid_feats = hf_output["hidden_states"]
+            feats = self._fuse_hid_feats(hid_feats)
+        else:
+            hid_feats = None
+            feats = hf_output["last_hidden_state"]
+
+        feats = feats.transpose(1, 2)
+        if return_feat_layers is not None:
+            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+            # as the hidden features of the x-vector encoder.
+            hid_feats = [
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
+                if i in return_feat_layers
+            ]
+        else:
+            hid_feats = None
+
+        return feats, hid_feats, feat_lengths
+
+    def forward(
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_feat_layers=None,
+        return_enc_layers=None,
+        return_classif_layers=None,
+        return_logits=True,
+    ):
+        """Forward function. If returns the logits posteriors of the classes.
+        It can also returns the hidden representations in the wav2vec feature extractor,
+        the x-vector encoder and the
+        classification head. In this case the ouput variable is a dictionary.
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time)
+          x_lengths: time lengths of the features with shape=(batch,)
+          y: target classes torch.long tensor with shape=(batch,)
+          return_feat_layers: list of integers indicating, which wav2vec layers
+                             we should return. If None, no wav2vec layers are returned.
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_enc_layers: list of integers indicating, which classification head layers
+                             we should return. If None, no head layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Tensor with class logits with shape=(batch, num_classes) or
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
+        """
+        feats, hid_feats, feat_lengths = self.forward_feats(
+            x, x_lengths, return_feat_layers
+        )
+        output = self.xvector(
+            feats,
+            feat_lengths,
+            y,
+            return_enc_layers=return_enc_layers,
+            return_classif_layers=return_classif_layers,
+            return_logits=return_logits,
+        )
+
+        if not return_feat_layers:
+            return output
+
+        if not isinstance(output, dict):
+            # if the xvector just returned the logits we put then into a dictionary
+            # to append the hid feats later.
+            output["logits"] = output
+
+        output["h_feats"] = hid_feats
+        return output
+
+    def extract_embed(
+        self,
+        x,
+        x_lengths=None,
+        vad_samples=None,
+        chunk_length=0,
+        embed_layer=None,
+        detach_chunks=False,
+    ):
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+
+        feats, _, feat_lengths = self.forward_feats(x, x_lengths)
+        xvec_chunk_length = int(chunk_length * feats.size(-1) // x.size(-1))
+        return self.xvector.extract_embed(
+            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
+        )
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "hf_feats",
+            "xvector",
+            "feat_fusion_start",
+            "feat_fusion_method",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    def get_config(self):
+
+        hf_cfg = self.hf_feats.get_config()
+        xvec_cfg = self.xvector.get_config()
+        del hf_cfg["class_name"]
+        del xvec_cfg["class_name"]
+        config = {
+            "hf_feats": hf_cfg,
+            "xvector": xvec_cfg,
+            "feat_fusion_start": self.feat_fusion_start,
+            "feat_fusion_method": self.feat_fusion_method,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--feat-fusion-start",
+            default=0,
+            type=int,
+            help=(
+                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+                "the wav2vec num_layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-fusion-method",
+            default="weighted-avg",
+            choices=["weighted-avg", "linear", "cat", "last"],
+            help=(
+                "method to fuse the hidden layers from the wav2vec model "
+                "in [weighted-avg, cat]"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+                help="xvector options",
+            )
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
new file mode 100644
index 00000000..89e7120e
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -0,0 +1,74 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
+
+import torch
+import torch.nn as nn
+
+from ..xvectors import ResNet1dXVector
+from ...tpm import HFWavLM
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFWavLM2ResNet1dXVector(HFWav2XVector):
+    """Class extracting WavLM + ResNet1d x-vectors from waveform.
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWavLM configuration dictionary or object.
+                This is a warpper over Hugging Face WavLM model.
+      xvector: ResNet1dXVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the WavLM layers from "feat_fusion_start" to
+                         the WavLM "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the WavLM model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWavLM],
+        xvector: Union[Dict, ResNet1dXVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+
+        if isinstance(hf_feats, dict):
+            hf_feats = HFWavLM(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWavLM)
+
+        if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector = ResNet1dXVector(**xvector)
+        else:
+            assert isinstance(xvector, ResNet1dXVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFWavLM.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWavLM.add_class_args(parser, prefix="hf_feats")
+        ResNet1dXVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 0c5a1698..27268e44 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -75,6 +75,7 @@ def extract_embed(
         if vad_feats is not None:
             feats, feat_lengths = remove_silence(feats, feat_lengths)
 
+        feats = feats.transpose(1, 2)
         return self.xvector.extract_embed(
             feats, feat_lengths, chunk_length, embed_layer, detach_chunks
         )
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 295824f3..706ee4ef 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -133,7 +133,6 @@ def get_config(self):
     def load(cls, file_path=None, cfg=None, state_dict=None):
 
         cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
-
         try:
             del cfg["in_feats"]
         except:
@@ -145,6 +144,7 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    @staticmethod
     def filter_args(**kwargs):
 
         base_args = XVector.filter_args(**kwargs)
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index d11fb020..e07487d7 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -12,7 +12,7 @@
 from ...layer_blocks import TDNNBlock
 from ...narchs import ClassifHead, TorchNALoader
 from ...torch_model import TorchModel
-from ...utils import eval_nnet_by_chunks, scale_lengths
+from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
 
 class XVector(TorchModel):
@@ -209,7 +209,7 @@ def _post_enc(self, x, in_lengths=None, max_in_length=None):
             x = self.proj(x)
 
         if in_lengths is not None:
-            out_lengths = scale_lengths(in_lengths, x.size(-1), max_in_length)
+            out_lengths = scale_seq_lengths(in_lengths, x.size(-1), max_in_length)
         else:
             out_lengths = None
 
@@ -224,6 +224,24 @@ def forward(
         return_classif_layers=None,
         return_logits=True,
     ):
+        """Forward function. If returns the logits posteriors of the classes.
+        It can also returns the hidden representations in the encoder and
+        classification head. In this case the ouput variable is a dictionary.
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time).
+          x_lengths: time lengths of the features with shape=(batch,).
+          y: target classes torch.long tensor with shape=(batch,).
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_enc_layers: list of integers indicating, which classification head layers
+                             we should return. If None, no head layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Tensor with class logits with shape=(batch, num_classes) or
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers).
+        """
 
         if return_enc_layers is None and return_classif_layers is None:
             return self.forward_logits(x, x_lengths, y)
@@ -236,11 +254,12 @@ def forward_logits(self, x, x_lengths=None, y=None):
         """Forward function
 
         Args:
-          x: input features tensor with shape=(batch, in_feats, time)
-          y: target classes torch.long tensor with shape=(batch,)
+          x: input features tensor with shape=(batch, in_feats, time).
+          x_lengths: time lengths of the features with shape=(batch,).
+          y: target classes torch.long tensor with shape=(batch,).
 
         Returns:
-          class logits tensor with shape=(batch, num_classes)
+          class logits tensor with shape=(batch, num_classes).
         """
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
@@ -259,7 +278,21 @@ def forward_hid_feats(
         return_classif_layers=None,
         return_logits=False,
     ):
-        """forwards hidden representations in the x-vector network"""
+        """forwards hidden representations in the x-vector network
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time).
+          x_lengths: time lengths of the features with shape=(batch,).
+          y: target classes torch.long tensor with shape=(batch,).
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_enc_layers: list of integers indicating, which classification head layers
+                             we should return. If None, no head layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers).
+        """
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
         h_enc, x = self.encoder_net.forward_hid_feats(
diff --git a/hyperion/torch/tpm/__init__.py b/hyperion/torch/tpm/__init__.py
new file mode 100644
index 00000000..dfa5c14b
--- /dev/null
+++ b/hyperion/torch/tpm/__init__.py
@@ -0,0 +1,6 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .hf import HFWav2Vec2, HFHubert, HFWavLM
diff --git a/hyperion/torch/tpm/hf/__init__.py b/hyperion/torch/tpm/hf/__init__.py
new file mode 100644
index 00000000..4db1c95d
--- /dev/null
+++ b/hyperion/torch/tpm/hf/__init__.py
@@ -0,0 +1,8 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .hf_wav2vec2 import HFWav2Vec2
+from .hf_hubert import HFHubert
+from .hf_wavlm import HFWavLM
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
new file mode 100644
index 00000000..889aed03
--- /dev/null
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -0,0 +1,553 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import os
+import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import Optional, Tuple, Union, List, Callable
+
+import torch
+import torch.nn as nn
+
+from transformers import HubertModel, HubertConfig
+
+from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from .hf_wav2vec_base import HFWav2VecBase
+
+
+class HFHubert(HFWav2VecBase):
+    r"""This is wrapper over HuggingFace Hubert model.
+        See documentation: https://huggingface.co/docs/transformers/main/en/model_doc/hubert
+
+        This wrapper makes the HugginFace model to have the same interface
+        as other hyperion models. It also add extra functionalities.
+
+        The config. parameters are the same as in the HuggingFace HubertConfig class.
+
+    Attributes:
+        pretrained_model_path (`str`, defaults to None): file path or HuggingFace Hub path to
+            pre-trained model.
+        normalize_input (`bool`, defaults to True): whether or not to zero-mean unit-variance
+            normalize the input.
+        use_input_attention_mask (`bool`, defaults to False): whether we should input an
+            attention mask to the wav2vec model.
+        vocab_size (`int`, defaults to 32): vocabulary size of the
+            model. Defines the different tokens that can be represented by the
+            *inputs_ids* passed to the forward method.
+        hidden_size (`int`, defaults to 768): dimensionality of the encoder layers and
+            the pooler layer.
+        num_hidden_layers (`int`, defaults to 12): number of hidden layers in the
+            Transformer encoder.
+        num_attention_heads (`int`, defaults to 12): number of attention heads for
+            each attention layer in the Transformer encoder.
+        intermediate_size (`int`, defaults to 3072): dimensionality of the
+            feed-forward layer in the Transformer encoder.
+        hidden_act (`str` or `function`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the encoder and pooler.
+            If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        hidden_dropout (`float`, defaults to 0.1): the dropout probability for all
+            fully connected layers in the embeddings, encoder, and pooler.
+        activation_dropout (`float`, defaults to 0.1): the dropout probability for all
+            intermediate layer in feedforward transformer layers.
+        attention_dropout (`float`, defaults to 0.1): the dropout ratio for the
+            attention probabilities.
+        layerdrop (`float`, defaults to 0.1): prob. of dropping a layer.
+        initializer_range (`float`, defaults to 0.02): the standard deviation of the
+            truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, defaults to 1e-12): the epsilon used by the layer
+            normalization layers.
+        feat_extract_norm (`str`, defaults to `"group"`):
+            the norm to be applied to 1D convolutional layers in feature encoder.
+            One of `"group"` for group normalization of only the first 1D convolutional
+            layer or `"layer"` for layer normalization of all 1D convolutional layers.
+        feat_proj_dropout (`float`, defaults to 0.0): the dropout probability for output
+            of the feature encoder.
+        feat_extract_activation (`str, `optional`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the 1D convolutional layers of the feature
+            extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        conv_dim (`Tuple[int]`, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
+            a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
+            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
+        conv_stride (`Tuple[int]`, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
+            a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
+            of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
+        conv_kernel (`Tuple[int]`, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
+            a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
+            length of *conv_kernel* defines the number of convolutional layers and has to match the length of
+            *conv_dim*.
+        conv_bias (`bool`, defaults to `False`): whether the 1D convolutional layers have a bias.
+        num_conv_pos_embeddings (`int`, defaults to 128):
+            number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional
+            embeddings layer.
+        num_conv_pos_embedding_groups (`int`, defaults to 16):
+            number of groups of 1D convolutional positional embeddings layer.
+        do_stable_layer_norm (`bool`, defaults to `False`):
+            whether to apply *stable* layer norm architecture of the Transformer encoder. `do_stable_layer_norm is
+            True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is
+            False` corresponds to applying layer norm after the attention layer.
+        apply_spec_augment (`bool`, defaults to `True`):
+            whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see
+            [SpecAugment: A Simple Data Augmentation Method for Automatic Speech
+            Recognition](https://arxiv.org/abs/1904.08779).
+        mask_time_prob (`float`, defaults to 0.05):
+            percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking
+            procecure generates ''mask_time_prob*len(time_axis)/mask_time_length'' independent masks over the axis. If
+            reasoning from the propability of each feature vector to be chosen as the start of the vector span to be
+            masked, *mask_time_prob* should be `prob_vector_start*mask_time_length`. Note that overlap may decrease the
+            actual percentage of masked vectors. This is only relevant if `apply_spec_augment is True`.
+        mask_time_length (`int`, defaults to 10):
+            length of vector span along the time axis.
+        mask_time_min_masks (`int`, defaults to 2),:
+            the minimum number of masks of length `mask_time_length` generated along the time axis, each time step,
+            irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length <
+            mask_time_min_masks''
+        mask_feature_prob (`float`, defaults to 0.0):
+            percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
+            masking procecure generates ''mask_feature_prob*len(feature_axis)/mask_time_length'' independent masks over
+            the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector
+            span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap
+            may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment is
+            True`.
+        mask_feature_length (`int`, defaults to 10):
+            length of vector span along the feature axis.
+        mask_feature_min_masks (`int`, defaults to 0):
+            The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time
+            step, irrespectively of `mask_feature_prob`. Only relevant if
+            ''mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks''
+        cache_dir (str or os.PathLike): path to a directory in which a downloaded pretrained
+            model configuration should be cached if the standard cache should not be used.
+        force_download (`bool`, defaults to `False`): whether or not to force the (re-)download
+            the model weights and configuration files and override the
+            cached versions if they exist.
+        resume_download (`bool`, defaults to `False`): whether or not to delete incompletely
+            received files. Will attempt to resume the download if such a file exists.
+        revision(`str`, defaults to `"main"`): the specific model version to use.
+            It can be a branch name, a tag name, or a commit id.
+        ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
+            and inits the model from the configuration. This is set to True for models that have already
+            been finetuned.
+    """
+
+    def __init__(
+        self,
+        pretrained_model_path: Optional[Union[str, os.PathLike]] = None,
+        normalize_input: bool = True,
+        use_input_attention_mask: bool = False,
+        vocab_size: int = 32,
+        hidden_size: int = 768,
+        num_hidden_layers: int = 12,
+        num_attention_heads: int = 12,
+        intermediate_size: int = 3072,
+        hidden_act: Union[str, Callable] = "gelu",
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        layerdrop: float = 0.1,
+        initializer_range: float = 0.02,
+        layer_norm_eps: float = 1e-12,
+        feat_extract_norm: str = "group",
+        feat_proj_dropout: float = 0.0,
+        feat_extract_activation: Union[str, Callable] = "gelu",
+        conv_dim: Tuple[int] = (512, 512, 512, 512, 512, 512, 512),
+        conv_stride: Tuple[int] = (5, 2, 2, 2, 2, 2, 2),
+        conv_kernel: Tuple[int] = (10, 3, 3, 3, 3, 3, 3),
+        conv_bias: bool = False,
+        num_conv_pos_embeddings: int = 128,
+        num_conv_pos_embedding_groups: int = 16,
+        do_stable_layer_norm: bool = False,
+        apply_spec_augment: bool = True,
+        mask_time_prob: float = 0.05,
+        mask_time_length: int = 10,
+        mask_time_min_masks: int = 2,
+        mask_feature_prob: float = 0.0,
+        mask_feature_length: int = 10,
+        mask_feature_min_masks: int = 0,
+        cache_dir: Union[str, os.PathLike] = "./.cache/hyperion_hf",
+        force_download: bool = False,
+        resume_download: bool = False,
+        revision: str = "main",
+        drop_layers_gt: Optional[int] = None,
+        ignore_pretrained: bool = False,
+    ):
+
+        super().__init__(
+            pretrained_model_path=pretrained_model_path,
+            normalize_input=normalize_input,
+            use_input_attention_mask=use_input_attention_mask,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            resume_download=resume_download,
+            revision=revision,
+            drop_layers_gt=drop_layers_gt,
+            ignore_pretrained=ignore_pretrained,
+        )
+
+        if pretrained_model_path is not None and not ignore_pretrained:
+            logging.info(f"Downloading HF model from {pretrained_model_path}")
+            rank = ddp_get_rank()
+            if rank == 0:
+                # rank 0 downloads the model from HF web
+                self.hf_model = HubertModel.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    resume_download=resume_download,
+                    revision=revision,
+                )
+            # all ranks wait until the model is downloaded
+            ddp_wait_for_all_procs()
+            if rank > 0:
+                # the rest of ranks should read the configuration from the cache.
+                self.hf_model = HubertModel.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=False,
+                    resume_download=False,
+                    revision=revision,
+                )
+            ddp_wait_for_all_procs()
+            self.hf_model.config.layerdrop = 0.0
+        else:
+            hf_config = HubertConfig(
+                vocab_size=vocab_size,
+                hidden_size=hidden_size,
+                num_hidden_layers=num_hidden_layers,
+                num_attention_heads=num_attention_heads,
+                intermediate_size=intermediate_size,
+                hidden_act=hidden_act,
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                layerdrop=0.0,  # layerdrop,
+                initializer_range=initializer_range,
+                layer_norm_eps=layer_norm_eps,
+                feat_extract_norm=feat_extract_norm,
+                feat_extract_activation=feat_extract_activation,
+                conv_dim=conv_dim,
+                conv_stride=conv_stride,
+                conv_kernel=conv_kernel,
+                conv_bias=conv_bias,
+                num_conv_pos_embeddings=num_conv_pos_embeddings,
+                num_conv_pos_embedding_groups=num_conv_pos_embedding_groups,
+                do_stable_layer_norm=do_stable_layer_norm,
+                apply_spec_augment=apply_spec_augment,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+            )
+            self.hf_model = HubertModel(hf_config)
+
+        if drop_layers_gt is not None:
+            self.drop_upper_layers(drop_layers_gt)
+
+        self.ignore_pretrained = True
+
+    @property
+    def num_encoder_layers(self):
+        return self.hf_config.num_hidden_layers
+
+    @property
+    def hidden_size(self):
+        return self.hf_config.hidden_size
+
+    def drop_upper_layers(self, max_layers: int):
+        if max_layers >= self.hf_config.num_hidden_layers:
+            return
+
+        layers = self.hf_model.encoder.layers
+        self.hf_model.encoder.layers = nn.ModuleList(
+            [l for i, l in enumerate(layers) if i < max_layers]
+        )
+        self.hf_config.num_hidden_layers = max_layers
+
+    def get_config(self):
+        """Returns the configuration arguments for the object in a dictionary."""
+        config = self.hf_model.config.to_dict()
+        config = self.filter_args(**config)
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "vocab_size",
+            "hidden_size",
+            "num_hidden_layers",
+            "num_attention_heads",
+            "intermediate_size",
+            "hidden_act",
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "layerdrop",
+            "initializer_range",
+            "layer_norm_eps",
+            "feat_extract_norm",
+            "feat_extract_activation",
+            "conv_dim",
+            "conv_stride",
+            "conv_kernel",
+            "conv_bias",
+            "num_conv_pos_embeddings",
+            "num_conv_pos_embedding_groups",
+            "do_stable_layer_norm",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_class_args(parser)
+
+        parser.add_argument(
+            "--vocab-size",
+            default=32,
+            type=int,
+            help=(
+                "vocabulary size of the "
+                "model. Defines the different tokens that can be represented by the "
+                "*inputs_ids* passed to the forward method."
+            ),
+        )
+        parser.add_argument(
+            "--hidden-size",
+            default=768,
+            type=int,
+            help=("dimensionality of the encoder layers and the pooler layer."),
+        )
+        parser.add_argument(
+            "--num-hidden-layers",
+            default=12,
+            type=int,
+            help=("number of hidden layers in the Transformer encoder"),
+        )
+        parser.add_argument(
+            "--num-attention-heads",
+            default=12,
+            type=int,
+            help=(
+                "number of attention heads for "
+                "each attention layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--intermediate-size",
+            default=3072,
+            type=int,
+            help=(
+                "dimensionality of the " "feed-forward layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-act",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear "
+                "activation function (function or string) in the encoder and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--layerdrop",
+            default=0.1,
+            type=float,
+            help=("prob. of dropping a layer"),
+        )
+        parser.add_argument(
+            "--initializer-range",
+            default=0.02,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--layer-norm-eps",
+            default=1e-12,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--feat-extract-norm",
+            default="group",
+            choices=["group", "layer"],
+            help=(
+                "the norm to be applied to 1D convolutional layers in feature encoder. "
+                "One of `group` for group normalization of only the first 1D convolutional "
+                "layer or `layer` for layer normalization of all 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-proj-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout probability for output of the feature encoder"),
+        )
+        parser.add_argument(
+            "--feat-extract-activation",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear activation function (function or string) in the 1D "
+                "convolutional layers of the feature extractor"
+            ),
+        )
+        parser.add_argument(
+            "--conv-dim",
+            default=[512, 512, 512, 512, 512, 512, 512],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the "
+                "feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--conv-stride",
+            default=[5, 2, 2, 2, 2, 2, 2],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-kernel",
+            default=[10, 3, 3, 3, 3, 3, 3],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-bias",
+            default=False,
+            action=ActionYesNo,
+            help=("whether the 1D convolutional layers have a bias"),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embeddings",
+            default=128,
+            type=int,
+            help=(
+                "number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional "
+                "embeddings layer"
+            ),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embedding-groups",
+            default=16,
+            type=int,
+            help=("number of groups of 1D convolutional positional embeddings layer"),
+        )
+        parser.add_argument(
+            "--do-stable-layer-norm",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *stable* layer norm architecture of the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
new file mode 100644
index 00000000..63a7cf99
--- /dev/null
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -0,0 +1,668 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import os
+import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import Optional, Tuple, Union, List, Callable
+
+import torch
+import torch.nn as nn
+
+from transformers import Wav2Vec2Model, Wav2Vec2Config
+
+from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from .hf_wav2vec_base import HFWav2VecBase
+
+
+class HFWav2Vec2(HFWav2VecBase):
+    r"""This is wrapper over HuggingFace Wav2Vec2 model.
+        See documentation: https://huggingface.co/docs/transformers/model_doc/wav2vec2
+
+        This wrapper makes the HugginFace model to have the same interface
+        as other hyperion models. It also add extra functionalities.
+
+        The config. parameters are the same as in the HuggingFace Wav2Vec2Config class.
+
+    Attributes:
+        pretrained_model_path (`str`, defaults to None): file path or HuggingFace Hub path to
+            pre-trained model.
+        normalize_input (`bool`, defaults to True): whether or not to zero-mean unit-variance
+            normalize the input.
+        use_input_attention_mask (`bool`, defaults to False): whether we should input an
+            attention mask to the wav2vec model.
+        vocab_size (`int`, defaults to 32): vocabulary size of the
+            model. Defines the different tokens that can be represented by the
+            *inputs_ids* passed to the forward method.
+        hidden_size (`int`, defaults to 768): dimensionality of the encoder layers and
+            the pooler layer.
+        num_hidden_layers (`int`, defaults to 12): number of hidden layers in the
+            Transformer encoder.
+        num_attention_heads (`int`, defaults to 12): number of attention heads for
+            each attention layer in the Transformer encoder.
+        intermediate_size (`int`, defaults to 3072): dimensionality of the
+            feed-forward layer in the Transformer encoder.
+        hidden_act (`str` or `function`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the encoder and pooler.
+            If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        hidden_dropout (`float`, defaults to 0.1): the dropout probability for all
+            fully connected layers in the embeddings, encoder, and pooler.
+        activation_dropout (`float`, defaults to 0.1): the dropout probability for all
+            intermediate layer in feedforward transformer layers.
+        attention_dropout (`float`, defaults to 0.1): the dropout ratio for the
+            attention probabilities.
+        layerdrop (`float`, defaults to 0.1): prob. of dropping a layer.
+        initializer_range (`float`, defaults to 0.02): the standard deviation of the
+            truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, defaults to 1e-12): the epsilon used by the layer
+            normalization layers.
+        feat_extract_norm (`str`, defaults to `"group"`):
+            the norm to be applied to 1D convolutional layers in feature encoder.
+            One of `"group"` for group normalization of only the first 1D convolutional
+            layer or `"layer"` for layer normalization of all 1D convolutional layers.
+        feat_proj_dropout (`float`, defaults to 0.0): the dropout probability for output
+            of the feature encoder.
+        feat_extract_activation (`str, `optional`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the 1D convolutional layers of the feature
+            extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        conv_dim (`Tuple[int]`, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
+            a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
+            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
+        conv_stride (`Tuple[int]`, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
+            a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
+            of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
+        conv_kernel (`Tuple[int]`, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
+            a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
+            length of *conv_kernel* defines the number of convolutional layers and has to match the length of
+            *conv_dim*.
+        conv_bias (`bool`, defaults to `False`): whether the 1D convolutional layers have a bias.
+        num_conv_pos_embeddings (`int`, defaults to 128):
+            number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional
+            embeddings layer.
+        num_conv_pos_embedding_groups (`int`, defaults to 16):
+            number of groups of 1D convolutional positional embeddings layer.
+        do_stable_layer_norm (`bool`, defaults to `False`):
+            whether to apply *stable* layer norm architecture of the Transformer encoder. `do_stable_layer_norm is
+            True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is
+            False` corresponds to applying layer norm after the attention layer.
+        apply_spec_augment (`bool`, defaults to `True`):
+            whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see
+            [SpecAugment: A Simple Data Augmentation Method for Automatic Speech
+            Recognition](https://arxiv.org/abs/1904.08779).
+        mask_time_prob (`float`, defaults to 0.05):
+            percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking
+            procecure generates ''mask_time_prob*len(time_axis)/mask_time_length'' independent masks over the axis. If
+            reasoning from the propability of each feature vector to be chosen as the start of the vector span to be
+            masked, *mask_time_prob* should be `prob_vector_start*mask_time_length`. Note that overlap may decrease the
+            actual percentage of masked vectors. This is only relevant if `apply_spec_augment is True`.
+        mask_time_length (`int`, defaults to 10):
+            length of vector span along the time axis.
+        mask_time_min_masks (`int`, defaults to 2),:
+            the minimum number of masks of length `mask_time_length` generated along the time axis, each time step,
+            irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length <
+            mask_time_min_masks''
+        mask_feature_prob (`float`, defaults to 0.0):
+            percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
+            masking procecure generates ''mask_feature_prob*len(feature_axis)/mask_time_length'' independent masks over
+            the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector
+            span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap
+            may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment is
+            True`.
+        mask_feature_length (`int`, defaults to 10):
+            length of vector span along the feature axis.
+        mask_feature_min_masks (`int`, defaults to 0):
+            The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time
+            step, irrespectively of `mask_feature_prob`. Only relevant if
+            ''mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks''
+        add_adapter (`bool`, defaults to `False`):
+            whether a convolutional network should be stacked on top of the Wav2Vec2 Encoder. Can be very useful for
+            warm-starting Wav2Vec2 for SpeechEncoderDecoder models.
+        adapter_kernel_size (`int`, defaults to 3):
+            kernel size of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        adapter_stride (`int`, defaults to 2):
+            stride of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        num_adapter_layers (`int`, defaults to 3):
+            number of convolutional layers that should be used in the adapter network. Only relevant if `add_adapter is
+            True`.
+        output_hidden_size (`int`, defaults to None):
+            dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*. Only relevant
+            if `add_adapter is True`.
+        cache_dir (str or os.PathLike): path to a directory in which a downloaded pretrained
+            model configuration should be cached if the standard cache should not be used.
+        force_download (`bool`, defaults to `False`): whether or not to force the (re-)download
+            the model weights and configuration files and override the
+            cached versions if they exist.
+        resume_download (`bool`, defaults to `False`): whether or not to delete incompletely
+            received files. Will attempt to resume the download if such a file exists.
+        revision(`str`, defaults to `"main"`): the specific model version to use.
+            It can be a branch name, a tag name, or a commit id.
+        ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
+            and inits the model from the configuration. This is set to True for models that have already
+            been finetuned.
+    """
+
+    def __init__(
+        self,
+        pretrained_model_path: Optional[Union[str, os.PathLike]] = None,
+        normalize_input: bool = True,
+        use_input_attention_mask: bool = False,
+        vocab_size: int = 32,
+        hidden_size: int = 768,
+        num_hidden_layers: int = 12,
+        num_attention_heads: int = 12,
+        intermediate_size: int = 3072,
+        hidden_act: Union[str, Callable] = "gelu",
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        layerdrop: float = 0.1,
+        initializer_range: float = 0.02,
+        layer_norm_eps: float = 1e-12,
+        feat_extract_norm: str = "group",
+        feat_proj_dropout: float = 0.0,
+        feat_extract_activation: Union[str, Callable] = "gelu",
+        conv_dim: Tuple[int] = (512, 512, 512, 512, 512, 512, 512),
+        conv_stride: Tuple[int] = (5, 2, 2, 2, 2, 2, 2),
+        conv_kernel: Tuple[int] = (10, 3, 3, 3, 3, 3, 3),
+        conv_bias: bool = False,
+        num_conv_pos_embeddings: int = 128,
+        num_conv_pos_embedding_groups: int = 16,
+        do_stable_layer_norm: bool = False,
+        apply_spec_augment: bool = True,
+        mask_time_prob: float = 0.05,
+        mask_time_length: int = 10,
+        mask_time_min_masks: int = 2,
+        mask_feature_prob: float = 0.0,
+        mask_feature_length: int = 10,
+        mask_feature_min_masks: int = 0,
+        add_adapter: bool = False,
+        adapter_kernel_size: int = 3,
+        adapter_stride: int = 2,
+        num_adapter_layers: int = 3,
+        output_hidden_size: Optional[int] = None,
+        cache_dir: Union[str, os.PathLike] = "./.cache/hyperion_hf",
+        force_download: bool = False,
+        resume_download: bool = False,
+        revision: str = "main",
+        drop_layers_gt: Optional[int] = None,
+        ignore_pretrained: bool = False,
+    ):
+
+        super().__init__(
+            pretrained_model_path=pretrained_model_path,
+            normalize_input=normalize_input,
+            use_input_attention_mask=use_input_attention_mask,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            resume_download=resume_download,
+            revision=revision,
+            drop_layers_gt=drop_layers_gt,
+            ignore_pretrained=ignore_pretrained,
+        )
+
+        if pretrained_model_path is not None and not ignore_pretrained:
+            logging.info(f"Downloading HF model from {pretrained_model_path}")
+            rank = ddp_get_rank()
+            if rank == 0:
+                # rank 0 downloads the model from HF web
+                self.hf_model = Wav2Vec2Model.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    resume_download=resume_download,
+                    revision=revision,
+                )
+            # all ranks wait until the model is downloaded
+            ddp_wait_for_all_procs()
+            if rank > 0:
+                # the rest of ranks should read the configuration from the cache.
+                self.hf_model = Wav2Vec2Model.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=False,
+                    resume_download=False,
+                    revision=revision,
+                )
+            ddp_wait_for_all_procs()
+            self.hf_model.config.layerdrop = 0.0
+        else:
+            hf_config = Wav2Vec2Config(
+                vocab_size=vocab_size,
+                hidden_size=hidden_size,
+                num_hidden_layers=num_hidden_layers,
+                num_attention_heads=num_attention_heads,
+                intermediate_size=intermediate_size,
+                hidden_act=hidden_act,
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                layerdrop=0.0,  # layerdrop,
+                initializer_range=initializer_range,
+                layer_norm_eps=layer_norm_eps,
+                feat_extract_norm=feat_extract_norm,
+                feat_extract_activation=feat_extract_activation,
+                conv_dim=conv_dim,
+                conv_stride=conv_stride,
+                conv_kernel=conv_kernel,
+                conv_bias=conv_bias,
+                num_conv_pos_embeddings=num_conv_pos_embeddings,
+                num_conv_pos_embedding_groups=num_conv_pos_embedding_groups,
+                do_stable_layer_norm=do_stable_layer_norm,
+                apply_spec_augment=apply_spec_augment,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+                add_adapter=add_adapter,
+                adapter_kernel_size=adapter_kernel_size,
+                adapter_stride=adapter_stride,
+                num_adapter_layers=num_adapter_layers,
+                output_hidden_size=output_hidden_size,
+            )
+            self.hf_model = Wav2Vec2Model(hf_config)
+
+        if drop_layers_gt is not None:
+            self.drop_upper_layers(drop_layers_gt)
+
+        self.ignore_pretrained = True
+
+    @property
+    def num_encoder_layers(self):
+        return self.hf_config.num_hidden_layers
+
+    @property
+    def hidden_size(self):
+        return self.hf_config.hidden_size
+
+    def drop_upper_layers(self, max_layers: int):
+        if max_layers >= self.hf_config.num_hidden_layers:
+            return
+
+        layers = self.hf_model.encoder.layers
+        self.hf_model.encoder.layers = nn.ModuleList(
+            [l for i, l in enumerate(layers) if i < max_layers]
+        )
+        self.hf_config.num_hidden_layers = max_layers
+
+        if self.hf_model.adapter is not None:
+            del self.hf_model.adapter
+            self.hf_model.adapter = None
+            self.hf_config.add_adapter = False
+
+    def get_config(self):
+        """Returns the configuration arguments for the object in a dictionary."""
+        config = self.hf_model.config.to_dict()
+        config = self.filter_args(**config)
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "vocab_size",
+            "hidden_size",
+            "num_hidden_layers",
+            "num_attention_heads",
+            "intermediate_size",
+            "hidden_act",
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "layerdrop",
+            "initializer_range",
+            "layer_norm_eps",
+            "feat_extract_norm",
+            "feat_extract_activation",
+            "conv_dim",
+            "conv_stride",
+            "conv_kernel",
+            "conv_bias",
+            "num_conv_pos_embeddings",
+            "num_conv_pos_embedding_groups",
+            "do_stable_layer_norm",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+            "add_adapter",
+            "adapter_kernel_size",
+            "adapter_stride",
+            "num_adapter_layers",
+            "output_hidden_size",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_class_args(parser)
+
+        parser.add_argument(
+            "--vocab-size",
+            default=32,
+            type=int,
+            help=(
+                "vocabulary size of the "
+                "model. Defines the different tokens that can be represented by the "
+                "*inputs_ids* passed to the forward method."
+            ),
+        )
+        parser.add_argument(
+            "--hidden-size",
+            default=768,
+            type=int,
+            help=("dimensionality of the encoder layers and the pooler layer."),
+        )
+        parser.add_argument(
+            "--num-hidden-layers",
+            default=12,
+            type=int,
+            help=("number of hidden layers in the Transformer encoder"),
+        )
+        parser.add_argument(
+            "--num-attention-heads",
+            default=12,
+            type=int,
+            help=(
+                "number of attention heads for "
+                "each attention layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--intermediate-size",
+            default=3072,
+            type=int,
+            help=(
+                "dimensionality of the " "feed-forward layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-act",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear "
+                "activation function (function or string) in the encoder and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--layerdrop",
+            default=0.1,
+            type=float,
+            help=("prob. of dropping a layer"),
+        )
+        parser.add_argument(
+            "--initializer-range",
+            default=0.02,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--layer-norm-eps",
+            default=1e-12,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--feat-extract-norm",
+            default="group",
+            choices=["group", "layer"],
+            help=(
+                "the norm to be applied to 1D convolutional layers in feature encoder. "
+                "One of `group` for group normalization of only the first 1D convolutional "
+                "layer or `layer` for layer normalization of all 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-proj-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout probability for output of the feature encoder"),
+        )
+        parser.add_argument(
+            "--feat-extract-activation",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear activation function (function or string) in the 1D "
+                "convolutional layers of the feature extractor"
+            ),
+        )
+        parser.add_argument(
+            "--conv-dim",
+            default=[512, 512, 512, 512, 512, 512, 512],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the "
+                "feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--conv-stride",
+            default=[5, 2, 2, 2, 2, 2, 2],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-kernel",
+            default=[10, 3, 3, 3, 3, 3, 3],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-bias",
+            default=False,
+            action=ActionYesNo,
+            help=("whether the 1D convolutional layers have a bias"),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embeddings",
+            default=128,
+            type=int,
+            help=(
+                "number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional "
+                "embeddings layer"
+            ),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embedding-groups",
+            default=16,
+            type=int,
+            help=("number of groups of 1D convolutional positional embeddings layer"),
+        )
+        parser.add_argument(
+            "--do-stable-layer-norm",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *stable* layer norm architecture of the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+        parser.add_argument(
+            "--add-adapter",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether a convolutional network should be stacked on top of the Wav2Vec2 Encoder"
+            ),
+        )
+        parser.add_argument(
+            "--adapter-kernel-size",
+            default=3,
+            type=int,
+            help=("kernel size of the convolutional layers in the adapter network"),
+        )
+        parser.add_argument(
+            "--adapter-stride",
+            default=2,
+            type=int,
+            help=("stride of the convolutional layers in the adapter network"),
+        )
+        parser.add_argument(
+            "--num-adapter-layers",
+            default=3,
+            type=int,
+            help=(
+                "number of convolutional layers that should be used in the adapter network"
+            ),
+        )
+        parser.add_argument(
+            "--output-hidden-size",
+            default=None,
+            type=int,
+            help=(
+                "dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*."
+                " Only relevant if `add_adapter is True"
+            ),
+        )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    """
+    Things I think I don't need:
+    feat_quantizer_dropout (`float`, defaults to 0.0): the dropout probabilitiy for quantized feature encoder states.
+    final_dropout (`float`, defaults to 0.1): the dropout probability for the
+            final projection layer of [`Wav2Vec2ForCTC`].
+    um_codevectors_per_group (`int`, defaults to 320):
+            number of entries in each quantization codebook (group).
+        num_codevector_groups (`int`, defaults to 2):
+            number of codevector groups for product codevector quantization.
+        contrastive_logits_temperature (`float`, defaults to 0.1):
+            the temperature *kappa* in the contrastive loss.
+        feat_quantizer_dropout (`float`, defaults to 0.0):
+            the dropout probabilitiy for the output of the feature encoder that's used by the quantizer.
+        num_negatives (`int`, defaults to 100):
+            number of negative samples for the contrastive loss.
+        codevector_dim (`int`, defaults to 256):
+            dimensionality of the quantized feature vectors.
+        proj_codevector_dim (`int`, defaults to 256):
+            dimensionality of the final projection of both the quantized and the transformer features.
+        diversity_loss_weight (`int`, defaults to 0.1):
+            the weight of the codebook diversity loss component.
+    ctc_loss_reduction (`str`, defaults to `"sum"`):
+            Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training an
+            instance of [`Wav2Vec2ForCTC`].
+        ctc_zero_infinity (`bool`, defaults to `False`):
+            whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. Infinite losses mainly
+            occur when the inputs are too short to be aligned to the targets. Only relevant when training an instance
+            of [`Wav2Vec2ForCTC`].
+        use_weighted_layer_sum (`bool`, defaults to `False`):
+            whether to use a weighted average of layer outputs with learned weights. Only relevant when using an
+            instance of [`Wav2Vec2ForSequenceClassification`].
+        classifier_proj_size (`int`, defaults to 256):
+            dimensionality of the projection before token mean-pooling for classification.
+        tdnn_dim (`Tuple[int]`, defaults to `(512, 512, 512, 512, 1500)`):
+            a tuple of integers defining the number of output channels of each 1D convolutional layer in the *TDNN*
+            module of the *XVector* model. The length of *tdnn_dim* defines the number of *TDNN* layers.
+        tdnn_kernel (`Tuple[int]`, defaults to `(5, 3, 3, 1, 1)`):
+            a tuple of integers defining the kernel size of each 1D convolutional layer in the *TDNN* module of the
+            *XVector* model. The length of *tdnn_kernel* has to match the length of *tdnn_dim*.
+        tdnn_dilation (`Tuple[int]`, defaults to `(1, 2, 3, 1, 1)`):
+            a tuple of integers defining the dilation factor of each 1D convolutional layer in *TDNN* module of the
+            *XVector* model. The length of *tdnn_dilation* has to match the length of *tdnn_dim*.
+        xvector_output_dim (`int`, defaults to 512):
+            dimensionality of the *XVector* embedding vectors.
+    """
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
new file mode 100644
index 00000000..0b862d62
--- /dev/null
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -0,0 +1,331 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import os
+import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from typing import Optional, Tuple, Union, List
+
+import torch
+import torch.nn as nn
+
+from transformers import Wav2Vec2Processor, Wav2Vec2FeatureExtractor
+
+from ...torch_model import TorchModel
+from ...utils import seq_lengths_to_mask, scale_seq_lengths
+from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+
+
+class HFWav2VecBase(TorchModel):
+    """Base class for Wav2Vec style models (Wav2Vec2, Hubert, WavLM, ...) in HuggingFace.
+
+    This class includes the proprocessing steps, common to all models.
+
+    Attributes:
+        pretrained_model_path (`str`, or os.PathLike, defaults to None): file path or
+            HuggingFace Hub path to pre-trained model.
+        normalize_input (`bool`, defaults to True): whether or not to zero-mean unit-variance
+            normalize the input.
+        use_input_attention_mask (`bool`, defaults to False): whether we should input an
+            attention mask to the wav2vec model.
+        cache_dir (str or os.PathLike): path to a directory in which a downloaded pretrained
+            model configuration should be cached if the standard cache should not be used.
+        force_download (`bool`, defaults to `False`): whether or not to force the (re-)download
+            the model weights and configuration files and override the
+            cached versions if they exist.
+        resume_download (`bool`, defaults to `False`): whether or not to delete incompletely
+            received files. Will attempt to resume the download if such a file exists.
+        revision(`str`, defaults to `"main"`): the specific model version to use.
+            It can be a branch name, a tag name, or a commit id.
+        drop_layers_gt (`int` defaults to None): drop encoder layers greater than this value (in [1, num_encoder_layers]).
+            If None, the model is not changed.
+        ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
+            and inits the model from the configuration. This is set to True for models that have already
+            been finetuned.
+    """
+
+    def __init__(
+        self,
+        pretrained_model_path: Optional[Union[str, os.PathLike]] = None,
+        normalize_input: bool = True,
+        use_input_attention_mask: bool = False,
+        cache_dir: Union[str, os.PathLike] = "./.cache/hyperion_hf",
+        force_download: bool = False,
+        resume_download: bool = False,
+        revision: str = "main",
+        drop_layers_gt: Optional[int] = None,
+        ignore_pretrained: bool = False,
+    ):
+        super().__init__()
+        self.pretrained_model_path = pretrained_model_path
+        self.cache_dir = cache_dir
+        self.force_download = force_download
+        self.resume_download = resume_download
+        self.revision = revision
+        self.drop_layers_gt = drop_layers_gt
+        self.ignore_pretrained = ignore_pretrained
+
+        if pretrained_model_path is not None and not ignore_pretrained:
+            logging.info(
+                f"Downloading config for HF preprocessor from {pretrained_model_path}"
+            )
+            rank = ddp_get_rank()
+            if rank == 0:
+                # rank 0 downloads the model from HF web
+                try:
+                    # some models donot have config for processor because do not have
+                    # tokenizer, first we try to donwload feature_extractor config
+                    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
+                        pretrained_model_path,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        resume_download=resume_download,
+                        revision=revision,
+                    )
+                except:
+                    # if fails, we try to download full processor config
+                    processor = Wav2Vec2Processor.from_pretrained(
+                        pretrained_model_path,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        resume_download=resume_download,
+                        revision=revision,
+                    )
+                    feature_extractor = processor.feature_extractor
+
+            # all ranks wait until the model is downloaded
+            ddp_wait_for_all_procs()
+            if rank > 0:
+                # the rest of ranks should read the configuration from the cache.
+                try:
+                    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
+                        pretrained_model_path,
+                        cache_dir=cache_dir,
+                        force_download=False,
+                        resume_download=False,
+                        revision=revision,
+                    )
+                except:
+                    # if fails, we try to download full processor config
+                    processor = Wav2Vec2Processor.from_pretrained(
+                        pretrained_model_path,
+                        cache_dir=cache_dir,
+                        force_download=False,
+                        resume_download=False,
+                        revision=revision,
+                    )
+                    feature_extractor = processor.feature_extractor
+
+            ddp_wait_for_all_procs()
+            normalize_input = feature_extractor.do_normalize
+            use_input_attention_mask = feature_extractor.return_attention_mask
+
+        self.normalize_input = normalize_input
+        self.use_input_attention_mask = use_input_attention_mask
+
+    def __deepcopy__(self, memo):
+        """Reimplementation of deepcopy for Hugging Face models.
+        The weight_norm in the Conv. Pos. Encoder of Wav2Vec models make the default deepcopy to fail.
+        """
+        cls = self.__class__  # Extract the class of the object
+        cfg = self.get_config()
+        del cfg["class_name"]
+        # Create a new instance of the object based on extracted class
+        new_obj = cls(**cfg)
+        memo[id(self)] = new_obj
+        new_obj.load_state_dict(self.state_dict())
+        device = next(self.parameters()).device
+        new_obj.to(device)
+        print(
+            "deepcopy",
+            next(self.parameters()).device,
+            next(new_obj.parameters()).device,
+            flush=True,
+        )
+        return new_obj
+
+    @property
+    def hf_config(self):
+        return self.hf_model.config
+
+    def _normalize(self, x, x_mask=None):
+        """Normalizes the audio to have zero mean and unit variance."""
+        if x_mask is None:
+            x = x - x.mean(dim=1, keepdim=True)
+            std = torch.sqrt((x ** 2).mean(dim=1, keepdim=True) + 1e-7)
+            x = x / std
+        else:
+            x_mask = x_mask.to(dtype=x.dtype)
+            x_samples = torch.mean(x_mask, dim=1, keepdim=True)
+            x_mean = torch.mean(x * x_mask, dim=1, keepdim=True) / x_samples
+            x2_mean = torch.mean(x ** 2 * x_mask, dim=1, keepdim=True) / x_samples
+            std = torch.sqrt(x2_mean - x_mean ** 2 + 1e-7)
+            x = (x - x_mean) / std
+        return x
+
+    def _preprocess(self, x, x_lengths=None):
+        """Prepares input audio to be used as input to wav2vec style model."""
+        x_mask = seq_lengths_to_mask(x_lengths, x.size(-1), dtype=torch.long)
+        if self.normalize_input:
+            x = self._normalize(x, x_lengths)
+
+        if self.use_input_attention_mask:
+            x_mask = None
+
+        return x, x_mask
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        x_lengths: Optional[torch.LongTensor] = None,
+        return_attentions: bool = False,
+        return_hid_states: bool = False,
+    ):
+        r"""Forward function for wav2vec style models.
+
+        Args:
+          x: input audio of shape = (batch, sequence_length).
+          x_lengths: lengths of the audio waveforms in samples with shape = (batch,).
+          return_attentions: whether or not to return the attentions tensors of
+            all attention layers.
+          return_hid_states: whether or not to return the hidden states of all layers.
+
+        Returns:
+          Dictionary with:
+            last_hidden_state: sequence of hidden-states at the output of the last
+                layer of the model (torch.FloatTensor of shape
+                (batch_size, sequence_length, hidden_size)).
+            extract_features: sequence of extracted feature vectors of the last
+                convolutional layer of the model. (torch.FloatTensor of shape
+                (batch_size, sequence_length, conv_dim[-1])
+            hidden_states: hidden-states of the model at the output of each layer
+                plus the initial embedding outputs (tuple(torch.FloatTensor)).
+            attentions: Attentions weights after the attention softmax, used to
+                compute the weighted average in the self-attention heads
+                (tuple(torch.FloatTensor)).
+        """
+        max_in_length = x.size(-1)
+        x, x_mask = self._preprocess(x, x_lengths)
+        output = self.hf_model(
+            x,
+            x_mask,
+            output_attentions=return_attentions,
+            output_hidden_states=return_hid_states,
+        )
+        max_out_length = output.last_hidden_state.size(1)
+        feat_lengths = (
+            None
+            if x_lengths is None
+            else scale_seq_lengths(x_lengths, max_out_length, max_in_length)
+        )
+        output["hidden_states_lengths"] = feat_lengths
+
+        return output
+
+    def get_config(self):
+        """Returns the configuration arguments for the object in a dictionary."""
+
+        config = {
+            "pretrained_model_path": self.pretrained_model_path,
+            "normalize_input": self.normalize_input,
+            "use_input_attention_mask": self.use_input_attention_mask,
+            "cache_dir": self.cache_dir,
+            "force_download": self.force_download,
+            "resume_download": self.resume_download,
+            "revision": self.revision,
+            "drop_layers_gt": self.drop_layers_gt,
+            "ignore_pretrained": self.ignore_pretrained,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def save(self, file_path: str):
+        """Saves the model to disk."""
+        self.ignore_pretrained = True
+        self.save(file_path)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "pretrained_model_path",
+            "normalize_input",
+            "use_input_attention_mask",
+            "cache_dir",
+            "force_download",
+            "resume_download",
+            "revision",
+            "drop_layers_gt",
+            "ignore_pretrained",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--pretrained-model-path",
+            default=None,
+            help=("file path or HuggingFace Hub path to pre-trained model"),
+        )
+        parser.add_argument(
+            "--normalize-input",
+            default=True,
+            action=ActionYesNo,
+            help=("whether or not to zero-mean unit-variance normalize the input"),
+        )
+        parser.add_argument(
+            "--use-input-attention-mask",
+            default=False,
+            action=ActionYesNo,
+            help=("whether we should input an attention mask to the wav2vec model"),
+        )
+        parser.add_argument(
+            "--cache-dir",
+            default="./.cache/hyperion_hf",
+            help=(
+                "path to a directory in which a downloaded pretrained model "
+                "configuration should be cached if the standard cache should not be used"
+            ),
+        )
+        parser.add_argument(
+            "--force-download",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether or not to force the (re-)download the model weights "
+                "and configuration files and override thecached versions if they exist"
+            ),
+        )
+        parser.add_argument(
+            "--resume-download",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether or not to delete incompletely received files. "
+                "Will attempt to resume the download if such a file exists"
+            ),
+        )
+        parser.add_argument(
+            "--revision",
+            default="main",
+            help=(
+                "the specific model version to use. It can be a branch name, "
+                "a tag name, or a commit id. "
+            ),
+        )
+        parser.add_argument(
+            "--drop-layers-gt",
+            default=None,
+            type=int,
+            help=("drop encoder layers greater than this value."),
+        )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
new file mode 100644
index 00000000..1e8a5e8d
--- /dev/null
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -0,0 +1,622 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import os
+import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import Optional, Tuple, Union, List, Callable
+
+import torch
+import torch.nn as nn
+
+from transformers import WavLMModel, WavLMConfig
+
+from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from .hf_wav2vec_base import HFWav2VecBase
+
+
+class HFWavLM(HFWav2VecBase):
+    r"""This is wrapper over HuggingFace WavLM model.
+        See documentation: https://huggingface.co/docs/transformers/model_doc/wavlm
+
+        This wrapper makes the HugginFace model to have the same interface
+        as other hyperion models. It also add extra functionalities.
+
+        The config. parameters are the same as in the HuggingFace WavLMConfig class.
+
+    Attributes:
+        pretrained_model_path (`str`, defaults to None): file path or HuggingFace Hub path to
+            pre-trained model.
+        normalize_input (`bool`, defaults to True): whether or not to zero-mean unit-variance
+            normalize the input.
+        use_input_attention_mask (`bool`, defaults to False): whether we should input an
+            attention mask to the wav2vec model.
+        vocab_size (`int`, defaults to 32): vocabulary size of the
+            model. Defines the different tokens that can be represented by the
+            *inputs_ids* passed to the forward method.
+        hidden_size (`int`, defaults to 768): dimensionality of the encoder layers and
+            the pooler layer.
+        num_hidden_layers (`int`, defaults to 12): number of hidden layers in the
+            Transformer encoder.
+        num_attention_heads (`int`, defaults to 12): number of attention heads for
+            each attention layer in the Transformer encoder.
+        intermediate_size (`int`, defaults to 3072): dimensionality of the
+            feed-forward layer in the Transformer encoder.
+        hidden_act (`str` or `function`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the encoder and pooler.
+            If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        hidden_dropout (`float`, defaults to 0.1): the dropout probability for all
+            fully connected layers in the embeddings, encoder, and pooler.
+        activation_dropout (`float`, defaults to 0.1): the dropout probability for all
+            intermediate layer in feedforward transformer layers.
+        attention_dropout (`float`, defaults to 0.1): the dropout ratio for the
+            attention probabilities.
+        layerdrop (`float`, defaults to 0.1): prob. of dropping a layer.
+        initializer_range (`float`, defaults to 0.02): the standard deviation of the
+            truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, defaults to 1e-12): the epsilon used by the layer
+            normalization layers.
+        feat_extract_norm (`str`, defaults to `"group"`):
+            the norm to be applied to 1D convolutional layers in feature encoder.
+            One of `"group"` for group normalization of only the first 1D convolutional
+            layer or `"layer"` for layer normalization of all 1D convolutional layers.
+        feat_proj_dropout (`float`, defaults to 0.0): the dropout probability for output
+            of the feature encoder.
+        feat_extract_activation (`str, `optional`, defaults to `"gelu"`): the non-linear
+            activation function (function or string) in the 1D convolutional layers of the feature
+            extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        conv_dim (`Tuple[int]`, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
+            a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
+            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
+        conv_stride (`Tuple[int]`, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
+            a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
+            of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
+        conv_kernel (`Tuple[int]`, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
+            a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
+            length of *conv_kernel* defines the number of convolutional layers and has to match the length of
+            *conv_dim*.
+        conv_bias (`bool`, defaults to `False`): whether the 1D convolutional layers have a bias.
+        num_conv_pos_embeddings (`int`, defaults to 128):
+            number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional
+            embeddings layer.
+        num_conv_pos_embedding_groups (`int`, defaults to 16):
+            number of groups of 1D convolutional positional embeddings layer.
+        do_stable_layer_norm (`bool`, defaults to `False`):
+            whether to apply *stable* layer norm architecture of the Transformer encoder. `do_stable_layer_norm is
+            True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is
+            False` corresponds to applying layer norm after the attention layer.
+        apply_spec_augment (`bool`, defaults to `True`):
+            whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see
+            [SpecAugment: A Simple Data Augmentation Method for Automatic Speech
+            Recognition](https://arxiv.org/abs/1904.08779).
+        mask_time_prob (`float`, defaults to 0.05):
+            percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking
+            procecure generates ''mask_time_prob*len(time_axis)/mask_time_length'' independent masks over the axis. If
+            reasoning from the propability of each feature vector to be chosen as the start of the vector span to be
+            masked, *mask_time_prob* should be `prob_vector_start*mask_time_length`. Note that overlap may decrease the
+            actual percentage of masked vectors. This is only relevant if `apply_spec_augment is True`.
+        mask_time_length (`int`, defaults to 10):
+            length of vector span along the time axis.
+        mask_time_min_masks (`int`, defaults to 2),:
+            the minimum number of masks of length `mask_time_length` generated along the time axis, each time step,
+            irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length <
+            mask_time_min_masks''
+        mask_feature_prob (`float`, defaults to 0.0):
+            percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
+            masking procecure generates ''mask_feature_prob*len(feature_axis)/mask_time_length'' independent masks over
+            the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector
+            span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap
+            may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment is
+            True`.
+        mask_feature_length (`int`, defaults to 10):
+            length of vector span along the feature axis.
+        mask_feature_min_masks (`int`, defaults to 0):
+            The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time
+            step, irrespectively of `mask_feature_prob`. Only relevant if
+            ''mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks''
+        add_adapter (`bool`, defaults to `False`):
+            whether a convolutional network should be stacked on top of the WavLM Encoder. Can be very useful for
+            warm-starting WavLM for SpeechEncoderDecoder models.
+        adapter_kernel_size (`int`, defaults to 3):
+            kernel size of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        adapter_stride (`int`, defaults to 2):
+            stride of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        num_adapter_layers (`int`, defaults to 3):
+            number of convolutional layers that should be used in the adapter network. Only relevant if `add_adapter is
+            True`.
+        output_hidden_size (`int`, defaults to None):
+            dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*. Only relevant
+            if `add_adapter is True`.
+        cache_dir (str or os.PathLike): path to a directory in which a downloaded pretrained
+            model configuration should be cached if the standard cache should not be used.
+        force_download (`bool`, defaults to `False`): whether or not to force the (re-)download
+            the model weights and configuration files and override the
+            cached versions if they exist.
+        resume_download (`bool`, defaults to `False`): whether or not to delete incompletely
+            received files. Will attempt to resume the download if such a file exists.
+        revision(`str`, defaults to `"main"`): the specific model version to use.
+            It can be a branch name, a tag name, or a commit id.
+        ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
+            and inits the model from the configuration. This is set to True for models that have already
+            been finetuned.
+    """
+
+    def __init__(
+        self,
+        pretrained_model_path: Optional[Union[str, os.PathLike]] = None,
+        normalize_input: bool = True,
+        use_input_attention_mask: bool = False,
+        vocab_size: int = 32,
+        hidden_size: int = 768,
+        num_hidden_layers: int = 12,
+        num_attention_heads: int = 12,
+        intermediate_size: int = 3072,
+        hidden_act: Union[str, Callable] = "gelu",
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        layerdrop: float = 0.1,
+        initializer_range: float = 0.02,
+        layer_norm_eps: float = 1e-12,
+        feat_extract_norm: str = "group",
+        feat_proj_dropout: float = 0.0,
+        feat_extract_activation: Union[str, Callable] = "gelu",
+        conv_dim: Tuple[int] = (512, 512, 512, 512, 512, 512, 512),
+        conv_stride: Tuple[int] = (5, 2, 2, 2, 2, 2, 2),
+        conv_kernel: Tuple[int] = (10, 3, 3, 3, 3, 3, 3),
+        conv_bias: bool = False,
+        num_conv_pos_embeddings: int = 128,
+        num_conv_pos_embedding_groups: int = 16,
+        do_stable_layer_norm: bool = False,
+        apply_spec_augment: bool = True,
+        mask_time_prob: float = 0.05,
+        mask_time_length: int = 10,
+        mask_time_min_masks: int = 2,
+        mask_feature_prob: float = 0.0,
+        mask_feature_length: int = 10,
+        mask_feature_min_masks: int = 0,
+        add_adapter: bool = False,
+        adapter_kernel_size: int = 3,
+        adapter_stride: int = 2,
+        num_adapter_layers: int = 3,
+        output_hidden_size: Optional[int] = None,
+        cache_dir: Union[str, os.PathLike] = "./.cache/hyperion_hf",
+        force_download: bool = False,
+        resume_download: bool = False,
+        revision: str = "main",
+        drop_layers_gt: Optional[int] = None,
+        ignore_pretrained: bool = False,
+    ):
+
+        super().__init__(
+            pretrained_model_path=pretrained_model_path,
+            normalize_input=normalize_input,
+            use_input_attention_mask=use_input_attention_mask,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            resume_download=resume_download,
+            revision=revision,
+            drop_layers_gt=drop_layers_gt,
+            ignore_pretrained=ignore_pretrained,
+        )
+
+        if pretrained_model_path is not None and not ignore_pretrained:
+            logging.info(f"Downloading HF model from {pretrained_model_path}")
+            rank = ddp_get_rank()
+            if rank == 0:
+                # rank 0 downloads the model from HF web
+                self.hf_model = WavLMModel.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    resume_download=resume_download,
+                    revision=revision,
+                )
+            # all ranks wait until the model is downloaded
+            ddp_wait_for_all_procs()
+            if rank > 0:
+                # the rest of ranks should read the configuration from the cache.
+                self.hf_model = WavLMModel.from_pretrained(
+                    pretrained_model_path,
+                    cache_dir=cache_dir,
+                    force_download=False,
+                    resume_download=False,
+                    revision=revision,
+                )
+            ddp_wait_for_all_procs()
+            self.hf_model.config.layerdrop = 0.0
+        else:
+            hf_config = WavLMConfig(
+                vocab_size=vocab_size,
+                hidden_size=hidden_size,
+                num_hidden_layers=num_hidden_layers,
+                num_attention_heads=num_attention_heads,
+                intermediate_size=intermediate_size,
+                hidden_act=hidden_act,
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                layerdrop=0.0,  # layerdrop,
+                initializer_range=initializer_range,
+                layer_norm_eps=layer_norm_eps,
+                feat_extract_norm=feat_extract_norm,
+                feat_extract_activation=feat_extract_activation,
+                conv_dim=conv_dim,
+                conv_stride=conv_stride,
+                conv_kernel=conv_kernel,
+                conv_bias=conv_bias,
+                num_conv_pos_embeddings=num_conv_pos_embeddings,
+                num_conv_pos_embedding_groups=num_conv_pos_embedding_groups,
+                do_stable_layer_norm=do_stable_layer_norm,
+                apply_spec_augment=apply_spec_augment,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+                add_adapter=add_adapter,
+                adapter_kernel_size=adapter_kernel_size,
+                adapter_stride=adapter_stride,
+                num_adapter_layers=num_adapter_layers,
+                output_hidden_size=output_hidden_size,
+            )
+            self.hf_model = WavLMModel(hf_config)
+
+        if drop_layers_gt is not None:
+            self.drop_upper_layers(drop_layers_gt)
+
+        self.ignore_pretrained = True
+
+    @property
+    def num_encoder_layers(self):
+        return self.hf_config.num_hidden_layers
+
+    @property
+    def hidden_size(self):
+        return self.hf_config.hidden_size
+
+    def drop_upper_layers(self, max_layers: int):
+        if max_layers >= self.hf_config.num_hidden_layers:
+            return
+
+        layers = self.hf_model.encoder.layers
+        self.hf_model.encoder.layers = nn.ModuleList(
+            [l for i, l in enumerate(layers) if i < max_layers]
+        )
+        self.hf_config.num_hidden_layers = max_layers
+
+        if self.hf_model.adapter is not None:
+            del self.hf_model.adapter
+            self.hf_model.adapter = None
+            self.hf_config.add_adapter = False
+
+    def get_config(self):
+        """Returns the configuration arguments for the object in a dictionary."""
+        config = self.hf_model.config.to_dict()
+        config = self.filter_args(**config)
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "vocab_size",
+            "hidden_size",
+            "num_hidden_layers",
+            "num_attention_heads",
+            "intermediate_size",
+            "hidden_act",
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "layerdrop",
+            "initializer_range",
+            "layer_norm_eps",
+            "feat_extract_norm",
+            "feat_extract_activation",
+            "conv_dim",
+            "conv_stride",
+            "conv_kernel",
+            "conv_bias",
+            "num_conv_pos_embeddings",
+            "num_conv_pos_embedding_groups",
+            "do_stable_layer_norm",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+            "add_adapter",
+            "adapter_kernel_size",
+            "adapter_stride",
+            "num_adapter_layers",
+            "output_hidden_size",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_class_args(parser)
+
+        parser.add_argument(
+            "--vocab-size",
+            default=32,
+            type=int,
+            help=(
+                "vocabulary size of the "
+                "model. Defines the different tokens that can be represented by the "
+                "*inputs_ids* passed to the forward method."
+            ),
+        )
+        parser.add_argument(
+            "--hidden-size",
+            default=768,
+            type=int,
+            help=("dimensionality of the encoder layers and the pooler layer."),
+        )
+        parser.add_argument(
+            "--num-hidden-layers",
+            default=12,
+            type=int,
+            help=("number of hidden layers in the Transformer encoder"),
+        )
+        parser.add_argument(
+            "--num-attention-heads",
+            default=12,
+            type=int,
+            help=(
+                "number of attention heads for "
+                "each attention layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--intermediate-size",
+            default=3072,
+            type=int,
+            help=(
+                "dimensionality of the " "feed-forward layer in the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-act",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear "
+                "activation function (function or string) in the encoder and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--layerdrop",
+            default=0.1,
+            type=float,
+            help=("prob. of dropping a layer"),
+        )
+        parser.add_argument(
+            "--initializer-range",
+            default=0.02,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--layer-norm-eps",
+            default=1e-12,
+            type=float,
+            help=(
+                "the standard deviation of the "
+                "truncated_normal_initializer for initializing all weight matrices"
+            ),
+        )
+        parser.add_argument(
+            "--feat-extract-norm",
+            default="group",
+            choices=["group", "layer"],
+            help=(
+                "the norm to be applied to 1D convolutional layers in feature encoder. "
+                "One of `group` for group normalization of only the first 1D convolutional "
+                "layer or `layer` for layer normalization of all 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-proj-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout probability for output of the feature encoder"),
+        )
+        parser.add_argument(
+            "--feat-extract-activation",
+            default="gelu",
+            choices=["gelu", "relu", "selu", "gelu_new"],
+            help=(
+                "the non-linear activation function (function or string) in the 1D "
+                "convolutional layers of the feature extractor"
+            ),
+        )
+        parser.add_argument(
+            "--conv-dim",
+            default=[512, 512, 512, 512, 512, 512, 512],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the number of input and output channels of each 1D convolutional layer in the "
+                "feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers"
+            ),
+        )
+        parser.add_argument(
+            "--conv-stride",
+            default=[5, 2, 2, 2, 2, 2, 2],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the stride of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-kernel",
+            default=[10, 3, 3, 3, 3, 3, 3],
+            nargs="+",
+            type=int,
+            help=(
+                "a tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--conv-bias",
+            default=False,
+            action=ActionYesNo,
+            help=("whether the 1D convolutional layers have a bias"),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embeddings",
+            default=128,
+            type=int,
+            help=(
+                "number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional "
+                "embeddings layer"
+            ),
+        )
+        parser.add_argument(
+            "--num-conv-pos-embedding-groups",
+            default=16,
+            type=int,
+            help=("number of groups of 1D convolutional positional embeddings layer"),
+        )
+        parser.add_argument(
+            "--do-stable-layer-norm",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *stable* layer norm architecture of the Transformer encoder"
+            ),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+        parser.add_argument(
+            "--add-adapter",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether a convolutional network should be stacked on top of the WavLM Encoder"
+            ),
+        )
+        parser.add_argument(
+            "--adapter-kernel-size",
+            default=3,
+            type=int,
+            help=("kernel size of the convolutional layers in the adapter network"),
+        )
+        parser.add_argument(
+            "--adapter-stride",
+            default=2,
+            type=int,
+            help=("stride of the convolutional layers in the adapter network"),
+        )
+        parser.add_argument(
+            "--num-adapter-layers",
+            default=3,
+            type=int,
+            help=(
+                "number of convolutional layers that should be used in the adapter network"
+            ),
+        )
+        parser.add_argument(
+            "--output-hidden-size",
+            default=None,
+            type=int,
+            help=(
+                "dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*."
+                " Only relevant if `add_adapter is True"
+            ),
+        )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 2755bbbe..1821b674 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -99,7 +99,6 @@ def __init__(
     ):
 
         self.model = model
-        # self.optimizer = optim
         self.loss = loss
         self.epochs = epochs
         self.cur_epoch = cur_epoch
@@ -334,7 +333,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 batch_size = data.shape[0]
 
                 with self.amp_autocast():
-                    output = self.model(data, **self.amp_args)
+                    output = self.model(data)
                     loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
@@ -374,7 +373,7 @@ def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
         )
 
     def update_model(self):
-
+        """Updates the model and does gradding clipping."""
         if self.use_amp:
             if self.grad_clip > 0:
                 self.grad_scaler.unscale_(self.optimizer)
@@ -393,6 +392,7 @@ def update_model(self):
             self.optimizer.step()
 
     def _make_optimizer(self, optim, model, oss=False):
+        """Makes an optimizer object."""
         if isinstance(optim, torch.optim.Optimizer):
             return optim
 
@@ -405,6 +405,7 @@ def _make_optimizer(self, optim, model, oss=False):
         return optimizer
 
     def _make_lr_sched(self, lr_sched, optim):
+        """Makes a Learning Rate scheduler object."""
         if lr_sched is None or isinstance(lr_sched, LRS):
             return lr_sched
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 3e704bd5..a643ca7f 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -12,6 +12,7 @@
 
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
+from torch.distributed.elastic.multiprocessing.errors import record
 
 
 class XVectorTrainer(TorchTrainer):
@@ -107,6 +108,7 @@ def __init__(
             cpu_offload=cpu_offload,
         )
 
+    @record
     def train_epoch(self, data_loader):
         """Training epoch loop
 
@@ -129,18 +131,28 @@ def train_epoch(self, data_loader):
             batch_size = data.shape[0]
 
             with self.amp_autocast():
+                # logging.info(
+                #     f"in_model rank={self.rank} batch={batch} x={data} mxx={data.max()} avgx={data.mean()}"
+                # )
                 output = self.model(data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
+                # logging.info(
+                #     f"out_model rank={self.rank} batch={batch} y={output} loss={loss.item()}"
+                # )
 
             if self.use_amp:
+                # logging.info("in_backward rank=%d batch=%d", self.rank, batch)
                 self.grad_scaler.scale(loss).backward()
+                # logging.info("out_backward rank=%d batch=%d", self.rank, batch)
             else:
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
                 if self.lr_scheduler is not None and not self.in_swa:
                     self.lr_scheduler.on_opt_step()
+                # logging.info("in_update rank=%d batch=%d", self.rank, batch)
                 self.update_model()
+                # logging.info("out_update rank=%d batch=%d", self.rank, batch)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 22af492c..3a4692dc 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -5,7 +5,7 @@
 
 from .devices import open_device
 from .metric_acc import MetricAcc
-from .masking import seq_lengths_to_mask, scale_lengths
+from .masking import seq_lengths_to_mask, scale_seq_lengths
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add
 from .vad_utils import remove_silence
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 48a8bcfe..7038cff3 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -4,7 +4,7 @@
 """
 import os
 import logging
-
+import datetime
 import torch
 import torch.nn as nn
 import torch.distributed as dist
@@ -61,7 +61,11 @@ def ddp_init(
     logging.info(
         f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port}"
     )
-    dist.init_process_group("nccl", rank=rank, world_size=world_size)
+    dist.init_process_group(
+        "nccl",
+        rank=rank,
+        world_size=world_size,
+    )
     torch.tensor([0]).to(gpu_id)
     return gpu_id, rank, world_size
 
@@ -73,6 +77,23 @@ def ddp_cleanup():
         pass
 
 
+def ddp_wait_for_all_procs():
+    if dist.is_initialized():
+        dist.barrier()
+
+
+def ddp_get_rank_world_size():
+    if dist.is_initialized():
+        return dist.get_rank(), dist.get_world_size()
+    return 0, 1
+
+
+def ddp_get_rank():
+    if dist.is_initialized():
+        return dist.get_rank()
+    return 0
+
+
 class TorchDDP(nn.parallel.DistributedDataParallel):
     def __getattr__(self, name):
         try:
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
index b6ccd5ef..1bb5a644 100644
--- a/hyperion/torch/utils/masking.py
+++ b/hyperion/torch/utils/masking.py
@@ -7,7 +7,7 @@
 import torch.nn as nn
 
 
-def scale_lengths(lengths, max_out_length, max_in_length=None):
+def scale_seq_lengths(lengths, max_out_length, max_in_length=None):
     if lengths is None:
         return None
 

From e9cb8a36975692c9ecd6c9a11171a4d719a826df Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 9 May 2022 19:23:27 -0400
Subject: [PATCH 010/154] added noam and triangular schedulers, added option to
 change dropout and specaugment options in wav2vec models

---
 ...dnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh | 51 +++++++++++++
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  2 +-
 hyperion/torch/lr_schedulers/__init__.py      |  3 +
 hyperion/torch/lr_schedulers/factory.py       | 43 ++++++++++-
 hyperion/torch/lr_schedulers/invpow_lr.py     |  4 +-
 hyperion/torch/lr_schedulers/noam_lr.py       | 44 +++++++++++
 hyperion/torch/lr_schedulers/triangular_lr.py | 75 +++++++++++++++++++
 hyperion/torch/torch_model.py                 |  6 ++
 hyperion/torch/tpm/hf/hf_hubert.py            | 20 +++++
 hyperion/torch/tpm/hf/hf_wav2vec2.py          | 46 ++++++++++++
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 65 ++++++++++++++--
 hyperion/torch/tpm/hf/hf_wavlm.py             | 20 +++++
 12 files changed, 368 insertions(+), 11 deletions(-)
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
 create mode 100644 hyperion/torch/lr_schedulers/noam_lr.py
 create mode 100644 hyperion/torch/lr_schedulers/triangular_lr.py

diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
new file mode 100644
index 00000000..b40ff3d1
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
@@ -0,0 +1,51 @@
+# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wav2vec2base
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+
+lr=0.001
+#lr=0.005
+xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v3 #v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+nnet=$nnet_dir/model_ep0060.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
index a021d5a1..24bc799a 100644
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
@@ -33,7 +33,7 @@ nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${d
 
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
-nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/swa_model_ep0064.pth
 
 
 # back-end
diff --git a/hyperion/torch/lr_schedulers/__init__.py b/hyperion/torch/lr_schedulers/__init__.py
index f0a3465e..be77dc15 100644
--- a/hyperion/torch/lr_schedulers/__init__.py
+++ b/hyperion/torch/lr_schedulers/__init__.py
@@ -8,4 +8,7 @@
 from .red_lr_on_plateau import ReduceLROnPlateau
 from .exp_lr import ExponentialLR
 from .cos_lr import CosineLR, AdamCosineLR
+from .invpow_lr import InvPowLR
+from .noam_lr import NoamLR
+from .triangular_lr import TriangularLR
 from .factory import LRSchedulerFactory
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index 9e185a7c..10b47ab2 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -10,6 +10,8 @@
 from .exp_lr import ExponentialLR
 from .invpow_lr import InvPowLR
 from .cos_lr import CosineLR, AdamCosineLR
+from .noam_lr import NoamLR
+from .triangular_lr import TriangularLR
 
 
 class LRSchedulerFactory(object):
@@ -34,6 +36,8 @@ def create(
         eps=1e-8,
         min_lr=0,
         warmup_steps=0,
+        d_model=None,
+        lr_factor=1,
         update_lr_on_opt_step=False,
     ):
 
@@ -61,6 +65,15 @@ def create(
                 update_lr_on_opt_step=update_lr_on_opt_step,
             )
 
+        if lrsch_type == "noam_lr":
+            return NoamLR(
+                optimizer,
+                d_model,
+                lr_factor,
+                min_lr=min_lr,
+                warmup_steps=warmup_steps,
+            )
+
         if lrsch_type == "cos_lr":
             return CosineLR(
                 optimizer,
@@ -73,6 +86,16 @@ def create(
                 update_lr_on_opt_step=update_lr_on_opt_step,
             )
 
+        if lrsch_type == "cos_lr":
+            return TriangularLR(
+                optimizer,
+                t,
+                t_mul,
+                min_lr=min_lr,
+                gamma=gamma,
+                update_lr_on_opt_step=update_lr_on_opt_step,
+            )
+
         if lrsch_type == "adamcos_lr":
             return AdamCosineLR(
                 optimizer,
@@ -122,6 +145,8 @@ def filter_args(**kwargs):
             "eps",
             "min_lr",
             "warmup_steps",
+            "lr_factor",
+            "d_model",
             "update_lr_on_opt_step",
         )
 
@@ -144,6 +169,8 @@ def add_class_args(parser, prefix=None):
                 "cos_lr",
                 "adamcos_lr",
                 "red_lr_on_plateau",
+                "noam_lr",
+                "triangular_lr",
             ],
             help=(
                 "Learning rate schedulers: None, Exponential,"
@@ -173,13 +200,13 @@ def add_class_args(parser, prefix=None):
             "--t-mul",
             default=1,
             type=int,
-            help=("Period multiplicator for each restart in cos lr"),
+            help=("Period multiplicator for each restart in cos/triangular lr"),
         )
         parser.add_argument(
             "--gamma",
             default=1 / 100,
             type=float,
-            help=("LR decay rate for each restart in cos lr"),
+            help=("LR decay rate for each restart in cos/triangular lr"),
         )
 
         parser.add_argument(
@@ -248,6 +275,18 @@ def add_class_args(parser, prefix=None):
             help=("Number of batches to warmup lr"),
         )
 
+        parser.add_argument(
+            "--d-model",
+            default=None,
+            type=int,
+            help=("Transformer model hidden dimension"),
+        )
+        parser.add_argument(
+            "--lr-factor",
+            default=1,
+            type=float,
+            help=("learning rate scaling factor for Noam schedule"),
+        )
         parser.add_argument(
             "--update-lr-on-opt-step",
             default=False,
diff --git a/hyperion/torch/lr_schedulers/invpow_lr.py b/hyperion/torch/lr_schedulers/invpow_lr.py
index 53aa28dc..7590a64c 100644
--- a/hyperion/torch/lr_schedulers/invpow_lr.py
+++ b/hyperion/torch/lr_schedulers/invpow_lr.py
@@ -10,7 +10,7 @@
 
 
 class InvPowLR(LRScheduler):
-    """inverse power learning rate scheduler."""
+    """inverse power decay learning rate scheduler."""
 
     def __init__(
         self,
@@ -23,7 +23,7 @@ def __init__(
         step=0,
         update_lr_on_opt_step=False,
     ):
-        super(InvPowLR, self).__init__(
+        super().__init__(
             optimizer, min_lr, warmup_steps, epoch, step, update_lr_on_opt_step
         )
         self.power = power
diff --git a/hyperion/torch/lr_schedulers/noam_lr.py b/hyperion/torch/lr_schedulers/noam_lr.py
new file mode 100644
index 00000000..edce0605
--- /dev/null
+++ b/hyperion/torch/lr_schedulers/noam_lr.py
@@ -0,0 +1,44 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import math
+from turtle import up
+import torch
+
+from .invpow_lr import InvPowLR
+
+
+class NoamLR(InvPowLR):
+    """Optimizer used for Transformers in
+    Attention is all You Need: https://arxiv.org/pdf/1706.03762.pdf
+
+    This is Inverse Power Law decay scheduler with parameters that depend on
+    the transformer hidden dimension.
+
+    Attributes:
+
+    """
+
+    def __init__(
+        self,
+        optimizer,
+        d_model,
+        lr_factor=1,
+        min_lr=0,
+        warmup_steps=0,
+        epoch=0,
+        step=0,
+    ):
+        lr = lr_factor / math.sqrt(d_model * warmup_steps)
+        print("noam_lr", lr, flush=True)
+        for group in optimizer.param_groups:
+            group["lr"] = lr
+        super().__init__(
+            optimizer,
+            min_lr=min_lr,
+            warmup_steps=warmup_steps,
+            epoch=epoch,
+            step=step,
+            update_lr_on_opt_step=True,
+        )
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
new file mode 100644
index 00000000..add8a13c
--- /dev/null
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -0,0 +1,75 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+import math
+import logging
+
+import torch
+
+from .lr_scheduler import LRScheduler
+
+
+class TriangularLR(LRScheduler):
+    r"""Sets cyclid triangular learning rate schedule as proposed in
+    .. Cyclical Learning Rates for Training Neural Networks:
+    https://arxiv.org/abs/1506.01186
+
+    .. math::
+        \mathrm{cycle} = \mathrm{floor}(1 + \frac{T_{cur}}{T_{max}})
+        x = \mathrm{abs}(2\frac{T_{cur}}{T_{max}}-2\mathrm{cycle}+1)
+        \eta_t = \eta_{min} + (\eta_{max} - \eta_{min})\max(0, 1-x)
+
+    """
+
+    def __init__(
+        self,
+        optimizer,
+        T,
+        T_mul=1,
+        min_lr=0,
+        gamma=1,
+        last_restart=0,
+        num_restarts=0,
+        epoch=0,
+        step=0,
+        update_lr_on_opt_step=False,
+    ):
+
+        super().__init__(optimizer, min_lr, 0, epoch, step, update_lr_on_opt_step)
+        self.T = T
+        self.T_mul = T_mul
+        self.last_restart = last_restart
+        self.num_restarts = num_restarts
+        self.gamma = gamma
+
+    def on_epoch_begin(self, epoch=None, epoch_updates=1, **kwargs):
+        super().on_epoch_begin(epoch)
+        if self.update_lr_on_opt_step:
+            # T has to correspond to an integer number of epochs
+            T = int(math.ceil(self.T / epoch_updates) * epoch_updates)
+            if self.T != T:
+                logging.info("readjusting triangular_lr T %d -> %d" % (self.T, T))
+                self.T = T
+
+    def get_lr(self, step):
+        x = step - self.last_restart
+
+        if x >= self.T:
+            self.last_restart = step
+            x = 0
+            self.T *= self.T_mul
+            self.num_restarts += 1
+            logging.info(
+                "triangular_lr warm-restart=%d T=%d" % (self.num_restarts, self.T)
+            )
+
+        alpha = self.gamma ** self.num_restarts
+        x = math.abs(2 * x / self.T - 1)
+
+        return [
+            eta_min + (alpha * eta_max - eta_min) * math.max(0, 1 - x)
+            for eta_max, eta_min in zip(self.base_lrs, self.min_lrs)
+        ]
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index dc5de737..2e4deac6 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -8,8 +8,14 @@
 import torch
 import torch.nn as nn
 
+torch_model_registry = {}
+
 
 class TorchModel(nn.Module):
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        torch_model_registry[cls.__name__] = cls
+
     def get_config(self):
         config = {"class_name": self.__class__.__name__}
 
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 889aed03..34f40cc8 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -127,6 +127,10 @@ class HFHubert(HFWav2VecBase):
         ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
             and inits the model from the configuration. This is set to True for models that have already
             been finetuned.
+        override_dropouts (`bool` defaults to False): if True, it ingnores the dropout probs. in the pretrained model
+            and uses the ones passed as arguments.
+        override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
+            configuration in the pretrained model and uses the ones passed in the arguments.
     """
 
     def __init__(
@@ -169,6 +173,8 @@ def __init__(
         revision: str = "main",
         drop_layers_gt: Optional[int] = None,
         ignore_pretrained: bool = False,
+        override_dropouts: bool = False,
+        override_spec_augment: bool = False,
     ):
 
         super().__init__(
@@ -181,6 +187,8 @@ def __init__(
             revision=revision,
             drop_layers_gt=drop_layers_gt,
             ignore_pretrained=ignore_pretrained,
+            override_dropouts=override_dropouts,
+            override_spec_augment=override_spec_augment,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -208,6 +216,18 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
+            self.change_hyperparams(
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+            )
         else:
             hf_config = HubertConfig(
                 vocab_size=vocab_size,
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index 63a7cf99..0b0302eb 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -140,6 +140,10 @@ class HFWav2Vec2(HFWav2VecBase):
         ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
             and inits the model from the configuration. This is set to True for models that have already
             been finetuned.
+        override_dropouts (`bool` defaults to False): if True, it ingnores the dropout probs. in the pretrained model
+            and uses the ones passed as arguments.
+        override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
+            configuration in the pretrained model and uses the ones passed in the arguments.
     """
 
     def __init__(
@@ -187,6 +191,8 @@ def __init__(
         revision: str = "main",
         drop_layers_gt: Optional[int] = None,
         ignore_pretrained: bool = False,
+        override_dropouts: bool = False,
+        override_spec_augment: bool = False,
     ):
 
         super().__init__(
@@ -199,6 +205,8 @@ def __init__(
             revision=revision,
             drop_layers_gt=drop_layers_gt,
             ignore_pretrained=ignore_pretrained,
+            override_dropouts=override_dropouts,
+            override_spec_augment=override_spec_augment,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -226,6 +234,18 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
+            self.change_hyperparams(
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+            )
         else:
             hf_config = Wav2Vec2Config(
                 vocab_size=vocab_size,
@@ -278,6 +298,32 @@ def num_encoder_layers(self):
     def hidden_size(self):
         return self.hf_config.hidden_size
 
+    def change_dropouts(
+        self,
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        feat_proj_dropout: float = 0.0,
+        **kwargs,
+    ):
+        import transformers.models.wav2vec2.modeling_wav2vec2 as t
+
+        self.hf_model.config.hidden_dropout = hidden_dropout
+        self.hf_model.config.activation_dropout = activation_dropout
+        self.hf_model.config.attention_dropout = attention_dropout
+        self.hf_model.config.feat_proj_dropout = feat_proj_dropout
+
+        self.hf_model.feature_projection.dropout.p = feat_proj_dropout
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, nn.Dropout):
+                t.p = hidden_dropout
+
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, t.Wav2Vec2Attention):
+                module.dropout = activation_dropout
+            if isinstance(module, t.Wav2Vec2FeatureProjection):
+                module.intermediate_dropout.p = activation_dropout
+
     def drop_upper_layers(self, max_layers: int):
         if max_layers >= self.hf_config.num_hidden_layers:
             return
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 0b862d62..9f5c353b 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -45,6 +45,10 @@ class HFWav2VecBase(TorchModel):
         ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
             and inits the model from the configuration. This is set to True for models that have already
             been finetuned.
+        override_dropouts (`bool` defaults to False): if True, it ingnores the dropout probs. in the pretrained model
+            and uses the ones passed as arguments.
+        override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
+            configuration in the pretrained model and uses the ones passed in the arguments.
     """
 
     def __init__(
@@ -58,6 +62,8 @@ def __init__(
         revision: str = "main",
         drop_layers_gt: Optional[int] = None,
         ignore_pretrained: bool = False,
+        override_dropouts: bool = False,
+        override_spec_augment: bool = False,
     ):
         super().__init__()
         self.pretrained_model_path = pretrained_model_path
@@ -67,6 +73,8 @@ def __init__(
         self.revision = revision
         self.drop_layers_gt = drop_layers_gt
         self.ignore_pretrained = ignore_pretrained
+        self.override_dropouts = override_dropouts
+        self.override_spec_augment = override_spec_augment
 
         if pretrained_model_path is not None and not ignore_pretrained:
             logging.info(
@@ -139,14 +147,37 @@ def __deepcopy__(self, memo):
         new_obj.load_state_dict(self.state_dict())
         device = next(self.parameters()).device
         new_obj.to(device)
-        print(
-            "deepcopy",
-            next(self.parameters()).device,
-            next(new_obj.parameters()).device,
-            flush=True,
-        )
         return new_obj
 
+    def change_hyperparams(self, **kwargs):
+        if self.override_spec_augment:
+            self.change_spec_augment(**kwargs)
+
+        if self.override_dropouts:
+            self.change_dropouts(**kwargs)
+
+    def change_spec_augment(
+        self,
+        apply_spec_augment: bool = True,
+        mask_time_prob: float = 0.05,
+        mask_time_length: int = 10,
+        mask_time_min_masks: int = 2,
+        mask_feature_prob: float = 0.0,
+        mask_feature_length: int = 10,
+        mask_feature_min_masks: int = 0,
+        **kwargs,
+    ):
+        self.hf_model.config.apply_spec_augment = apply_spec_augment
+        self.hf_model.config.mask_time_prob = mask_time_prob
+        self.hf_model.config.mask_time_length = mask_time_length
+        self.hf_model.config.mask_time_min_masks = mask_time_min_masks
+        self.hf_model.config.mask_feature_prob = mask_feature_prob
+        self.hf_model.config.mask_feature_length = mask_feature_length
+        self.hf_model.config.mask_feature_min_masks = mask_feature_min_masks
+
+    def change_dropouts(self, **kwargs):
+        pass  # needs to be overloaded
+
     @property
     def hf_config(self):
         return self.hf_model.config
@@ -238,6 +269,8 @@ def get_config(self):
             "revision": self.revision,
             "drop_layers_gt": self.drop_layers_gt,
             "ignore_pretrained": self.ignore_pretrained,
+            "override_dropouts": self.override_dropouts,
+            "override_spec_augment": self.override_spec_augment,
         }
 
         base_config = super().get_config()
@@ -260,6 +293,8 @@ def filter_args(**kwargs):
             "revision",
             "drop_layers_gt",
             "ignore_pretrained",
+            "override_dropouts",
+            "override_spec_augment",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         return args
@@ -327,5 +362,23 @@ def add_class_args(parser, prefix=None, skip=set()):
             type=int,
             help=("drop encoder layers greater than this value."),
         )
+        parser.add_argument(
+            "--override-dropouts",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the dropout probabilities passed in the "
+                "arguments instead of the defaults in the pretrained model."
+            ),
+        )
+        parser.add_argument(
+            "--override-spec-augment",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the spec augment config. passed in the "
+                "arguments instead of the defaults in the pretrained model."
+            ),
+        )
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 1e8a5e8d..229c1871 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -140,6 +140,10 @@ class HFWavLM(HFWav2VecBase):
         ignore_pretrained (`bool` defaults to False): if True, it ignores the pretrained_model_path
             and inits the model from the configuration. This is set to True for models that have already
             been finetuned.
+        override_dropouts (`bool` defaults to False): if True, it ingnores the dropout probs. in the pretrained model
+            and uses the ones passed as arguments.
+        override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
+            configuration in the pretrained model and uses the ones passed in the arguments.
     """
 
     def __init__(
@@ -187,6 +191,8 @@ def __init__(
         revision: str = "main",
         drop_layers_gt: Optional[int] = None,
         ignore_pretrained: bool = False,
+        override_dropouts: bool = False,
+        override_spec_augment: bool = False,
     ):
 
         super().__init__(
@@ -199,6 +205,8 @@ def __init__(
             revision=revision,
             drop_layers_gt=drop_layers_gt,
             ignore_pretrained=ignore_pretrained,
+            override_dropouts=override_dropouts,
+            override_spec_augment=override_spec_augment,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -226,6 +234,18 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
+            self.change_hyperparams(
+                hidden_dropout=hidden_dropout,
+                activation_dropout=activation_dropout,
+                attention_dropout=attention_dropout,
+                feat_proj_dropout=feat_proj_dropout,
+                mask_time_prob=mask_time_prob,
+                mask_time_length=mask_time_length,
+                mask_time_min_masks=mask_time_min_masks,
+                mask_feature_prob=mask_feature_prob,
+                mask_feature_length=mask_feature_length,
+                mask_feature_min_masks=mask_feature_min_masks,
+            )
         else:
             hf_config = WavLMConfig(
                 vocab_size=vocab_size,

From 5c5ef7c0421ba0603d1dfa366e998fc5cb3613dd Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 16 May 2022 12:27:44 -0400
Subject: [PATCH 011/154] changed way that training modes work in TorchModels

---
 .../conf/wav2vec2base_do1_ecapatdnn512x2.yaml |  42 ++++++
 .../wav2vec2base_specaug1_ecapatdnn512x2.yaml |  39 ++++++
 .../wav2vec2base_specaug2_ecapatdnn512x2.yaml |  39 ++++++
 .../wav2vec2base_specaug3_ecapatdnn512x2.yaml |  39 ++++++
 .../wav2vec2base_specaug4_ecapatdnn512x2.yaml |  39 ++++++
 .../wav2vec2base_specaug5_ecapatdnn512x2.yaml |  40 ++++++
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh |  56 ++++++++
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh} |  16 ++-
 hyperion/torch/torch_model.py                 | 127 ++++++++++++++++--
 hyperion/torch/trainers/torch_trainer.py      |  13 +-
 hyperion/torch/trainers/xvector_trainer.py    |  10 --
 hyperion/utils/ext_segment_list.py            |  10 +-
 12 files changed, 436 insertions(+), 34 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_do1_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_specaug1_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_specaug2_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_specaug3_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_specaug4_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
 rename egs/voxceleb/v2/global_conf/{config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh => config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh} (65%)

diff --git a/egs/voxceleb/v2/conf/wav2vec2base_do1_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_do1_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..f616073c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_do1_ecapatdnn512x2.yaml
@@ -0,0 +1,42 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_dropouts: true
+  activation_dropout: 0.1
+  attention_dropout: 0.2
+  hidden_dropout: 0.2
+  feat_proj_dropout: 0.2
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_specaug1_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_specaug1_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..921f21a6
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_specaug1_ecapatdnn512x2.yaml
@@ -0,0 +1,39 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_spec_augment: true
+  mask_time_prob: 0.5
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_specaug2_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_specaug2_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..410fd521
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_specaug2_ecapatdnn512x2.yaml
@@ -0,0 +1,39 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_spec_augment: true
+  mask_time_prob: 0.25
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_specaug3_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_specaug3_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..96c70f98
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_specaug3_ecapatdnn512x2.yaml
@@ -0,0 +1,39 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_spec_augment: true
+  mask_time_prob: 0.125
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_specaug4_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_specaug4_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..bb4613da
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_specaug4_ecapatdnn512x2.yaml
@@ -0,0 +1,39 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_spec_augment: true
+  mask_time_prob: 0.0625
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..bf67ce48
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml
@@ -0,0 +1,40 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base
+  override_spec_augment: true
+  mask_time_prob: 0.150
+  mask_feature_prob: 0.150
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
new file mode 100644
index 00000000..8e4e4d93
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
@@ -0,0 +1,56 @@
+# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wav2vec2base
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+
+lr=0.001
+#lr=0.005
+xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --model conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v12 #v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/model_ep0030.pth
+nnet=$nnet_dir/model_ep0040.pth
+nnet=$nnet_dir/model_ep0030.pth
+nnet=$nnet_dir/model_ep0020.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
similarity index 65%
rename from egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
rename to egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
index 24bc799a..1509e46e 100644
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
@@ -23,18 +23,20 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-lr=0.002
+
 lr=0.001
-lr=0.005
+#lr=0.005
 xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 10000 --trainer.lrsched.hold-steps 20000"
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
 
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v2 #v1
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v4 #v1
 
 nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-nnet=$nnet_dir/swa_model_ep0064.pth
-
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/model_ep0030.pth
+nnet=$nnet_dir/model_ep0020.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 2e4deac6..a9cffa33 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -3,7 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import os
+from collections import OrderedDict as ODict
 from copy import deepcopy
+from enum import Enum
 
 import torch
 import torch.nn as nn
@@ -16,23 +18,29 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         torch_model_registry[cls.__name__] = cls
 
+    def __init__(self):
+        super().__init__()
+        self._train_mode = "full"
+
     def get_config(self):
         config = {"class_name": self.__class__.__name__}
-
         return config
 
     def copy(self):
         return deepcopy(self)
 
-    def save(self, file_path):
-        file_dir = os.path.dirname(file_path)
-        if not (os.path.isdir(file_dir)):
-            os.makedirs(file_dir, exist_ok=True)
+    def clone(self):
+        return deepcopy(self)
 
-        config = self.get_config()
-        torch.save(
-            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()}
-        )
+    def trainable_parameters(self, recurse: bool = True):
+        for param in self.parameters(recurse=recurse):
+            if param.requires_grad:
+                yield param
+
+    def non_trainable_parameters(self, recurse: bool = True):
+        for param in self.parameters(recurse=recurse):
+            if not param.requires_grad:
+                yield param
 
     def freeze(self):
         for param in self.parameters():
@@ -42,6 +50,47 @@ def unfreeze(self):
         for param in self.parameters():
             param.requires_grad = True
 
+    @property
+    def train_mode(self):
+        return self._train_mode
+
+    @train_mode.setter
+    def train_mode(self, mode):
+        self.set_train_mode(mode)
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+
+        self._train_mode = mode
+
+    def train(self, mode=None):
+        train_mode = self.train_mode if mode is None else mode
+        if train_mode == "full":
+            super().train()
+        elif train_mode == "frozen":
+            super().eval()
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    def valid_train_modes(self):
+        return ["full", "frozen"]
+
+    def save(self, file_path):
+        file_dir = os.path.dirname(file_path)
+        if not (os.path.isdir(file_dir)):
+            os.makedirs(file_dir, exist_ok=True)
+
+        config = self.get_config()
+        torch.save(
+            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()}
+        )
+
     @staticmethod
     def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
         model_data = None
@@ -86,3 +135,63 @@ def device(self):
             )
 
         return next(iter(devices))
+
+    @staticmethod
+    def _fix_cfg_compatibility(class_obj, cfg):
+        """Function that fixed compatibility issues with deprecated models
+
+        Args:
+          class_obj: class type of the model.
+          cfg: configuration dictiory that inits the model.
+
+        Returns:
+          Fixed configuration dictionary.
+        """
+        # for compatibility with older x-vector models
+        XVector = torch_model_registry["xvector"]
+        if issubclass(class_obj, XVector):
+            # We renamed AM-softmax scale parameer s to cos_scale
+            if "s" in cfg:
+                cfg["cos_scale"] = cfg["s"]
+                del cfg["s"]
+
+        return cfg
+
+    @staticmethod
+    def auto_load(file_path, extra_objs={}, map_location=None):
+
+        if map_location is None:
+            map_location = torch.device("cpu")
+
+        model_data = torch.load(file_path, map_location=map_location)
+        cfg = model_data["model_cfg"]
+        class_name = cfg["class_name"]
+        del cfg["class_name"]
+        if class_name in torch_model_registry:
+            class_obj = torch_model_registry[class_name]
+        elif class_name in extra_objs:
+            class_obj = extra_objs[class_name]
+        else:
+            raise Exception("unknown object with class_name=%s" % (class_name))
+
+        state_dict = model_data["model_state_dict"]
+
+        if "n_averaged" in state_dict:
+            del state_dict["n_averaged"]
+
+        cfg = TorchModel._fix_cfg_compatibility(class_obj, cfg)
+
+        import re
+
+        p = re.compile("^module\.")
+        num_tries = 3
+        for tries in range(num_tries):
+            try:
+                return class_obj.load(cfg=cfg, state_dict=state_dict)
+            except RuntimeError as err:
+                # remove module prefix when is trained with dataparallel
+                if tries == num_tries - 1:
+                    # if it failed the 3 trials raise exception
+                    raise err
+                # remove module prefix when is trained with dataparallel
+                state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 1821b674..1c8c89e3 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -54,7 +54,7 @@ class TorchTrainer(object):
       loggers: LoggerList object, loggers write training progress to std. output and file.
       ddp: if True use distributed data parallel training
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
-      train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
+      train_mode: training mode in ['full', 'frozen']
       use_amp: uses mixed precision training.
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
@@ -84,7 +84,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
@@ -625,7 +625,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=[]):
+    def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -649,6 +649,13 @@ def add_class_args(parser, prefix=None, skip=[]):
             help="effective total batch size, if given, it overrides grad_acc_steps",
         )
         parser.add_argument("--epochs", type=int, default=200, help="number of epochs")
+        if train_modes is not None:
+            parser.add_argument(
+                "--train-mode",
+                default="full",
+                choices=train_modes,
+                help=f"Available train modes for the model in {train_modes}",
+            )
         parser.add_argument(
             "--log-interval",
             type=int,
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index a643ca7f..4fbbc895 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -131,28 +131,18 @@ def train_epoch(self, data_loader):
             batch_size = data.shape[0]
 
             with self.amp_autocast():
-                # logging.info(
-                #     f"in_model rank={self.rank} batch={batch} x={data} mxx={data.max()} avgx={data.mean()}"
-                # )
                 output = self.model(data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
-                # logging.info(
-                #     f"out_model rank={self.rank} batch={batch} y={output} loss={loss.item()}"
-                # )
 
             if self.use_amp:
-                # logging.info("in_backward rank=%d batch=%d", self.rank, batch)
                 self.grad_scaler.scale(loss).backward()
-                # logging.info("out_backward rank=%d batch=%d", self.rank, batch)
             else:
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
                 if self.lr_scheduler is not None and not self.in_swa:
                     self.lr_scheduler.on_opt_step()
-                # logging.info("in_update rank=%d batch=%d", self.rank, batch)
                 self.update_model()
-                # logging.info("out_update rank=%d batch=%d", self.rank, batch)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
diff --git a/hyperion/utils/ext_segment_list.py b/hyperion/utils/ext_segment_list.py
index 38a4a1b4..9c7d81d3 100644
--- a/hyperion/utils/ext_segment_list.py
+++ b/hyperion/utils/ext_segment_list.py
@@ -78,11 +78,11 @@ def create(
         )
 
         if series_id is None:
-            u_file_id = self.segments["file_id"].unique()
+            u_file_id = segments.segments["file_id"].unique()
             files = pd.DataFrame({"file_id": u_file_id, "series_id": u_file_id})
         else:
-            file_id = [f for f in v for k, v in series_id.items()]
-            series_id = [k for f in v for k, v in series_id.items()]
+            file_id = [f for k, v in series_id.items() for f in v]
+            series_id = [k for k, v in series_id.items() for f in v]
             files = pd.DataFrame({"file_id": file_id, "series_id": series_id})
 
         if isinstance(name, str):
@@ -128,8 +128,8 @@ def create_from_segment_list(
             u_file_id = segments["file_id"].unique()
             files = pd.DataFrame({"file_id": u_file_id, "series_id": u_file_id})
         else:
-            file_id = [f for f in v for k, v in series_id.items()]
-            series_id = [k for f in v for k, v in series_id.items()]
+            file_id = [f for k, v in series_id.items() for f in v]
+            series_id = [k for k, v in series_id.items() for f in v]
             files = pd.DataFrame({"file_id": file_id, "series_id": series_id})
 
         return cls(segments, ext_segments, files, index_column)

From d7599f9ed8f8b6ed881362897d8651736c9ce86a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 16 May 2022 18:43:51 -0400
Subject: [PATCH 012/154] fixed bugs introduced in TorchModel.train()

---
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh |  1 -
 hyperion/torch/models/xvectors/xvector.py     | 47 +++++++++++++++----
 hyperion/torch/torch_model.py                 | 15 ++++--
 hyperion/torch/trainers/ae_trainer.py         |  4 +-
 hyperion/torch/trainers/dvae_trainer.py       |  4 +-
 hyperion/torch/trainers/plda_trainer.py       |  4 +-
 hyperion/torch/trainers/torch_trainer.py      | 10 ++--
 hyperion/torch/trainers/vae_trainer.py        |  4 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |  4 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |  4 +-
 .../torch/trainers/xvector_adv_trainer.py     | 14 ++----
 .../trainers/xvector_adv_trainer_from_wav.py  |  8 ++--
 hyperion/torch/trainers/xvector_trainer.py    |  2 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |  2 +-
 .../xvector_trainer_deep_feat_reg_from_wav.py |  4 +-
 .../trainers/xvector_trainer_from_wav.py      |  5 +-
 16 files changed, 78 insertions(+), 54 deletions(-)

diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
index 8e4e4d93..942fb336 100644
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
@@ -37,7 +37,6 @@ nnet=$nnet_dir/swa_model_ep0076.pth
 nnet=$nnet_dir/model_ep0060.pth
 nnet=$nnet_dir/model_ep0030.pth
 nnet=$nnet_dir/model_ep0040.pth
-nnet=$nnet_dir/model_ep0030.pth
 nnet=$nnet_dir/model_ep0020.pth
 
 
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index e07487d7..008f595c 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -3,7 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
+from enum import Enum
 from jsonargparse import ArgumentParser, ActionParser
+from typing import Optional
 
 import torch
 import torch.nn as nn
@@ -15,6 +17,12 @@
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
 
+class XVectorTrainMode(Enum):
+    full = 0
+    frozen = 1
+    ft_embed_affine = 2
+
+
 class XVector(TorchModel):
     """x-Vector base class"""
 
@@ -543,19 +551,38 @@ def freeze_preembed_layers(self):
         layer_list = [l for l in range(self.embed_layer)]
         self.classif_net.freeze_layers(layer_list)
 
-    def train_mode(self, mode="ft-embed-affine"):
-        if mode == "ft-full" or mode == "train":
-            self.train()
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
             return
 
-        self.encoder_net.eval()
-        if self.proj is not None:
-            self.proj.eval()
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.freeze_preembed_layers()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode == "ft-embed-affine":
+            self.encoder_net.eval()
+            if self.proj is not None:
+                self.proj.eval()
+
+            self.pool_net.eval()
+            self.classif_net.train()
+            layer_list = [l for l in range(self.embed_layer)]
+            self.classif_net.put_layers_in_eval_mode(layer_list)
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
 
-        self.pool_net.eval()
-        self.classif_net.train()
-        layer_list = [l for l in range(self.embed_layer)]
-        self.classif_net.put_layers_in_eval_mode(layer_list)
+    def valid_train_modes(self):
+        return ["full", "frozen", "ft-embed-affine"]
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index a9cffa33..fdd17701 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -6,6 +6,7 @@
 from collections import OrderedDict as ODict
 from copy import deepcopy
 from enum import Enum
+from typing import Optional
 
 import torch
 import torch.nn as nn
@@ -69,15 +70,21 @@ def set_train_mode(self, mode):
 
         self._train_mode = mode
 
-    def train(self, mode=None):
-        train_mode = self.train_mode if mode is None else mode
+    def _train(self, train_mode: str):
         if train_mode == "full":
-            super().train()
+            super().train(True)
         elif train_mode == "frozen":
-            super().eval()
+            super().train(False)
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
+    def train(self, mode: bool = True):
+        if not mode:
+            super().train(False)
+            return
+
+        self._train(self.train_mode)
+
     def valid_train_modes(self):
         return ["full", "frozen"]
 
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 4bd6790a..c1debdb6 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -117,7 +117,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
         for batch, data in enumerate(data_loader):
 
             if isinstance(data, (tuple, list)):
@@ -167,7 +167,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 5649cc01..02c4fb6e 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -113,7 +113,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, data in enumerate(data_loader):
 
@@ -174,7 +174,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index dd797996..ea5e57af 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -131,7 +131,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc()
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
@@ -205,7 +205,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = ""
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 1c8c89e3..4cf90d1a 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -214,6 +214,7 @@ def fit(self, train_data, val_data=None):
         """
         self.exp_path.mkdir(parents=True, exist_ok=True)
         self._compute_grad_acc_steps(train_data)
+        self.set_train_mode()
 
         if self.do_swa and self.cur_epoch >= self.swa_start:
             self.in_swa = True
@@ -260,10 +261,7 @@ def fit(self, train_data, val_data=None):
             self.save_swa_model(logs)
 
     def set_train_mode(self):
-        if self.train_mode == "train":
-            self.model.train()
-        else:
-            self.model.train_mode(self.train_mode)
+        self.model.train_mode = self.train_mode
 
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -273,7 +271,7 @@ def train_epoch(self, data_loader):
         """
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
             if batch % self.grad_acc_steps == 0:
@@ -323,7 +321,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index bc72bbe2..ae193209 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -108,7 +108,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, data in enumerate(data_loader):
 
@@ -162,7 +162,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index ac87ba5a..840d70d6 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -107,7 +107,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, data in enumerate(data_loader):
 
@@ -166,7 +166,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 1b13bac1..cb09ba00 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -107,7 +107,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, data in enumerate(data_loader):
 
@@ -166,7 +166,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 7dee1303..91c75823 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -128,19 +128,13 @@ def __init__(
                 % (p_attack, 1.0 / self.grad_acc_steps)
             )
 
-        # if data_parallel:
-        #     # change model in attack by the data parallel version
-        #     self.attack.model = self.model
-        #     # make loss function in attack data parallel
-        #     self.attack.make_data_parallel()
-
     def train_epoch(self, data_loader):
 
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
@@ -157,7 +151,7 @@ def train_epoch(self, data_loader):
                     max_delta = torch.max(torch.abs(data_adv - data)).item()
                     logging.info("adv attack max perturbation=%f" % (max_delta))
                     data = data_adv
-                    self.set_train_mode()
+                    self.model.train()
 
                 self.optimizer.zero_grad()
 
@@ -196,7 +190,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
         if swa_update_bn:
             log_tag = "train_"
-            self.set_train_mode()
+            self.model.train()
         else:
             log_tag = "val_"
             self.model.eval()
@@ -210,7 +204,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
                 data = self.attack.generate(data, target)
                 if swa_update_bn:
-                    self.set_train_mode()
+                    self.model.train()
 
             with torch.no_grad():
                 with self.amp_autocast():
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 0719f350..2797e678 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -137,7 +137,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
@@ -156,7 +156,7 @@ def train_epoch(self, data_loader):
                     # logging.info('zz {} {}'.format(data[z], data_adv[z]))
                     # logging.info('adv attack max perturbation=%f' % (max_delta))
                     data = data_adv
-                    self.set_train_mode()
+                    self.model.train()
 
                 self.optimizer.zero_grad()
 
@@ -198,7 +198,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
         if swa_update_bn:
             log_tag = "train_"
-            self.set_train_mode()
+            self.model.train()
         else:
             log_tag = "val_"
             self.model.eval()
@@ -212,7 +212,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
                 data = self.attack.generate(data, target)
                 if swa_update_bn:
-                    self.set_train_mode()
+                    self.model.train()
 
             with torch.no_grad():
                 feats = self.feat_extractor(data)
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 4fbbc895..8c39a345 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -120,7 +120,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index e6014750..58235961 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -141,7 +141,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index dafeb0c5..da9d064a 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -138,7 +138,7 @@ def train_epoch(self, data_loader):
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
-        self.set_train_mode()
+        self.model.train()
 
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
@@ -239,7 +239,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index a8f9da99..6445ae3e 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -125,8 +125,7 @@ def train_epoch(self, data_loader):
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.feat_extractor.train()
-        self.set_train_mode()
-
+        self.model.train()
         for batch, (data, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
             if batch % self.grad_acc_steps == 0:
@@ -177,7 +176,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.set_train_mode()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()

From 8a7a6fdafbc565baa86decf1f210fa3d06a571a8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 20 May 2022 16:20:43 -0400
Subject: [PATCH 013/154] added training models to xvector and wav2vec2xvector
 models

---
 egs/voxceleb/v2/conf/train_data_default.yaml  |  4 +-
 hyperion/torch/lr_schedulers/cos_lr.py        | 52 +++++++++--
 hyperion/torch/lr_schedulers/exp_lr.py        | 18 +++-
 hyperion/torch/lr_schedulers/factory.py       | 47 ++++++++++
 hyperion/torch/lr_schedulers/invpow_lr.py     | 15 +++-
 hyperion/torch/lr_schedulers/lr_scheduler.py  | 13 ++-
 hyperion/torch/lr_schedulers/noam_lr.py       | 24 ++++-
 .../torch/lr_schedulers/red_lr_on_plateau.py  |  4 +-
 hyperion/torch/lr_schedulers/triangular_lr.py | 17 ++++
 .../models/wav2xvectors/hf_wav2xvector.py     | 90 ++++++++++++++++++-
 hyperion/torch/torch_model.py                 |  4 +-
 hyperion/torch/tpm/hf/hf_hubert.py            |  2 +-
 hyperion/torch/tpm/hf/hf_wav2vec2.py          |  2 +-
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      |  9 +-
 hyperion/torch/tpm/hf/hf_wavlm.py             |  2 +-
 hyperion/torch/trainers/torch_trainer.py      | 12 ++-
 .../trainers/xvector_trainer_from_wav.py      |  3 +-
 17 files changed, 285 insertions(+), 33 deletions(-)

diff --git a/egs/voxceleb/v2/conf/train_data_default.yaml b/egs/voxceleb/v2/conf/train_data_default.yaml
index 72c77204..d41c1507 100644
--- a/egs/voxceleb/v2/conf/train_data_default.yaml
+++ b/egs/voxceleb/v2/conf/train_data_default.yaml
@@ -1,6 +1,6 @@
 dataset:
-  max_chunk_length: 4.0
-  min_chunk_length: 4.0
+  max_chunk_length: 3.0
+  min_chunk_length: 3.0
   aug_cfg: conf/reverb_noise_aug.yaml
   wav_scale: 1
 sampler:
diff --git a/hyperion/torch/lr_schedulers/cos_lr.py b/hyperion/torch/lr_schedulers/cos_lr.py
index 83b9206f..5caf12bb 100644
--- a/hyperion/torch/lr_schedulers/cos_lr.py
+++ b/hyperion/torch/lr_schedulers/cos_lr.py
@@ -25,16 +25,27 @@ class CosineLR(LRScheduler):
     When epoch=-1, sets initial lr as lr.
 
     It has been proposed in
-    `SGDR: Stochastic Gradient Descent with Warm Restarts`_.
-
-    Args:
-        optimizer (Optimizer): Wrapped optimizer.
-        T_max (int): Maximum number of iterations.
-        eta_min (float): Minimum learning rate. Default: 0.
-        epoch (int): The index of last epoch. Default: -1.
-
     .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
         https://arxiv.org/abs/1608.03983
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      T: period of the cycle.
+      T_mul: period multiplier, after each cycle the period is multiplied by T_mul.
+      hold_steps: number of steps until the lr starts decaying.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      warm_restarts: whether or not to do warm restarts.
+      gamma: after each period, the maximum lr is multiplied by gamma.
+      last_restart: what is the step when the last restart happened, , this is used
+                    to restart the training from a checkpoint.
+      num_restarts: how many restarts, we have done, this is used to restart the
+                    training from a checkpoint.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
     """
 
     def __init__(
@@ -53,7 +64,7 @@ def __init__(
         update_lr_on_opt_step=False,
     ):
 
-        super(CosineLR, self).__init__(
+        super().__init__(
             optimizer, min_lr, warmup_steps, epoch, step, update_lr_on_opt_step
         )
         self.T = T
@@ -108,6 +119,29 @@ def get_lr(self, step):
 
 
 class AdamCosineLR(CosineLR):
+    r"""Set the learning rate of each parameter group using a cosine annealing
+    schedule when using adam optimizer
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      T: period of the cycle.
+      T_mul: period multiplier, after each cycle the period is multiplied by T_mul.
+      hold_steps: number of steps until the lr starts decaying.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      warm_restarts: whether or not to do warm restarts.
+      gamma: after each period, the maximum lr is multiplied by gamma.
+      last_restart: what is the step when the last restart happened, , this is used
+                    to restart the training from a checkpoint.
+      num_restarts: how many restarts, we have done, this is used to restart the
+                    training from a checkpoint.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
+    """
+
     def __init__(
         self,
         optimizer,
diff --git a/hyperion/torch/lr_schedulers/exp_lr.py b/hyperion/torch/lr_schedulers/exp_lr.py
index cbe00a01..66edf436 100644
--- a/hyperion/torch/lr_schedulers/exp_lr.py
+++ b/hyperion/torch/lr_schedulers/exp_lr.py
@@ -10,7 +10,21 @@
 
 
 class ExponentialLR(LRScheduler):
-    """Exponential learning rate scheduler."""
+    """Exponential learning rate scheduler.
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      decay_rate: the lr is multiplied by `decay_rate` after `decay_ste.ps`
+      decay_steps: number of decay steps.
+      hold_steps: number of steps until the lr starts decaying.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
+    """
 
     def __init__(
         self,
@@ -24,7 +38,7 @@ def __init__(
         step=0,
         update_lr_on_opt_step=False,
     ):
-        super(ExponentialLR, self).__init__(
+        super().__init__(
             optimizer, min_lr, warmup_steps, epoch, step, update_lr_on_opt_step
         )
         self.decay_rate = decay_rate
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index 10b47ab2..1a542bf2 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -40,6 +40,53 @@ def create(
         lr_factor=1,
         update_lr_on_opt_step=False,
     ):
+        """Creates a  learning rate scheduler object.
+
+        Args:
+          optimizer: Pytorch optimizer object.
+          lrsched_type: type of scheduler in ["none", "exp_lr", "invpow_lr",
+                "cos_lr", "adamcos_lr", "red_lr_on_plateau", "noam_lr",
+                        "triangular_lr"].
+          decay_rate: the lr is multiplied by `decay_rate` after `decay_ste.ps`
+          decay_steps: number of decay steps.
+          power: the step/epoch number is ellebated to this power to compute the decay.
+          hold_steps: number of steps until the lr starts decaying.
+          t: period of the cycle.
+          t_mul: period multiplier, after each cycle the period is multiplied by T_mul.
+          warm_restarts: whether or not to do warm restarts.
+          gamma: after each period, the maximum lr is multiplied by gamma, in cyclid schedulers.
+          monitor: which metric to monitor in RedLROnPlateau scheduler.
+          mode (str): One of `min`, `max`. In `min` mode, lr will
+                be reduced when the quantity monitored has stopped
+                decreasing; in `max` mode it will be reduced when the
+                quantity monitored has stopped increasing. Default: 'min'.
+          factor (float): Factor by which the learning rate will be
+                reduced. new_lr = lr * factor. Default: 0.1.
+          patience (int): Number of epochs with no improvement after
+                which learning rate will be reduced. For example, if
+                `patience = 2`, then we will ignore the first 2 epochs
+                with no improvement, and will only decrease the LR after the
+                3rd epoch if the loss still hasn't improved then.
+          threshold (float): Threshold for measuring the new optimum,
+                to only focus on significant changes. Default: 1e-4.
+          threshold_mode (str): One of `rel`, `abs`. In `rel` mode,
+                dynamic_threshold = best * ( 1 + threshold ) in 'max'
+                 mode or best * ( 1 - threshold ) in `min` mode.
+                 In `abs` mode, dynamic_threshold = best + threshold in
+                 `max` mode or best - threshold in `min` mode. Default: 'rel'.
+          cooldown (int): Number of epochs to wait before resuming
+                normal operation after lr has been reduced. Default: 0.
+          eps (float): Minimal decay applied to lr. If the difference
+                between new and old lr is smaller than eps, the update is
+                ignored. Default: 1e-8.
+                d_model: hidden dimension of transformer model.
+          min_lr: minimum learning rate.
+          warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+          d_model: hidden dimension of transformer model.
+          lr_factor: multiplies the Noam lr by this number.
+          update_lr_on_opt_step: if True, updates the lr each time we update the model,
+                otherwise after each epoch.
+        """
 
         if lrsch_type == "none":
             return None
diff --git a/hyperion/torch/lr_schedulers/invpow_lr.py b/hyperion/torch/lr_schedulers/invpow_lr.py
index 7590a64c..db420a0f 100644
--- a/hyperion/torch/lr_schedulers/invpow_lr.py
+++ b/hyperion/torch/lr_schedulers/invpow_lr.py
@@ -10,7 +10,20 @@
 
 
 class InvPowLR(LRScheduler):
-    """inverse power decay learning rate scheduler."""
+    """inverse power decay learning rate scheduler.
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      power: the step/epoch number is ellebated to this power to compute the decay.
+      hold_steps: number of steps until the lr starts decaying.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
+    """
 
     def __init__(
         self,
diff --git a/hyperion/torch/lr_schedulers/lr_scheduler.py b/hyperion/torch/lr_schedulers/lr_scheduler.py
index 2ad1740e..5cbb3ff1 100644
--- a/hyperion/torch/lr_schedulers/lr_scheduler.py
+++ b/hyperion/torch/lr_schedulers/lr_scheduler.py
@@ -9,7 +9,18 @@
 
 
 class LRScheduler(object):
-    """Base class for learning rate schedulers"""
+    """Base class for learning rate schedulers.
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
+    """
 
     def __init__(
         self,
diff --git a/hyperion/torch/lr_schedulers/noam_lr.py b/hyperion/torch/lr_schedulers/noam_lr.py
index edce0605..4acdc3b9 100644
--- a/hyperion/torch/lr_schedulers/noam_lr.py
+++ b/hyperion/torch/lr_schedulers/noam_lr.py
@@ -3,8 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
-from turtle import up
-import torch
+import logging
+
+# import torch
 
 from .invpow_lr import InvPowLR
 
@@ -17,6 +18,14 @@ class NoamLR(InvPowLR):
     the transformer hidden dimension.
 
     Attributes:
+      optimizer: Pytorch optimizer object.
+      d_model: hidden dimension of transformer model.
+      lr_factor: multiplies the Noam lr by this number.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
 
     """
 
@@ -31,9 +40,16 @@ def __init__(
         step=0,
     ):
         lr = lr_factor / math.sqrt(d_model * warmup_steps)
-        print("noam_lr", lr, flush=True)
+        logging.info("Noam lr=%f", lr)
+        # we scale the lr taking account the relative
+        # learning rates in the param_groups
+        # in order to be able to have different lr for
+        # different modules of the model
+        max_lr = 0
+        for group in optimizer.param_groups:
+            max_lr = max(lr, max_lr)
         for group in optimizer.param_groups:
-            group["lr"] = lr
+            group["lr"] = lr * group["lr"] / max_lr
         super().__init__(
             optimizer,
             min_lr=min_lr,
diff --git a/hyperion/torch/lr_schedulers/red_lr_on_plateau.py b/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
index 8d9eb4bf..7a2e82f8 100644
--- a/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
+++ b/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
@@ -21,6 +21,7 @@ class ReduceLROnPlateau(LRScheduler):
 
     Attributes:
      optimizer (Optimizer): optimizer.
+     monitor: which metric to monitor.
      mode (str): One of `min`, `max`. In `min` mode, lr will
          be reduced when the quantity monitored has stopped
          decreasing; in `max` mode it will be reduced when the
@@ -45,6 +46,7 @@ class ReduceLROnPlateau(LRScheduler):
      min_lr (float or list): A scalar or a list of scalars. A
          lower bound on the learning rate of all param groups
          or each group respectively. Default: 0.
+     warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
      eps (float): Minimal decay applied to lr. If the difference
          between new and old lr is smaller than eps, the update is
          ignored. Default: 1e-8.
@@ -64,7 +66,7 @@ def __init__(
         warmup_steps=0,
         eps=1e-8,
     ):
-        super(ReduceLROnPlateau, self).__init__(
+        super().__init__(
             optimizer,
             min_lr,
             warmup_steps,
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
index add8a13c..c2b66c42 100644
--- a/hyperion/torch/lr_schedulers/triangular_lr.py
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -22,6 +22,23 @@ class TriangularLR(LRScheduler):
         x = \mathrm{abs}(2\frac{T_{cur}}{T_{max}}-2\mathrm{cycle}+1)
         \eta_t = \eta_{min} + (\eta_{max} - \eta_{min})\max(0, 1-x)
 
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      T: period of the cycle.
+      T_mul: period multiplier, after each cycle the period is multiplied by T_mul.
+      hold_steps: number of steps until the lr starts decaying.
+      min_lr: minimum learning rate.
+      warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
+      gamma: after each period, the maximum lr is multiplied by gamma.
+      last_restart: what is the step when the last restart happened, , this is used
+                    to restart the training from a checkpoint.
+      num_restarts: how many restarts, we have done, this is used to restart the
+                    training from a checkpoint.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_lr_on_opt_step: if True, updates the lr each time we update the model,
+        otherwise after each epoch.
     """
 
     def __init__(
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index f5f2c840..cb8ff1d0 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
+import contextlib
 from jsonargparse import ArgumentParser, ActionParser
 
 import torch
@@ -35,6 +36,7 @@ def __init__(
         self.xvector = xvector
         self.feat_fusion_start = feat_fusion_start
         self.feat_fusion_method = feat_fusion_method
+        self._hf_context = contextlib.nullcontext
         self._make_fuser()
 
     def _make_fuser(self):
@@ -96,7 +98,8 @@ def forward_feats(self, x, x_lengths, return_feat_layers=None):
             if return_feat_layers is None and self.feat_fusion_method == "last"
             else True
         )
-        hf_output = self.hf_feats(x, x_lengths, return_hid_states=return_hid_states)
+        with self._hf_context:
+            hf_output = self.hf_feats(x, x_lengths, return_hid_states=return_hid_states)
         feat_lengths = hf_output["hidden_states_lengths"]
         if return_hid_states:
             hid_feats = hf_output["hidden_states"]
@@ -192,6 +195,91 @@ def extract_embed(
             feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
         )
 
+    def freeze_feat_fuser(self):
+        if self.feat_fuser is None:
+            return
+
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser.requires_grad = False
+            return
+
+        for param in self.feat_fuser.parameters():
+            param.requires_grad = False
+
+    def freeze_hf_feats(self):
+        self.hf_feats.freeze()
+
+    def freeze_hf_feature_encoder(self):
+        self.hf_feats.freeze_feature_encoder()
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.unfreeze()
+            self.freeze_feat_fuser()
+            self.freeze_hf_feats()
+            self.xvector.freeze_preembed_layers()
+        elif mode in ["ft-xvector", "ft-xvector-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+            self.freeze_feat_fuser()
+        elif mode in ["hf-feats-frozen", "hf-feats-frozen-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+        elif mode == "hf-feat-extractor-frozen":
+            self.unfreeze()
+            self.freeze_hf_feature_encoder()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        logging.info("train mode set to %s", mode)
+
+        if "nograd" in mode:
+            logging.info("using torch.no_grad for hf_feats")
+            self._hf_context = torch.no_grad()
+        else:
+            self._hf_context = contextlib.nullcontext
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode == "ft-embed-affine":
+            self.hf_feats.train()
+            self.xvector._train("ft-embed_affine")
+        elif train_mode in [
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]:
+            self.hf_feats.train()
+            self.xvector._train("full")
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return [
+            "full",
+            "frozen",
+            "ft-embed-affine",
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]
+
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index fdd17701..af3a305c 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -57,6 +57,7 @@ def train_mode(self):
 
     @train_mode.setter
     def train_mode(self, mode):
+        print("hola3", mode, flush=True)
         self.set_train_mode(mode)
 
     def set_train_mode(self, mode):
@@ -85,7 +86,8 @@ def train(self, mode: bool = True):
 
         self._train(self.train_mode)
 
-    def valid_train_modes(self):
+    @staticmethod
+    def valid_train_modes():
         return ["full", "frozen"]
 
     def save(self, file_path):
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 34f40cc8..82ce70bd 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -192,9 +192,9 @@ def __init__(
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
-            logging.info(f"Downloading HF model from {pretrained_model_path}")
             rank = ddp_get_rank()
             if rank == 0:
+                logging.info(f"Downloading HF model from {pretrained_model_path}")
                 # rank 0 downloads the model from HF web
                 self.hf_model = HubertModel.from_pretrained(
                     pretrained_model_path,
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index 0b0302eb..e91fe8c4 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -210,10 +210,10 @@ def __init__(
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
-            logging.info(f"Downloading HF model from {pretrained_model_path}")
             rank = ddp_get_rank()
             if rank == 0:
                 # rank 0 downloads the model from HF web
+                logging.info(f"Downloading HF model from {pretrained_model_path}")
                 self.hf_model = Wav2Vec2Model.from_pretrained(
                     pretrained_model_path,
                     cache_dir=cache_dir,
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 9f5c353b..87f19835 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -77,11 +77,11 @@ def __init__(
         self.override_spec_augment = override_spec_augment
 
         if pretrained_model_path is not None and not ignore_pretrained:
-            logging.info(
-                f"Downloading config for HF preprocessor from {pretrained_model_path}"
-            )
             rank = ddp_get_rank()
             if rank == 0:
+                logging.info(
+                    f"Downloading config for HF preprocessor from {pretrained_model_path}"
+                )
                 # rank 0 downloads the model from HF web
                 try:
                     # some models donot have config for processor because do not have
@@ -178,6 +178,9 @@ def change_spec_augment(
     def change_dropouts(self, **kwargs):
         pass  # needs to be overloaded
 
+    def freeze_feature_encoder(self):
+        self.hf_model.freeze_feature_encoder()
+
     @property
     def hf_config(self):
         return self.hf_model.config
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 229c1871..c75cb6e8 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -210,9 +210,9 @@ def __init__(
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
-            logging.info(f"Downloading HF model from {pretrained_model_path}")
             rank = ddp_get_rank()
             if rank == 0:
+                logging.info(f"Downloading HF model from {pretrained_model_path}")
                 # rank 0 downloads the model from HF web
                 self.hf_model = WavLMModel.from_pretrained(
                     pretrained_model_path,
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 4cf90d1a..8dfad9ce 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -127,6 +127,8 @@ def __init__(
         self.swa_anneal_epochs = swa_anneal_epochs
         self.amp_args = {}
 
+        self.set_train_mode()
+
         if device is not None:
             self.model.to(device)
             if loss is not None:
@@ -214,7 +216,6 @@ def fit(self, train_data, val_data=None):
         """
         self.exp_path.mkdir(parents=True, exist_ok=True)
         self._compute_grad_acc_steps(train_data)
-        self.set_train_mode()
 
         if self.do_swa and self.cur_epoch >= self.swa_start:
             self.in_swa = True
@@ -261,7 +262,8 @@ def fit(self, train_data, val_data=None):
             self.save_swa_model(logs)
 
     def set_train_mode(self):
-        self.model.train_mode = self.train_mode
+        # self.model.train_mode = self.train_mode
+        self.model.set_train_mode(self.train_mode)
 
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -313,7 +315,8 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         """Validation epoch loop
 
         Args:
-          data_loader: PyTorch data loader return input/output pairs
+          data_loader: PyTorch data loader return input/output pairs.
+          sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
 
         metric_acc = MetricAcc(self.device)
@@ -607,6 +610,7 @@ def filter_args(**kwargs):
             "use_amp",
             "ddp_type",
             "grad_clip",
+            "grad_clip_norm",
             "swa_start",
             "swa_lr",
             "swa_anneal_epochs",
@@ -617,9 +621,9 @@ def filter_args(**kwargs):
             "use_tensorboard",
             "use_wandb",
             "wandb",
+            "train_mode",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
         return args
 
     @staticmethod
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 6445ae3e..d75936d8 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -168,7 +168,8 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         """Validation epoch loop
 
         Args:
-          data_loader: PyTorch data loader return input/output pairs
+          data_loader: PyTorch data loader return input/output pairs.
+          sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()

From 90ef40a49c1845f53b9dd59f6c5179d542b9d3b8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 2 Jun 2022 10:27:25 -0400
Subject: [PATCH 014/154] wavlm phase 1

---
 ..._wavlmbaseplus_ecapatdnn512x3_default.yaml |   6 +
 ...aseplus_ecapatdnn512x3_phase1_default.yaml |   6 +
 .../v2/conf/trainer_phase1_adam_default.yaml  |  20 +
 .../v2/conf/trainer_phase1_sgd_default.yaml   |  18 +
 .../v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml |  38 ++
 ...nfig_wavlmbaseplus_ecapatdnn512x3_v1.10.sh |  36 ++
 ...onfig_wavlmbaseplus_ecapatdnn512x3_v1.9.sh |  36 ++
 hyperion/bin/train_wav2vec2xvector.py         |   4 +-
 hyperion/np/classifiers/linear_gbe1.py        | 264 -------------
 hyperion/np/np_model.py                       | 101 ++++-
 hyperion/np/pdfs/core/exp_family.py           | 133 ++++++-
 hyperion/np/pdfs/core/normal.py               | 156 ++++++--
 hyperion/np/pdfs/core/normal_diag_cov.py      | 125 ++++++-
 hyperion/np/pdfs/core/pdf.py                  |  30 +-
 hyperion/np/pdfs/hmm/hmm.py                   |  37 +-
 hyperion/np/pdfs/jfa/jfa_total.py             | 169 +++++++--
 .../np/pdfs/mixtures/exp_family_mixture.py    | 335 +++++++++++++++--
 hyperion/np/pdfs/mixtures/gmm.py              | 161 +++++++-
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py     | 163 +++++++-
 .../np/pdfs/mixtures/gmm_tied_diag_cov.py     | 102 ++++-
 hyperion/np/pdfs/plda/frplda.py               | 132 ++++++-
 hyperion/np/pdfs/plda/plda.py                 | 172 +++++++--
 hyperion/np/pdfs/plda/plda_base.py            | 351 +++++++++++++-----
 hyperion/np/pdfs/plda/splda.py                | 151 +++++++-
 hyperion/np/score_norm/adapt_s_norm.py        |  24 +-
 hyperion/np/score_norm/s_norm.py              |  14 +-
 hyperion/np/score_norm/score_norm.py          |  16 +-
 hyperion/np/score_norm/t_norm.py              |   8 +
 hyperion/np/score_norm/tz_norm.py             |  16 +-
 hyperion/np/score_norm/z_norm.py              |   8 +
 hyperion/np/score_norm/zt_norm.py             |  18 +-
 hyperion/np/transforms/cent_whiten_up.py      |   6 +-
 hyperion/np/transforms/lda.py                 |   8 -
 hyperion/torch/layers/margin_losses.py        | 154 +++++++-
 .../models/wav2xvectors/hf_wav2xvector.py     |   4 +-
 .../models/xvectors/efficient_net_xvector.py  |   4 +
 .../torch/models/xvectors/resnet1d_xvector.py |  56 +--
 .../torch/models/xvectors/resnet_xvector.py   |   4 +
 .../torch/models/xvectors/spinenet_xvector.py |   4 +
 .../torch/models/xvectors/tdnn_xvector.py     |   4 +
 .../models/xvectors/transformer_xvector_v1.py |   4 +
 hyperion/torch/models/xvectors/xvector.py     |  50 ++-
 hyperion/torch/narchs/classif_head.py         |  36 ++
 hyperion/torch/trainers/torch_trainer.py      |   7 +-
 notebooks/tutorial_jsalt22/ivectors.ipynb     | 226 +++++++++++
 45 files changed, 2783 insertions(+), 634 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
 delete mode 100644 hyperion/np/classifiers/linear_gbe1.py
 create mode 100644 notebooks/tutorial_jsalt22/ivectors.ipynb

diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_default.yaml
new file mode 100644
index 00000000..8574a1cf
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: wavlmbaseplus_ecapatdnn512x3.yaml
+trainer: trainer_phase1_sgd_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
new file mode 100644
index 00000000..8574a1cf
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
@@ -0,0 +1,6 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model: wavlmbaseplus_ecapatdnn512x3.yaml
+trainer: trainer_phase1_sgd_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml b/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
new file mode 100644
index 00000000..03c5cc84
--- /dev/null
+++ b/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
@@ -0,0 +1,20 @@
+optim:
+  opt_type: adam
+  lr: 0.05
+  amsgrad: true
+  beta1: 0.9
+  beta2: 0.95
+  weight_decay: 4e-4
+lrsched:
+  lrsch_type: exp_lr
+  decay_steps: 8000
+  hold_steps: 40000
+  min_lr: 1.0e-05
+  decay_rate: 0.5
+  warmup_steps: 1000
+  update_lr_on_opt_step: true
+use_amp: true
+log_interval: 1000
+epochs: 30
+eff_batch_size: 1024
+train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
new file mode 100644
index 00000000..7fc848a0
--- /dev/null
+++ b/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
@@ -0,0 +1,18 @@
+optim:
+  opt_type: sgd
+  lr: 0.45
+  momentum: 0.9
+  weight_decay: 4e-4
+lrsched:
+  lrsch_type: exp_lr
+  decay_rate: 0.5
+  decay_steps: 2100
+  hold_steps: 1000
+  min_lr: 4e-4
+  warmup_steps: 1000
+  update_lr_on_opt_step: true
+use_amp: true
+log_interval: 1000
+epochs: 30
+eff_batch_size: 1024
+train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..787e3718
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
@@ -0,0 +1,38 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 3
+  dropout_rate: 0.0
+feat_fusion_method: last
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
new file mode 100644
index 00000000..47af1f43
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
@@ -0,0 +1,36 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
+xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 4200 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 60 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.10
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0060.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
new file mode 100644
index 00000000..dccd01e1
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
@@ -0,0 +1,36 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
+xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 3150 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 45 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.9
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0045.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index cbb37bb3..c673f5c9 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -154,7 +154,9 @@ def make_parser(model_class):
     )
 
     model_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(parser, prefix="trainer")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
     ddp.add_ddp_args(parser)
     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
     parser.add_argument(
diff --git a/hyperion/np/classifiers/linear_gbe1.py b/hyperion/np/classifiers/linear_gbe1.py
deleted file mode 100644
index 8c5df381..00000000
--- a/hyperion/np/classifiers/linear_gbe1.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
- Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import numpy as np
-
-from ...hyp_defs import float_cpu
-from ..np_model import NPModel
-from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
-
-
-class LinearGBE(NPModel):
-    def __init__(
-        self,
-        mu=None,
-        W=None,
-        update_mu=True,
-        update_W=True,
-        x_dim=1,
-        num_classes=None,
-        balance_class_weight=True,
-        do_map=False,
-        r_mu=16,
-        r_W=16,
-        **kwargs
-    ):
-        super(LinearGBE, self).__init__(**kwargs)
-        if mu is not None:
-            num_classes = mu.shape[0]
-            x_dim = mu.shape[1]
-
-        self.mu = mu
-        self.W = W
-        self.update_mu = update_mu
-        self.update_W = update_W
-        self.x_dim = x_dim
-        self.num_classes = num_classes
-        self.balance_class_weight = balance_class_weight
-        self.A = None
-        self.b = None
-        self.do_map = do_map
-        self.r_mu = r_mu
-        self.r_W = r_W
-
-        self._compute_Ab()
-
-    def get_config(self):
-        config = {
-            "update_mu": self.update_mu,
-            "update_W": self.update_W,
-            "x_dim": self.x_dim,
-            "num_classes": self.num_classes,
-            "balance_class_weight": self.balance_class_weight,
-            "do_map": self.do_map,
-            "r_mu": self.r_mu,
-            "r_W": self.r_W,
-        }
-        base_config = super(LinearGBE, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-    def predict(self, x, normalize=False, return_full_llk=False):
-        logp = np.dot(x, self.A) + self.b
-
-        if return_full_llk:
-            K = 0.5 * logdet_pdmat(self.W) - 0.5 * self.x_dim * np.log(2 * np.pi)
-            K += -0.5 * np.sum(np.dot(x, self.W) * x, axis=1, keepdims=True)
-            logp += K
-
-        if normalize:
-            logp = np.log(softmax(logp, axis=1))
-
-        return logp
-
-    def fit(self, x, class_ids=None, p_theta=None, sample_weight=None):
-
-        assert class_ids is not None or p_theta is not None
-
-        self.x_dim = x.shape[-1]
-        if self.num_classes is None:
-            if class_ids is not None:
-                self.num_classes = np.max(class_ids) + 1
-            else:
-                self.num_classes = p_theta.shape[-1]
-
-        if class_ids is not None:
-            p_theta = int2onehot(class_ids, self.num_classes)
-
-        if sample_weight is not None:
-            p_theta = sample_weight[:, None] * p_theta
-
-        N = np.sum(p_theta, axis=0)
-
-        F = np.dot(p_theta.T, x)
-
-        mu0 = self.mu
-        xbar = mu0
-        if self.update_mu:
-            xbar = F / N[:, None]
-            if self.do_map:
-                alpha = (N / (N + self.r_mu))[:, None]
-                self.mu = (1 - alpha) * mu0 + alpha * xbar
-            else:
-                self.mu = xbar
-
-        if self.update_W:
-            if self.do_map:
-                r_W = self.r_W
-                alpha = (N / (N + r_W))[:, None]
-                S0 = invert_pdmat(self.W, return_inv=True)[-1]
-                if self.balance_class_weight:
-                    S = (self.num_classes - np.sum(alpha)) * S0
-                else:
-                    S = self.num_classes * self.r_W * S0
-            else:
-                r_W = 0
-                S = np.zeros((x.shape[1], x.shape[1]), dtype=float_cpu())
-
-            for k in range(self.num_classes):
-                delta = x - xbar[k]
-                S_k = np.dot(p_theta[:, k] * delta.T, delta)
-                if self.do_map:
-                    mu_delta = xbar[k] - mu0[k]
-                    S_k += self.r_W * alpha[k] * np.outer(mu_delta, mu_delta)
-
-                if self.balance_class_weight:
-                    S_k /= N[k] + r_W
-
-                S += S_k
-
-            if self.balance_class_weight:
-                S /= self.num_classes
-            else:
-                S /= self.num_classes * r_w + np.sum(N)
-
-            self.W = invert_pdmat(S, return_inv=True)[-1]
-
-        self._compute_Ab()
-
-    def save_params(self, f):
-        params = {"mu": self.mu, "W": self.W}
-        self._save_params_from_dict(f, params)
-
-    @classmethod
-    def load_params(cls, f, config):
-        param_list = ["mu", "W"]
-        params = cls._load_params_to_dict(f, config["name"], param_list)
-        kwargs = dict(list(config.items()) + list(params.items()))
-        return cls(**kwargs)
-
-    def _compute_Ab(self):
-        if self.mu is not None and self.W is not None:
-            self.A = np.dot(self.W, self.mu.T)
-            self.b = -0.5 * np.sum(self.mu.T * self.A, axis=0)
-
-    @staticmethod
-    def filter_args(**kwargs):
-
-        valid_args = (
-            "update_mu",
-            "update_W",
-            "balance_class_weight",
-            "do_map",
-            "r_mu",
-            "r_W",
-            "name",
-        )
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-    filter_train_args = filter_args
-
-    @staticmethod
-    def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-            p2 = ""
-        else:
-            p1 = "--" + prefix + "."
-            p2 = prefix + "."
-
-        parser.add_argument(
-            p1 + "no-update-mu",
-            default=True,
-            action="store_false",
-            help="not update mu",
-        )
-        parser.add_argument(
-            p1 + "no-update-W",
-            dest=(p2 + "update_W"),
-            default=True,
-            action="store_false",
-            help="not update W",
-        )
-        parser.add_argument(
-            p1 + "balance-class-weight",
-            dest=(p2 + "balance_class_weight"),
-            default=False,
-            action="store_true",
-            help="Balances the weight of each class when computing W",
-        )
-        parser.add_argument(
-            p1 + "do-map",
-            dest=(p2 + "do_map"),
-            default=False,
-            action="store_true",
-            help="does MAP adaptation",
-        )
-        parser.add_argument(
-            p1 + "r-mu",
-            dest=(p2 + "r_mu"),
-            default=16,
-            type=float,
-            help="relevance factor for the means",
-        )
-        parser.add_argument(
-            p1 + "r-w",
-            dest=(p2 + "r_W"),
-            default=16,
-            type=float,
-            help="relevance factor for the variances",
-        )
-
-        parser.add_argument(
-            p1 + "name", dest=(p2 + "name"), default="lgbe", help="model name"
-        )
-
-    @staticmethod
-    def filter_eval_args(**kwargs):
-        valid_args = ("model_file", "normalize", "return_full_llk")
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-    @staticmethod
-    def add_argparse_eval_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-            p2 = ""
-        else:
-            p1 = "--" + prefix + "."
-            p2 = prefix + "."
-
-        parser.add_argument(
-            p1 + "model-file",
-            dest=(p2 + "model_file"),
-            required=True,
-            help=("model file"),
-        )
-        parser.add_argument(
-            p1 + "normalize",
-            dest=(p2 + "normalize"),
-            default=False,
-            action="store_true",
-            help=("normalizes the ouput probabilities to sum to one"),
-        )
-        parser.add_argument(
-            p1 + "return-full-llk",
-            dest=(p2 + "return_full_llk"),
-            default=False,
-            action="store_true",
-            help=("evaluates full gaussian likelihood instead of linear function"),
-        )
-
-    add_argparse_args = add_class_args
-    add_argparse_train_args = add_class_args
-    add_argparse_eval_args = add_eval_args
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index 35717a82..a53135e6 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -2,7 +2,6 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from abc import ABCMeta, abstractmethod
 import os
 import json
 from copy import deepcopy
@@ -14,36 +13,62 @@
 
 
 class NPModel(object):
-    __metaclass__ = ABCMeta
+    """Base class for machine learning models based on numpy.
+
+    Attributes:
+      name: optional identifier for the model.
+    """
 
     def __init__(self, name=None, **kwargs):
         self.name = name
         self._is_init = False
 
     def copy(self):
+        """Returns a clone of the model."""
+        return deepcopy(self)
+
+    def clone(self):
+        """Returns a clone of the model."""
         return deepcopy(self)
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         return self._is_init
 
     def init_to_false(self):
+        """Sets the model as non initialized."""
         self._is_init = False
 
-    @abstractmethod
     def initialize(self):
         pass
 
-    @abstractmethod
-    def fit(self, x, sample_weights=None, x_val=None, sample_weights_val=None):
-        pass
+    def fit(self, x, sample_weight=None, x_val=None, sample_weight_val=None):
+        """Trains the model.
+
+        Args:
+          x: train data matrix with shape (num_samples, x_dim).
+          sample_weight: weight of each sample in the training loss shape (num_samples,).
+          x_val: validation data matrix with shape (num_val_samples, x_dim).
+          sample_weight_val: weight of each sample in the val. loss.
+        """
+        raise NotImplementedError()
 
-    @abstractmethod
     def fit_generator(self, x, x_val=None):
-        pass
+        """Trains the model from a data generator function.
+
+        Args:
+          x: train data generation function.
+          x_val: validation data generation function.
+        """
+        raise NotImplementedError()
 
-    @abstractmethod
     def save(self, file_path):
+        """Saves the model to file.
+
+        Args:
+          file_path: filename path.
+        """
         file_dir = os.path.dirname(file_path)
         if not (os.path.isdir(file_dir)):
             os.makedirs(file_dir, exist_ok=True)
@@ -52,11 +77,24 @@ def save(self, file_path):
             f.create_dataset("config", data=np.array(config, dtype="S"))
             self.save_params(f)
 
-    @abstractmethod
     def save_params(self, f):
-        assert True, "save_params method not defined for %s" % (self.__class__.__name__)
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
+        raise NotImplementedError(
+            f"save_params method not defined for {self.__class__.__name__}"
+        )
 
     def _save_params_from_dict(self, f, params, dtypes=None):
+        """Saves a dictionary of model parameters into the file.
+
+        Args:
+          f: file handle.
+          params: dictionary of model parameters.
+          dtypes: dictionary indicating the dtypes of the model parameters.
+        """
         if dtypes is None:
             dtypes = dict((k, float_save()) for k in params)
 
@@ -74,6 +112,14 @@ def _save_params_from_dict(self, f, params, dtypes=None):
 
     @classmethod
     def load_config(cls, file_path):
+        """Loads the model configuration from file.
+
+        Args:
+          file_path: path to the file where the model is stored.
+
+        Returns:
+          Dictionary containing the model configuration.
+        """
         try:
             with h5py.File(file_path, "r") as f:
                 json_str = str(np.asarray(f["config"]).astype("U"))
@@ -84,6 +130,14 @@ def load_config(cls, file_path):
 
     @classmethod
     def load(cls, file_path):
+        """Loads the model from file.
+
+        Args:
+          file_path: path to the file where the model is stored.
+
+        Returns:
+          Model object.
+        """
         with h5py.File(file_path, "r") as f:
             json_str = str(np.asarray(f["config"]).astype("U"))
             config = cls.load_config_from_json(json_str)
@@ -91,10 +145,31 @@ def load(cls, file_path):
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         return cls(name=config["name"])
 
     @staticmethod
     def _load_params_to_dict(f, name, params, dtypes=None):
+        """Loads the model parameters from file to a dictionary.
+
+        Args:
+          f: file handle.
+          name: model identifier or None.
+          params: parameter names.
+          dtypes: dictionary containing the dtypes of the parameters.
+
+        Returns:
+          Dictionary with model parameters.
+        """
         if dtypes is None:
             dtypes = dict((k, float_cpu()) for k in params)
         if name is None:
@@ -113,12 +188,13 @@ def _load_params_to_dict(f, name, params, dtypes=None):
                 param_dict[k] = None
         return param_dict
 
-    @abstractmethod
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"class_name": self.__class__.__name__, "name": self.name}
         return config
 
     def to_json(self, **kwargs):
+        """Returns model config as json string."""
         # Piece of code borrowed from keras
         def get_json_type(obj):
             # if obj is any numpy type
@@ -136,4 +212,5 @@ def get_json_type(obj):
 
     @staticmethod
     def load_config_from_json(json_str):
+        """Converts json string into dict."""
         return json.loads(json_str)
diff --git a/hyperion/np/pdfs/core/exp_family.py b/hyperion/np/pdfs/core/exp_family.py
index 44fc172c..c91469e7 100644
--- a/hyperion/np/pdfs/core/exp_family.py
+++ b/hyperion/np/pdfs/core/exp_family.py
@@ -5,20 +5,27 @@
 
 import numpy as np
 
-from abc import ABCMeta, abstractmethod
 from .pdf import PDF
 
 
 class ExpFamily(PDF):
-    __metaclass__ = ABCMeta
+    """Base class for exponential family distribution.
+
+    p(x) = h(x) exp(\eta u(x) - A)
+
+    Attributes:
+      eta: natural parameters of the distribution.
+      x_dim: data dimension.
+    """
 
     def __init__(self, eta=None, **kwargs):
-        super(ExpFamily, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.eta = eta
         self.A = None
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         if not self._is_init:
             self._compute_nat_std()
             if self.eta is not None and self.A is not None:
@@ -29,6 +36,21 @@ def is_init(self):
     def fit(
         self, x, sample_weight=None, x_val=None, sample_weight_val=None, batch_size=None
     ):
+        """Trains the model.
+
+        Args:
+          x: train data matrix with shape (num_samples, x_dim).
+          sample_weight: weight of each sample in the training loss shape (num_samples,).
+          x_val: validation data matrix with shape (num_val_samples, x_dim).
+          sample_weight_val: weight of each sample in the val. loss.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
 
         N, u_x = self.Estep(x=x, sample_weight=sample_weight, batch_size=batch_size)
         self.Mstep(N, u_x)
@@ -44,23 +66,49 @@ def fit(
         return elbo
 
     def log_h(self, x):
+        """Computes log h(x) of the exp. family."""
         return 0
 
     def accum_log_h(self, x, sample_weight=None):
+        """Accumlates log h(x)"""
         if sample_weight is None:
             return np.sum(self.log_h(x))
         return np.sum(sample_weight * self.log_h(x))
 
     def compute_suff_stats(self, x):
+        """Computes sufficient stats for a data sample."""
         return x
 
     def accum_suff_stats(self, x, u_x=None, sample_weight=None, batch_size=None):
+        """Accumlates sufficient statistis over several data samples.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is not None or batch_size is None:
             return self._accum_suff_stats_1batch(x, u_x, sample_weight)
         else:
             return self._accum_suff_stats_nbatches(x, sample_weight, batch_size)
 
     def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
+        """Accumlates sufficient statistis over several data samples for a single batch.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is None:
             u_x = self.compute_suff_stats(x)
         if sample_weight is None:
@@ -72,6 +120,18 @@ def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
         return N, acc_u_x
 
     def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
+        """Accumlates sufficient statistis over several data samples for multiple batches.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         sw_i = None
         for i1 in range(0, x.shape[0], batch_size):
             i2 = np.minimum(i1 + batch_size, x.shape[0])
@@ -87,23 +147,56 @@ def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
                 u_x += u_x_i
         return N, u_x
 
-    def add_suff_stats(self, N, u_x):
+    def sum_suff_stats(self, N, u_x):
+        """Sums suff. stats from muttiple sub-processes.
+
+        Args:
+          N: zero order stats with shape = (num_proc,)
+          u_x: higher order stats with shape = (num_proc, u(x)_dim).
+
+        Args:
+          Accumalted N and u_x.
+        """
         assert len(N) == len(u_x)
         acc_N = N[1]
         acc_u_x = u_x[1]
         for i in range(1, len(N)):
             acc_N += N
-            acc_u_x += u[i]
+            acc_u_x += u_x[i]
         return acc_N, acc_u_x
 
     def Estep(self, x, u_x=None, sample_weight=None, batch_size=None):
+        """Expectation step, accumlates suff. stats.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         return self.accum_suff_stats(x, u_x, sample_weight, batch_size)
 
-    @abstractmethod
     def Mstep(self, stats):
+        """Maximization step."""
         pass
 
     def elbo(self, x, u_x=None, N=1, log_h=None, sample_weight=None, batch_size=None):
+        """Evidence lower bound.
+
+        Args:
+          x: data samples with shape = (num_samples, x_dim).
+          u_x: accumlated u(x) (optional).
+          log_h: accumlated log h(x) (optional).
+          sample_weight: weigth of each sample in the loss function.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          log p(X) of the data.
+        """
         assert self.is_init
         if u_x is None:
             N, u_x = self.accum_suff_stats(
@@ -114,12 +207,33 @@ def elbo(self, x, u_x=None, N=1, log_h=None, sample_weight=None, batch_size=None
         return log_h + np.inner(u_x, self.eta) - N * self.A
 
     def log_prob(self, x, u_x=None, method="nat"):
+        """log p(x) of each data sample.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+          method: the probability is computed using standard ("std") or
+            natural parameters ("nat").
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         if method == "nat":
             return self.log_prob_nat(x, u_x)
         else:
             return self.log_prob_std(x)
 
     def log_prob_nat(self, x, u_x=None):
+        """log p(x) of each data sample computed using the
+        natural parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         assert self.is_init
         if u_x is None:
             u_x = self.compute_suff_stats(x)
@@ -127,31 +241,32 @@ def log_prob_nat(self, x, u_x=None):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A_theta from the natural param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_A_std(params):
+        """Computes A_theta from the standard param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_eta(param):
+        """Computes the natural param. from the standard param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_std(eta):
+        """Computes the standard param. from the natural param."""
         raise NotImplementedError()
 
-    @abstractmethod
     def _compute_nat_params(self):
         pass
 
-    @abstractmethod
     def _compute_std_params(self):
         pass
 
     def _compute_nat_std(self):
         pass
 
-    @abstractmethod
     def validate(self):
         pass
diff --git a/hyperion/np/pdfs/core/normal.py b/hyperion/np/pdfs/core/normal.py
index ed60edb7..4c3c70cf 100644
--- a/hyperion/np/pdfs/core/normal.py
+++ b/hyperion/np/pdfs/core/normal.py
@@ -4,9 +4,7 @@
 """
 
 import numpy as np
-import h5py
 import scipy.linalg as la
-from scipy.special import erf
 
 from ....hyp_defs import float_cpu
 from ....utils.plotting import (
@@ -28,6 +26,17 @@
 
 
 class Normal(ExpFamily):
+    """Class for Normal distribution with full covariance.
+
+    Attributes:
+      mu: mean with shape (x_dim,) or None.
+      Lambda: precision with shape (x_dim, x_dim) or None.
+      var_floor: variance floor.
+      update_mu: whether or not update mu when optimizing.
+      update_Lambda: wether or not update Lambda when optimizing.
+      x_dim: data dim (infered from mu if present)
+    """
+
     def __init__(
         self,
         mu=None,
@@ -37,7 +46,7 @@ def __init__(
         update_Lambda=True,
         **kwargs
     ):
-        super(Normal, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.Lambda = Lambda
         self.var_floor = var_floor
@@ -51,6 +60,7 @@ def __init__(
         self._Sigma = None
 
     def _compute_nat_std(self):
+        """Comptues natural and standard parameters of the distribution."""
         if self.mu is not None and self.Lambda is not None:
             self._validate_mu()
             self._validate_Lambda()
@@ -62,6 +72,7 @@ def _compute_nat_std(self):
 
     @property
     def logLambda(self):
+        """log precision determinant."""
         if self._logLambda is None:
             assert self.is_init
             f, L, logL = invert_pdmat(self.Lambda, return_logdet=True)
@@ -71,6 +82,7 @@ def logLambda(self):
 
     @property
     def cholLambda(self):
+        """Cholesqy decomp. of the precision."""
         if self._cholLambda is None:
             assert self.is_init
             f, L, logL = invert_pdmat(self.Lambda, return_logdet=True)
@@ -80,26 +92,42 @@ def cholLambda(self):
 
     @property
     def Sigma(self):
+        """Covariance."""
         if self._Sigma is None:
             assert self.is_init
             self._Sigma = invert_pdmat(self.Lambda, return_inv=True)[-1]
         return self._Sigma
 
     def initialize(self):
+        """Initializes the distribution."""
         self.validate()
         self._compute_nat_std()
 
     def stack_suff_stats(self, F, S=None):
+        """Stacks F and S suff stats into single vector."""
         if S is None:
             return F
         return np.hstack((F, S))
 
     def unstack_suff_stats(self, stats):
+        """Decomposes suff. stats vector into F and S."""
         F = stats[: self.x_dim]
         S = stats[self.x_dim :]
         return F, S
 
     def accum_suff_stats(self, x, u_x=None, sample_weight=None, batch_size=None):
+        """Accumlates sufficient statistis over several data samples.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: unused
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is None:
             if sample_weight is None:
                 N = x.shape[0]
@@ -115,12 +143,23 @@ def accum_suff_stats(self, x, u_x=None, sample_weight=None, batch_size=None):
             return self._accum_suff_stats_1batch(x, u_x, sample_weight)
 
     def norm_suff_stats(self, N, u_x, return_order2=False):
+        """Normalizes accumlated sufficient statistics with the
+        mean and covariance of the distribution.
+
+        Args:
+          N: zeroth order sufficient stats.
+          u_x: 1st and 2nd order stats.
+          return_order2: whether or not return normalizes 2nd order stats.
+
+        Return:
+          Normalized N, F or N, [F, S].
+        """
         assert self.is_init
 
         F, S = self.unstack_suff_stats(u_x)
         F_norm = np.dot(F - N * self.mu, self.cholLambda.T)
         if return_order2:
-            SS = vec2symat(S)
+            SS = vec2symmat(S)
             Fmu = np.outer(self.F, self.mu)
             SS = SS - Fmu - Fmu.T + N * np.outer(self.mu, self.mu)
             SS = np.dot(self.cholLambda, np.dot(SS, self.cholLambda.T))
@@ -129,7 +168,13 @@ def norm_suff_stats(self, N, u_x, return_order2=False):
         return N, F_norm
 
     def Mstep(self, N, u_x):
+        """Maximization step.
 
+        Args:
+          N: zeroth order stats.
+          u_x: accumlated higher order stats.
+
+        """
         F, S = self.unstack_suff_stats(u_x)
 
         if self.update_mu:
@@ -147,6 +192,15 @@ def Mstep(self, N, u_x):
         self._compute_nat_params()
 
     def log_prob_std(self, x):
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         assert self.is_init
         mah_dist2 = np.sum(np.dot(x - self.mu, self.cholLambda) ** 2, axis=1)
         return (
@@ -155,17 +209,17 @@ def log_prob_std(self, x):
             - 0.5 * mah_dist2
         )
 
-    # def eval_logcdf(self, x):
-    #     delta = np.dot((x-self.mu), self.cholLambda)
-    #     lk = 0.5*(1+erf(delta/np.sqrt(2)))
-    #     print(x-self.mu)
-    #     print(la.cholesky(self.Lambda,lower=True))
-    #     print(self.cholLambda)
-    #     print(delta)
-    #     print(lk)
-    #     return np.sum(np.log(lk+1e-20), axis=-1)
-
     def sample(self, num_samples, rng=None, seed=1024):
+        """Draws samples from the data distribution.
+
+        Args:
+          num_samples: number of samples.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         assert self.is_init
 
         if rng is None:
@@ -173,11 +227,9 @@ def sample(self, num_samples, rng=None, seed=1024):
         return rng.multivariate_normal(self.mu, self.Sigma, size=(num_samples,)).astype(
             float_cpu()
         )
-        # x=rng.normal(size=(num_samples, self.x_dim))
-        # cholS=la.cholesky(self.Sigma, lower=False, overwrite_a=True)
-        # return self.mu+np.dot(x, cholS)
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "var_floor": self.var_floor,
             "update_mu": self.update_mu,
@@ -187,7 +239,11 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
 
+        Args:
+          f: file handle.
+        """
         assert self.is_init
 
         params = {"mu": self.mu, "Lambda": self.Lambda}
@@ -195,8 +251,18 @@ def save_params(self, f):
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "Lambda"]
-        params = self._load_params_to_dict(f, config["name"], param_list)
+        params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
             x_dim=config["x_dim"],
             mu=params["mu"],
@@ -217,6 +283,7 @@ def _validate_eta(self):
         assert self.eta.shape[0] == (self.x_dim ** 2 + 3 * self.x_dim) / 2
 
     def validate(self):
+        """Validates the parameters of the distribution."""
         if self.mu is not None and self.Lambda is not None:
             self._validate_mu()
             self._validate_Lambda()
@@ -226,18 +293,21 @@ def validate(self):
 
     @staticmethod
     def compute_eta(mu, Lambda):
+        """Computes nat param. from mean and precision."""
         Lmu = np.dot(mu, Lambda)
         eta = np.hstack((Lmu, -symmat2vec(Lambda, diag_factor=0.5)))
         return eta
 
     @staticmethod
     def compute_x_dim_from_eta(eta):
+        """Computes data dim. from natural param."""
         x_dim = 0.5 * (-3 + np.sqrt(9 + 8 * eta.shape[-1]))
         assert int(x_dim) == x_dim
         return int(x_dim)
 
     @staticmethod
     def compute_std(eta):
+        """Computes standard params. from the natural param."""
         x_dim = Normal.compute_x_dim_from_eta(eta)
         eta1 = eta[:x_dim]
         eta2 = vec2symmat(eta[x_dim:], diag_factor=2) / 2
@@ -248,6 +318,7 @@ def compute_std(eta):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A from the natural param."""
         x_dim = Normal.compute_x_dim_from_eta(eta)
         eta1 = eta[:x_dim]
         eta2 = vec2symmat(eta[x_dim:], diag_factor=2) / 2
@@ -259,6 +330,7 @@ def compute_A_nat(eta):
 
     @staticmethod
     def compute_A_std(mu, Lambda):
+        """Computes A from the standard params."""
         x_dim = mu.shape[0]
         r1 = 0.5 * x_dim * np.log(2 * np.pi)
         r2 = -0.5 * logdet_pdmat(Lambda)
@@ -266,15 +338,9 @@ def compute_A_std(mu, Lambda):
         return r1 + r2 + r3
 
     def _compute_nat_params(self):
+        """Computes all natural params from mean and precision."""
         self.eta = self.compute_eta(self.mu, self.Lambda)
         self.A = self.compute_A_std(self.mu, self.Lambda)
-        # self.A = self.compute_A_nat(self.eta)
-        # Lmu = np.dot(self.Lambda, self.mu[:, None])
-        # muLmu = np.dot(self.mu, Lmu)
-        # lnr = 0.5*self.lnLambda - 0.5*self.x_dim*np.log(2*np.pi)-0.5*muLmu
-        # Lambda=np.copy(self.Lambda)
-        # Lambda[np.diag_indices(self.x_dim)] /= 2
-        # self.eta=np.vstack((lnr, Lmu, symmat2vec(Lambda)[:, None]))
 
     def _compute_std_params(self):
         self.mu, self.Lambda = self.compute_std(self.eta)
@@ -284,6 +350,14 @@ def _compute_std_params(self):
 
     @staticmethod
     def compute_suff_stats(x):
+        """Computes the sufficient stats. for each sample.
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+
+        Returns:
+          Sufficient stats. for each data sample with shape (num_samples, u_dim).
+        """
         d = x.shape[1]
         u = np.zeros((x.shape[0], int(d + d * (d + 1) / 2)), dtype=float_cpu())
         u[:, :d] = x
@@ -295,12 +369,28 @@ def compute_suff_stats(x):
         return u
 
     def plot1D(self, feat_idx=0, num_sigmas=2, num_pts=100, **kwargs):
+        """Plots one slice of the Gaussian in 1d.
+
+        Args:
+          feat_idx: feature index.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         assert self.is_init
         mu = self.mu[feat_idx]
         C = invert_pdmat(self.Lambda, return_inv=True)[-1][feat_idx, feat_idx]
         plot_gaussian_1D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of the Gaussian in 2d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         assert self.is_init
         mu = self.mu[feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
@@ -308,6 +398,14 @@ def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
         plot_gaussian_ellipsoid_2D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of the Gaussian in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         assert self.is_init
         mu = self.mu[feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
@@ -315,6 +413,14 @@ def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
         plot_gaussian_3D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot3D_ellipsoid(self, feat_idx=[0, 1, 2], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 3 dimensions of the Gaussian in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         assert self.is_init
         mu = self.mu[feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
diff --git a/hyperion/np/pdfs/core/normal_diag_cov.py b/hyperion/np/pdfs/core/normal_diag_cov.py
index cb21f84c..8a896cd5 100644
--- a/hyperion/np/pdfs/core/normal_diag_cov.py
+++ b/hyperion/np/pdfs/core/normal_diag_cov.py
@@ -4,12 +4,8 @@
 """
 
 import numpy as np
-import h5py
 from scipy.special import erf
 
-# import matplotlib.pyplot as plt
-# import matplotlib.mlab as mlab
-
 from ....hyp_defs import float_cpu
 from ....utils.plotting import (
     plot_gaussian_1D,
@@ -22,6 +18,17 @@
 
 
 class NormalDiagCov(ExpFamily):
+    """Class for Normal distribution with diagonal covariance.
+
+    Attributes:
+      mu: mean with shape (x_dim,) or None.
+      Lambda: precision with shape (x_dim, x_dim) or None.
+      var_floor: variance floor.
+      update_mu: whether or not update mu when optimizing.
+      update_Lambda: wether or not update Lambda when optimizing.
+      x_dim: data dim (infered from mu if present)
+    """
+
     def __init__(
         self,
         mu=None,
@@ -31,7 +38,7 @@ def __init__(
         update_Lambda=True,
         **kwargs
     ):
-        super(NormalDiagCov, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.Lambda = Lambda
         self.var_floor = var_floor
@@ -45,6 +52,7 @@ def __init__(
         self._Sigma = None
 
     def _compute_nat_std(self):
+        """Comptues natural and standard parameters of the distribution."""
         if self.mu is not None and self.Lambda is not None:
             self._validate_mu()
             self._validate_Lambda()
@@ -56,6 +64,7 @@ def _compute_nat_std(self):
 
     @property
     def logLambda(self):
+        """log precision determinant."""
         if self._logLambda is None:
             assert self.is_init
             self._logLambda = np.sum(np.log(self.Lambda))
@@ -63,6 +72,7 @@ def logLambda(self):
 
     @property
     def cholLambda(self):
+        """Square root of precision."""
         if self._cholLambda is None:
             assert self.is_init
             self._cholLambda = np.sqrt(self.Lambda)
@@ -70,27 +80,43 @@ def cholLambda(self):
 
     @property
     def Sigma(self):
+        "Variance of the distribution."
         if self._Sigma is None:
             assert self.is_init
             self._Sigma = 1.0 / self.Lambda
         return self._Sigma
 
     def initialize(self):
+        """Initializes the distribution."""
         self.validate()
         self._compute_nat_std()
         assert self.is_init
 
     def stack_suff_stats(self, F, S=None):
+        """Stacks F and S suff stats into single vector."""
+
         if S is None:
             return F
         return np.hstack((F, S))
 
     def unstack_suff_stats(self, stats):
+        """Decomposes suff. stats vector into F and S."""
         F = stats[: self.x_dim]
         S = stats[self.x_dim :]
         return F, S
 
     def norm_suff_stats(self, N, u_x=None, return_order2=False):
+        """Normalizes accumlated sufficient statistics with the
+        mean and covariance of the distribution.
+
+        Args:
+          N: zeroth order sufficient stats.
+          u_x: 1st and 2nd order stats.
+          return_order2: whether or not return normalizes 2nd order stats.
+
+        Return:
+          Normalized N, F or N, [F, S].
+        """
         assert self.is_init
         F, S = self.unstack_suff_stats(u_x)
         F_norm = self.cholLambda * (F - N * self.mu)
@@ -101,7 +127,13 @@ def norm_suff_stats(self, N, u_x=None, return_order2=False):
         return N, F_norm
 
     def Mstep(self, N, u_x):
+        """Maximization step.
 
+        Args:
+          N: zeroth order stats.
+          u_x: accumlated higher order stats.
+
+        """
         F, S = self.unstack_suff_stats(u_x)
 
         if self.update_mu:
@@ -118,6 +150,15 @@ def Mstep(self, N, u_x):
         self._compute_nat_params()
 
     def log_prob_std(self, x):
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         assert self.is_init
         mah_dist2 = np.sum(((x - self.mu) * self.cholLambda) ** 2, axis=1)
         return (
@@ -127,12 +168,23 @@ def log_prob_std(self, x):
         )
 
     def log_cdf(self, x):
+        """Log cumulative distribution function."""
         assert self.is_init
         delta = (x - self.mu) * self.cholLambda
         lk = 0.5 * (1 + erf(delta / np.sqrt(2)))
         return np.sum(np.log(lk + 1e-10), axis=-1)
 
     def sample(self, num_samples, rng=None, seed=1024):
+        """Draws samples from the data distribution.
+
+        Args:
+          num_samples: number of samples.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         assert self.is_init
         if rng is None:
             rng = np.random.RandomState(seed)
@@ -140,6 +192,7 @@ def sample(self, num_samples, rng=None, seed=1024):
         return self.mu + 1.0 / self.cholLambda * x
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "var_floor": self.var_floor,
             "update_mu": self.update_mu,
@@ -149,14 +202,29 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         assert self.is_init
         params = {"mu": self.mu, "Lambda": self.Lambda}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "Lambda"]
-        params = self._load_params_to_dict(f, config["name"], param_list)
+        params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
             x_dim=config["x_dim"],
             mu=params["mu"],
@@ -178,6 +246,7 @@ def _validate_eta(self):
         assert self.eta.shape[0] == self.x_dim * 2
 
     def validate(self):
+        """Validates the parameters of the distribution."""
         if self.mu is not None and self.Lambda is not None:
             self._validate_mu()
             self._validate_Lambda()
@@ -187,12 +256,14 @@ def validate(self):
 
     @staticmethod
     def compute_eta(mu, Lambda):
+        """Computes nat param. from mean and precision."""
         Lmu = Lambda * mu
         eta = np.hstack((Lmu, -0.5 * Lambda))
         return eta
 
     @staticmethod
     def compute_std(eta):
+        """Computes standard params. from the natural param."""
         x_dim = int(eta.shape[0] / 2)
         eta1 = eta[:x_dim]
         eta2 = eta[x_dim:]
@@ -202,6 +273,7 @@ def compute_std(eta):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A from the natural param."""
         x_dim = int(eta.shape[0] / 2)
         eta1 = eta[:x_dim]
         eta2 = eta[x_dim:]
@@ -212,6 +284,7 @@ def compute_A_nat(eta):
 
     @staticmethod
     def compute_A_std(mu, Lambda):
+        """Computes A from the standard params."""
         x_dim = mu.shape[0]
         r1 = 0.5 * x_dim * np.log(2 * np.pi)
         r2 = -0.5 * np.sum(np.log(Lambda))
@@ -234,6 +307,14 @@ def _compute_std_params(self):
 
     @staticmethod
     def compute_suff_stats(x):
+        """Computes the sufficient stats. for each sample.
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+
+        Returns:
+          Sufficient stats. for each data sample with shape (num_samples, u_dim).
+        """
         d = x.shape[1]
         u = np.zeros((x.shape[0], 2 * d), dtype=float_cpu())
         u[:, :d] = x
@@ -241,21 +322,53 @@ def compute_suff_stats(x):
         return u
 
     def plot1D(self, feat_idx=0, num_sigmas=2, num_pts=100, **kwargs):
+        """Plots one slice of the Gaussian in 1d.
+
+        Args:
+          feat_idx: feature index.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[feat_idx]
         C = 1 / self.Lambda[feat_idx]
         plot_gaussian_1D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of the Gaussian in 2d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[feat_idx]
         C = np.diag(1.0 / self.Lambda[feat_idx])
         plot_gaussian_ellipsoid_2D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of the Gaussian in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[feat_idx]
         C = np.diag(1.0 / self.Lambda[feat_idx])
         plot_gaussian_3D(mu, C, num_sigmas, num_pts, **kwargs)
 
     def plot3D_ellipsoid(self, feat_idx=[0, 1, 2], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 3 dimensions of the Gaussian in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[feat_idx]
         C = np.diag(1.0 / self.Lambda[feat_idx])
         plot_gaussian_ellipsoid_3D(mu, C, num_sigmas, num_pts, **kwargs)
diff --git a/hyperion/np/pdfs/core/pdf.py b/hyperion/np/pdfs/core/pdf.py
index acd26105..82f4330d 100644
--- a/hyperion/np/pdfs/core/pdf.py
+++ b/hyperion/np/pdfs/core/pdf.py
@@ -5,32 +5,44 @@
 
 import numpy as np
 
-from abc import ABCMeta, abstractmethod
 from ...np_model import NPModel
 
 
 class PDF(NPModel):
-    __metaclass__ = ABCMeta
+    """Base class for probability density functions.
+
+    Attributes:
+      x_dim: data dimension.
+    """
 
     def __init__(self, x_dim=1, **kwargs):
-        super(PDF, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.x_dim = x_dim
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"x_dim": self.x_dim}
-        base_config = super(PDF, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    @abstractmethod
     def log_prob(self, x):
-        pass
+        """Computes log probability of the data."""
+        raise NotImplementedError()
 
     def eval_llk(self, x):
+        """Computes log likelihood of the data."""
         return self.log_prob(x)
 
-    @abstractmethod
     def sample(self, num_samples):
-        pass
+        """Draws samples from the data distribution."""
+        raise NotImplementedError()
 
     def generate(self, num_samples, **kwargs):
-        return self.generate(num_samples, **kwargs)
+        """Draws samples from the data distribution.
+        Args:
+          num_samples: number of samples to generate.
+
+        Returns:
+          np.array of generated samples with shape=(num_samples, x_dim)
+        """
+        return self.sample(num_samples, **kwargs)
diff --git a/hyperion/np/pdfs/hmm/hmm.py b/hyperion/np/pdfs/hmm/hmm.py
index b8497b38..704f0991 100644
--- a/hyperion/np/pdfs/hmm/hmm.py
+++ b/hyperion/np/pdfs/hmm/hmm.py
@@ -11,18 +11,19 @@
 
 
 class HMM(PDF):
-    def __init__(self, **kwargs):
-        super(HMM, self).__init__(
-            num_states=1,
-            pi=None,
-            trans=None,
-            trans_mask=None,
-            update_pi=True,
-            update_trans=True,
-            tied_trans=False,
-            left_to_right=False,
-            **kwargs
-        )
+    def __init__(
+        self,
+        num_states=1,
+        pi=None,
+        trans=None,
+        trans_mask=None,
+        update_pi=True,
+        update_trans=True,
+        tied_trans=False,
+        left_to_right=False,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
         if pi is not None:
             num_states = len(pi)
 
@@ -36,13 +37,13 @@ def __init__(self, **kwargs):
         self.tied_trans = tied_trans
         self.left_to_right = left_to_right
 
-        if left_to_rigth and (trans_mask is None):
+        if left_to_right and (trans_mask is None):
             self.trans_mask = np.triu(np.ones_like(self.trans))
 
         self._log_pi = None
         self._log_trans = None
 
-    def reset_aux():
+    def reset_aux(self):
         self._log_pi = None
         self._log_trans = None
 
@@ -132,11 +133,11 @@ def compute_pz(self, x, return_Nzz=False, return_log_px=False):
 
         pz = softmax(log_alpha + log_beta, axis=-1)
 
-        if not (return_Nzz or return_elbo or return_log_px):
+        if not (return_Nzz or return_log_px):
             return pz
 
         r = [pz]
-        if return_pzz_acc:
+        if return_Nzz:
             x_e = np.expand_dims(axis=1)
             log_alpha_e = np.expand_dims(axis=-1)
             log_beta_e = np.expand_dims(axis=1)
@@ -169,7 +170,7 @@ def Estep(self, x, stats_0=None):
 
         pz, Nzz = self.compute_pz(x, return_Nzz=True)
         Nz += pz[0]
-        Nzz += pzz
+        Nzz += Nzz
         stats = (Nz, Nzz)
 
         return pz, stats
@@ -238,7 +239,7 @@ def sample(self, num_seqs, num_steps, rng=None, seed=1024):
         for t in range(1, num_steps):
             for k in range(self.num_states):
                 index = x[:, t - 1, k] == 1
-                n_k = num.sum(index)
+                n_k = np.sum(index)
                 if n_k == 0:
                     continue
                 x[index] = rng.multinomial(1, self.trans[k], size=(n_k,))
diff --git a/hyperion/np/pdfs/jfa/jfa_total.py b/hyperion/np/pdfs/jfa/jfa_total.py
index 4a11b5cf..993da9d6 100644
--- a/hyperion/np/pdfs/jfa/jfa_total.py
+++ b/hyperion/np/pdfs/jfa/jfa_total.py
@@ -4,7 +4,7 @@
 """
 
 import numpy as np
-from scipy import linalg as sla
+from scipy import linalg as la
 
 from ....hyp_defs import float_cpu
 from ....utils.math import (
@@ -18,8 +18,17 @@
 
 
 class JFATotal(PDF):
+    """Class for joint factor analysis with total variability matrix (i-vectors).
+    Args:
+      K: number of gaussian components.
+      y_dim: dimension of total variability sub-space.
+      T: Total variability matrix with shape (y_dim, K * x_dim).
+      x_dim: data dimension.
+
+    """
+
     def __init__(self, K, y_dim=None, T=None, **kwargs):
-        super(JFATotal, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         if T is not None:
             y_dim = T.shape[0]
 
@@ -32,10 +41,12 @@ def __init__(self, K, y_dim=None, T=None, **kwargs):
         self.__upptr = None
 
     def reset_aux(self):
+        """Resets auxiliary variables."""
         self._TT = None
 
     @property
-    def is_init():
+    def is_init(self):
+        """Returns True if the model has been initialized."""
         if self._is_init:
             return True
         if self.T is not None:
@@ -43,15 +54,36 @@ def is_init():
         return self._is_init
 
     def initialize(self, N, F):
-        assert N.shape[0] == self.K
+        """Initializes the model.
 
+        Args:
+          N: zero order statistics (num_utterances, K).
+          F: first order statisticss (num_utterances, K * x_dim)
+        """
+        assert N.shape[1] == self.K
         self.T = np.random.randn(self.y_dim, F.shape[1]).astype(float_cpu(), copy=False)
 
     def compute_py_g_x(
         self, N, F, G=None, return_cov=False, return_elbo=False, return_acc=False
     ):
+        """Computes the latent posterior P(Y|X).
+
+        Args:
+          N: zero order statistics (num_utterances, K).
+          F: first order statisticss (num_utterances, K * x_dim).
+          G: logP(x| UBM, Z) to add to elbo (optional).
+          return_cov: whether or not to return the covariance of the posterior.
+          return_elbo: whther or not to return the ELBO.
+          return_acc: whther or not to return accumulated stats for EM algorithm.
+
+        Returns:
+          y: latent mean (i-vector).
+          Posterior covariances.
+          ELBO
+          Ry accumlator for ML step with shape (y_dim, y_dim)
+          Py accumlator for MD step with shape (y_dim, y_dim)
+        """
         assert self.is_init
-        x_dim = int(F.shape[1] / self.K)
         M = F.shape[0]
         y_dim = self.y_dim
 
@@ -63,7 +95,7 @@ def compute_py_g_x(
         y = np.zeros((M, y_dim), dtype=float_cpu())
 
         if return_cov:
-            Sy = np.zeros((M, y_dim * (y_dim + 1) / 2), dtype=float_cpu())
+            Sy = np.zeros((M, int(y_dim * (y_dim + 1) // 2)), dtype=float_cpu())
         else:
             Sy = None
 
@@ -72,7 +104,7 @@ def compute_py_g_x(
 
         if return_acc:
             Py = np.zeros((y_dim, y_dim), dtype=float_cpu())
-            Ry = np.zeros((self.K, y_dim * (y_dim + 1) / 2), dtype=float_cpu())
+            Ry = np.zeros((self.K, int(y_dim * (y_dim + 1) // 2)), dtype=float_cpu())
 
         Li = np.zeros((self.y_dim, self.y_dim), dtype=float_cpu())
         for i in range(N.shape[0]):
@@ -94,7 +126,7 @@ def compute_py_g_x(
             if return_acc:
                 iL += np.outer(y[i], y[i])
                 Py += iL
-                Ry += iL[self.__uppr] * N[i][:, None]
+                Ry += iL[self.__upptr] * N[i][:, None]
 
         if not return_tuple:
             return y
@@ -107,7 +139,7 @@ def compute_py_g_x(
         if return_elbo:
             if G is not None:
                 elbo += G
-            elbo += 0.5 * np.sum(VF * y, axis=-1)
+            elbo += 0.5 * np.sum(TF * y, axis=-1)
             r += [elbo]
 
         if return_acc:
@@ -116,14 +148,24 @@ def compute_py_g_x(
         return tuple(r)
 
     def Estep(self, N, F, G=None):
+        """Computes the latent posterior P(Y|X).
+
+        Args:
+          N: zero order statistics (num_utterances, K).
+          F: first order statisticss (num_utterances, K * x_dim).
+          G: logP(x| UBM, Z) to add to elbo (optional).
 
+        Results:
+          Tuple with stats needed by the maximization step:
+            ELBO, num_classes, accumulated y, Ry, Cy, Py
+        """
         y, elbo, Ry, Py = self.compute_py_g_x(
             N, F, G, return_elbo=True, return_acc=True
         )
 
         M = y.shape[0]
         y_acc = np.sum(y, axis=0)
-        Cy = np.dot(F, y)
+        Cy = np.dot(F.T, y)
 
         elbo = np.sum(elbo)
 
@@ -131,10 +173,15 @@ def Estep(self, N, F, G=None):
         return stats
 
     def MstepML(self, stats):
+        """Maximum likelihood step.
+
+        Args:
+          stats: tuple with statistics prouced by the estimation step.
+        """
         _, M, y_acc, Ry, Cy, _ = stats
         T = np.zeros_like(self.T)
         Ryk = np.zeros((self.y_dim, self.y_dim), dtype=float_cpu())
-        x_dim = T.shape[1] / self.K
+        x_dim = T.shape[1] // self.K
         for k in range(self.K):
             idx = k * x_dim
             Ryk[self._upptr] = Ry[k]
@@ -145,9 +192,14 @@ def MstepML(self, stats):
         self.reset_aux()
 
     def MstepMD(self, stats):
+        """Minimum divergence step.
+
+        Args:
+          stats: tuple with statistics prouced by the estimation step.
+        """
         _, M, y_acc, Ry, Cy, Py = stats
         mu_y = y_acc / M
-        Cy = Py / M - np.outer(my_y, mu_y)
+        Cy = Py / M - np.outer(mu_y, mu_y)
         chol_Cy = la.cholesky(Cy, lower=False, overwrite_a=True)
         self.T = np.dot(chol_Cy, self.T)
 
@@ -160,10 +212,30 @@ def fit(
         G=None,
         N_val=None,
         F_val=None,
+        G_val=None,
         epochs=20,
         ml_md="ml+md",
         md_epochs=None,
     ):
+        """Trains the model.
+
+        Args:
+          N: zero order sufficient statistics for training data with shape (num_utterances, K).
+          F: first order sufficient statistics for training data with shape (num_utterances, K*x_dim).
+          G: logP(x| UBM, Z) for training data to add to elbo (optional).
+          N_val: zero order sufficient statistics for val data with shape (num_utterances, K).
+          F_val: first order sufficient statistics for val data with shape (num_utterances, K*x_dim).
+          G_val: logP(x| UBM, Z) for val data to add to elbo (optional).
+          epochs: number of EM steps.
+          ml_md: whether to do maximum likelihood estimation ("ml"), minimum divergence ("md") or both ("ml+md").
+          md_epochs: in which epochs to do MD estimation, if None, MD is done in all epochs.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
 
         use_ml = False if ml_md == "md" else True
         use_md = False if ml_md == "ml" else True
@@ -178,7 +250,7 @@ def fit(
             stats = self.Estep(N, F, G)
             elbo[epoch] = stats[0]
             if N_val is not None and F_val is not None:
-                _, elbo_val_e = self.compute_py_x(N, F, G, return_elbo=True)
+                _, elbo_val_e = self.compute_py_x(N_val, F_val, G_val, return_elbo=True)
                 elbo_val[epoch] = np.sum(elbo_val_e)
 
             if use_ml:
@@ -187,7 +259,7 @@ def fit(
                 self.MstepMD(stats)
 
         elbo_norm = elbo / np.sum(N)
-        if x_val is None:
+        if N_val is None:
             return elbo, elbo_norm
         else:
             elbo_val_norm = elbo_val / np.sum(N_val)
@@ -195,38 +267,72 @@ def fit(
 
     @property
     def TT(self):
+        """
+        Returns:
+          Matrices T_k T_k.T for Gaussian component k.
+          Matrices are vectorized and keep the upper triangular matrix
+          with shape = (K, y_dim (y_dim-1)/2 )
+        """
         if self._TT is None:
-            self._TT = self.compute_TT(self.T, self.K)
+            self._TT = self.compute_TT(self.T, self.K, self._upptr)
         return self._TT
 
     @property
     def _upptr(self):
+        """Upper triangular mask."""
         if self.__upptr is None:
-            I = np.eye(self.y_dim, dtype=float_cpu())
-            self.__upptr = np.triu(I).ravel()
+            self.__upptr = np.triu(np.ones(self.y_dim, dtype=np.bool))
         return self.__upptr
 
     @staticmethod
-    def compute_TT(self, T, K, upptr):
+    def compute_TT(T, K, upptr):
+        """Computes T_k T_k.T matrices.
+
+        Args:
+          T: Total variability factor loading matrix.
+          K: number of Gaussian components.
+          upptr: upper triangular mask.
+
+        Returns:
+          Matrices T_k T_k.T for Gaussian component k.
+          Matrices are vectorized and keep the upper triangular matrix
+          with shape = (K, y_dim (y_dim-1)/2 )
+        """
         x_dim = int(T.shape[1] / K)
         y_dim = T.shape[0]
-        TT = np.zeros((K, y_dim * (y_dim + 1) / 2), dtype=float_cpu())
+        TT = np.zeros((K, int(y_dim * (y_dim + 1) / 2)), dtype=float_cpu())
         for k in range(K):
             idx = k * x_dim
             T_k = T[:, idx : idx + x_dim]
             TT_k = np.dot(T_k, T_k.T)
-            TT[k] = TT_k[self._upptr]
+            TT[k] = TT_k[upptr]
 
         return TT
 
     @staticmethod
     def compute_L(TT, N, upptr):
-        y_dim = self._upptr.shape[0]
-        I = np.eye(y_dim, dtype=float_cpu())[self._upptr]
+        """Computes P(y|x) precision.
+
+        Args:
+          TT: T_k T_k.T matrices.
+          N: zero order statistics.
+          upptr: upper triangular mask.
+
+        Returns:
+          Posterior precision vectorized to keep just the upper triangular matrix.
+        """
+        y_dim = upptr.shape[0]
+        I = np.eye(y_dim, dtype=float_cpu())[upptr]
         return I + np.dot(N, TT)
 
     @staticmethod
     def normalize_T(T, chol_prec):
+        """Normalizes T by the covariances of the GMM.
+
+        Args:
+          T: original total variability matrix.
+          chol_prec: cholesqy decomp. of the precisions of the GMM components.
+        """
         Tnorm = np.zeros_like(T)
         K = chol_prec.shape[0]
         x_dim = int(T.shape[1] / K)
@@ -239,20 +345,37 @@ def normalize_T(T, chol_prec):
         return Tnorm
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"K": self.K}
         base_config = super(JFATotal, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"T": self.T}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         kwargs = dict(list(config.items()) + list(params.items()))
         return cls(**kwargs)
 
     def sample(self, num_samples):
-        pass
+        """Draws samples from the i-vector model."""
+        raise NotImplementedError()
diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 143d7df5..f684e453 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -5,7 +5,6 @@
 import numpy as np
 
 import logging
-from abc import ABCMeta, abstractmethod
 
 from ....hyp_defs import float_cpu
 from ....utils.math import softmax, logsumexp
@@ -14,7 +13,18 @@
 
 
 class ExpFamilyMixture(PDF):
-    __metaclass__ = ABCMeta
+    """Base class for a mixture of exponential family distributions.
+
+    p(x) = \sum_k h(x) exp(\eta_k u(x) - A_k)
+
+    Attributes:
+      num_comp: number of components of the mixture.
+      pi: weights of the components.
+      eta: natural parameters of the distribution.
+      min_N: minimum number of samples for keeping the component.
+      update_pi: whether or Not to update the weights when optimizing.
+      x_dim: data dimension.
+    """
 
     def __init__(
         self, num_comp=1, pi=None, eta=None, min_N=0, update_pi=True, **kwargs
@@ -32,6 +42,7 @@ def __init__(
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         if not self._is_init:
             if self.eta is not None and self.A is not None and self.pi is not None:
                 self.validate()
@@ -40,6 +51,7 @@ def is_init(self):
 
     @property
     def log_pi(self):
+        """Log weights"""
         if self._log_pi is None:
             self._log_pi = np.log(self.pi + 1e-15)
         return self._log_pi
@@ -56,6 +68,22 @@ def fit(
         epochs=10,
         batch_size=None,
     ):
+        """Trains the model.
+
+        Args:
+          x: train data matrix with shape (num_samples, x_dim).
+          sample_weight: weight of each sample in the training loss shape (num_samples,).
+          x_val: validation data matrix with shape (num_val_samples, x_dim).
+          sample_weight_val: weight of each sample in the val. loss.
+          epochs: number of EM steps.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
 
         if not self.is_init:
             self.initialize(x)
@@ -93,18 +121,35 @@ def fit_generator(
         workers=1,
         use_multiprocessing=False,
     ):
-
-        do_validation = bool(validation_data)
-        val_gen = (
-            hasattr(validation_data, "next")
-            or hasattr(validation_data, "__next__")
-            or isinstance(validation_data, Sequence)
-        )
-        if val_gen and not validation_steps:
+        """Trains the model from data read by a generator function.
+           This function is deprecated.
+
+        Args:
+          generator: train data generator function returning a tuple
+                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
+          train_steps: number of training steps / epoch
+          epochs: number of epochs.
+          val_data: val. data generator function returning a tuple
+                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
+          val_steps: number of validation steps / epoch
+          max_queue_size: max. size of the generator queue.
+          workers: number of workers in the generator.
+          use_multiprocessing: use multi-processing in the generator queue.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
+
+        do_validation = bool(val_data)
+        val_gen = hasattr(val_data, "next") or hasattr(val_data, "__next__")
+        if val_gen and not val_steps:
             raise ValueError(
                 "When using a generator for validation data, "
                 "you must specify a value for "
-                "`validation_steps`."
+                "`val_steps`."
             )
 
         if do_validation and not val_gen:
@@ -129,7 +174,7 @@ def fit_generator(
             if val_data is not None:
                 if val_gen:
                     N, u_x, log_h_val = self.Estep_generator(
-                        generator,
+                        val_data,
                         train_steps,
                         return_log_h=True,
                         max_queue_size=max_queue_size,
@@ -137,52 +182,100 @@ def fit_generator(
                         use_multiprocessing=use_multiprocessing,
                     )
                 else:
-                    N, u_x = self.Estep(x_val, u_x_val, sample_weight_val)
+                    N, u_x = self.Estep(val_data, u_x_val, sample_weight_val)
                 elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
 
-        if x_val is None:
+        if val_data is None:
             return elbo, elbo / x.shape[0]
         else:
             return elbo, elbo / x.shape[0], elbo_val, elbo_val / x.shape[0]
 
     def log_h(self, x):
+        """Computes log h(x) of the exp. family."""
         return 0
 
     def accum_log_h(self, x, sample_weight=None):
+        """Accumlates log h(x)"""
         if sample_weight is None:
             return np.sum(self.log_h(x))
         return np.sum(sample_weight * self.log_h(x))
 
-    def compute_log_pz(self, x, u_x=None, mode="nat"):
-        if u_x is None:
-            u_x = self.compute_suff_stats(x)
-        return np.dot(u_x, self.eta.T) - self.A + self.log_pi
-
     def compute_pz(self, x, u_x=None, mode="nat"):
+        """Computes p(z|x)
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: precomputed sufficient stats with shape (num_samples, u_dim).
+          mode: whether to use natural (nat) or standard (std) parameters.
+
+        Returns:
+          p(z|x) with shape (num_samples, num_comp)
+        """
         if mode == "nat":
             return self.compute_pz_nat(x, u_x)
         else:
             return self.compute_pz_std(x)
 
     def compute_pz_nat(self, x, u_x=None):
+        """Computes p(z|x) using the natural parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: precomputed sufficient stats with shape (num_samples, u_dim).
+
+        Returns:
+          p(z|x) with shape (num_samples, num_comp)
+        """
         if u_x is None:
             u_x = self.compute_suff_stats(x)
         logr = np.dot(u_x, self.eta.T) - self.A + self.log_pi
         return softmax(logr)
 
     def compute_pz_std(self, x):
+        """Computes p(z|x) using the standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          p(z|x) with shape (num_samples, num_comp)
+        """
         return self.compute_pz_nat(x)
 
     def compute_suff_stats(self, x):
+        """Computes sufficient stats for a data sample."""
         return x
 
     def accum_suff_stats(self, x, u_x=None, sample_weight=None, batch_size=None):
+        """Accumlates sufficient statistis over several data samples.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is not None or batch_size is None:
             return self._accum_suff_stats_1batch(x, u_x, sample_weight)
         else:
             return self._accum_suff_stats_nbatches(x, sample_weight, batch_size)
 
     def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
+        """Accumlates sufficient statistis over several data samples for a single batch.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is None:
             u_x = self.compute_suff_stats(x)
         z = self.compute_pz_nat(x, u_x)
@@ -195,6 +288,18 @@ def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
         return N, acc_u_x
 
     def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
+        """Accumlates sufficient statistis over several data samples for multiple batches.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         sw_i = None
         for i1 in range(0, x.shape[0], batch_size):
             i2 = np.minimum(i1 + batch_size, x.shape[0])
@@ -213,6 +318,19 @@ def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
     def accum_suff_stats_segments(
         self, x, segments, u_x=None, sample_weight=None, batch_size=None
     ):
+        """Accumlates sufficient statistis per each segment in an utterance.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          segments: segments t_start and t_end with shape (num_segments, 2).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         K = self.num_comp
         num_segments = len(segments)
         N = np.zeros((num_segments, K), dtype=float_cpu())
@@ -238,6 +356,21 @@ def accum_suff_stats_segments(
     def accum_suff_stats_segments_prob(
         self, x, prob, u_x=None, sample_weight=None, batch_size=None
     ):
+        """Accumlates sufficient statistis per each segment in an utterance,
+        Segments are defined by the probability for a frame to belong to the
+        segment
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          prob: probability of belonging to a segments with shape (num_samples, num_segments).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is not None or batch_size is None:
             return self._accum_suff_stats_segments_prob_1batch(
                 x, prob, u_x, sample_weight
@@ -299,6 +432,20 @@ def accum_suff_stats_sorttime(
         sample_weight=None,
         batch_size=None,
     ):
+        """Accumlates sufficient statistis over a sliding window.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          frame_length: frame length.
+          frame_shift: frame shift.
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         if u_x is not None or batch_size is None:
             return self._accum_suff_stats_sorttime_1batch(
                 x, frame_length, frame_shift, u_x, sample_weight
@@ -352,7 +499,7 @@ def _accum_suff_stats_sorttime_nbatches(
         num_frames = x.shape[0]
         num_segments = int(np.floor((num_frames - frame_length) / frame_shift + 1))
         if num_segments == 1:
-            return self._accum_suff_stats_1batch(self, x, u_x, sample_weight)
+            return self._accum_suff_stats_1batch(self, x, None, sample_weight)
 
         num_segments_per_batch = np.floor((num_frames - frame_length) / frame_shift + 1)
         batch_size = int((num_segments_per_batch - 1) * frame_shift + frame_length)
@@ -378,6 +525,18 @@ def _accum_suff_stats_sorttime_nbatches(
         return N, acc_u_x
 
     def Estep(self, x, u_x=None, sample_weight=None, batch_size=None):
+        """Expectation step, accumlates suff. stats.
+
+        Args:
+          x: data samples of shape (num_samples, x_dim).
+          u_x: sufficient stats for x with shape = (num_samples, u(x)_dim) (optional).
+          sample_weight: weight of each sample in the accumalation.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x)
+        """
         return self.accum_suff_stats(x, u_x, sample_weight, batch_size)
 
     def Estep_generator(
@@ -387,8 +546,24 @@ def Estep_generator(
         return_log_h,
         max_queue_size=10,
         workers=1,
-        use_multiprocessin=False,
+        use_multiprocessing=False,
     ):
+        """Expectation step, where data is read from a generator function.
+
+        Args:
+          generator: data generator function returning a tuple
+                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
+          num_steps: number of steps / epoch
+          return_log_h: returns accumlated log h(x).
+          max_queue_size: max. size of the generator queue.
+          workers: number of workers in the generator.
+          use_multiprocessing: use multi-processing in the generator queue.
+
+        Returns:
+          N zero order sufficient statistics (number of samples).
+          Accumlated sufficient statistics \sum u(x).
+          Accumlated log h(x) (optional).
+        """
         wait_time = 0.01  # in secs
         queue = None
         N = None
@@ -415,8 +590,8 @@ def Estep_generator(
                     N += N_i
                     acc_u_x += u_x_i
         finally:
-            if enqueuer is not None:
-                enqueuer.stop()
+            if queue is not None:
+                queue.stop()
 
         if return_log_h:
             return N, acc_u_x, log_h
@@ -424,19 +599,41 @@ def Estep_generator(
             return N, acc_u_x
 
     def sum_suff_stats(self, N, u_x):
+        """Sums suff. stats from muttiple sub-processes.
+
+        Args:
+          N: zero order stats with shape = (num_proc,)
+          u_x: higher order stats with shape = (num_proc, u(x)_dim).
+
+        Args:
+          Accumalted N and u_x.
+        """
         assert len(N) == len(u_x)
         acc_N = N[1]
         acc_u_x = u_x[1]
         for i in range(1, len(N)):
-            acc_N += N
-            acc_u_x += u[i]
+            acc_N += N[i]
+            acc_u_x += u_x[i]
         return acc_N, acc_u_x
 
-    @abstractmethod
     def Mstep(self, stats):
+        """Maximization step."""
         pass
 
     def elbo(self, x, u_x=None, N=1, log_h=None, sample_weight=None, batch_size=None):
+        """Evidence lower bound.
+
+        Args:
+          x: data samples with shape = (num_samples, x_dim).
+          u_x: accumlated u(x) (optional).
+          N: zero-th orders statistics (optional)
+          log_h: accumlated log h(x) (optional).
+          sample_weight: weigth of each sample in the loss function.
+          batch_size: accumlates sufficient statistics in batch_size blocks.
+
+        Returns:
+          log p(X) of the data.
+        """
         if u_x is None:
             N, u_x = self.accum_suff_stats(
                 x, sample_weight=sample_weight, batch_size=batch_size
@@ -446,30 +643,84 @@ def elbo(self, x, u_x=None, N=1, log_h=None, sample_weight=None, batch_size=None
         return log_h + np.sum(u_x * self.eta) + np.inner(N, self.log_pi - self.A)
 
     def log_prob(self, x, u_x=None, mode="nat"):
+        """log p(x) of each data sample.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+          method: the probability is computed using standard ("std") or
+            natural parameters ("nat").
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         if mode == "nat":
             return self.log_prob_nat(x, u_x)
         else:
             return self.log_prob_std(x)
 
     def log_prob_nat(self, x, u_x=None):
+        """log p(x) of each data sample computed using the
+        natural parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         if u_x is None:
             u_x = self.compute_suff_stats(x)
         llk_k = np.dot(u_x, self.eta.T) - self.A + self.log_pi
         llk = logsumexp(llk_k)
         return self.log_h(x) + llk
 
-    @abstractmethod
     def log_prob_std(self, x):
-        pass
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
 
-    def log_prob_nbest(self, x, u_x=None, mode="nat", nbest_mode="master", nbest=1):
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
+        raise NotImplementedError()
+
+    def log_prob_nbest(self, x, u_x=None, mode="nat", nbest_mode="ubm", nbest=1):
+        """log p(x) of each data sample computed using the N best components.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+          method: the probability is computed using standard ("std") or
+            natural parameters ("nat").
+          nbest_mode: if "ubm", it selects the best components.
+          nbest: number of best components, or selected components.
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         if mode == "nat":
             return self.log_prob_nbest_nat(x, u_x, nbest_mode=nbest_mode, nbest=nbest)
         else:
-            return self.log_prob_std(x, nbest_mode=nbest_mode, nbest=nbest)
+            return self.log_prob_nbest_std(x, nbest_mode=nbest_mode, nbest=nbest)
 
     def log_prob_nbest_nat(self, x, u_x=None, nbest_mode="master", nbest=1):
-
+        """log p(x) of each data sample computed using the N best components
+        and natural parameters.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+          nbest_mode: if "ubm", it selects the best components.
+          nbest: number of best components, or selected components.
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         if u_x is None:
             u_x = self.compute_suff_stats(x)
         if nbest_mode == "master":
@@ -482,11 +733,23 @@ def log_prob_nbest_nat(self, x, u_x=None, nbest_mode="master", nbest=1):
         llk = logsumexp(llk_k)
         return self.log_h(x) + llk
 
-    @abstractmethod
     def log_prob_nbest_std(self, x, nbest_mode="master", nbest=1):
-        pass
+        """log p(x) of each data sample computed using the N best components
+        and standard parameters.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+          u_x: sufficient stats u(x) with shape (num_samples, u_dim).
+          nbest_mode: if "ubm", it selects the best components.
+          nbest: number of best components, or selected components.
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
+        raise NotImplementedError()
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"min_n": self.min_N, "update_pi": self.update_pi}
         base_config = super(ExpFamilyMixture, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -515,24 +778,26 @@ def tuple2data(data):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A_theta from the natural param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_A_std(params):
+        """Computes A_theta from the standard param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_eta(param):
+        """Computes the natural param. from the standard param."""
         raise NotImplementedError()
 
     @staticmethod
     def compute_std(eta):
+        """Computes the standard param. from the natural param."""
         raise NotImplementedError()
 
-    @abstractmethod
     def _compute_nat_params(self):
         pass
 
-    @abstractmethod
     def _compute_std_params(self):
         pass
diff --git a/hyperion/np/pdfs/mixtures/gmm.py b/hyperion/np/pdfs/mixtures/gmm.py
index 391c59ee..4f6d599e 100644
--- a/hyperion/np/pdfs/mixtures/gmm.py
+++ b/hyperion/np/pdfs/mixtures/gmm.py
@@ -32,8 +32,23 @@
 
 
 class GMM(ExpFamilyMixture):
+    """Class for GMM with full covariance.
+
+    Attributes:
+      num_comp: number of components of the mixture (intered from pi).
+      pi: weights of the components.
+      mu: mean with shape (num_comp, x_dim,) or None.
+      Lambda: precision with shape (num_comp, x_dim, x_dim) or None.
+      var_floor: variance floor.
+      update_mu: whether or not update mu when optimizing.
+      update_Lambda: wether or not update Lambda when optimizing.
+      x_dim: data dim (infered from mu if present)
+    """
+
     def __init__(
         self,
+        num_comp=1,
+        pi=None,
         mu=None,
         Lambda=None,
         var_floor=1e-3,
@@ -41,7 +56,10 @@ def __init__(
         update_Lambda=True,
         **kwargs
     ):
-        super().__init__(**kwargs)
+        if mu is not None:
+            assert mu.ndim == 2
+            kwargs["x_dim"] = mu.shape[1]
+        super().__init__(num_comp=num_comp, pi=pi, **kwargs)
         self.mu = mu
         self.Lambda = Lambda
         self.var_floor = var_floor
@@ -55,6 +73,7 @@ def __init__(
         self._Sigma = None
 
     def _compute_gmm_nat_std(self):
+        """Comptues natural and standard parameters of the distribution."""
         if self.mu is not None and self.Lambda is not None:
             self._validate_mu()
             self._validate_Lambda()
@@ -65,6 +84,7 @@ def _compute_gmm_nat_std(self):
             self._compute_std_params()
 
     def compute_Lambda_aux(self):
+        """Comptues auxiliary variables derived from the precision."""
         self._logLambda = np.zeros((self.num_comp,), dtype=float_cpu())
         self._cholLambda = np.zeros(
             (self.num_comp, self.x_dim, self.x_dim), dtype=float_cpu()
@@ -76,18 +96,21 @@ def compute_Lambda_aux(self):
 
     @property
     def logLambda(self):
+        """log precision determinants."""
         if self._logLambda is None:
             self.compute_Lambda_aux()
         return self._logLambda
 
     @property
     def cholLambda(self):
+        """Cholesqy decomp. of the precisions."""
         if self._cholLambda is None:
             self.compute_Lambda_aux()
         return self._cholLambda
 
     @property
     def Sigma(self):
+        """Covariances."""
         if self._Sigma is None:
             self._Sigma = np.zeros(
                 (self.num_comp, self.x_dim, self.x_dim), dtype=float_cpu()
@@ -97,6 +120,7 @@ def Sigma(self):
         return self._Sigma
 
     def initialize(self, x=None):
+        """Initializes the distribution."""
         if x is None and self.mu is None and self.eta is None:
             assert self.num_comp == 1
             self._initialize_stdnormal()
@@ -106,12 +130,19 @@ def initialize(self, x=None):
         self._compute_gmm_nat_std()
 
     def _initialize_stdnormal(self):
+        """Initializes a single component GMM with std. Normal."""
         self.pi = np.array([1], dtype=float_cpu())
         self.mu = np.zeros((1, self.x_dim), dtype=float_cpu())
         self.Lambda = np.zeros((1, self.x_dim, self.x_dim), dtype=float_cpu())
         self.Lambda[0] = np.eye(self.x_dim, dtype=float_cpu())
 
     def _initialize_kmeans(self, num_comp, x):
+        """Initializes the GMM with K-Means.
+
+        Args:
+          num_comp: number of components.
+          x: initialization data with shape (num_samples, x_dim).
+        """
         if num_comp == 1:
             self.pi = np.array([1], dtype=float_cpu())
             self.mu = np.mean(x, axis=0, keepdims=True)
@@ -138,22 +169,35 @@ def _initialize_kmeans(self, num_comp, x):
             self.Lambda[k] = invert_pdmat(S, return_inv=True)[-1]
 
     def stack_suff_stats(self, F, S=None):
+        """Stacks F and S suff stats into single vector."""
         if S is None:
             return F
         return np.hstack((F, S))
 
     def unstack_suff_stats(self, stats):
+        """Decomposes suff. stats vector into F and S."""
         F = stats[:, : self.x_dim]
         S = stats[:, self.x_dim :]
         return F, S
 
     def norm_suff_stats(self, N, u_x, return_order2=False):
+        """Normalizes accumlated sufficient statistics with the
+        mean and covariance of the distribution.
+
+        Args:
+          N: zeroth order sufficient stats.
+          u_x: 1st and 2nd order stats.
+          return_order2: whether or not return normalized 2nd order stats.
+
+        Return:
+          Normalized N, F or N, [F, S].
+        """
         F, S = self.unstack_suff_stats(u_x)
         F_norm = F - N[:, None] * self.mu
         for k in range(self.num_comp):
             F_norm[k] = np.dot(F_norm[k], self.cholLambda[k].T)
             if return_order2:
-                SS = vec2symat(S[k])
+                SS = vec2symmat(S[k])
                 Fmu = np.outer(self.F[k], self.mu[k])
                 SS = SS - Fmu - Fmu.T + N * np.outer(self.mu[k], self.mu[k])
                 SS = np.dot(self.cholLambda[k], np.dot(SS, self.cholLambda[k].T))
@@ -163,7 +207,13 @@ def norm_suff_stats(self, N, u_x, return_order2=False):
         return N, F_norm
 
     def Mstep(self, N, u_x):
+        """Maximization step.
+
+        Args:
+          N: zeroth order stats.
+          u_x: accumlated higher order stats.
 
+        """
         F, S = self.unstack_suff_stats(u_x)
 
         if self.update_mu:
@@ -187,15 +237,22 @@ def Mstep(self, N, u_x):
             N0 = N < self.min_N
             if np.any(N0):
                 N[N0] = 0
-                mu[N0] = 0
-                S[N0] = 1
+                self.mu[N0] = 0
+                self.Lambda[N0] = np.eye(self.x_dim)
             self.pi = N / np.sum(N)
             self._log_pi = None
 
         self._compute_nat_params()
 
     def split_comp(self, K=2):
+        """Creates a new GMM with K x num_componentes.
 
+        Args:
+          K: multiplier for the number of components
+
+        Returns:
+          GMM object.
+        """
         num_comp = self.num_comp * K
         pi = np.repeat(self.pi, K) / K
         Lambda = np.repeat(self.Lambda, K, axis=0) * (K ** 2)
@@ -218,6 +275,15 @@ def split_comp(self, K=2):
         return GMM(pi=pi, mu=mu, Lambda=Lambda, **config)
 
     def log_prob_std(self, x):
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         r0 = self.log_pi + 0.5 * self.logLambda - 0.5 * self.x_dim * np.log(2 * np.pi)
         llk_k = np.zeros((x.shape[0], self.num_comp), dtype=float_cpu())
         for k in range(self.num_comp):
@@ -226,11 +292,25 @@ def log_prob_std(self, x):
 
         return logsumexp(llk_k, axis=-1)
 
-    def sample(self, num_samples, rng=None, seed=1024):
+    def sample(self, num_samples, rng=None, seed=1024, r=None):
+        """Draws samples from the data distribution.
+
+        Args:
+          num_samples: number of samples.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         if rng is None:
             rng = np.random.RandomState(seed)
 
-        r = rng.multinomial(1, self.pi, size=(num_samples,))
+        if r is None:
+            r = rng.multinomial(1, self.pi, size=(num_samples,))
+        else:
+            num_samples = len(r)
+
         x = np.zeros((num_samples, self.x_dim), dtype=float_cpu())
         for k in range(self.num_comp):
             index = r[:, k] == 1
@@ -244,6 +324,7 @@ def sample(self, num_samples, rng=None, seed=1024):
         return x
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "var_floor": self.var_floor,
             "update_mu": self.update_mu,
@@ -253,11 +334,26 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"pi": self.pi, "mu": self.mu, "Lambda": self.Lambda}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["pi", "mu", "Lambda"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
@@ -275,6 +371,14 @@ def load_params(cls, f, config):
 
     @classmethod
     def load_from_kaldi(cls, file_path):
+        """Loads GMM from Kaldi file.
+
+        Args:
+          file_path: kaldi file path.
+
+        Returns:
+          Model object.
+        """
         pi = None
         eta1 = None
         eta2 = None
@@ -337,6 +441,7 @@ def _validate_eta(self):
         assert self.eta.shape[1] == (self.x_dim ** 2 + 3 * self.x_dim) / 2
 
     def validate(self):
+        """Validates the parameters of the distribution."""
         if self.pi is not None:
             self._validate_pi()
 
@@ -349,6 +454,7 @@ def validate(self):
 
     @staticmethod
     def compute_eta(mu, Lambda):
+        """Computes nat param. from mean and precision."""
         x_dim = mu.shape[-1]
         eta_dim = int((x_dim ** 2 + 3 * x_dim) / 2)
         eta = np.zeros((mu.shape[0], eta_dim), dtype=float_cpu())
@@ -359,6 +465,7 @@ def compute_eta(mu, Lambda):
 
     @staticmethod
     def compute_std(eta):
+        """Computes standard params. from the natural param."""
         x_dim = Normal.compute_x_dim_from_eta(eta)
         mu = np.zeros((eta.shape[0], x_dim), dtype=float_cpu())
         Lambda = np.zeros((eta.shape[0], x_dim, x_dim), dtype="float32")
@@ -369,6 +476,7 @@ def compute_std(eta):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A from the natural param."""
         A = np.zeros((eta.shape[0],), dtype=float_cpu())
         for k in range(eta.shape[0]):
             A[k] = Normal.compute_A_nat(eta[k])
@@ -377,6 +485,7 @@ def compute_A_nat(eta):
 
     @staticmethod
     def compute_A_std(mu, Lambda):
+        """Computes A from the standard params."""
         A = np.zeros((mu.shape[0],), dtype=float_cpu())
         for k in range(mu.shape[0]):
             A[k] = Normal.compute_A_std(mu[k], Lambda[k])
@@ -395,6 +504,14 @@ def _compute_std_params(self):
 
     @staticmethod
     def compute_suff_stats(x):
+        """Computes the sufficient stats. for each sample.
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+
+        Returns:
+          Sufficient stats. for each data sample with shape (num_samples, u_dim).
+        """
         d = x.shape[1]
         u = np.zeros((x.shape[0], int(d + d * (d + 1) / 2)), dtype=float_cpu())
         u[:, :d] = x
@@ -406,12 +523,28 @@ def compute_suff_stats(x):
         return u
 
     def plot1D(self, feat_idx=0, num_sigmas=2, num_pts=100, **kwargs):
+        """Plots one slice of each GMM component in 1d.
+
+        Args:
+          feat_idx: feature index.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         for k in range(mu.shape[0]):
             C = invert_pdmat(self.Lambda[k], return_inv=True)[-1][feat_idx, feat_idx]
             plot_gaussian_1D(mu[k], C, num_sigmas, num_pts, **kwargs)
 
     def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 2d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
         for k in range(mu.shape[0]):
@@ -419,6 +552,14 @@ def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
             plot_gaussian_ellipsoid_2D(mu[k], C_k, num_sigmas, num_pts, **kwargs)
 
     def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
         for k in range(mu.shape[0]):
@@ -426,6 +567,14 @@ def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
             plot_gaussian_3D(mu[k], C_k, num_sigmas, num_pts, **kwargs)
 
     def plot3D_ellipsoid(self, feat_idx=[0, 1, 2], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 3 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         j, i = np.meshgrid(feat_idx, feat_idx)
         for k in range(mu.shape[0]):
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index 46a30f81..4a0ba27d 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -21,8 +21,23 @@
 
 
 class GMMDiagCov(ExpFamilyMixture):
+    """Class for GMM with diagonal covariance.
+
+    Attributes:
+      num_comp: number of components of the mixture (intered from pi).
+      pi: weights of the components.
+      mu: mean with shape (num_comp, x_dim,) or None.
+      Lambda: precision with shape (num_comp, x_dim, x_dim) or None.
+      var_floor: variance floor.
+      update_mu: whether or not update mu when optimizing.
+      update_Lambda: wether or not update Lambda when optimizing.
+      x_dim: data dim (infered from mu if present)
+    """
+
     def __init__(
         self,
+        num_comp=1,
+        pi=None,
         mu=None,
         Lambda=None,
         var_floor=1e-3,
@@ -30,7 +45,11 @@ def __init__(
         update_Lambda=True,
         **kwargs
     ):
-        super().__init__(**kwargs)
+        if mu is not None:
+            assert mu.ndim == 2
+            kwargs["x_dim"] = mu.shape[1]
+
+        super().__init__(num_comp=num_comp, pi=pi, **kwargs)
         self.mu = mu
         self.Lambda = Lambda
         self.var_floor = var_floor
@@ -55,23 +74,27 @@ def _compute_gmm_nat_std(self):
 
     @property
     def logLambda(self):
+        """log precision determinants."""
         if self._logLambda is None:
             self._logLambda = np.sum(np.log(self.Lambda), axis=-1)
         return self._logLambda
 
     @property
     def cholLambda(self):
+        """Cholesqy decomp. of the precisions."""
         if self._cholLambda is None:
             self._cholLambda = np.sqrt(self.Lambda)
         return self._cholLambda
 
     @property
     def Sigma(self):
+        """Covariances."""
         if self._Sigma is None:
             self._Sigma = 1.0 / self.Lambda
         return self._Sigma
 
     def initialize(self, x=None):
+        """Initializes the distribution."""
         if x is None and self.mu is None and self.eta is None:
             assert self.num_comp == 1
             self._initialize_stdnormal()
@@ -81,11 +104,18 @@ def initialize(self, x=None):
         self._compute_gmm_nat_std()
 
     def _initialize_stdnormal(self):
+        """Initializes a single component GMM with std. Normal."""
         self.pi = np.array([1], dtype=float_cpu())
         self.mu = np.zeros((1, self.x_dim), dtype=float_cpu())
         self.Lambda = np.ones((1, self.x_dim), dtype=float_cpu())
 
     def _initialize_kmeans(self, num_comp, x):
+        """Initializes the GMM with K-Means.
+
+        Args:
+          num_comp: number of components.
+          x: initialization data with shape (num_samples, x_dim).
+        """
         if num_comp == 1:
             self.pi = np.array([1], dtype=float_cpu())
             self.mu = np.mean(x, axis=0, keepdims=True)
@@ -104,17 +134,30 @@ def _initialize_kmeans(self, num_comp, x):
             self.Lambda[k] = 1 / np.std(x[r], axis=0) ** 2
 
     def stack_suff_stats(self, F, S=None):
+        """Stacks F and S suff stats into single vector."""
         if S is None:
             return F
         return np.hstack((F, S))
 
     def unstack_suff_stats(self, stats):
+        """Decomposes suff. stats vector into F and S."""
         F = stats[:, : self.x_dim]
         S = stats[:, self.x_dim :]
         return F, S
 
     def norm_suff_stats(self, N, u_x, return_order2=False):
-        F, S = self.unstack_suff_stats(acc_u_x)
+        """Normalizes accumlated sufficient statistics with the
+        mean and covariance of the distribution.
+
+        Args:
+          N: zeroth order sufficient stats.
+          u_x: 1st and 2nd order stats.
+          return_order2: whether or not return normalized 2nd order stats.
+
+        Return:
+          Normalized N, F or N, [F, S].
+        """
+        F, S = self.unstack_suff_stats(u_x)
         F_norm = self.cholLambda * (F - N[:, None] * self.mu)
         if return_order2:
             S = S - 2 * self.mu * F + N * self.mu ** 2
@@ -124,7 +167,13 @@ def norm_suff_stats(self, N, u_x, return_order2=False):
         return N, F_norm
 
     def Mstep(self, N, u_x):
+        """Maximization step.
+
+        Args:
+          N: zeroth order stats.
+          u_x: accumlated higher order stats.
 
+        """
         F, S = self.unstack_suff_stats(u_x)
 
         if self.update_mu:
@@ -143,15 +192,23 @@ def Mstep(self, N, u_x):
             N0 = N < self.min_N
             if np.any(N0):
                 N[N0] = 0
-                mu[N0] = 0
-                S[N0] = 1
+                self.mu[N0] = 0
+                self._Sigma[N0] = 1
+                self.Lambda[N0] = 1
             self.pi = N / np.sum(N)
             self._log_pi = None
 
         self._compute_nat_params()
 
     def split_comp(self, K=2):
+        """Creates a new GMM with K x num_componentes.
+
+        Args:
+          K: multiplier for the number of components
 
+        Returns:
+          GMMDiagConv object.
+        """
         std_dev = 1 / self.cholLambda
 
         num_comp = self.num_comp * K
@@ -171,6 +228,15 @@ def split_comp(self, K=2):
         return GMMDiagCov(pi=pi, mu=mu, Lambda=Lambda, **config)
 
     def log_prob_std(self, x):
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         r0 = self.log_pi + 0.5 * self.logLambda - 0.5 * self.x_dim * np.log(2 * np.pi)
         llk_k = np.zeros((x.shape[0], self.num_comp), dtype=float_cpu())
         for k in range(self.num_comp):
@@ -179,6 +245,7 @@ def log_prob_std(self, x):
         return logsumexp(llk_k, axis=-1)
 
     def log_cdf(self, x):
+        """Log cumulative distribution function."""
         llk_k = np.zeros((x.shape[0], self.num_comp), dtype=float_cpu())
         for k in range(self.num_comp):
             delta = (x - self.mu[k]) * self.cholLambda[k]
@@ -187,11 +254,24 @@ def log_cdf(self, x):
 
         return logsumexp(llk_k)
 
-    def sample(self, num_samples, rng=None, seed=1024):
+    def sample(self, num_samples=1, rng=None, seed=1024, r=None):
+        """Draws samples from the data distribution.
+
+        Args:
+          num_samples: number of samples.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         if rng is None:
             rng = np.random.RandomState(seed)
 
-        r = rng.multinomial(1, self.pi, size=(num_samples,))
+        if r is None:
+            r = rng.multinomial(1, self.pi, size=(num_samples,))
+        else:
+            num_samples = len(r)
         x = rng.normal(size=(num_samples, self.x_dim)).astype(float_cpu())
 
         for k in range(self.num_comp):
@@ -201,6 +281,7 @@ def sample(self, num_samples, rng=None, seed=1024):
         return x
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "var_floor": self.var_floor,
             "update_mu": self.update_mu,
@@ -210,13 +291,28 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"pi": self.pi, "mu": self.mu, "Lambda": self.Lambda}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["pi", "mu", "Lambda"]
-        params = self._load_params_to_dict(f, config["name"], param_list)
+        params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
             x_dim=config["x_dim"],
             pi=params["pi"],
@@ -232,6 +328,14 @@ def load_params(cls, f, config):
 
     @classmethod
     def load_from_kaldi(cls, file_path):
+        """Loads GMM from Kaldi file.
+
+        Args:
+          file_path: kaldi file path.
+
+        Returns:
+          Model object.
+        """
         pi = None
         eta1 = None
         eta2 = None
@@ -284,6 +388,7 @@ def _validate_eta(self):
         assert self.eta.shape[1] == self.x_dim * 2
 
     def validate(self):
+        """Validates the parameters of the distribution."""
         if self.pi is not None:
             self._validate_pi()
 
@@ -296,12 +401,14 @@ def validate(self):
 
     @staticmethod
     def compute_eta(mu, Lambda):
+        """Computes nat param. from mean and precision."""
         Lmu = Lambda * mu
         eta = np.hstack((Lmu, -0.5 * Lambda))
         return eta
 
     @staticmethod
     def compute_std(eta):
+        """Computes standard params. from the natural param."""
         x_dim = int(eta.shape[-1] / 2)
         eta1 = eta[:, :x_dim]
         eta2 = eta[:, x_dim:]
@@ -311,6 +418,7 @@ def compute_std(eta):
 
     @staticmethod
     def compute_A_nat(eta):
+        """Computes A from the natural param."""
         x_dim = int(eta.shape[-1] / 2)
         eta1 = eta[:, :x_dim]
         eta2 = eta[:, x_dim:]
@@ -321,6 +429,7 @@ def compute_A_nat(eta):
 
     @staticmethod
     def compute_A_std(mu, Lambda):
+        """Computes A from the standard params."""
         x_dim = mu.shape[1]
         r1 = 0.5 * x_dim * np.log(2 * np.pi)
         r2 = -0.5 * np.sum(np.log(Lambda), axis=-1)
@@ -339,6 +448,14 @@ def _compute_std_params(self):
 
     @staticmethod
     def compute_suff_stats(x):
+        """Computes the sufficient stats. for each sample.
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+
+        Returns:
+          Sufficient stats. for each data sample with shape (num_samples, u_dim).
+        """
         d = x.shape[-1]
         u = np.zeros((x.shape[0], 2 * d), dtype=float_cpu())
         u[:, :d] = x
@@ -346,12 +463,28 @@ def compute_suff_stats(x):
         return u
 
     def plot1D(self, feat_idx=0, num_sigmas=2, num_pts=100, **kwargs):
+        """Plots one slice of each GMM component in 1d.
+
+        Args:
+          feat_idx: feature index.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = 1 / self.Lambda[:, feat_idx]
         for k in range(mu.shape[0]):
             plot_gaussian_1D(mu[k], C[k], num_sigmas, num_pts, **kwargs)
 
     def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 2d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = 1 / self.Lambda[:, feat_idx]
         for k in range(mu.shape[0]):
@@ -359,6 +492,14 @@ def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
             plot_gaussian_ellipsoid_2D(mu[k], C_k, num_sigmas, num_pts, **kwargs)
 
     def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = 1 / self.Lambda[:, feat_idx]
         for k in range(mu.shape[0]):
@@ -366,6 +507,14 @@ def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
             plot_gaussian_3D(mu[k], C_k, num_sigmas, num_pts, **kwargs)
 
     def plot3D_ellipsoid(self, feat_idx=[0, 1, 2], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 3 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = 1 / self.Lambda[:, feat_idx]
         for k in range(mu.shape[0]):
diff --git a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
index 87043cc4..ff02ec62 100644
--- a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
@@ -20,8 +20,23 @@
 
 
 class GMMTiedDiagCov(GMMDiagCov):
+    """Class for GMM with diagonal covariance tied across components.
+
+    Attributes:
+      num_comp: number of components of the mixture (intered from pi).
+      pi: weights of the components.
+      mu: mean with shape (num_comp, x_dim,) or None.
+      Lambda: precision with shape (num_comp, x_dim, x_dim) or None.
+      var_floor: variance floor.
+      update_mu: whether or not update mu when optimizing.
+      update_Lambda: wether or not update Lambda when optimizing.
+      x_dim: data dim (infered from mu if present)
+    """
+
     def __init__(
         self,
+        num_comp=1,
+        pi=None,
         mu=None,
         Lambda=None,
         var_floor=1e-3,
@@ -30,6 +45,8 @@ def __init__(
         **kwargs
     ):
         super().__init__(
+            num_comp=num_comp,
+            pi=pi,
             mu=mu,
             Lambda=Lambda,
             var_floor=var_floor,
@@ -49,11 +66,18 @@ def _compute_gmm_nat_std(self):
             self._compute_std_params()
 
     def _initialize_stdnormal(self):
+        """Initializes a single component GMM with std. Normal."""
         self.pi = np.array([1], dtype=float_cpu())
         self.mu = np.zeros((1, self.x_dim), dtype=float_cpu())
         self.Lambda = np.ones((self.x_dim,), dtype=float_cpu())
 
     def _initialize_kmeans(self, num_comp, x):
+        """Initializes the GMM with K-Means.
+
+        Args:
+          num_comp: number of components.
+          x: initialization data with shape (num_samples, x_dim).
+        """
         if num_comp == 1:
             self.pi = np.array([1], dtype=float_cpu())
             self.mu = np.mean(x, axis=0, keepdims=True)
@@ -75,7 +99,13 @@ def _initialize_kmeans(self, num_comp, x):
         self.Lambda = x.shape[0] / C
 
     def Mstep(self, N, u_x):
+        """Maximization step.
+
+        Args:
+          N: zeroth order stats.
+          u_x: accumlated higher order stats.
 
+        """
         F, S = self.unstack_suff_stats(u_x)
 
         if self.update_mu:
@@ -95,15 +125,22 @@ def Mstep(self, N, u_x):
             N0 = N < self.min_N
             if np.any(N0):
                 N[N0] = 0
-                mu[N0] = 0
-                S[N0] = 1
+                self.mu[N0] = 0
+
             self.pi = N / np.sum(N)
             self._log_pi = None
 
         self._compute_nat_params()
 
     def split_comp(self, K=2):
+        """Creates a new GMM with K x num_componentes.
+
+        Args:
+          K: multiplier for the number of components
 
+        Returns:
+          GMMTiedDiagConv object.
+        """
         std_dev = 1 / self.cholLambda
 
         num_comp = self.num_comp * K
@@ -122,6 +159,15 @@ def split_comp(self, K=2):
         return DiagGMMTiedCov(pi=pi, mu=mu, Lambda=self.Lambda, **config)
 
     def log_prob_std(self, x):
+        """log p(x) of each data sample computed using the
+        standard parameters of the distribution.
+
+        Args:
+          x: input data with shape (num_samples, x_dim).
+
+        Returns:
+          log p(x) with shape (num_samples,)
+        """
         r0 = self.log_pi + 0.5 * self.logLambda - 0.5 * self.x_dim * np.log(2 * np.pi)
         llk_k = np.zeros((x.shape[0], self.num_comp), dtype=float_cpu())
         for k in range(self.num_comp):
@@ -130,6 +176,7 @@ def log_prob_std(self, x):
         return logsumexp(llk_k, axis=-1)
 
     def log_cdf(self, x):
+        """Log cumulative distribution function."""
         llk_k = np.zeros((x.shape[0], self.num_comp), dtype=float_cpu())
         for k in range(self.num_comp):
             delta = (x - self.mu[k]) * self.cholLambda
@@ -138,11 +185,24 @@ def log_cdf(self, x):
 
         return logsumexp(llk_k)
 
-    def sample(self, num_samples, rng=None, seed=1024):
+    def sample(self, num_samples=1, rng=None, seed=1024, r=None):
+        """Draws samples from the data distribution.
+
+        Args:
+          num_samples: number of samples.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         if rng is None:
             rng = np.random.RandomState(seed)
 
-        r = rng.multinomial(1, self.pi, size=(num_samples,))
+        if r is None:
+            r = rng.multinomial(1, self.pi, size=(num_samples,))
+        else:
+            num_samples = len(r)
         x = rng.normal(size=(num_samples, self.x_dim)).astype(float_cpu())
 
         for k in range(self.num_comp):
@@ -157,12 +217,14 @@ def _validate_Lambda(self):
 
     @staticmethod
     def compute_eta(mu, Lambda):
+        """Computes nat param. from mean and precision."""
         Lmu = Lambda * mu
         eta = np.hstack((Lmu, -0.5 * np.tile(Lambda, (mu.shape[0], 1))))
         return eta
 
     @staticmethod
     def compute_std(eta):
+        """Computes standard params. from the natural param."""
         x_dim = int(eta.shape[-1] / 2)
         eta1 = eta[:, :x_dim]
         eta2 = eta[:, x_dim:]
@@ -171,24 +233,56 @@ def compute_std(eta):
         return mu, Lambda
 
     def plot1D(self, feat_idx=0, num_sigmas=2, num_pts=100, **kwargs):
+        """Plots one slice of each GMM component in 1d.
+
+        Args:
+          feat_idx: feature index.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = 1 / self.Lambda[feat_idx]
         for k in range(mu.shape[0]):
             plot_gaussian_1D(mu[k], C, num_sigmas, num_pts, **kwargs)
 
     def plot2D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 2d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = np.diag(1 / self.Lambda[feat_idx])
         for k in range(mu.shape[0]):
             plot_gaussian_ellipsoid_2D(mu[k], C, num_sigmas, num_pts, **kwargs)
 
     def plot3D(self, feat_idx=[0, 1], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 2 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = np.diag(1 / self.Lambda[feat_idx])
         for k in range(mu.shape[0]):
             plot_gaussian_3D(mu[k], C, num_sigmas, num_pts, **kwargs)
 
     def plot3D_ellipsoid(self, feat_idx=[0, 1, 2], num_sigmas=2, num_pts=100, **kwargs):
+        """Plots 3 dimensions of each GMM component in 3d.
+
+        Args:
+          feat_idx: feature indeces.
+          num_sigmas: size of the plot in number of standard devs.
+          num_pts: number of points in the graph.
+          **kwargs: pyplot options.
+        """
         mu = self.mu[:, feat_idx]
         C = np.diag(1 / self.Lambda[feat_idx])
         for k in range(mu.shape[0]):
diff --git a/hyperion/np/pdfs/plda/frplda.py b/hyperion/np/pdfs/plda/frplda.py
index 137276c7..183725a7 100644
--- a/hyperion/np/pdfs/plda/frplda.py
+++ b/hyperion/np/pdfs/plda/frplda.py
@@ -12,6 +12,21 @@
 
 
 class FRPLDA(PLDABase):
+    """Class for Full-rank PLDA (a.k.a. Two-Covariance Model) where
+    .. math::
+       \mathbf{x}_{ij} = \mathbf{y}_i + \varepsilon_{ij}
+
+
+    Attributes:
+      mu: class-independent mean.
+      B: between-class precision.
+      W: within-class precision.
+      update_mu: whether to update mu or not when training the model.
+      update_B: whether to update B or not when training the model.
+      update_W: whether to update W or not when training the model.
+      x_dim: data dimension.
+    """
+
     def __init__(
         self,
         mu=None,
@@ -23,7 +38,7 @@ def __init__(
         update_W=True,
         **kwargs
     ):
-        super(FRPLDA, self).__init__(mu=mu, update_mu=update_mu, **kwargs)
+        super().__init__(mu=mu, update_mu=update_mu, **kwargs)
         if mu is not None:
             self.y_dim = mu.shape[0]
         self.B = B
@@ -33,6 +48,7 @@ def __init__(
         self.update_W = update_W
 
     def validate(self):
+        """Validates the model parameters."""
         assert self.mu.shape[0] == self.B.shape[0]
         assert self.mu.shape[0] == self.B.shape[1]
         assert self.mu.shape[0] == self.W.shape[0]
@@ -40,6 +56,7 @@ def validate(self):
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         if self._is_init:
             return True
         if self.mu is not None and self.B is not None and self.W is not None:
@@ -48,6 +65,11 @@ def is_init(self):
         return self._is_init
 
     def initialize(self, D):
+        """initializes the model.
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+        """
         N, F, S = D
         self.x_dim = F.shape[1]
         self.y_dim = F.shape[1]
@@ -77,6 +99,21 @@ def initialize(self, D):
     def compute_py_g_x(
         self, D, return_cov=False, return_logpy_0=False, return_acc=False
     ):
+        """Computes the posterior P(y|x)
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+          return_cov: whether or not to return the posterior covariances.
+          return_logpy_0: whether or not to return log P(y=0|x).
+          return_acc: whether or not to return Ry and Py accumulators.
+
+        Returns:
+          Speaker factor posterior means with shape (num_speakers, y_dim)
+          Speaker factor posterior convariances with shape (num_speakers, y_dim, y_dim)
+          log P(y=0|x) with shape (num_spakers,)
+          Ry accumlator for ML step with shape (y_dim, y_dim)
+          Py accumlator for MD step with shape (y_dim, y_dim)
+        """
 
         assert self.is_init
 
@@ -160,6 +197,14 @@ def compute_py_g_x(
         return r
 
     def Estep(self, D):
+        """Expectation step.
+
+        Args:
+          D: tuple with sufficient statistics (N, F, S)
+
+        Returns:
+          Tuple of statistics with accumlated expectations.
+        """
         N, F, S = D
         y, logpy, Ry, Py = self.compute_py_g_x(D, return_logpy_0=True, return_acc=True)
 
@@ -179,6 +224,14 @@ def Estep(self, D):
         return stats
 
     def elbo(self, stats):
+        """Computes the objective function.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        Returns:
+         log P(X)
+        """
         N, M, S, logpy_x = stats[:4]
 
         logW = logdet_pdmat(self.W)
@@ -201,19 +254,14 @@ def elbo(self, stats):
 
         elbo = logpx_y + logpy - logpy_x
         return elbo
-        # N, M, sumy, yy, _, _, CW, logL = stats
-        # ymu = np.outer(sumy, mu)
-        # CB = yy - ymu -ymu.T + M*np.outer(self.mu, self.mu.T)
 
-        # logW = logdet_pdmat(self.W)
-        # logB = logdet_pdmat(self.B)
+    def MstepML(self, stats):
+        """Maximum likelihood estimation step.
 
-        # elbo = 0.5*(-logL - N*self.x_dim*np.log(2*np.pi)
-        #             +N*logW - np.inner(self.W.ravel(), CW.ravel())
-        #             +M*logB - np.inner(self.B.ravel(), CB.ravel()))
-        # return elbo
+        Args:
+          stats: tuple of expectations computed at the Estep.
 
-    def MstepML(self, stats):
+        """
         N, M, S, _, y_acc, Ry, Cy, Py = stats
         ybar = y_acc / M
         if self.update_mu:
@@ -236,6 +284,7 @@ def MstepMD(self, stats):
         pass
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "update_W": self.update_W,
             "update_B": self.update_B,
@@ -245,18 +294,42 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "B": self.B, "W": self.W}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "B", "W"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         kwargs = dict(list(config.items()) + list(params.items()))
         return cls(**kwargs)
 
     def llr_1vs1(self, x1, x2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of one enrollment and one test segments.
 
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+
+        Returns:
+          Score matrix with shape (num_enrollment_segments, num_test_segments).
+        """
         assert self.is_init
 
         Lnon = self.B + self.W
@@ -303,7 +376,17 @@ def llr_1vs1(self, x1, x2):
         return scores
 
     def llr_NvsM_book(self, D1, D2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with the exact formula (by the book).
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          D2: tuple of sufficient statistics for the test sides (N2, F2, S2).
 
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         assert self.is_init
 
         N1, F1, _ = D1
@@ -368,7 +451,17 @@ def llr_NvsM_book(self, D1, D2):
     def sample(
         self, num_classes, num_samples_per_class, rng=None, seed=1024, return_y=False
     ):
+        """Draws samples from the PLDA model.
 
+        Args:
+          num_classes: number of classes to sample.
+          num_samples_per_class: number of samples to sample per each class.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         assert self.is_init
 
         if rng is None:
@@ -394,7 +487,15 @@ def sample(
         return y + z
 
     def weighted_avg_params(self, mu, B, W, w_mu, w_B, w_W):
-        super(FRPLDA, self).weigthed_avg_params(mu, w_mu)
+        """Performs weighted average of the model parameters
+        and some given parameters.
+
+        Args:
+          mu: other mean vector
+          w_mu: weight of the given mean vector.
+
+        """
+        super().weigthed_avg_params(mu, w_mu)
         if w_B > 0:
             Sb0 = invert_pdmat(self.B, return_inv=True)[-1]
             Sb = invert_pdmat(B, return_inv=True)[-1]
@@ -407,4 +508,11 @@ def weighted_avg_params(self, mu, B, W, w_mu, w_B, w_W):
             self.W = invert_pdmat(Sw, return_inv=True)[-1]
 
     def weighted_avg_model(self, plda, w_mu, w_B, w_W):
+        """Performs weighted average of the model parameters
+        and those of another model given as input.
+
+        Args:
+          plda: other PLDA model.
+
+        """
         self.weighted_avg_params(plda.mu, plda.B, plda.W, w_mu, w_B, w_W)
diff --git a/hyperion/np/pdfs/plda/plda.py b/hyperion/np/pdfs/plda/plda.py
index 30c21361..fd2eb9a9 100644
--- a/hyperion/np/pdfs/plda/plda.py
+++ b/hyperion/np/pdfs/plda/plda.py
@@ -12,6 +12,25 @@
 
 
 class PLDA(PLDABase):
+    """Class for Probabilistic Discriminant Analysis (PLDA) model.
+    .. math::
+        \mathbf{x}_{ij} = \mu + \mathbf{V} \mathbf{y}_i + \mathbf{U} \mathbf{z}_{ij} + \varepsilon_{ij}
+
+    Attributes:
+      y_dim: speaker factor dimension.
+      z_dim: channel factor dimension.
+      mu: class-independent mean.
+      V: speaker factor loading matrix.
+      U: channel factor loading matrix.
+      D: Precision of the additional channel noise.
+      fullcov_iD: floor for the inverse of D.
+      update_mu: whether to update mu or not when training the model.
+      update_V: whether to update V or not when training the model.
+      update_U: whether to update U or not when training the model.
+      update_D: whether to update D or not when training the model.
+      x_dim: data dimension.
+    """
+
     def __init__(
         self,
         y_dim=None,
@@ -27,7 +46,7 @@ def __init__(
         update_D=True,
         **kwargs
     ):
-        super(PLDA, self).__init__(y_dim=y_dim, mu=mu, update_mu=update_mu, **kwargs)
+        super().__init__(y_dim=y_dim, mu=mu, update_mu=update_mu, **kwargs)
         self.z_dim = z_dim
         if V is not None:
             self.y_dim = V.shape[0]
@@ -52,6 +71,7 @@ def __init__(
         self._VWV = None
 
     def validate(self):
+        """Validates the model parameters."""
         assert self.mu.shape[0] >= self.V.shape[0]
         assert self.mu.shape[0] == self.V.shape[1]
         assert self.mu.shape[0] >= self.U.shape[0]
@@ -60,6 +80,7 @@ def validate(self):
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         if self._is_init:
             return True
         if (
@@ -75,6 +96,7 @@ def is_init(self):
         return self._is_init
 
     def compute_aux(self):
+        """Computes auxiliary variables."""
         DV = self.V * self.D
         DU = self.U * self.D
         self._DU = DU
@@ -89,6 +111,11 @@ def compute_aux(self):
         self._VWV = np.dot(self.V, self._VW)
 
     def initialize(self, D):
+        """initializes the model.
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+        """
         N, F, S = D
         self.x_dim = F.shape[1]
         M = F.shape[0]
@@ -118,7 +145,21 @@ def initialize(self, D):
     def compute_py_g_x(
         self, D, return_cov=False, return_logpy_0=False, return_acc=False
     ):
-
+        """Computes the posterior P(y|x)
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+          return_cov: whether or not to return the posterior covariances.
+          return_logpy_0: whether or not to return log P(y=0|x).
+          return_acc: whether or not to return Ry and Py accumulators.
+
+        Returns:
+          Speaker factor posterior means with shape (num_speakers, y_dim)
+          Speaker factor posterior convariances with shape (num_speakers, y_dim, y_dim)
+          log P(y=0|x) with shape (num_spakers,)
+          Ry accumlator for ML step with shape (y_dim, y_dim)
+          Py accumlator for MD step with shape (y_dim, y_dim)
+        """
         assert self.is_init
 
         N, F, S = D
@@ -203,6 +244,14 @@ def compute_py_g_x(
         return tuple(r)
 
     def Estep(self, D):
+        """Expectation step.
+
+        Args:
+          D: tuple with sufficient statistics (N, F, S)
+
+        Returns:
+          Tuple of statistics with accumlated expectations.
+        """
         N, F, S = D
         y, logpy, Ry, Py = self.compute_py_g_x(D, return_logpy_0=True, return_acc=True)
 
@@ -264,6 +313,14 @@ def Estep(self, D):
         return stats
 
     def elbo(self, stats):
+        """Computes the objective function.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        Returns:
+         log P(X)
+        """
         N, M, F, S, logpy_x = stats[:5]
 
         logD = np.sum(np.log(self.D))
@@ -281,6 +338,12 @@ def elbo(self, stats):
         return elbo
 
     def MstepML(self, stats):
+        """Maximum likelihood estimation step.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        """
         N, M, F, S, _, y_acc, Ry1, Ry, Cy, Py, Rz1, Rz, Ryz, Cz = stats
 
         if self.update_mu and not self.update_V and not self.update_U:
@@ -357,6 +420,12 @@ def MstepML(self, stats):
         self.compute_aux()
 
     def MstepMD(self, stats):
+        """Minimum divergence estimation step.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        """
         N, M, F, S, _, y_acc, Ry1, Ry, Cy, Py, Rz1, Rz, Ryz, Cz = stats
         mu_y = y_acc / M
         Cov_y = Py / M - np.outer(mu_y, mu_y)
@@ -384,6 +453,7 @@ def MstepMD(self, stats):
         self.compute_aux()
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "update_D": self.update_D,
             "update_U": self.update_U,
@@ -394,17 +464,41 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "V": self.V, "U": self.U, "D": self.D}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "V", "U", "D"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         kwargs = dict(list(config.items()) + list(params.items()))
         return cls(**kwargs)
 
     def log_probx_g_y(self, x, y):
+        """Computes logP(X|Y)
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+          y: speaker factors for each sample with shape (num_samples, y_dim).
+
+         Returns:
+          log P(X|Y) array with shape (num_samples,)
+        """
         iW = np.diag(1 / self.D) + np.dot(self.U.T, self.U)
         mult_W, _, logiW = invert_pdmat(iW, return_logdet=True)
         delta = x - self.mu - np.dot(y, self.V)
@@ -417,6 +511,16 @@ def log_probx_g_y(self, x, y):
         return logp
 
     def log_probx_g_yz(self, x, y, z):
+        """Computes logP(X|Y,Z)
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+          y: speaker factors for each sample with shape (num_samples, y_dim).
+          z: channel factors for each sample with shape (num_samples, z_dim).
+
+        Returns:
+          log P(X|Y,Z) array with shape (num_samples,)
+        """
         logD = np.sum(np.log(self.D))
         delta = x - self.mu - np.dot(y, self.V) - np.dot(z, self.U)
         logp = (
@@ -428,7 +532,16 @@ def log_probx_g_yz(self, x, y, z):
         return logp
 
     def llr_1vs1(self, x1, x2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of one enrollment and one test segments.
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
 
+        Returns:
+          Score matrix with shape (num_enrollment_segments, num_test_segments).
+        """
         assert self.is_init
         WV = self._VW
         VV = self._VWV
@@ -472,7 +585,17 @@ def llr_1vs1(self, x1, x2):
         return scores
 
     def llr_NvsM_book(self, D1, D2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with the exact formula (by the book).
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          D2: tuple of sufficient statistics for the test sides (N2, F2, S2).
 
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         assert self.is_init
 
         N1, F1, _ = D1
@@ -539,6 +662,17 @@ def llr_NvsM_book(self, D1, D2):
         return scores
 
     def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
+        """Draws samples from the PLDA model.
+
+        Args:
+          num_classes: number of classes to sample.
+          num_samples_per_class: number of samples to sample per each class.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         if rng is None:
             rng = np.random.RandomState(seed=seed)
 
@@ -562,8 +696,15 @@ def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
         return y + z1 + z2
 
     def weighted_avg_params(self, mu, V, U, D, w_mu, w_B, w_W):
+        """Performs weighted average of the model parameters
+        and some given parameters.
+
+        Args:
+          mu: other mean vector
+          w_mu: weight of the given mean vector.
 
-        super(PLDA, self).weigthed_avg_params(mu, w_mu)
+        """
+        super().weigthed_avg_params(mu, w_mu)
         if w_B > 0:
             Sb0 = np.dot(self.V.T, self.V)
             Sb = np.dot(V.T, V)
@@ -582,26 +723,15 @@ def weighted_avg_params(self, mu, V, U, D, w_mu, w_B, w_W):
             U = U[:, -self.z_dim :]
             self.U = U.T
             iD = np.diag(Sw - np.dot(self.U.T, self.U)).copy()
-            # print(Sw[:10,:10])
-            # print(np.dot(self.U.T, self.U))
-            # print(iD[:10])
             iD[iD < self.floor_iD] = self.floor_iD
             self.D = 1 / iD
 
-        # if w_W > 0:
-        #     Sw0 = np.dot(self.U.T, self.U)
-        #     Sw = np.dot(U.T, U)
-        #     Sw = w_W*Sw + (1-w_W)*Sw0
-        #     w, U = sla.eigh(Sw, overwrite_a=True)
-        #     U = np.sqrt(w)*U
-        #     U = U[:,-self.z_dim:]
-        #     self.U = U.T
-
-        # if w_D > 0:
-        #     Sd0 = 1/self.D
-        #     Sd = 1/D
-        #     Sd = w_D*Sd + (1-w_D)*Sd0
-        #     self.D = 1/Sd
-
     def weighted_avg_model(self, plda, w_mu, w_B, w_W):
+        """Performs weighted average of the model parameters
+        and those of another model given as input.
+
+        Args:
+          plda: other PLDA model.
+
+        """
         self.weighted_avg_params(plda.mu, plda.V, plda.U, plda.D, w_mu, w_B, w_W)
diff --git a/hyperion/np/pdfs/plda/plda_base.py b/hyperion/np/pdfs/plda/plda_base.py
index 1d5d758c..72503965 100644
--- a/hyperion/np/pdfs/plda/plda_base.py
+++ b/hyperion/np/pdfs/plda/plda_base.py
@@ -5,30 +5,44 @@
 
 import numpy as np
 
-from abc import ABCMeta, abstractmethod
-
 from ....hyp_defs import float_cpu
 from ..core.pdf import PDF
 from ...transforms import LNorm
 
 
 class PLDABase(PDF):
-    __metaclass__ = ABCMeta
+    """Abstract Base class for different versions of
+    Probabilistic Linear Discriminant Analysis (PLDA) models.
+
+    Attributes:
+      y_dim: speaker factor dimension.
+      mu: class-independent mean.
+      update_mu: whether to update mu or not when training the model.
+      x_dim: data dimension.
+    """
 
     def __init__(self, y_dim=None, mu=None, update_mu=True, **kwargs):
-        super(PLDABase, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.y_dim = y_dim
         self.update_mu = update_mu
         if mu is not None:
             self.x_dim = mu.shape[0]
 
-    @abstractmethod
     def initialize(self, D):
+        """initializes the model.
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+        """
         pass
 
-    @abstractmethod
     def compute_py_g_x(self, D):
+        """Computes the posterior P(y|x)
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+        """
         pass
 
     def fit(
@@ -45,22 +59,47 @@ def fit(
         ml_md="ml+md",
         md_epochs=None,
     ):
+        """Trains the model.
+
+        Args:
+          x: train data matrix with shape (num_samples, x_dim).
+          class_ids: class identifiers [0, num_clases-1] for training data.
+          ptheta: probability of belonging to a class with shape (num_samples, num_classes) for training data.
+          sample_weight: weight of each sample in the training loss shape (num_samples,).
+          x_val: validation data matrix with shape (num_val_samples, x_dim).
+          class_ids_val: class identifiers [0, num_clases-1] for val data.
+          ptheta_val: probability of belonging to a class with shape (num_samples, num_classes) for val. data.
+          sample_weight_val: weight of each sample in the val. loss.
+          epochs: number of EM steps.
+          ml_md: whether to do maximum likelihood estimation ("ml"), minimum divergence ("md") or both ("ml+md").
+          md_epochs: in which epochs to do MD estimation, if None, MD is done in all epochs.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
 
         use_ml = False if ml_md == "md" else True
         use_md = False if ml_md == "ml" else True
 
         assert not (class_ids is None and ptheta is None)
         if class_ids is None:
-            D = self.compute_stats_soft(x, ptheta)
+            D = self.compute_stats_soft(x, ptheta, sample_weight=sample_weight)
         else:
-            D = self.compute_stats_hard(x, class_ids)
+            D = self.compute_stats_hard(x, class_ids, sample_weight=sample_weight)
 
         if x_val is not None:
             assert not (class_ids_val is None and ptheta_val is None)
             if class_ids_val is None:
-                D_val = self.compute_stats_soft(x_val, ptheta_val)
+                D_val = self.compute_stats_soft(
+                    x_val, ptheta_val, sample_weight=sample_weight_val
+                )
             else:
-                D_val = self.compute_stats_hard(x_val, class_ids_val)
+                D_val = self.compute_stats_hard(
+                    x_val, class_ids_val, sample_weight=sample_weight_val
+                )
 
         if not self.is_init:
             self.initialize(D)
@@ -87,24 +126,16 @@ def fit(
             elbo_val_norm = elbo_val / np.sum(D_val[0])
             return elbo, elbo_norm, elbo_val, elbo_val_norm
 
-    @abstractmethod
     def Estep(self, x):
+        """Expectation step."""
         pass
 
-    @abstractmethod
     def MstepML(self, x):
+        """Maximum likelihood step."""
         pass
 
-    @abstractmethod
     def MstepMD(self, x):
-        pass
-
-    @abstractmethod
-    def llr_1vs1(self, x1, x2):
-        pass
-
-    @abstractmethod
-    def llr_NvsM_book(self, D1, D2):
+        """Minimum Divergence step."""
         pass
 
     def fit_adapt_weighted_avg_model(
@@ -125,6 +156,31 @@ def fit_adapt_weighted_avg_model(
         w_B=0.5,
         w_W=0.5,
     ):
+        """Adapts a PLDA model to new data. The adapted model is weighted averaged with the prior after each epoch.
+
+        Args:
+          x: train data matrix with shape (num_samples, x_dim).
+          class_ids: class identifiers [0, num_clases-1] for training data.
+          ptheta: probability of belonging to a class with shape (num_samples, num_classes) for training data.
+          sample_weight: weight of each sample in the training loss shape (num_samples,).
+          x_val: validation data matrix with shape (num_val_samples, x_dim).
+          class_ids_val: class identifiers [0, num_clases-1] for val data.
+          ptheta_val: probability of belonging to a class with shape (num_samples, num_classes) for val. data.
+          sample_weight_val: weight of each sample in the val. loss.
+          epochs: number of EM steps.
+          ml_md: whether to do maximum likelihood estimation ("ml"), minimum divergence ("md") or both ("ml+md").
+          md_epochs: in which epochs to do MD estimation, if None, MD is done in all epochs.
+          plda0: prior model.
+          w_mu: weigth of the prior on the mean.
+          w_B: weight of the prior on the between-class precision.
+          w_W: weight of the prior on the within-class precision.
+
+        Returns:
+          log p(X) of the training data.
+          log p(x) per sample.
+          log p(X) of the val. data, if present.
+          log p(x) of the val. data per sample, if present.
+        """
 
         assert self.is_init
         use_ml = False if ml_md == "md" else True
@@ -132,16 +188,20 @@ def fit_adapt_weighted_avg_model(
 
         assert not (class_ids is None and ptheta is None)
         if class_ids is None:
-            D = self.compute_stats_soft(x, ptheta)
+            D = self.compute_stats_soft(x, ptheta, sample_weight=sample_weight)
         else:
-            D = self.compute_stats_hard(x, class_ids)
+            D = self.compute_stats_hard(x, class_ids, sample_weight=sample_weight)
 
         if x_val is not None:
             assert not (class_ids_val is None and ptheta_val is None)
             if class_ids_val is None:
-                D_val = self.compute_stats_soft(x_val, ptheta_val)
+                D_val = self.compute_stats_soft(
+                    x_val, ptheta_val, sample_weight=sample_weight_val
+                )
             else:
-                D_val = self.compute_stats_hard(x_val, class_ids_val)
+                D_val = self.compute_stats_hard(
+                    x_val, class_ids_val, sample_weight=sample_weight_val
+                )
 
         elbo = np.zeros((epochs,), dtype=float_cpu())
         elbo_val = np.zeros((epochs,), dtype=float_cpu())
@@ -167,74 +227,21 @@ def fit_adapt_weighted_avg_model(
             elbo_val_norm = elbo_val / np.sum(D_val[0])
             return elbo, elbo_norm, elbo_val, elbo_val_norm
 
-    def fit_adapt(
-        self,
-        x,
-        class_ids=None,
-        ptheta=None,
-        sample_weight=None,
-        x0=None,
-        class_ids0=None,
-        ptheta0=None,
-        sample_weight0=None,
-        x_val=None,
-        class_ids_val=None,
-        ptheta_val=None,
-        sample_weight_val=None,
-        epochs=20,
-        ml_md="ml+md",
-        md_epochs=None,
-    ):
-
-        assert self.is_init
-        use_ml = False if ml_md == "md" else True
-        use_md = False if ml_md == "ml" else True
-
-        assert not (class_ids is None and ptheta is None)
-        if class_ids is None:
-            D = self.compute_stats_soft(x, ptheta)
-        else:
-            D = self.compute_stats_hard(x, class_ids)
-
-        if x0 is not None:
-            assert not (class_ids0 is None and ptheta0 is None)
-            if class_ids0 is None:
-                D0 = self.compute_stats_soft(x0, ptheta0)
-            else:
-                D0 = self.compute_stats_hard(x0, class_ids0)
-
-        if x_val is not None:
-            assert not (class_ids_val is None and ptheta_val is None)
-            if class_ids_val is None:
-                D_val = self.compute_stats_soft(x_val, ptheta_val)
-            else:
-                D_val = self.compute_stats_hard(x_val, class_ids_val)
-
-        elbo = np.zeros((epochs,), dtype=float_cpu())
-        elbo_val = np.zeros((epochs,), dtype=float_cpu())
-        for epoch in range(epochs):
-
-            stats = self.Estep(D)
-            stats0 = self.Estep(D0)
-            elbo[epoch] = self.elbo(stats)
-            if x_val is not None:
-                stats_val = self.Estep(D_val)
-                elbo_val[epoch] = self.elbo(stats_val)
-
-            if use_ml:
-                self.MstepML(stats)
-            if use_md and (md_epochs is None or epoch in md_epochs):
-                self.MstepMD(stats)
-
-        elbo_norm = elbo / np.sum(D[0])
-        if x_val is None:
-            return elbo, elbo_norm
-        else:
-            elbo_val_norm = elbo_val / np.sum(D_val[0])
-            return elbo, elbo_norm, elbo_val, elbo_val_norm
-
     @staticmethod
     def compute_stats_soft(x, p_theta, sample_weight=None, scal_factor=None):
+        """Computes sufficient statistics need by PLDA model using soft class assigments.
+
+        Args:
+          x: input data with shape (num_samples, x_dim)
+          p_theta: soft class assigments with shape (num_samples, num_classes)
+          sample_weight: weight of each sample for training with shape (num_samples, )
+          scal_factor: scaling factor for sufficient statistics (Themos factor)
+
+        Returns:
+          N: zero order stats with shape (num_classes,)
+          F: first order stats with shape (num_classes, x_dim)
+          S: Accumulated second order stats with sahpe (x_dim, x_dim)
+        """
         if sample_weight is not None:
             p_theta = sample_weight[:, None] * p_theta
         if scal_factor is not None:
@@ -247,6 +254,19 @@ def compute_stats_soft(x, p_theta, sample_weight=None, scal_factor=None):
 
     @staticmethod
     def compute_stats_hard(x, class_ids, sample_weight=None, scale_factor=None):
+        """Computes sufficient statistics need by PLDA model using soft class assigments.
+
+        Args:
+          x: input data with shape (num_samples, x_dim)
+          class_ids: integer [0, num_classes-1] vector indicating the class of each sample.
+          sample_weight: weight of each sample for training with shape (num_samples, )
+          scal_factor: scaling factor for sufficient statistics (Themos factor)
+
+        Returns:
+          N: zero order stats with shape (num_classes,)
+          F: first order stats with shape (num_classes, x_dim)
+          S: Accumulated second order stats with sahpe (x_dim, x_dim)
+        """
         x_dim = x.shape[1]
         num_classes = np.max(class_ids) + 1
         N = np.zeros((num_classes,), dtype=float_cpu())
@@ -283,13 +303,66 @@ def compute_stats_hard_v0(x, class_ids, sample_weight=None, scal_factor=None):
 
     @staticmethod
     def center_stats(D, mu):
+        """Centers the sufficient statistics by the PLDA mean.
+
+        Args:
+           D: tupe with sufficient stats (N, F, S).
+           mu: mean vector.
+
+        Returns:
+          Centered N, F, S
+        """
         N, F, S = D
         Fc = F - np.outer(N, mu)
         Fmu = np.outer(np.sum(F, axis=0), mu)
         Sc = S - Fmu - Fmu.T + np.sum(N) * np.outer(mu, mu)
         return N, Fc, Sc
 
+    def llr_1vs1(self, x1, x2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of one enrollment and one test segments.
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+
+        Returns:
+          Score matrix with shape (num_enrollment_segments, num_test_segments).
+        """
+        pass
+
+    def llr_NvsM_book(self, D1, D2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with the exact formula (by the book).
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          D2: tuple of sufficient statistics for the test sides (N2, F2, S2).
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
+        pass
+
     def llr_NvsM(self, x1, x2, ids1=None, ids2=None, method="vavg-lnorm"):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+          ids1: integer array mapping from segments to
+                enrollment-sides in [0, num_enroll_sides-1]
+          ids2: integer array mapping from segments to
+                test-sides in [0, num_test_sides-1]
+          method: evaluation method in ["book" (exact formula),
+            "vavg" (vector averaging), "vavg-lnorm" (vector averagin + lnorm),
+            "savg" (score averaging)]
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         if method == "savg":
             return self.llr_NvsM_savg(x1, ids1, x2, ids2)
 
@@ -304,6 +377,18 @@ def llr_NvsM(self, x1, x2, ids1=None, ids2=None, method="vavg-lnorm"):
             return self.llr_NvsM_vavg(D1, D2, do_lnorm=True)
 
     def llr_NvsM_vavg(self, D1, D2, do_lnorm=True):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with vector averaging.
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          D2: tuple of sufficient statistics for the test sides (N2, F2, S2).
+          do_lnorm: whether or not to do length norm. after vector averaging.
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         x1 = D1[1] / np.expand_dims(D1[0], axis=-1)
         x2 = D2[1] / np.expand_dims(D2[0], axis=-1)
         if do_lnorm:
@@ -314,6 +399,20 @@ def llr_NvsM_vavg(self, D1, D2, do_lnorm=True):
         return self.llr_1vs1(x1, x2)
 
     def llr_NvsM_savg(self, x1, ids1, x2, ids2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+          ids1: integer array mapping from segments to
+                enrollment-sides in [0, num_enroll_sides-1]
+          ids2: integer array mapping from segments to
+                test-sides in [0, num_test_sides-1]
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         scores_1vs1 = self.llr_1vs1(x1, x2)
         N, F, _ = self.compute_stats_hard(scores_1vs1, ids1)
         scores_Nvs1 = F / N[:, None]
@@ -322,6 +421,21 @@ def llr_NvsM_savg(self, x1, ids1, x2, ids2):
         return scores
 
     def llr_Nvs1(self, x1, x2, ids1=None, method="vavg-lnorm"):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_test_segmens, x_dim).
+          ids1: integer array mapping from segments to
+                enrollment-sides in [0, num_enroll_sides-1]
+          method: evaluation method in ["book" (exact formula),
+            "vavg" (vector averaging), "vavg-lnorm" (vector averagin + lnorm),
+            "savg" (score averaging)]
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         if method == "savg":
             return self.llr_Nvs1_savg(x1, ids1, x2)
 
@@ -336,6 +450,18 @@ def llr_Nvs1(self, x1, x2, ids1=None, method="vavg-lnorm"):
             return self.llr_Nvs1_vavg(D1, x2, do_lnorm=True)
 
     def llr_Nvs1_vavg(self, D1, x2, do_lnorm=True):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with vector averaging.
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          x2: test vectors with shape (num_test_segmens, x_dim).
+          do_lnorm: whether or not to do length norm. after vector averaging.
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         x1 = D1[1] / np.expand_dims(D1[0], axis=-1)
         if do_lnorm:
             lnorm = LNorm()
@@ -345,23 +471,60 @@ def llr_Nvs1_vavg(self, D1, x2, do_lnorm=True):
         return self.llr_1vs1(x1, x2)
 
     def llr_Nvs1_savg(self, x1, ids1, x2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+          ids1: integer array mapping from segments to
+                enrollment-sides in [0, num_enroll_sides-1]
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         scores_1vs1 = self.llr_1vs1(x1, x2)
         N, F, _ = self.compute_stats_hard(scores_1vs1, ids1)
         scores = F / N[:, None]
         return scores
 
-    @abstractmethod
     def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
+        """Draws samples from the PLDA model.
+
+        Args:
+          num_classes: number of classes to sample.
+          num_samples_per_class: number of samples to sample per each class.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         pass
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"y_dim": self.y_dim, "update_mu": self.update_mu}
-        base_config = super(PLDABase, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def weigthed_avg_params(self, mu, w_mu):
+        """Performs weighted average of the model parameters
+        and some given parameters.
+
+        Args:
+          mu: other mean vector
+          w_mu: weight of the given mean vector.
+
+        """
         self.mu = w_mu * mu + (1 - w_mu) * self.mu
 
-    @abstractmethod
     def weigthed_avg_model(self, plda):
+        """Performs weighted average of the model parameters
+        and those of another model given as input.
+
+        Args:
+          plda: other PLDA model.
+
+        """
         pass
diff --git a/hyperion/np/pdfs/plda/splda.py b/hyperion/np/pdfs/plda/splda.py
index f10759cf..f9322d26 100644
--- a/hyperion/np/pdfs/plda/splda.py
+++ b/hyperion/np/pdfs/plda/splda.py
@@ -11,6 +11,22 @@
 
 
 class SPLDA(PLDABase):
+    """Class for Simplied Probabilistic Discriminant Analysis (SPLDA).
+     .. math::
+        \mathbf{x}_{ij} = \mu + \mathbf{V} \mathbf{y}_i + \varepsilon_{ij}
+
+    Attributes:
+      y_dim: speaker factor dimension.
+      mu: class-independent mean.
+      V: speaker factor loading matrix.
+      W: within-class precision.
+      fullcov_W: whether W is full-precision matrix or not.
+      update_mu: whether to update mu or not when training the model.
+      update_V: whether to update V or not when training the model.
+      update_W: whether to update W or not when training the model.
+      x_dim: data dimension.
+    """
+
     def __init__(
         self,
         y_dim=None,
@@ -33,6 +49,7 @@ def __init__(
         self.update_W = update_W
 
     def validate(self):
+        """Validates the model parameters."""
         assert self.mu.shape[0] >= self.V.shape[0]
         assert self.mu.shape[0] == self.V.shape[1]
         assert self.mu.shape[0] == self.W.shape[0]
@@ -40,6 +57,7 @@ def validate(self):
 
     @property
     def is_init(self):
+        """Returns True if the model has been initialized."""
         if self._is_init:
             return True
         if self.mu is not None and self.V is not None and self.W is not None:
@@ -48,6 +66,11 @@ def is_init(self):
         return self._is_init
 
     def initialize(self, D):
+        """initializes the model.
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+        """
         N, F, S = D
         self.x_dim = F.shape[1]
         M = F.shape[0]
@@ -73,6 +96,21 @@ def initialize(self, D):
     def compute_py_g_x(
         self, D, return_cov=False, return_logpy_0=False, return_acc=False
     ):
+        """Computes the posterior P(y|x)
+
+        Args:
+          D: tuple of sufficient statistics (N, F, S)
+          return_cov: whether or not to return the posterior covariances.
+          return_logpy_0: whether or not to return log P(y=0|x).
+          return_acc: whether or not to return Ry and Py accumulators.
+
+        Returns:
+          Speaker factor posterior means with shape (num_speakers, y_dim)
+          Speaker factor posterior convariances with shape (num_speakers, y_dim, y_dim)
+          log P(y=0|x) with shape (num_spakers,)
+          Ry accumlator for ML step with shape (y_dim, y_dim)
+          Py accumlator for MD step with shape (y_dim, y_dim)
+        """
         N, F, S = D
         Fc = F - self.mu
 
@@ -158,6 +196,14 @@ def compute_py_g_x(
         return tuple(r)
 
     def Estep(self, D):
+        """Expectation step.
+
+        Args:
+          D: tuple with sufficient statistics (N, F, S)
+
+        Returns:
+          Tuple of statistics with accumlated expectations.
+        """
         N, F, S = D
         y, logpy, Ry, Py = self.compute_py_g_x(D, return_logpy_0=True, return_acc=True)
 
@@ -179,6 +225,14 @@ def Estep(self, D):
         return stats
 
     def elbo(self, stats):
+        """Computes the objective function.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        Returns:
+         log P(X)
+        """
         N, M, F, S, logpy_x = stats[:5]
 
         logW = logdet_pdmat(self.W)
@@ -196,6 +250,12 @@ def elbo(self, stats):
         return elbo
 
     def MstepML(self, stats):
+        """Maximum likelihood estimation step.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        """
         N, M, F, S, _, y_acc, Ry1, Ry, Cy, Py = stats
 
         a = np.hstack((Ry, Ry1[:, None]))
@@ -230,6 +290,12 @@ def MstepML(self, stats):
                 self.W = np.diag(1 / np.diag(iW))
 
     def MstepMD(self, stats):
+        """Minimum divergence estimation step.
+
+        Args:
+          stats: tuple of expectations computed at the Estep.
+
+        """
         N, M, F, S, _, y_acc, Ry1, Ry, Cy, Py = stats
         mu_y = y_acc / M
 
@@ -242,26 +308,51 @@ def MstepMD(self, stats):
             self.V = np.dot(chol_Cov_y, self.V)
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "update_W": self.update_W,
             "update_V": self.update_V,
             "fullcov_W": self.fullcov_W,
         }
-        base_config = super(SPLDA, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "V": self.V, "W": self.W}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "V", "W"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         kwargs = dict(list(config.items()) + list(params.items()))
         return cls(**kwargs)
 
     def log_probx_g_y(self, x, y):
+        """Computes logP(X|Y)
+
+        Args:
+          x: data samples with shape (num_samples, x_dim).
+          y: speaker factors for each sample with shape (num_samples, y_dim).
+
+        Returns:
+          log P(X|Y) array with shape (num_samples,)
+        """
         logW = logdet_pdmat(self.W)
         delta = x - self.mu - np.dot(y, self.V)
         logp = (
@@ -273,7 +364,16 @@ def log_probx_g_y(self, x, y):
         return logp
 
     def llr_1vs1(self, x1, x2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of one enrollment and one test segments.
 
+        Args:
+          x1: enrollment vectors with shape (num_enroll_segmens, x_dim).
+          x2: test vectors with shape (num_enroll_segmens, x_dim).
+
+        Returns:
+          Score matrix with shape (num_enrollment_segments, num_test_segments).
+        """
         WV = np.dot(self.W, self.V.T)
         VV = np.dot(self.V, WV)
         I = np.eye(self.y_dim, dtype=float_cpu())
@@ -316,6 +416,17 @@ def llr_1vs1(self, x1, x2):
         return scores
 
     def llr_NvsM_book(self, D1, D2):
+        """log-likelihood ratio between target and non-target hypothesis for
+        the case of N segments/enrollment-side and M segments/test-side
+        evaluated with the exact formula (by the book).
+
+        Args:
+          D1: tuple of sufficient statistics for the enrollment sides (N1, F1, S1).
+          D2: tuple of sufficient statistics for the test sides (N2, F2, S2).
+
+        Returns:
+          Score matrix with shape (num_enrollment_sides, num_test_sides).
+        """
         N1, F1, _ = D1
         N2, F2, _ = D2
 
@@ -379,6 +490,17 @@ def llr_NvsM_book(self, D1, D2):
         return scores
 
     def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
+        """Draws samples from the PLDA model.
+
+        Args:
+          num_classes: number of classes to sample.
+          num_samples_per_class: number of samples to sample per each class.
+          rng: random number generator.
+          seed: random seed used if rng is None.
+
+        Returns:
+          Generated samples with shape (num_samples, x_dim).
+        """
         if rng is None:
             rng = np.random.RandomState(seed=seed)
 
@@ -399,7 +521,15 @@ def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
         return y + z
 
     def weighted_avg_params(self, mu, V, W, w_mu, w_B, w_W):
-        super(SPLDA, self).weigthed_avg_params(mu, w_mu)
+        """Performs weighted average of the model parameters
+        and some given parameters.
+
+        Args:
+          mu: other mean vector
+          w_mu: weight of the given mean vector.
+
+        """
+        super().weigthed_avg_params(mu, w_mu)
         if w_B > 0:
             Sb0 = np.dot(self.V.T, self.V)
             Sb = np.dot(V.T, V)
@@ -416,9 +546,26 @@ def weighted_avg_params(self, mu, V, W, w_mu, w_B, w_W):
             self.W = invert_pdmat(Sw, return_inv=True)[-1]
 
     def weighted_avg_model(self, plda, w_mu, w_B, w_W):
+        """Performs weighted average of the model parameters
+        and those of another model given as input.
+
+        Args:
+          plda: other PLDA model.
+
+        """
         self.weighted_avg_params(plda.mu, plda.V, plda.W, w_mu, w_B, w_W)
 
     def project(self, T, delta_mu=None):
+        """Transforms the PLDA parameters given an affine transformation
+        of the data.
+
+        Args:
+          T: data projection matrix.
+          delta_mu: data shift vector.
+
+        Returns:
+          Projected PLDA model.
+        """
         mu = self.mu
         if mu is not None:
             mu -= delta_mu
diff --git a/hyperion/np/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
index 3f1a47c7..b213d653 100644
--- a/hyperion/np/score_norm/adapt_s_norm.py
+++ b/hyperion/np/score_norm/adapt_s_norm.py
@@ -11,10 +11,18 @@
 
 
 class AdaptSNorm(ScoreNorm):
-    """Class for adaptive S-Norm"""
+    """Class for adaptive S-Norm.
+
+    Attributes:
+      nbest: number of samples selected to compute the statistics for each trial
+        by the adaptive algorith
+      nbest_discard: discard the nbest trials with higher scores, which could
+        be actual target trials.
+      std_floor: floor for standard deviations.
+    """
 
     def __init__(self, nbest=100, nbest_discard=0, **kwargs):
-        super(AdaptSNorm, self).__init__(*kwargs)
+        super().__init__(*kwargs)
         self.nbest = nbest
         self.nbest_discard = nbest_discard
 
@@ -26,6 +34,18 @@ def predict(
         mask_coh_test=None,
         mask_enr_coh=None,
     ):
+        """Normalizes the scores.
+
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_coh_test: score matrix cohort vs. test.
+          scores_enr_coh: score matrix enroll vs cohort.
+          mask_coh_test: binary matrix to mask out target trials
+            from cohort vs test matrix.
+          mask_enr_coh: binary matrix to mask out target trials
+            from enroll vs. cohort matrix.
+
+        """
 
         assert scores_enr_coh.shape[1] == scores_coh_test.shape[0]
         assert self.nbest_discard < scores_enr_coh.shape[1]
diff --git a/hyperion/np/score_norm/s_norm.py b/hyperion/np/score_norm/s_norm.py
index ee00a7e8..2cf81ffc 100644
--- a/hyperion/np/score_norm/s_norm.py
+++ b/hyperion/np/score_norm/s_norm.py
@@ -15,7 +15,7 @@ class SNorm(ScoreNorm):
     """Class for S-Norm, symmetric score normalization."""
 
     def __init__(self, **kwargs):
-        super(SNorm, self).__init__(*kwargs)
+        super().__init__(*kwargs)
         self.t_norm = TNorm(**kwargs)
         self.z_norm = ZNorm(**kwargs)
 
@@ -27,6 +27,18 @@ def predict(
         mask_coh_test=None,
         mask_enr_coh=None,
     ):
+        """Normalizes the scores.
+
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_coh_test: score matrix cohort vs. test.
+          scores_enr_coh: score matrix enroll vs cohort.
+          mask_coh_test: binary matrix to mask out target trials
+            from cohort vs test matrix.
+          mask_enr_coh: binary matrix to mask out target trials
+            from enroll vs. cohort matrix.
+
+        """
 
         scores_z_norm = self.z_norm.predict(scores, scores_enr_coh, mask_enr_coh)
         scores_t_norm = self.t_norm.predict(scores, scores_coh_test, mask_coh_test)
diff --git a/hyperion/np/score_norm/score_norm.py b/hyperion/np/score_norm/score_norm.py
index 45df0323..e2fa1814 100644
--- a/hyperion/np/score_norm/score_norm.py
+++ b/hyperion/np/score_norm/score_norm.py
@@ -9,10 +9,20 @@
 
 
 class ScoreNorm(NPModel):
-    """
-    Base class for score normalization
+    """Base class for score normalization
+
+    Attributes:
+      std_floor: floor for standard deviations.
     """
 
     def __init__(self, std_floor=1e-5, **kwargs):
-        super(ScoreNorm, self).__init__(*kwargs)
+        super().__init__(*kwargs)
         self.std_floor = std_floor
+
+    def forward(self, **kwargs):
+        """Overloads predict function."""
+        return self.predict(**kwargs)
+
+    def __call__(self, *kwargs):
+        """Overloads predict function."""
+        return self.predict(**kwargs)
diff --git a/hyperion/np/score_norm/t_norm.py b/hyperion/np/score_norm/t_norm.py
index 3fb92548..ac87c8ac 100644
--- a/hyperion/np/score_norm/t_norm.py
+++ b/hyperion/np/score_norm/t_norm.py
@@ -13,7 +13,15 @@ class TNorm(ScoreNorm):
     """Class for T-Norm score normalization."""
 
     def predict(self, scores, scores_coh_test, mask=None):
+        """Normalizes the scores.
 
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_coh_test: score matrix cohort vs. test.
+          mask: binary matrix to mask out target trials
+            from cohort vs test matrix.
+
+        """
         if mask is None:
             mu_t = np.mean(scores_coh_test, axis=0, keepdims=True)
             s_t = np.std(scores_coh_test, axis=0, keepdims=True)
diff --git a/hyperion/np/score_norm/tz_norm.py b/hyperion/np/score_norm/tz_norm.py
index d4bb1539..6127091d 100644
--- a/hyperion/np/score_norm/tz_norm.py
+++ b/hyperion/np/score_norm/tz_norm.py
@@ -14,7 +14,7 @@ class TZNorm(ScoreNorm):
     """Class for TZ-Norm score normalization."""
 
     def __init__(self, **kwargs):
-        super(SNorm, self).__init__(*kwargs)
+        super().__init__(*kwargs)
         self.t_norm = TNorm(**kwargs)
         self.z_norm = ZNorm(**kwargs)
 
@@ -28,6 +28,20 @@ def predict(
         mask_enr_coh=None,
         mask_coh_coh=None,
     ):
+        """Normalizes the scores.
+
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_coh_test: score matrix cohort vs. test.
+          scores_enr_coh: score matrix enroll vs cohort.
+          scores_coh_coh: score matrix cohort vs cohort.
+          mask_coh_test: binary matrix to mask out target trials
+            from cohort vs test matrix.
+          mask_enr_coh: binary matrix to mask out target trials
+            from enroll vs. cohort matrix.
+          mask_coh_coh: binary matrix to mask out target trials
+            from cohort vs. cohort matrix.
+        """
 
         scores_t_norm = self.t_norm.predict(scores, scores_coh_test, mask_coh_test)
         scores_enr_coh_t_norm = self.t_norm.predict(
diff --git a/hyperion/np/score_norm/z_norm.py b/hyperion/np/score_norm/z_norm.py
index f5350fb1..98189e06 100644
--- a/hyperion/np/score_norm/z_norm.py
+++ b/hyperion/np/score_norm/z_norm.py
@@ -14,7 +14,15 @@ class ZNorm(ScoreNorm):
     """
 
     def predict(self, scores, scores_enr_coh, mask=None):
+        """Normalizes the scores.
 
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_enr_coh: score matrix enroll vs cohort.
+          mask: binary matrix to mask out target trials
+            from enroll vs. cohort matrix.
+
+        """
         if mask is None:
             mu_z = np.mean(scores_enr_coh, axis=1, keepdims=True)
             s_z = np.std(scores_enr_coh, axis=1, keepdims=True)
diff --git a/hyperion/np/score_norm/zt_norm.py b/hyperion/np/score_norm/zt_norm.py
index 4c5c8b5c..415ddca8 100644
--- a/hyperion/np/score_norm/zt_norm.py
+++ b/hyperion/np/score_norm/zt_norm.py
@@ -15,7 +15,7 @@ class ZTNorm(ScoreNorm):
     """Class ZT-Norm score-normalization."""
 
     def __init__(self, **kwargs):
-        super(SNorm, self).__init__(*kwargs)
+        super().__init__(*kwargs)
         self.t_norm = TNorm(**kwargs)
         self.z_norm = ZNorm(**kwargs)
 
@@ -29,10 +29,24 @@ def predict(
         mask_enr_coh=None,
         mask_coh_coh=None,
     ):
+        """Normalizes the scores.
+
+        Args:
+          scores: score matrix enroll vs. test.
+          scores_coh_test: score matrix cohort vs. test.
+          scores_enr_coh: score matrix enroll vs cohort.
+          scores_coh_coh: score matrix cohort vs cohort.
+          mask_coh_test: binary matrix to mask out target trials
+            from cohort vs test matrix.
+          mask_enr_coh: binary matrix to mask out target trials
+            from enroll vs. cohort matrix.
+          mask_coh_coh: binary matrix to mask out target trials
+            from cohort vs. cohort matrix.
+        """
 
         scores_z_norm = self.z_norm.predict(scores, scores_enr_coh, mask_enr_coh)
         scores_coh_test_z_norm = self.z_norm.predict(
-            scores_coh_test, scores_coh_coh, mask_enr_coh
+            scores_coh_test, scores_coh_coh, mask_coh_coh
         )
         scores_zt_norm = self.t_norm.predict(
             scores_z_norm, scores_coh_test_z_norm, mask_coh_test
diff --git a/hyperion/np/transforms/cent_whiten_up.py b/hyperion/np/transforms/cent_whiten_up.py
index f3793328..1200e61b 100644
--- a/hyperion/np/transforms/cent_whiten_up.py
+++ b/hyperion/np/transforms/cent_whiten_up.py
@@ -17,17 +17,17 @@ class CentWhitenUP(CentWhiten):
     """Class to do centering and whitening with uncertainty propagation."""
 
     def __init__(self, mu=None, T=None, update_mu=True, update_T=True, **kwargs):
-        super(CentWhitenUP, self).__init__(mu, T, update_mu, update_T, **kwargs)
+        super().__init__(mu, T, update_mu, update_T, **kwargs)
 
     def predict(self, x):
         x_dim = int(x.shape[-1] / 2)
         m_x = x[:, :x_dim]
         s2_x = x[:, x_dim:]
-        m_x = super(CentWhitenUP, self).predict(m_x)
+        m_x = super().predict(m_x)
         for i in range(x.shape[0]):
             s2_x[i] = np.diag(np.dot(self.T.T * s2_x[i], self.T))
         return np.hstack((m_x, s2_x))
 
     def fit(self, x, sample_weight=None):
         x = x[:, : int(x.shape[-1] / 2)]
-        super(CentWhitenUP, self).fit(x, sample_weight=sample_weight)
+        super().fit(x, sample_weight=sample_weight)
diff --git a/hyperion/np/transforms/lda.py b/hyperion/np/transforms/lda.py
index 13c74fe8..5644a2a3 100644
--- a/hyperion/np/transforms/lda.py
+++ b/hyperion/np/transforms/lda.py
@@ -85,14 +85,6 @@ def load_params(cls, f, config):
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(mu=params["mu"], T=params["T"], name=config["name"])
 
-    # @classmethod
-    # def load(cls, file_path):
-    #     with h5py.File(file_path, 'r') as f:
-    #         config = self.load_config_from_json(f['config'])
-    #         param_list = ['mu', 'T']
-    #         params = self._load_params_to_dict(f, config['name'], param_list)
-    #         return cls(mu=params['mu'], T=params['T'], name=config['name'])
-
     @classmethod
     def load_mat(cls, file_path):
         with h5py.File(file_path, "r") as f:
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index 63da2493..0d748249 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -22,6 +22,9 @@ def _l2_norm(x, axis=-1):
 class ArcLossOutput(nn.Module):
     """Additive angular margin softmax (ArcFace) output layer.
 
+    It includes the option to also use InterTopK penalty:
+    https://arxiv.org/abs/2109.01989
+
     Attributes:
       in_feats: input feature dimension.
       num_classes: number of output classes.
@@ -29,10 +32,19 @@ class ArcLossOutput(nn.Module):
       margin: angular margin.
       margin_warmup_epochs: number of epochs to warm up the margin from 0 to
                             its final value.
+      intertop_k: adds negative angular penalty to k largest negative scores.
+      intertop_margin: inter-top-k penalty.
     """
 
     def __init__(
-        self, in_feats, num_classes, cos_scale=64, margin=0.3, margin_warmup_epochs=0
+        self,
+        in_feats,
+        num_classes,
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0,
     ):
         super().__init__()
         self.in_feats = in_feats
@@ -40,10 +52,14 @@ def __init__(
         self.cos_scale = cos_scale
         self.margin = margin
         self.margin_warmup_epochs = margin_warmup_epochs
+        self.intertop_k = intertop_k
+        self.intertop_margin = intertop_margin
         if margin_warmup_epochs == 0:
             self.cur_margin = margin
+            self.cur_intertop_margin = intertop_margin
         else:
             self.cur_margin = 0
+            self.cur_intertop_margin = 0
 
         self._compute_aux()
 
@@ -54,20 +70,28 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d)" % (
+        s = "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
             self.__class__.__name__,
             self.in_feats,
             self.num_classes,
             self.cos_scale,
             self.margin,
             self.margin_warmup_epochs,
+            self.intertop_k,
+            self.intertop_margin,
         )
         return s
 
     def _compute_aux(self):
-        logging.info("updating arc-softmax margin=%.2f" % (self.cur_margin))
+        logging.info(
+            "updating arc-softmax margin=%.2f intertop-margin=%.2f",
+            self.cur_margin,
+            self.cur_intertop_margin,
+        )
         self.cos_m = math.cos(self.cur_margin)
         self.sin_m = math.sin(self.cur_margin)
+        self.intertop_cos_m = math.cos(self.cur_intertop_margin)
+        self.intertop_sin_m = math.sin(self.cur_intertop_margin)
 
     def update_margin(self, epoch):
         """Updates the value of the margin.
@@ -80,9 +104,13 @@ def update_margin(self, epoch):
 
         if epoch < self.margin_warmup_epochs:
             self.cur_margin = self.margin * epoch / self.margin_warmup_epochs
+            self.cur_intertop_margin = (
+                self.intertop_margin * epoch / self.margin_warmup_epochs
+            )
         else:
             if self.cur_margin != self.margin:
                 self.cur_margin = self.margin
+                self.cur_intertop_margin = self.intertop_margin
             else:
                 return
 
@@ -117,7 +145,35 @@ def forward(self, x, y=None):
                 cos_theta_m = cos_theta * self.cos_m - sin_theta * self.sin_m
 
                 idx_ = torch.arange(0, batch_size, dtype=torch.long)
+                # if torch.distributed.get_rank() == 0:
+                #     print("o1", output[idx_, y])
                 output[idx_, y] = cos_theta_m[idx_, y]
+                # if torch.distributed.get_rank() == 0:
+                #     print("o2", output[idx_, y])
+                if self.cur_intertop_margin > 0:
+                    # implementation of intertop-K
+                    # set positive scores to -inf so they don't appear in the top k
+                    cos_aux = cos_theta * 1
+                    cos_aux[idx_, y] = -1e10
+                    # find topk indices for negative samples
+                    topk = torch.topk(cos_aux, k=self.intertop_k, dim=-1, sorted=False)
+                    idx_ = (
+                        idx_.unsqueeze(-1).expand(batch_size, self.intertop_k).flatten()
+                    )
+                    topk_idx = topk.indices.flatten()
+                    # compute cos(theta-m')
+                    cos_theta_m = (
+                        cos_theta[idx_, topk_idx] * self.intertop_cos_m
+                        + sin_theta[idx_, topk_idx] * self.intertop_sin_m
+                    )
+                    # take the maximum for the cases where m' is larger than theta to get cos(max(0, theta-m'))
+                    # if torch.distributed.get_rank() == 0:
+                    #     print("o3", output[idx_, topk_idx])
+                    output[idx_, topk_idx] = torch.maximum(
+                        output[idx_, topk_idx], cos_theta_m
+                    )
+                    # if torch.distributed.get_rank() == 0:
+                    #     print("o4", output[idx_, topk_idx], flush=True)
 
             output *= s  # scale up in order to make softmax work
             return output
@@ -133,10 +189,19 @@ class CosLossOutput(nn.Module):
       margin: angular margin.
       margin_warmup_epochs: number of epochs to warm up the margin from 0 to
                             its final value.
+      intertop_k: adds negative angular penalty to k largest negative scores.
+      intertop_margin: inter-top-k penalty.
     """
 
     def __init__(
-        self, in_feats, num_classes, cos_scale=64, margin=0.3, margin_warmup_epochs=0
+        self,
+        in_feats,
+        num_classes,
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
     ):
         super().__init__()
         self.in_feats = in_feats
@@ -144,14 +209,34 @@ def __init__(
         self.cos_scale = cos_scale
         self.margin = margin
         self.margin_warmup_epochs = margin_warmup_epochs
+        self.intertop_k = intertop_k
+        self.intertop_margin = intertop_margin
         if margin_warmup_epochs == 0:
             self.cur_margin = margin
+            self.cur_intertop_margin = intertop_margin
         else:
             self.cur_margin = 0
+            self.cur_intertop_margin = 0
 
         self.kernel = nn.Parameter(torch.Tensor(in_feats, num_classes))
         self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
 
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        s = "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
+            self.__class__.__name__,
+            self.in_feats,
+            self.num_classes,
+            self.cos_scale,
+            self.margin,
+            self.margin_warmup_epochs,
+            self.intertop_k,
+            self.intertop_margin,
+        )
+        return s
+
     def update_margin(self, epoch):
         """Updates the value of the margin.
 
@@ -163,11 +248,23 @@ def update_margin(self, epoch):
 
         if epoch < self.margin_warmup_epochs:
             self.cur_margin = self.margin * epoch / self.margin_warmup_epochs
-            logging.info("updating cos-softmax margin=%.2f" % (self.cur_margin))
+            logging.info(
+                "updating cos-softmax margin=%.2f intertop-margin=%.2f",
+                self.cur_margin,
+                self.cur_intertop_margin,
+            )
+            self.cur_intertop_margin = (
+                self.intertop_margin * epoch / self.margin_warmup_epochs
+            )
         else:
             if self.cur_margin != self.margin:
                 self.cur_margin = self.margin
-                logging.info("updating cos-softmax margin=%.2f" % (self.cur_margin))
+                self.cur_intertop_margin = self.intertop_margin
+                logging.info(
+                    "updating cos-softmax margin=%.2f intertop-margin=%.2f",
+                    self.cur_margin,
+                    self.cur_intertop_margin,
+                )
             else:
                 return
 
@@ -198,6 +295,21 @@ def forward(self, x, y=None):
                 cos_theta_m = cos_theta - self.cur_margin
                 idx_ = torch.arange(0, batch_size, dtype=torch.long)
                 output[idx_, y] = cos_theta_m[idx_, y]
+                if self.cur_intertop_margin > 0:
+                    # implementation of intertop-K
+                    # set positive scores to -inf so they don't appear in the top k
+                    cos_aux = cos_theta * 1
+                    cos_aux[idx_, y] = -1e10
+                    # find topk indices for negative samples
+                    topk = torch.topk(cos_aux, k=self.intertop_k, dim=-1, sorted=False)
+                    idx_ = (
+                        idx_.unsqueeze(-1).expand(batch_size, self.intertop_k).flatten()
+                    )
+                    topk_idx = topk.indices.flatten()
+                    # compute cos(theta) + m'
+                    cos_theta_m = cos_theta[idx_, topk_idx] + self.cur_intertop_margin
+                    # clamp so cos cannt be larger than 1.
+                    output[idx_, topk_idx] = cos_theta_m.clamp(max=1.0)
 
             output *= s  # scale up in order to make softmax work
             return output
@@ -214,6 +326,8 @@ class SubCenterArcLossOutput(ArcLossOutput):
       margin: angular margin.
       margin_warmup_epochs: number of epochs to warm up the margin from 0 to
                             its final value.
+      intertop_k: adds negative angular penalty to k largest negative scores.
+      intertop_margin: inter-top-k penalty.
     """
 
     def __init__(
@@ -224,6 +338,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
     ):
         super().__init__(
             in_feats,
@@ -231,12 +347,14 @@ def __init__(
             cos_scale,
             margin,
             margin_warmup_epochs,
+            intertop_k,
+            intertop_margin,
         )
         self.num_classes = num_classes
         self.num_subcenters = num_subcenters
 
     def __str__(self):
-        s = "%s(in_feats=%d, num_classes=%d, num_subcenters=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d)" % (
+        s = "%s(in_feats=%d, num_classes=%d, num_subcenters=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
             self.__class__.__name__,
             self.in_feats,
             self.num_classes,
@@ -244,6 +362,8 @@ def __str__(self):
             self.cos_scale,
             self.margin,
             self.margin_warmup_epochs,
+            self.intertop_k,
+            self.intertop_margin,
         )
         return s
 
@@ -283,6 +403,26 @@ def forward(self, x, y=None):
 
                 idx_ = torch.arange(0, batch_size, dtype=torch.long)
                 output[idx_, y] = cos_theta_m[idx_, y]
+                if self.cur_intertop_margin > 0:
+                    # implementation of intertop-K
+                    # set positive scores to -inf so they don't appear in the top k
+                    cos_aux = cos_theta * 1
+                    cos_aux[idx_, y] = -1e10
+                    # find topk indices for negative samples
+                    topk = torch.topk(cos_aux, k=self.intertop_k, dim=-1, sorted=False)
+                    idx_ = (
+                        idx_.unsqueeze(-1).expand(batch_size, self.intertop_k).flatten()
+                    )
+                    topk_idx = topk.indices.flatten()
+                    # compute cos(theta-m')
+                    cos_theta_m = (
+                        cos_theta[idx_, topk_idx] * self.intertop_cos_m
+                        + sin_theta[idx_, topk_idx] * self.intertop_sin_m
+                    )
+                    # take the maximum for the cases where m' is larger than theta to get cos(max(0, theta-m'))
+                    output[idx_, topk_idx] = torch.maximum(
+                        output[idx_, topk_idx], cos_theta_m
+                    )
 
             output *= s  # scale up in order to make softmax work
             return output
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index cb8ff1d0..d79d5a26 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -36,7 +36,7 @@ def __init__(
         self.xvector = xvector
         self.feat_fusion_start = feat_fusion_start
         self.feat_fusion_method = feat_fusion_method
-        self._hf_context = contextlib.nullcontext
+        self._hf_context = contextlib.nullcontext()
         self._make_fuser()
 
     def _make_fuser(self):
@@ -244,7 +244,7 @@ def set_train_mode(self, mode):
             logging.info("using torch.no_grad for hf_feats")
             self._hf_context = torch.no_grad()
         else:
-            self._hf_context = contextlib.nullcontext
+            self._hf_context = contextlib.nullcontext()
 
         self._train_mode = mode
 
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index 606a9554..21eb9dbe 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -42,6 +42,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         drop_connect_rate=0.2,
         dropout_rate=0,
@@ -88,6 +90,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 706ee4ef..e4495182 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -26,6 +26,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         dropout_rate=0,
         norm_layer=None,
@@ -52,6 +54,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
@@ -62,58 +66,6 @@ def __init__(
             proj_feats=proj_feats,
         )
 
-    # @property
-    # def in_channels(self):
-    #     return self.encoder_net.in_channels
-
-    # @property
-    # def conv_channels(self):
-    #     return self.encoder_net.conv_channels
-
-    # @property
-    # def base_channels(self):
-    #     return self.encoder_net.base_channels
-
-    # @property
-    # def in_kernel_size(self):
-    #     return self.encoder_net.in_kernel_size
-
-    # @property
-    # def in_stride(self):
-    #     return self.encoder_net.in_stride
-
-    # @property
-    # def zero_init_residual(self):
-    #     return self.encoder_net.zero_init_residual
-
-    # @property
-    # def groups(self):
-    #     return self.encoder_net.groups
-
-    # @property
-    # def replace_stride_with_dilation(self):
-    #     return self.encoder_net.replace_stride_with_dilation
-
-    # @property
-    # def do_maxpool(self):
-    #     return self.encoder_net.do_maxpool
-
-    # @property
-    # def in_norm(self):
-    #     return self.encoder_net.in_norm
-
-    # @property
-    # def se_r(self):
-    #     return self.encoder_net.se_r
-
-    # @property
-    # def res2net_scale(self):
-    #     return self.encoder_net.res2net_scale
-
-    # @property
-    # def res2net_width_factor(self):
-    #     return self.encoder_net.res2net_width_factor
-
     def get_config(self):
 
         base_config = super().get_config()
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index 58a34c94..99385cae 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -36,6 +36,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         dropout_rate=0,
         norm_layer=None,
@@ -84,6 +86,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index d3a22bce..676952da 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -40,6 +40,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         dropout_rate=0,
         norm_layer=None,
@@ -92,6 +94,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index a0211f87..7816c7ea 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -33,6 +33,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         dropout_rate=0,
         norm_layer=None,
@@ -73,6 +75,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 1eaa03b6..742fadc8 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -73,6 +73,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         dropout_rate=0.1,
         pos_dropout_rate=0.1,
@@ -118,6 +120,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=norm_layer,
             head_norm_layer=head_norm_layer,
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 008f595c..8c2070b5 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -38,6 +38,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         norm_layer=None,
         head_norm_layer=None,
@@ -120,6 +122,8 @@ def __init__(
             cos_scale=cos_scale,
             margin=margin,
             margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
             num_subcenters=num_subcenters,
             norm_layer=head_norm_layer,
             use_norm=use_norm,
@@ -163,6 +167,14 @@ def margin(self):
     def margin_warmup_epochs(self):
         return self.classif_net.margin_warmup_epochs
 
+    @property
+    def intertop_k(self):
+        return self.classif_net.intertop_k
+
+    @property
+    def intertop_margin(self):
+        return self.classif_net.intertop_margin
+
     @property
     def num_subcenters(self):
         return self.classif_net.num_subcenters
@@ -490,6 +502,8 @@ def get_config(self):
             "cos_scale": self.cos_scale,
             "margin": self.margin,
             "margin_warmup_epochs": self.margin_warmup_epochs,
+            "intertop_k": self.intertop_k,
+            "intertop_margin": self.intertop_margin,
             "num_subcenters": self.num_subcenters,
             "norm_layer": self.norm_layer,
             "head_norm_layer": self.head_norm_layer,
@@ -560,6 +574,7 @@ def set_train_mode(self, mode):
         elif mode == "frozen":
             self.freeze()
         elif mode == "ft-embed-affine":
+            self.unfreeze()
             self.freeze_preembed_layers()
         else:
             raise ValueError(f"invalid train_mode={mode}")
@@ -581,7 +596,8 @@ def _train(self, train_mode: str):
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
-    def valid_train_modes(self):
+    @staticmethod
+    def valid_train_modes():
         return ["full", "frozen", "ft-embed-affine"]
 
     @staticmethod
@@ -607,6 +623,8 @@ def filter_args(**kwargs):
             "cos_scale",
             "margin",
             "margin_warmup_epochs",
+            "intertop_k",
+            "intertop_margin",
             "num_subcenters",
             "use_norm",
             "norm_before",
@@ -670,6 +688,16 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="number of epoch until we set the final margin",
         )
 
+        parser.add_argument(
+            "--intertop-k", default=5, type=int, help="K for InterTopK penalty"
+        )
+        parser.add_argument(
+            "--intertop-margin",
+            default=0.0,
+            type=float,
+            help="margin for InterTopK penalty",
+        )
+
         parser.add_argument(
             "--num-subcenters",
             default=2,
@@ -760,9 +788,15 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        valid_args = ("loss_type", "cos_scale", "margin", "margin_warmup_epochs")
+        valid_args = (
+            "loss_type",
+            "cos_scale",
+            "margin",
+            "margin_warmup_epochs",
+            "intertop_k",
+            "intertop_margin",
+        )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
         return args
 
     @staticmethod
@@ -793,6 +827,16 @@ def add_finetune_args(parser, prefix=None):
             help="number of epoch until we set the final margin",
         )
 
+        parser.add_argument(
+            "--intertop-k", default=5, type=int, help="K for InterTopK penalty"
+        )
+        parser.add_argument(
+            "--intertop-margin",
+            default=0.0,
+            type=float,
+            help="margin for InterTopK penalty",
+        )
+
         parser.add_argument(
             "--num-subcenters",
             default=2,
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 5824cb1b..06bd988c 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -29,6 +29,8 @@ class ClassifHead(NetArch):
        cos_scale: scale parameter for cos-softmax and arc-softmax
        margin: margin parameter for cos-softmax and arc-softmax
        margin_warmup_epochs: number of epochs to anneal the margin from 0 to margin
+       intertop_k: adds negative angular penalty to k largest negative scores.
+       intertop_margin: inter-top-k penalty.
        num_subcenters: number of subcenters in subcenter losses
        norm_layer: norm_layer object or str indicating type norm layer, if None it uses BatchNorm1d
        use_norm: it True it uses layer/batch-normalization
@@ -46,6 +48,8 @@ def __init__(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
         norm_layer=None,
         use_norm=True,
@@ -78,6 +82,8 @@ def __init__(
         self.cos_scale = cos_scale
         self.margin = margin
         self.margin_warmup_epochs = margin_warmup_epochs
+        self.intertop_k = intertop_k
+        self.intertop_margin = intertop_margin
         self.num_subcenters = num_subcenters
 
         prev_feats = in_feats
@@ -124,6 +130,8 @@ def __init__(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
         elif loss_type == "arc-softmax":
             self.output = ArcLossOutput(
@@ -132,6 +140,8 @@ def __init__(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
         elif loss_type == "subcenter-arc-softmax":
             self.output = SubCenterArcLossOutput(
@@ -141,6 +151,8 @@ def __init__(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
 
     def rebuild_output_layer(
@@ -150,6 +162,8 @@ def rebuild_output_layer(
         cos_scale,
         margin,
         margin_warmup_epochs,
+        intertop_k=5,
+        intertop_margin=0.0,
         num_subcenters=2,
     ):
 
@@ -159,6 +173,8 @@ def rebuild_output_layer(
         self.cos_scale = cos_scale
         self.margin = margin
         self.margin_warmup_epochs = margin_warmup_epochs
+        self.intertop_margin = intertop_margin
+        self.num_subcenters = num_subcenters
         self.num_subcenters = num_subcenters
 
         if loss_type == "softmax":
@@ -170,6 +186,8 @@ def rebuild_output_layer(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
         elif loss_type == "arc-softmax":
             self.output = ArcLossOutput(
@@ -178,6 +196,8 @@ def rebuild_output_layer(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
         elif loss_type == "subcenter-arc-softmax":
             self.output = SubCenterArcLossOutput(
@@ -187,6 +207,8 @@ def rebuild_output_layer(
                 cos_scale=cos_scale,
                 margin=margin,
                 margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
             )
 
     def set_margin(self, margin):
@@ -281,6 +303,8 @@ def get_config(self):
             "cos_scale": self.cos_scale,
             "margin": self.margin,
             "margin_warmup_epochs": self.margin_warmup_epochs,
+            "intertop_k": self.intertop_k,
+            "intertop_margin": self.intertop_margin,
             "num_subcenters": self.num_subcenters,
             "norm_layer": self.norm_layer,
             "use_norm": self.use_norm,
@@ -311,6 +335,8 @@ def filter_args(**kwargs):
             "s",
             "margin",
             "margin_warmup_epochs",
+            "intertop_k",
+            "intertop_margin",
             "num_subcenters",
             "use_norm",
             "norm_before",
@@ -362,6 +388,16 @@ def add_class_args(parser, prefix=None):
             help="number of epoch until we set the final margin",
         )
 
+        parser.add_argument(
+            "--intertop-k", default=5, type=int, help="K for InterTopK penalty"
+        )
+        parser.add_argument(
+            "--intertop-margin",
+            default=0.0,
+            type=float,
+            help="margin for InterTopK penalty",
+        )
+
         parser.add_argument(
             "--num-subcenters",
             default=2,
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 8dfad9ce..4e29dab5 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -586,8 +586,11 @@ def load_checkpoint(self, file_path):
             logs = checkpoint["logs"]
 
         del checkpoint
-        if self.device is not None:
-            torch.cuda.empty_cache()
+        # this was added before to try to release as much GPU memory as possible
+        # Recently has started to cause CUDA not available devices error
+        # Commenting for now.
+        # if self.device is not None:
+        #    torch.cuda.empty_cache()
 
         return logs
 
diff --git a/notebooks/tutorial_jsalt22/ivectors.ipynb b/notebooks/tutorial_jsalt22/ivectors.ipynb
new file mode 100644
index 00000000..46d4eb61
--- /dev/null
+++ b/notebooks/tutorial_jsalt22/ivectors.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# i-Vectors Tutorial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hpath='/exp/jvillalba/hyperion/hyperion-persephone'\n",
+    "import sys\n",
+    "sys.path.append(hpath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import hyperion as hyp\n",
+    "import hyperion.np as hnp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_data(num_dims, num_spks=10, num_utts=10, num_units=10, unit_length=10, tv_dim=2):\n",
+    "    \"\"\" Generate data following the i-vector model\n",
+    "\n",
+    "    Args:\n",
+    "      num_dims: number of dimensions of the features.\n",
+    "      num_spks: number of speakers.\n",
+    "      num_utts: number of utterances per speaker.\n",
+    "      num_units: number of phonetic units per utterance.\n",
+    "      unit_length: duration of each phonetic unit.\n",
+    "    \"\"\"\n",
+    "    rng = np.random.RandomState(seed=1234)\n",
+    "    # we set the number of phonetic classes to 2^num_dim\n",
+    "    num_comp = 2**num_dims\n",
+    "    \n",
+    "    # Define UBM\n",
+    "    # Means of the GMM-UBM\n",
+    "    ubm_means = np.zeros((num_comp, num_dims))\n",
+    "    kernel=np.array([1.,-1.])[:,None]\n",
+    "    ubm_means = kernel\n",
+    "    for i in range(1,num_dims):\n",
+    "        ubm_means = np.concatenate((np.repeat(kernel, int(2**i), axis=0), np.tile(ubm_means,(2,1))), axis=1)\n",
+    "        \n",
+    "    # Covariances of the GMM-UBM\n",
+    "    ubm_cov = 0.1 * np.ones((num_comp, num_dims))\n",
+    "    ubm_prec = 1./ubm_cov\n",
+    "\n",
+    "    # Weights of the GMM-UBM\n",
+    "    ubm_weights = np.ones((num_comp))/num_comp\n",
+    "    \n",
+    "\n",
+    "    # Define between and within speaker covariances\n",
+    "    sb = 0.7\n",
+    "    sw = 0.3\n",
+    "\n",
+    "    # Define Total Variability sub-space\n",
+    "    T = rng.randn(tv_dim, num_dims * num_comp)\n",
+    "    T = 0.2 * T/np.max(T)\n",
+    "    \n",
+    "    # Sample speakers\n",
+    "    spk_ids = np.arange(num_spks)\n",
+    "    y = sb * rng.randn(num_spks, tv_dim)\n",
+    "\n",
+    "    # Sample i-vectors\n",
+    "    spk_ids = np.repeat(spk_ids, num_utts, axis=0)\n",
+    "    y = np.repeat(y, num_utts, axis=0)\n",
+    "    w = y + sw * rng.randn(num_spks*num_utts, tv_dim)\n",
+    "\n",
+    "    x = []\n",
+    "    r_idx = []\n",
+    "    # Sample features\n",
+    "    for i in range(w.shape[0]):\n",
+    "      # For each utterance\n",
+    "      # Compute the GMM mean of the utterance\n",
+    "      means_i = ubm_means + np.dot(w[i],T).reshape(num_dims,num_comp).T\n",
+    "\n",
+    "      # Create a GMM for the utterance.\n",
+    "      gmm = hnp.pdfs.GMMDiagCov(pi=ubm_weights, mu=means_i, Lambda=ubm_prec)\n",
+    "\n",
+    "      # Sample the Gaussian components\n",
+    "      r_i = rng.multinomial(1, ubm_weights, size=(num_units,))\n",
+    "      # Assume that we stay in the same component several time steps.\n",
+    "      r_i = np.repeat(r_i, unit_length, axis=0)\n",
+    "      # Draw samples from the GMM\n",
+    "      x_i = gmm.sample(r=r_i)\n",
+    "      x.append(x_i)\n",
+    "      r_idx.append(r_i.argmax(axis=-1))\n",
+    "\n",
+    "    return x, r_idx, spk_ids\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x, r_idx, spk_ids =generate_data(num_dims=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_cat=np.concatenate(x, axis=0)\n",
+    "fig = plt.figure()\n",
+    "ax = fig.add_subplot(projection='3d')\n",
+    "ax.scatter(x_cat[:,0], x_cat[:,1], x_cat[:,2], marker='o')\n",
+    "ax.set_xlabel('x1')\n",
+    "ax.set_ylabel('x2')\n",
+    "ax.set_zlabel('x3')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ubm_gmm = hnp.pdfs.GMMDiagCov(num_comp=8, x_dim=3)\n",
+    "elbo, elbo_norm = ubm_gmm.fit(x_cat, epochs=10)\n",
+    "fig = plt.figure()\n",
+    "plt.plot(elbo_norm)\n",
+    "plt.xlabel('iters')\n",
+    "plt.ylabel('log(p(x))')\n",
+    "plt.grid(True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ubm_gmm.mu\n",
+    "ubm_gmm.pi\n",
+    "ubm_gmm.Sigma\n",
+    "fig=plt.figure()\n",
+    "ax=fig.add_subplot(111, projection=\"3d\")\n",
+    "ubm_gmm.plot3D_ellipsoid(num_sigmas=1, ax=ax)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iv_model = hnp.pdfs.JFATotal(K=8, x_dim=3, y_dim=2)\n",
+    "N=[]\n",
+    "F=[]\n",
+    "for x_i in x:\n",
+    "    N_i, u_x_i = ubm_gmm.accum_suff_stats(x_i)\n",
+    "    N_i, F_i = ubm_gmm.norm_suff_stats(N_i, u_x_i)\n",
+    "    N.append(N_i.reshape(1,-1))\n",
+    "    F.append(F_i.reshape(1,-1))\n",
+    "\n",
+    "N = np.concatenate(N, axis=0)\n",
+    "F = np.concatenate(F, axis=0)\n",
+    "\n",
+    "elbo, elbo_norm = iv_model.fit(N, F)\n",
+    "fig = plt.figure()\n",
+    "plt.plot(elbo_norm)\n",
+    "plt.xlabel('iters')\n",
+    "plt.ylabel('log(p(x))')\n",
+    "plt.grid(True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "F.shape\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "488a239b304e646027d6710c3377746db4487e56624448f35f81edd765904a6d"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.12 ('py38_pt101_cu112')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 25bbd0e652c1de7cf1f3fb214604028cc2176df9 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 2 Jun 2022 18:57:48 -0400
Subject: [PATCH 015/154] documented most np models

---
 hyperion/np/transforms/cent_whiten.py    |  68 +++++++++++++-
 hyperion/np/transforms/cent_whiten_up.py |  44 ++++++++-
 hyperion/np/transforms/coral.py          |  80 ++++++++++++++--
 hyperion/np/transforms/gaussianizer.py   |  74 +++++++++++++--
 hyperion/np/transforms/lda.py            |  72 ++++++++++++++-
 hyperion/np/transforms/lnorm.py          |   9 +-
 hyperion/np/transforms/lnorm_up.py       |  11 ++-
 hyperion/np/transforms/mvn.py            |  60 +++++++++++-
 hyperion/np/transforms/nap.py            |  90 ++++++++++++++----
 hyperion/np/transforms/nda.py            | 111 +++++++++++++++++++++--
 hyperion/np/transforms/pca.py            |  72 ++++++++++++++-
 hyperion/np/transforms/sb_sw.py          |  32 ++++++-
 hyperion/np/transforms/skl_tsne.py       |  50 ++++++++++
 hyperion/np/transforms/transform_list.py |  43 ++++++++-
 14 files changed, 749 insertions(+), 67 deletions(-)

diff --git a/hyperion/np/transforms/cent_whiten.py b/hyperion/np/transforms/cent_whiten.py
index f1cdf227..e700dbe8 100644
--- a/hyperion/np/transforms/cent_whiten.py
+++ b/hyperion/np/transforms/cent_whiten.py
@@ -13,7 +13,14 @@
 
 
 class CentWhiten(NPModel):
-    """Class to do centering and whitening of i-vectors."""
+    """Class to do centering and whitening of i-vectors.
+
+    Attributes:
+      mu: data mean vector
+      T: whitening projection.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+    """
 
     def __init__(self, mu=None, T=None, update_mu=True, update_T=True, **kwargs):
         super().__init__(**kwargs)
@@ -22,18 +29,55 @@ def __init__(self, mu=None, T=None, update_mu=True, update_T=True, **kwargs):
         self.update_mu = update_mu
         self.update_T = update_T
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.mu is not None:
             x = x - self.mu
         if self.T is not None:
             if self.T.ndim == 1:
-                x = x * T
+                x = x * self.T
             else:
                 x = np.dot(x, self.T)
         return x
 
     def fit(self, x=None, sample_weight=None, mu=None, S=None):
-
+        """Trains the model.
+
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+          sample_weight: weight for each training sample.
+          mu: precomputed mean (used if x is None).
+          S: precomputed convariances (used if x is None).
+        """
         if x is not None:
             if x.shape[0] > x.shape[1]:
                 gauss = Normal(x_dim=x.shape[1])
@@ -62,19 +106,35 @@ def fit(self, x=None, sample_weight=None, mu=None, S=None):
             self.T = V
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"update_mu": self.update_mu, "update_t": self.update_T}
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "T": self.T}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
-        return cls(mu=params["mu"], T=params["T"], name=config["name"])
+        return cls(mu=params["mu"], T=params["T"], **config)
 
     @classmethod
     def load_mat(cls, file_path):
diff --git a/hyperion/np/transforms/cent_whiten_up.py b/hyperion/np/transforms/cent_whiten_up.py
index 1200e61b..9290eae6 100644
--- a/hyperion/np/transforms/cent_whiten_up.py
+++ b/hyperion/np/transforms/cent_whiten_up.py
@@ -14,12 +14,49 @@
 
 
 class CentWhitenUP(CentWhiten):
-    """Class to do centering and whitening with uncertainty propagation."""
+    """Class to do centering and whitening with uncertainty propagation.
+
+    Attributes:
+      mu: data mean vector
+      T: whitening projection.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+    """
 
     def __init__(self, mu=None, T=None, update_mu=True, update_T=True, **kwargs):
         super().__init__(mu, T, update_mu, update_T, **kwargs)
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         x_dim = int(x.shape[-1] / 2)
         m_x = x[:, :x_dim]
         s2_x = x[:, x_dim:]
@@ -29,5 +66,10 @@ def predict(self, x):
         return np.hstack((m_x, s2_x))
 
     def fit(self, x, sample_weight=None):
+        """Trains the transformation parameters.
+
+        Args:
+          x: training samples with shape (num_samples, x_dim)
+        """
         x = x[:, : int(x.shape[-1] / 2)]
         super().fit(x, sample_weight=sample_weight)
diff --git a/hyperion/np/transforms/coral.py b/hyperion/np/transforms/coral.py
index 9aee7579..54bd27bc 100644
--- a/hyperion/np/transforms/coral.py
+++ b/hyperion/np/transforms/coral.py
@@ -12,7 +12,19 @@
 
 
 class CORAL(NPModel):
-    """Class to do CORAL"""
+    """Class to do CORAL.
+
+    https://arxiv.org/abs/1612.01939
+
+    Attributes:
+      mu: mean shift between both domains.
+      T_col: recoloring projection.
+      T_white: whitening projection.
+      update_mu: whether or not to update mu when training.
+      update_T: wheter or not to update T_col and T_white when training.
+      alpha_mu: weight of the in-domain data when computing in-domain mean.
+      alpha_T: weight of the in-domain data when computing in-domain covariance.
+    """
 
     def __init__(
         self,
@@ -25,7 +37,7 @@ def __init__(
         alpha_T=1,
         **kwargs
     ):
-        super(CORAL, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.T_col = T_col
         self.T_white = T_white
@@ -36,19 +48,51 @@ def __init__(
         self.alpha_T = alpha_T
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "update_mu": self.update_mu,
             "update_t": self.update_T,
-            "pca_dim": self.pca_dim,
+            "alpha_mu": self.alpha_mu,
+            "alpha_T": self.alpha_T,
         }
-        base_config = super(CORAL, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def _compute_T(self):
         if self.T_col is not None and self.T_white is not None:
             self.T = np.dot(self.T_white, self.T_col)
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.T is None:
             self._compute_T()
         if self.mu is not None:
@@ -60,7 +104,14 @@ def predict(self, x):
         return x
 
     def fit(self, x, sample_weight=None, x_out=None, sample_weight_out=None):
-
+        """Trains the model.
+
+        Args:
+          x:  in-domain data samples with shape (num_samples, x_dim).
+          sample_weight: weight for each in-domain training sample.
+          x_out:  out-domain data samples with shape (num_samples, x_dim).
+          sample_weight_out: weight for each out-domain training sample.
+        """
         if x_out is None:
             assert self.T_white is not None
         else:
@@ -88,21 +139,34 @@ def fit(self, x, sample_weight=None, x_out=None, sample_weight_out=None):
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "T_col", "T_white"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
             mu=params["mu"],
             T_col=params["T_col"],
             T_white=params["T_white"],
-            name=config["name"],
+            **config,
         )
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {
             "mu": self.mu,
             "T_col": self.T_col,
             "T_white": self.T_white,
-            "alpha_mu": self.alpha_mu,
-            "alpha_T": self.alpha_T,
         }
         self._save_params_from_dict(f, params)
diff --git a/hyperion/np/transforms/gaussianizer.py b/hyperion/np/transforms/gaussianizer.py
index 26294134..393364b6 100644
--- a/hyperion/np/transforms/gaussianizer.py
+++ b/hyperion/np/transforms/gaussianizer.py
@@ -15,18 +15,56 @@
 
 
 class Gaussianizer(NPModel):
-    """Class to make i-vector distribution standard Normal."""
+    """Class to make i-vector distribution standard Normal.
+
+    Args:
+      max_vectors: maximum number of background vectors needed to
+        compute the Gaussianization.
+      r: background vector matrix obtained by fit function.
+    """
 
     def __init__(self, max_vectors=None, r=None, **kwargs):
-        super(Gaussianizer, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.max_vectors = max_vectors
         self.r = r
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
-        px_cum = np.linspace(0, 1, self.r.shape[0] + 2)[1:-1]
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        # px_cum = np.linspace(0, 1, self.r.shape[0] + 2)[1:-1]
+        px_cum = np.linspace(0, 1, self.r.shape[0] + 3)[1:-1]
         y_map = erfinv(2 * px_cum - 1) * np.sqrt(2)
 
-        r = self.r[1:]
+        # r = self.r[1:]
+        r = self.r
         y = np.zeros_like(x)
         for i in range(x.shape[1]):
             y_index = np.searchsorted(r[:, i], x[:, i])
@@ -36,10 +74,13 @@ def predict(self, x):
         return y
 
     def fit(self, x):
+        """Trains the model.
 
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+        """
         r = np.sort(x, axis=0, kind="heapsort")
-
-        x = np.zeros((1, x.shape[-1]), dtype=float_cpu())
+        # x = np.zeros((1, x.shape[-1]), dtype=float_cpu())
 
         if r.shape[0] > self.max_vectors:
             index = np.round(
@@ -47,20 +88,37 @@ def fit(self, x):
             ).astype(int)
             r = r[index, :]
 
-        self.r = np.vstack((x, r))
+        # self.r = np.vstack((x, r))
+        self.r = r
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"max_vectors": self.max_vectors}
 
-        base_config = super(Gaussianizer, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"r": self.r}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["r"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
diff --git a/hyperion/np/transforms/lda.py b/hyperion/np/transforms/lda.py
index 5644a2a3..b4f5cbc8 100644
--- a/hyperion/np/transforms/lda.py
+++ b/hyperion/np/transforms/lda.py
@@ -13,12 +13,20 @@
 
 
 class LDA(NPModel):
-    """Class to do linear discriminant analysis."""
+    """Class to do linear discriminant analysis.
+
+    Attributes:
+      mu: data mean vector
+      T: LDA projection.
+      lda_dim: LDA dimension.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+    """
 
     def __init__(
         self, mu=None, T=None, lda_dim=None, update_mu=True, update_T=True, **kwargs
     ):
-        super(LDA, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.T = T
         if T is None:
@@ -28,13 +36,51 @@ def __init__(
         self.update_mu = update_mu
         self.update_T = update_T
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.mu is not None:
             x = x - self.mu
         return np.dot(x, self.T)
 
     def fit(self, x, y, mu=None, Sb=None, Sw=None):
-
+        """Trains the model.
+
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+          y: training labels as integers in [0, num_classes-1] with shape (num_samples,)
+          mu: precomputed mean.
+          Sb: precomputed between-class covariance.
+          Sw: precomputed within-class covariance.
+        """
         if mu is None or Sb is None or Sw is None:
             sbsw = SbSw()
             sbsw.fit(x, y)
@@ -67,23 +113,39 @@ def fit(self, x, y, mu=None, Sb=None, Sw=None):
         self.T = V
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "lda_dim": self.lda_dim,
             "update_mu": self.update_mu,
             "update_t": self.update_T,
         }
-        base_config = super(LDA, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "T": self.T}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
-        return cls(mu=params["mu"], T=params["T"], name=config["name"])
+        return cls(mu=params["mu"], T=params["T"], **config)
 
     @classmethod
     def load_mat(cls, file_path):
diff --git a/hyperion/np/transforms/lnorm.py b/hyperion/np/transforms/lnorm.py
index 088748b2..9b4f36fe 100644
--- a/hyperion/np/transforms/lnorm.py
+++ b/hyperion/np/transforms/lnorm.py
@@ -9,7 +9,14 @@
 
 
 class LNorm(CentWhiten):
-    """Class to do length normalization."""
+    """Class to do length normalization.
+
+    Attributes:
+      mu: data mean vector
+      T: whitening projection.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+    """
 
     def predict(self, x):
         x = super().predict(x)
diff --git a/hyperion/np/transforms/lnorm_up.py b/hyperion/np/transforms/lnorm_up.py
index b6e211d5..0814f9fe 100644
--- a/hyperion/np/transforms/lnorm_up.py
+++ b/hyperion/np/transforms/lnorm_up.py
@@ -10,10 +10,17 @@
 
 
 class LNormUP(CentWhitenUP):
-    """Class to do Lenght Normalization with uncertainty propagation"""
+    """Class to do Lenght Normalization with uncertainty propagation.
+
+    Attributes:
+      mu: data mean vector
+      T: whitening projection.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+    """
 
     def predict(self, x):
-        x = super(LNormUP, self).predict(x)
+        x = super().predict(x)
         x_dim = int(x.shape[-1] / 2)
         m_x = x[:, :x_dim]
         s2_x = x[:, x_dim:]
diff --git a/hyperion/np/transforms/mvn.py b/hyperion/np/transforms/mvn.py
index 7f60206e..484a6913 100644
--- a/hyperion/np/transforms/mvn.py
+++ b/hyperion/np/transforms/mvn.py
@@ -12,14 +12,50 @@
 
 
 class MVN(NPModel):
-    """Class to do global mean and variance normalization."""
+    """Class to do global mean and variance normalization.
+
+    Attributes:
+      mu: data mean vector
+      s: standard deviation vector.
+
+    """
 
     def __init__(self, mu=None, s=None, **kwargs):
-        super(MVN, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.mu = mu
         self.s = s
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.mu is not None:
             x = x - self.mu
         if self.s is not None:
@@ -27,15 +63,35 @@ def predict(self, x):
         return x
 
     def fit(self, x):
+        """Trains the model.
+
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+        """
         self.mu = np.mean(x, axis=0)
         self.s = np.std(x, axis=0)
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "s": self.s}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "s"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(mu=params["mu"], s=params["s"], name=config["name"])
diff --git a/hyperion/np/transforms/nap.py b/hyperion/np/transforms/nap.py
index ee13e7e0..c6f8f8de 100644
--- a/hyperion/np/transforms/nap.py
+++ b/hyperion/np/transforms/nap.py
@@ -12,47 +12,105 @@
 
 
 class NAP(NPModel):
-    """Class to do nussance attribute projection."""
+    """Class to do nuissance attribute projection.
 
-    def __init__(self, U=None, **kwargs):
-        super(NAP, self).__init__(**kwargs)
+    Attributes:
+      U: NAP projection.
+    """
+
+    def __init__(self, U=None, U_dim=None, **kwargs):
+        super().__init__(**kwargs)
         self.U = U
+        if U is None:
+            self.U_dim = U_dim
+        else:
+            self.U_dim = U.shape[0]
+
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
 
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         return x - np.dot(np.dot(x, self.U.T), self.U)
 
-    def fit(self, x, U_dim, class_ids):
-        x_hat = np.zeros_like(x)
-        u_ids = np.uniqe(class_ids)
+    def fit(self, x, y):
+        """Trains the model.
+
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+          y: training labels as integers in [0, num_classes-1] with shape (num_samples,)
+        """
+        u_ids = np.unique(y)
         M = np.sqrt(len(u_ids))
         for i in u_ids:
-            idx = np.nonzero(i == class_ids)
+            idx = y == i
             N = np.sqrt(len(idx))
             mu_i = np.mean(x[idx, :], axis=0)
             xx[idx, :] = (x[idx, :] - mu_i) / N
         xx /= M
         _, s, Vt = np.svd(xx, full_matrices=False, overwrite_a=True)
-        idx = (np.argsort(s)[::-1])[:U_dim]
+        idx = (np.argsort(s)[::-1])[: self.U_dim]
         self.U = Vt[idx, :]
 
+    def get_config(self):
+        """Returns the model configuration dict."""
+        config = {
+            "U_dim": self.U_dim,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"U": self.U}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["U"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(U=params["U"], name=config["name"])
 
-    # @classmethod
-    # def load(cls, file_path):
-    #     with h5py.File(file_path, 'r') as f:
-    #         config = self.load_config_from_json(f['config'])
-    #         param_list = ['U']
-    #         params = self._load_params_to_dict(f, config['name'], param_list)
-    #         return cls(U=params['U'], name=config['name'])
-
     @classmethod
     def load_mat(cls, file_path):
         with h5py.File(file_path, "r") as f:
diff --git a/hyperion/np/transforms/nda.py b/hyperion/np/transforms/nda.py
index c84a4527..71910c92 100644
--- a/hyperion/np/transforms/nda.py
+++ b/hyperion/np/transforms/nda.py
@@ -10,47 +10,140 @@
 
 from ..np_model import NPModel
 from ...hyp_defs import float_cpu
+from .sb_sw import NSbSw
 
 
 class NDA(NPModel):
-    """Class to do nearest-neighbors discriminant analysis"""
+    """Class to do nearest-neighbors discriminant analysis
 
-    def __init__(self, mu=None, T=None, **kwargs):
+    Attributes:
+      mu: data mean vector
+      T: NDA projection.
+    """
+
+    def __init__(
+        self, mu=None, T=None, nda_dim=None, update_mu=True, update_T=True, **kwargs
+    ):
         super().__init__(**kwargs)
         self.mu = mu
         self.T = T
+        if T is None:
+            self.nda_dim = nda_dim
+        else:
+            self.nda_dim = T.shape[1]
+        self.update_mu = update_mu
+        self.update_T = update_T
+
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
 
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.mu is not None:
             x = x - self.mu
         return np.dot(x, self.T)
 
-    def fit(self, mu, Sb, Sw, nda_dim=None):
-        self.mu = mu
+    def fit(self, x, y, mu=None, Sb=None, Sw=None):
+        """Trains the model.
+
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+          y: training labels as integers in [0, num_classes-1] with shape (num_samples,)
+          mu: precomputed mean.
+          Sb: precomputed between-class covariance.
+          Sw: precomputed within-class covariance.
+        """
+        if mu is None or Sb is None or Sw is None:
+            sbsw = NSbSw()
+            sbsw.fit(x, y)
+            mu = sbsw.mu
+            Sb = sbsw.Sb
+            Sw = sbsw.Sw
+
+        if self.update_mu:
+            self.mu = mu
+
+        if not self.update_T:
+            return
 
         assert Sb.shape == Sw.shape
 
-        d, V = la.eigh(Sb, Sw)
+        try:
+            d, V = la.eigh(Sb, Sw)
+        except:
+            alpha = 1e-2 * np.max(np.diag(Sw))
+            d, V = la.eigh(Sb, alpha * np.eye(Sw.shape[0]) + Sw)
         V = np.fliplr(V)
 
         p = V[0, :] < 0
         V[:, p] *= -1
 
-        if nda_dim is not None:
-            assert nda_dim <= V.shape[1]
-            V = V[:, :nda_dim]
+        if self.nda_dim is not None:
+            assert self.nda_dim <= V.shape[1]
+            V = V[:, : self.nda_dim]
 
         self.T = V
 
+    def get_config(self):
+        """Returns the model configuration dict."""
+        config = {
+            "nda_dim": self.nda_dim,
+            "update_mu": self.update_mu,
+            "update_t": self.update_T,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "T": self.T}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
-        return cls(mu=params["mu"], T=params["T"], name=config["name"])
+        return cls(mu=params["mu"], T=params["T"], **config)
 
     @classmethod
     def load_mat(cls, file_path):
diff --git a/hyperion/np/transforms/pca.py b/hyperion/np/transforms/pca.py
index 23477c84..6d6ff7b1 100644
--- a/hyperion/np/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -12,7 +12,18 @@
 
 
 class PCA(NPModel):
-    """Class to do principal component analysis"""
+    """Class to do principal component analysis
+
+    Attributes:
+      mu: data mean vector
+      T: LDA projection.
+      update_mu: whether or not to update the mean when training.
+      update_T: wheter or not to update T when training.
+      pca_dim: pca dimension (optional).
+      pca_var_r: pca variance ratio to retain, overrides pca_dim (optional).
+      pca_min_dim: minimum dimension of PCA when using pca_var_r.
+      whiten: whitens the data after PCA.
+    """
 
     def __init__(
         self,
@@ -36,7 +47,37 @@ def __init__(
         self.pca_min_dim = pca_min_dim
         self.whiten = whiten
 
+    def __call__(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         if self.mu is not None:
             x = x - self.mu
         return np.dot(x, self.T)
@@ -57,8 +98,15 @@ def get_pca_dim_for_var_ratio(x, var_r=1, min_dim=2):
         rank = max(min_dim, rank)
         return rank
 
-    def fit(self, x=None, sample_weight=None, mu=None, S=None):
+    def fit(self, x=None, mu=None, S=None):
+        """Trains the model.
 
+        Args:
+          x: training data samples with shape (num_samples, x_dim).
+          y: training labels as integers in [0, num_classes-1] with shape (num_samples,)
+          mu: precomputed mean.
+          S: precomputed total covariance.
+        """
         if x is not None:
             mu = np.mean(x, axis=0)
             delta = x - mu
@@ -104,28 +152,44 @@ def fit(self, x=None, sample_weight=None, mu=None, S=None):
             self.T = V
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "update_mu": self.update_mu,
             "update_t": self.update_T,
             "pca_dim": self.pca_dim,
             "pca_var_r": self.pca_var_r,
+            "pca_min_dim": self.pca_min_dim,
         }
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def save_params(self, f):
+        """Saves the model paramters into the file.
+
+        Args:
+          f: file handle.
+        """
         params = {"mu": self.mu, "T": self.T}
         self._save_params_from_dict(f, params)
 
     @classmethod
     def load_params(cls, f, config):
+        """Initializes the model from the configuration and loads the model
+        parameters from file.
+
+        Args:
+          f: file handle.
+          config: configuration dictionary.
+
+        Returns:
+          Model object.
+        """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
         return cls(
             mu=params["mu"],
             T=params["T"],
-            pca_dim=config["pca_dim"],
-            name=config["name"],
+            **config,
         )
 
     @classmethod
diff --git a/hyperion/np/transforms/sb_sw.py b/hyperion/np/transforms/sb_sw.py
index 92cba594..6d013e55 100644
--- a/hyperion/np/transforms/sb_sw.py
+++ b/hyperion/np/transforms/sb_sw.py
@@ -13,7 +13,14 @@
 
 
 class SbSw(NPModel):
-    """Class to compute between and within class matrices"""
+    """Class to compute between and within class covariance matrices.
+
+    Args:
+      Sb: between-class cov. matrix.
+      Sw: within-class cov. matrix.
+      mu: data mean vector.
+      num_classes: number of classes.
+    """
 
     def __init__(self, Sb=None, Sw=None, mu=None, num_classes=0, **kwargs):
         super(SbSw, self).__init__(**kwargs)
@@ -22,7 +29,7 @@ def __init__(self, Sb=None, Sw=None, mu=None, num_classes=0, **kwargs):
         self.mu = None
         self.num_classes = num_classes
 
-    def fit(self, x, class_ids, sample_weight=None, class_weights=None, normalize=True):
+    def fit(self, x, class_ids, normalize=True):
         dim = x.shape[1]
         if self.Sb is None:
             self.Sb = np.zeros((dim, dim))
@@ -75,7 +82,7 @@ def save_params(self, f):
     @classmethod
     def load(cls, file_path):
         with h5py.File(file_path, "r") as f:
-            config = self.load_config_from_json(f["config"])
+            config = cls.load_config_from_json(f["config"])
             param_list = ["mu", "Sb", "Sw", "num_classes"]
             params = cls._load_params_to_dict(f, config["name"], param_list)
             kwargs = dict(list(config.items()) + list(params.items()))
@@ -83,12 +90,26 @@ def load(cls, file_path):
 
 
 class NSbSw(SbSw):
+    """Class to compute nearest neighbour between and within class
+    covariance matrices.
+    https://www.isca-speech.org/archive/pdfs/interspeech_2014/sadjadi14_interspeech.pdf
+
+    Args:
+      K: number of neighbours.
+      alpha: distance exponent that determines how fast the weight of the samples decays
+        when they get far from the classification boundary.
+      Sb: between-class cov. matrix.
+      Sw: within-class cov. matrix.
+      mu: data mean vector.
+      num_classes: number of classes.
+    """
+
     def __init__(self, K=10, alpha=1, **kwargs):
-        super(NSbSw, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.K = K
         self.alpha = alpha
 
-    def fit(self, x, class_ids, sample_weight=None, class_weights=None, normalize=True):
+    def fit(self, x, class_ids, normalize=True):
         dim = x.shape[1]
         self.Sb = np.zeros((dim, dim), dtype=float_cpu())
         self.Sw = np.zeros((dim, dim), dtype=float_cpu())
@@ -139,6 +160,7 @@ def normalize(self):
         self.Sw /= self.num_classes
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {"K": self.K, "alpha": self.alpha}
         base_config = super(NSbSw, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
diff --git a/hyperion/np/transforms/skl_tsne.py b/hyperion/np/transforms/skl_tsne.py
index b5be0fac..71a3e084 100644
--- a/hyperion/np/transforms/skl_tsne.py
+++ b/hyperion/np/transforms/skl_tsne.py
@@ -121,10 +121,48 @@ def angle(self):
     def num_jobs(self):
         return self._tsne.n_jobs
 
+    def __call__(self, x):
+        """Trains and applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Trains and applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Trains and applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         return self._tsne.fit_transform(x)
 
     def fit(self, x):
+        """Trains and applies the transformation to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         return self._tsne.fit_transform(x)
 
     def save_params(self, f):
@@ -135,6 +173,7 @@ def load_params(cls, f, config):
         return cls(**config)
 
     def get_config(self):
+        """Returns the model configuration dict."""
         config = {
             "tsne_dim": self.tsne_dim,
             "perplexity": self.perplexity,
@@ -155,6 +194,11 @@ def get_config(self):
 
     @staticmethod
     def filter_args(**kwargs):
+        """Filters the arguments corresponding to this model from a dictionary.
+
+        Returns
+          Dictionary containing valid options to initialize the model.
+        """
         valid_args = (
             "tsne_dim",
             "perplexity",
@@ -174,6 +218,12 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
+        """Adds model options to parser.
+
+        Args:
+          parser: parser object.
+          prefix: prefix str to add to the argument names.
+        """
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/np/transforms/transform_list.py b/hyperion/np/transforms/transform_list.py
index 62bc802e..1ddceeaa 100644
--- a/hyperion/np/transforms/transform_list.py
+++ b/hyperion/np/transforms/transform_list.py
@@ -23,7 +23,11 @@
 
 
 class TransformList(NPModel):
-    """Class to perform a list of transformations"""
+    """Class to perform a sequence of transformations
+
+    Attributes:
+      transforms: list of transformation objects.
+    """
 
     def __init__(self, transforms, **kwargs):
         super().__init__(**kwargs)
@@ -34,11 +38,46 @@ def __init__(self, transforms, **kwargs):
             self.update_names()
 
     def append(self, t):
+        """Appends a transformation to the list.
+
+        Args:
+          t: transformation object.
+        """
         self.transforms.append(t)
         if self.name is not None:
             t.name = self.name + "/" + t.name
 
+    def __call__(self, x):
+        """Applies the list of transformations to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
+    def forward(self, x):
+        """Applies the list of transformations to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
+        return self.predict(x)
+
     def predict(self, x):
+        """Applies the list of transformations to the data.
+
+        Args:
+          x: data samples.
+
+        Returns:
+          Transformed data samples.
+        """
         for t in self.transforms:
             x = t.predict(x)
         return x
@@ -49,7 +88,7 @@ def update_names(self):
                 t.name = self.name + "/" + t.name
 
     def get_config(self):
-        config = super(TransformList, self).get_config()
+        config = super().get_config()
         config_t = {}
         for i in range(len(self.transforms)):
             config_t[i] = self.transforms[i].get_config()

From 0ecebc0e6524b74628f459896ccbcb46a3011f78 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 9 Jun 2022 09:06:36 -0400
Subject: [PATCH 016/154] wavlm phase3

---
 egs/voxceleb/v2/cmd.sh                        |   2 +-
 ...aseplus_ecapatdnn512x3_phase2_default.yaml |  12 +
 ...aseplus_ecapatdnn512x3_phase3_default.yaml |  11 +
 .../v2/conf/trainer_phase2_sgd_default.yaml   |  18 ++
 .../v2/conf/trainer_phase3_sgd_default.yaml   |  18 ++
 ...ig_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh |  52 ++++
 ...nfig_wavlmbaseplus_ecapatdnn512x3_v1.10.sh |  16 +
 egs/voxceleb/v2/run_011_train_xvector.sh      |  56 +++-
 egs/voxceleb/v2/run_030_extract_xvectors.sh   |  10 +
 egs/voxceleb/v2/run_040_eval_be.sh            |  11 +
 hyperion/bin/finetune_wav2vec2xvector.py      | 204 +++++++++++++
 ....py => finetune_xvector_dfr_from_feats.py} |   0
 ...av.py => finetune_xvector_dfr_from_wav.py} |   0
 ...xvec.py => finetune_xvector_from_feats.py} |   0
 hyperion/bin/finetune_xvector_from_wav.py     | 190 ++++++++++++
 hyperion/bin/torch-finetune-xvec-from-wav.py  | 287 ------------------
 hyperion/bin/train_wav2vec2xvector.py         |   2 +-
 hyperion/bin/train_xvector_from_wav.py        |   7 +-
 hyperion/torch/layers/margin_losses.py        |   8 +-
 .../hf_hubert2resnet1d_xvector.py             |  11 +
 .../hf_wav2vec2resnet1d_xvector.py            |  11 +
 .../models/wav2xvectors/hf_wav2xvector.py     |  22 ++
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py |  11 +
 .../torch/models/wav2xvectors/wav2xvector.py  |  22 ++
 hyperion/torch/models/xvectors/xvector.py     |   6 +
 hyperion/torch/narchs/classif_head.py         |  21 ++
 notebooks/tutorial_jsalt22/ivectors.ipynb     |  33 +-
 27 files changed, 736 insertions(+), 305 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
 create mode 100755 hyperion/bin/finetune_wav2vec2xvector.py
 rename hyperion/bin/{torch-finetune-xvec-dfr.py => finetune_xvector_dfr_from_feats.py} (100%)
 rename hyperion/bin/{torch-finetune-xvec-dfr-from-wav.py => finetune_xvector_dfr_from_wav.py} (100%)
 rename hyperion/bin/{torch-finetune-xvec.py => finetune_xvector_from_feats.py} (100%)
 create mode 100755 hyperion/bin/finetune_xvector_from_wav.py
 delete mode 100755 hyperion/bin/torch-finetune-xvec-from-wav.py

diff --git a/egs/voxceleb/v2/cmd.sh b/egs/voxceleb/v2/cmd.sh
index 040f458b..00f8d40a 100755
--- a/egs/voxceleb/v2/cmd.sh
+++ b/egs/voxceleb/v2/cmd.sh
@@ -17,7 +17,7 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
     export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
     export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
new file mode 100644
index 00000000..87b01a1f
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
@@ -0,0 +1,12 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: trainer_phase2_sgd_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
new file mode 100644
index 00000000..d13931e0
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
@@ -0,0 +1,11 @@
+data:
+  train: train_data_default.yaml
+  val: val_data_default.yaml
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.
+trainer: trainer_phase3_sgd_default.yaml
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
new file mode 100644
index 00000000..ae708b62
--- /dev/null
+++ b/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
@@ -0,0 +1,18 @@
+optim:
+  opt_type: sgd
+  lr: 5.5e-3
+  momentum: 0.9
+  weight_decay: 1e-4
+lrsched:
+  lrsch_type: exp_lr
+  decay_rate: 0.5
+  decay_steps: 5000
+  hold_steps: 6000
+  min_lr: 4.4e-3
+  warmup_steps: 6000
+  update_lr_on_opt_step: true
+use_amp: true
+log_interval: 1000
+epochs: 7
+eff_batch_size: 512
+train_mode: full
diff --git a/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
new file mode 100644
index 00000000..2529e25a
--- /dev/null
+++ b/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
@@ -0,0 +1,18 @@
+optim:
+  opt_type: sgd
+  lr: 2.3e-4
+  momentum: 0.9
+  weight_decay: 1e-4
+lrsched:
+  lrsch_type: exp_lr
+  decay_rate: 0.5
+  decay_steps: 5000
+  hold_steps: 6000
+  min_lr: 2e-4
+  warmup_steps: 6000
+  update_lr_on_opt_step: true
+use_amp: true
+log_interval: 1000
+epochs: 7
+eff_batch_size: 192
+train_mode: full
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
new file mode 100644
index 00000000..b580508a
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
@@ -0,0 +1,52 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
+xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 4200 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 60 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.10
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0060.pth
+
+xvec_train_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
+xvec_train_s2_args="--trainer.epochs 20"
+nnet_name_s2=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_name_s2
+nnet_s2=$nnet_s2_dir/model_ep0007.pth
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+xvec_train_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
+xvec_train_s3_args="--trainer.epochs 10 --data.train.dataset.min-chunk-length 6 --data.train.dataset.max-chunk-length 6 --model.xvector.intertop-margin 0.1"
+nnet_name_s3=${nnet_name}.s3.4
+nnet_s3_dir=exp/xvector_nnets/$nnet_name_s3
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0006.pth
+#nnet_s3=$nnet_s3_dir/model_ep0010.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
index 47af1f43..b84c1f15 100644
--- a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
@@ -21,6 +21,22 @@ nnet_name=${hf_model_name}_ecapatdnn512x3_v1.10
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0060.pth
 
+xvec_train_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
+xvec_train_s2_args="--trainer.epochs 20"
+nnet_name_s2=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_name_s2
+nnet_s2=$nnet_s2_dir/model_ep0007.pth
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+xvec_train_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
+xvec_train_s3_args="--trainer.epochs 10 --data.train.dataset.min-chunk-length 6 --data.train.dataset.max-chunk-length 6"
+nnet_name_s3=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_name_s3
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0006.pth
+nnet_s3=$nnet_s3_dir/model_ep0010.pth
+
+
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
diff --git a/egs/voxceleb/v2/run_011_train_xvector.sh b/egs/voxceleb/v2/run_011_train_xvector.sh
index 0b9a092e..b959936f 100755
--- a/egs/voxceleb/v2/run_011_train_xvector.sh
+++ b/egs/voxceleb/v2/run_011_train_xvector.sh
@@ -28,17 +28,17 @@ fi
 if [ "$use_tb" == "true" ];then
     extra_args="$extra_args --trainer.use-tensorboard"
 fi
-if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
-fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
+if [ "$use_wandb" == "true" ];then
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.2 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
 # Network Training
 if [ $stage -le 1 ]; then
-
   
   mkdir -p $nnet_dir/log
   $cuda_cmd \
@@ -53,6 +53,54 @@ if [ $stage -le 1 ]; then
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir $args \
+    --num-gpus $ngpu
+  
+fi
+
+if [ $stage -le 2 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_name_s2.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type --cfg $xvec_train_s2_base_cfg $xvec_train_s2_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet \
+    --trainer.exp-path $nnet_s2_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
+if [ $stage -le 3 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_name_s3.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type --cfg $xvec_train_s3_base_cfg $xvec_train_s3_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir $args \
     --num-gpus $ngpu \
   
 fi
diff --git a/egs/voxceleb/v2/run_030_extract_xvectors.sh b/egs/voxceleb/v2/run_030_extract_xvectors.sh
index 90186a42..77c46672 100755
--- a/egs/voxceleb/v2/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v2/run_030_extract_xvectors.sh
@@ -10,6 +10,7 @@ set -e
 stage=1
 config_file=default_config.sh
 use_gpu=false
+nnet_stage=1
 xvec_chunk_length=120 #seconds
 . parse_options.sh || exit 1;
 . $config_file
@@ -21,6 +22,15 @@ else
     xvec_cmd="$train_cmd --mem 12G"
 fi
 
+
+if [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_name_s2
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_name_s3
+fi
+
 xvector_dir=exp/xvectors/$nnet_name
 
 if [ $stage -le 1 ]; then
diff --git a/egs/voxceleb/v2/run_040_eval_be.sh b/egs/voxceleb/v2/run_040_eval_be.sh
index cd168180..d9c03bba 100755
--- a/egs/voxceleb/v2/run_040_eval_be.sh
+++ b/egs/voxceleb/v2/run_040_eval_be.sh
@@ -9,14 +9,25 @@ set -e
 
 stage=1
 config_file=default_config.sh
+nnet_stage=1
 
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh 
 
+if [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_name_s2
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_name_s3
+fi
+
+
 plda_label=${plda_type}y${plda_y_dim}_v1
 be_name=lda${lda_dim}_${plda_label}_${plda_data}
 
+
 xvector_dir=exp/xvectors/$nnet_name
 be_dir=exp/be/$nnet_name/$be_name
 score_dir=exp/scores/$nnet_name/${be_name}
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
new file mode 100755
index 00000000..fda819ad
--- /dev/null
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import (
+    HFWav2Vec2ResNet1dXVector,
+    HFHubert2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+)
+from hyperion.torch import TorchModelLoader as TML
+
+model_dict = {
+    "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
+    "hf_hubert2resnet1d": HFHubert2ResNet1dXVector,
+    "hf_wavlm2resnet1d": HFWavLM2ResNet1dXVector,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    ad_args["is_val"] = partition == "val"
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_model(num_classes, in_model_file, rank, **kwargs):
+    xvec_args = kwargs["model"]["xvector"]
+    if rank == 0:
+        logging.info("xvector network ft args={}".format(xvec_args))
+    xvec_args["num_classes"] = num_classes
+    model = TML.load(in_model_file)
+    model.rebuild_output_layer(**xvec_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
+
+    parser.add_argument("--in-model-file", required=True)
+    model_class.add_finetune_args(parser, prefix="model")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Finetunes Wav2Vec2XVector model from audio files"
+    )
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/bin/torch-finetune-xvec-dfr.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
similarity index 100%
rename from hyperion/bin/torch-finetune-xvec-dfr.py
rename to hyperion/bin/finetune_xvector_dfr_from_feats.py
diff --git a/hyperion/bin/torch-finetune-xvec-dfr-from-wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
similarity index 100%
rename from hyperion/bin/torch-finetune-xvec-dfr-from-wav.py
rename to hyperion/bin/finetune_xvector_dfr_from_wav.py
diff --git a/hyperion/bin/torch-finetune-xvec.py b/hyperion/bin/finetune_xvector_from_feats.py
similarity index 100%
rename from hyperion/bin/torch-finetune-xvec.py
rename to hyperion/bin/finetune_xvector_from_feats.py
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
new file mode 100755
index 00000000..5ddc4d82
--- /dev/null
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import torch
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.models import XVector as XVec
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch import TorchModelLoader as TML
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    ad_args["is_val"] = partition == "val"
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_feats(rank, **kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    if rank == 0:
+        logging.info("feat args={}".format(feat_args))
+        logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=True, **feat_args)
+    if rank == 0:
+        logging.info("feat-extractor={}".format(feat_extractor))
+    return feat_extractor
+
+
+def init_xvector(num_classes, in_model_path, rank, **kwargs):
+    xvec_args = XVec.filter_finetune_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network ft args={}".format(xvec_args))
+    xvec_args["num_classes"] = num_classes
+    model = TML.load(in_model_path)
+    model.rebuild_output_layer(**xvec_args)
+    # if train_mode == "ft-embed-affine":
+    #    model.freeze_preembed_layers()
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    feat_extractor = init_feats(**kwargs)
+    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs)
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        feat_extractor,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
+
+    AF.add_class_args(parser, prefix="feats")
+    parser.add_argument("--in-model-path", required=True)
+
+    XVec.add_finetune_args(parser, prefix="model")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=XVec.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    parser.add_argument("--local_rank", default=0, type=int)
+
+    args = parser.parse_args()
+    gpu_id = args.local_rank
+    del args.local_rank
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args)
diff --git a/hyperion/bin/torch-finetune-xvec-from-wav.py b/hyperion/bin/torch-finetune-xvec-from-wav.py
deleted file mode 100755
index e33d9b8e..00000000
--- a/hyperion/bin/torch-finetune-xvec-from-wav.py
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/usr/bin/env python
-"""
- Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
-import logging
-import multiprocessing
-
-import numpy as np
-
-import torch
-import torch.nn as nn
-
-from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
-from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch import TorchModelLoader as TML
-
-
-def init_data(
-    audio_path,
-    train_list,
-    val_list,
-    train_aug_cfg,
-    val_aug_cfg,
-    num_workers,
-    num_gpus,
-    rank,
-    **kwargs
-):
-
-    ad_args = AD.filter_args(**kwargs)
-    sampler_args = Sampler.filter_args(**kwargs)
-    if rank == 0:
-        logging.info("audio dataset args={}".format(ad_args))
-        logging.info("sampler args={}".format(sampler_args))
-        logging.info("init datasets")
-
-    train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
-    val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
-
-    if rank == 0:
-        logging.info("init samplers")
-    train_sampler = Sampler(train_data, **sampler_args)
-    val_sampler = Sampler(val_data, **sampler_args)
-
-    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = (
-        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
-    )
-
-    train_loader = torch.utils.data.DataLoader(
-        train_data, batch_sampler=train_sampler, **largs
-    )
-
-    test_loader = torch.utils.data.DataLoader(
-        val_data, batch_sampler=val_sampler, **largs
-    )
-
-    return train_loader, test_loader
-
-
-def init_feats(rank, **kwargs):
-    feat_args = AF.filter_args(**kwargs["feats"])
-    if rank == 0:
-        logging.info("feat args={}".format(feat_args))
-        logging.info("initializing feature extractor")
-    feat_extractor = AF(trans=True, **feat_args)
-    if rank == 0:
-        logging.info("feat-extractor={}".format(feat_extractor))
-    return feat_extractor
-
-
-def init_xvector(num_classes, in_model_path, rank, train_mode, **kwargs):
-    xvec_args = XVec.filter_finetune_args(**kwargs)
-    if rank == 0:
-        logging.info("xvector network ft args={}".format(xvec_args))
-    xvec_args["num_classes"] = num_classes
-    model = TML.load(in_model_path)
-    model.rebuild_output_layer(**xvec_args)
-    if train_mode == "ft-embed-affine":
-        model.freeze_preembed_layers()
-    if rank == 0:
-        logging.info("x-vector-model={}".format(model))
-    return model
-
-
-def train_xvec(gpu_id, args):
-
-    config_logger(args.verbose)
-    del args.verbose
-    logging.debug(args)
-
-    kwargs = namespace_to_dict(args)
-    torch.manual_seed(args.seed)
-    set_float_cpu("float32")
-
-    train_mode = kwargs["train_mode"]
-
-    ddp_args = ddp.filter_ddp_args(**kwargs)
-    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    kwargs["rank"] = rank
-
-    train_loader, test_loader = init_data(**kwargs)
-    feat_extractor = init_feats(**kwargs)
-    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
-
-    trn_args = Trainer.filter_args(**kwargs)
-    if rank == 0:
-        logging.info("trainer args={}".format(trn_args))
-    metrics = {"acc": CategoricalAccuracy()}
-    trainer = Trainer(
-        model,
-        feat_extractor,
-        device=device,
-        metrics=metrics,
-        ddp=world_size > 1,
-        train_mode=train_mode,
-        **trn_args
-    )
-    if args.resume:
-        trainer.load_last_checkpoint()
-    trainer.fit(train_loader, test_loader)
-
-    ddp.ddp_cleanup()
-
-
-# (audio_path, train_list, val_list,
-#                train_aug_cfg, val_aug_cfg,
-#                in_model_path, num_gpus, resume, num_workers,
-#                train_mode, **kwargs):
-
-#     set_float_cpu('float32')
-#     logging.info('initializing devices num_gpus={}'.format(num_gpus))
-#     device = open_device(num_gpus=num_gpus)
-
-#     ad_args = AD.filter_args(**kwargs)
-#     sampler_args = Sampler.filter_args(**kwargs)
-#     feat_args = AFF.filter_args(prefix='feats', **kwargs)
-#     mvn_args = MVN.filter_args(prefix='mvn', **kwargs)
-#     xvec_args = XVec.filter_finetune_args(**kwargs)
-#     opt_args = OF.filter_args(prefix='opt', **kwargs)
-#     lrsch_args = LRSF.filter_args(prefix='lrsch', **kwargs)
-#     trn_args = Trainer.filter_args(**kwargs)
-#     logging.info('audio dataset args={}'.format(ad_args))
-#     logging.info('sampler args={}'.format(sampler_args))
-#     logging.info('feat args={}'.format(feat_args))
-#     logging.info('mvn args={}'.format(mvn_args))
-#     logging.info('xvector finetune args={}'.format(xvec_args))
-#     logging.info('optimizer args={}'.format(opt_args))
-#     logging.info('lr scheduler args={}'.format(lrsch_args))
-#     logging.info('trainer args={}'.format(trn_args))
-
-#     logging.info('initializing feature extractor args={}'.format(feat_args))
-#     feat_extractor = AFF.create(**feat_args)
-#     mvn = None
-#     if mvn_args['norm_mean'] or mvn_args['norm_var']:
-#         logging.info('initializing short-time mvn')
-#         mvn = MVN(**mvn_args)
-
-#     feat_extractor = FeatExtractor(feat_extractor, mvn)
-
-#     logging.info('init datasets')
-#     train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
-#     val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
-
-#     logging.info('init samplers')
-#     train_sampler = Sampler(train_data, **sampler_args)
-#     val_sampler = Sampler(val_data, **sampler_args)
-
-#     largs = {'num_workers': num_workers, 'pin_memory': True} if num_gpus>0 else {}
-
-#     train_loader = torch.utils.data.DataLoader(
-#         train_data, batch_sampler = train_sampler, **largs)
-
-#     test_loader = torch.utils.data.DataLoader(
-#         val_data, batch_sampler = val_sampler, **largs)
-
-#     xvec_args['num_classes'] = train_data.num_classes
-#     model = TML.load(in_model_path)
-#     model.rebuild_output_layer(**xvec_args)
-#     if train_mode == 'ft-embed-affine':
-#         model.freeze_preembed_layers()
-
-#     logging.info('feat-extractor={}'.format(feat_extractor))
-#     logging.info('x-vector-model={}'.format(model))
-
-#     optimizer = OF.create(model.parameters(), **opt_args)
-#     lr_sch = LRSF.create(optimizer, **lrsch_args)
-#     metrics = { 'acc': CategoricalAccuracy() }
-
-#     trainer = Trainer(model, feat_extractor, optimizer,
-#                       device=device, metrics=metrics, lr_scheduler=lr_sch,
-#                       data_parallel=(num_gpus>1), train_mode=train_mode,
-#                       **trn_args)
-#     if resume:
-#         trainer.load_last_checkpoint()
-#     trainer.fit(train_loader, test_loader)
-
-
-if __name__ == "__main__":
-
-    parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
-
-    parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--audio-path", required=True)
-    parser.add_argument("--train-list", required=True)
-    parser.add_argument("--val-list", required=True)
-
-    AD.add_argparse_args(parser)
-    Sampler.add_argparse_args(parser)
-
-    parser.add_argument("--train-aug-cfg", default=None)
-    parser.add_argument("--val-aug-cfg", default=None)
-    parser.add_argument(
-        "--num-workers", type=int, default=5, help="num_workers of data loader"
-    )
-
-    AF.add_class_args(parser, prefix="feats")
-    parser.add_argument("--in-model-path", required=True)
-
-    XVec.add_finetune_args(parser)
-    Trainer.add_class_args(parser)
-    ddp.add_ddp_args(parser)
-
-    # parser.add_argument('--num-gpus', type=int, default=1,
-    #                     help='number of gpus, if 0 it uses cpu')
-    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
-    parser.add_argument(
-        "--resume",
-        action="store_true",
-        default=False,
-        help="resume training from checkpoint",
-    )
-    parser.add_argument(
-        "--train-mode",
-        default="ft-embed-affine",
-        choices=["ft-full", "ft-embed-affine"],
-        help=(
-            "ft-full: adapt full x-vector network"
-            "ft-embed-affine: adapt affine transform before embedding"
-        ),
-    )
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
-    parser.add_argument("--local_rank", default=0, type=int)
-
-    args = parser.parse_args()
-    gpu_id = args.local_rank
-    del args.local_rank
-
-    if gpu_id == 0:
-        try:
-            config_file = Path(args.exp_path) / "config.yaml"
-            parser.save(args, str(config_file), format="yaml", overwrite=True)
-        except:
-            pass
-
-    # torch docs recommend using forkserver
-    multiprocessing.set_start_method("forkserver")
-    train_xvec(gpu_id, args)
-
-    # args = parser.parse_args()
-    # config_logger(args.verbose)
-    # del args.verbose
-    # logging.debug(args)
-
-    # torch.manual_seed(args.seed)
-    # del args.seed
-
-    # train_xvec(**vars(args))
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index c673f5c9..e92b9a1a 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import sys
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 39b037ba..5eb871db 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -16,10 +16,7 @@
 import logging
 import multiprocessing
 
-import numpy as np
-
 import torch
-import torch.nn as nn
 
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.utils import ddp
@@ -223,7 +220,9 @@ def make_parser(xvec_class):
 
     AF.add_class_args(parser, prefix="feats")
     xvec_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(parser, prefix="trainer")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
+    )
     ddp.add_ddp_args(parser)
     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
     # parser.add_argument(
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index 0d748249..d7a086d1 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -99,8 +99,8 @@ def update_margin(self, epoch):
         Args:
           epoch: value of current epoch.
         """
-        if self.margin_warmup_epochs == 0:
-            return
+        # if self.margin_warmup_epochs == 0:
+        #    return
 
         if epoch < self.margin_warmup_epochs:
             self.cur_margin = self.margin * epoch / self.margin_warmup_epochs
@@ -243,8 +243,8 @@ def update_margin(self, epoch):
         Args:
           epoch: value of current epoch.
         """
-        if self.margin_warmup_epochs == 0:
-            return
+        # if self.margin_warmup_epochs == 0:
+        #    return
 
         if epoch < self.margin_warmup_epochs:
             self.cur_margin = self.margin * epoch / self.margin_warmup_epochs
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index d585567f..bf0552dc 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -72,3 +72,14 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 3b44c53f..3cabb1d5 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -76,3 +76,14 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index d79d5a26..85944fb9 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -92,6 +92,28 @@ def update_loss_margin(self, epoch):
         """
         self.xvector.update_loss_margin(epoch)
 
+    def rebuild_output_layer(
+        self,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+    ):
+        self.xvector.rebuild_output_layer(
+            num_classes=num_classes,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+        )
+
     def forward_feats(self, x, x_lengths, return_feat_layers=None):
         return_hid_states = (
             False
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index 89e7120e..efac4e50 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -72,3 +72,14 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 27268e44..83c95222 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -35,6 +35,28 @@ def __init__(self, feats, xvector):
         self.feats = feats
         self.xvector = xvector
 
+    def rebuild_output_layer(
+        self,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+    ):
+        self.xvector.rebuild_output_layer(
+            num_classes=num_classes,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+        )
+
     def forward(
         self,
         x,
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 8c2070b5..c35e6a4a 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -538,6 +538,9 @@ def rebuild_output_layer(
         cos_scale=64,
         margin=0.3,
         margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
     ):
         if (self.num_classes is not None and self.num_classes != num_classes) or (
             self.loss_type != loss_type
@@ -553,6 +556,9 @@ def rebuild_output_layer(
         self.classif_net.set_margin(margin)
         self.classif_net.set_margin_warmup_epochs(margin_warmup_epochs)
         self.classif_net.set_cos_scale(cos_scale)
+        self.classif_net.set_intertop_k(intertop_k)
+        self.classif_net.set_intertop_margin(intertop_margin)
+        self.classif_net.set_num_subcenters(num_subcenters)
 
     def freeze_preembed_layers(self):
         self.encoder_net.freeze()
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 06bd988c..6a886e44 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -232,6 +232,27 @@ def set_cos_scale(self, cos_scale):
         self.cos_scale = cos_scale
         self.output.cos_scale = cos_scale
 
+    def set_intertop_k(self, intertop_k):
+        if self.loss_type == "softmax":
+            return
+
+        self.intertop_k = intertop_k
+        self.output.intertop_k = intertop_k
+
+    def set_intertop_margin(self, intertop_margin):
+        if self.loss_type == "softmax":
+            return
+
+        self.intertop_margin = intertop_margin
+        self.output.intertop_margin = intertop_margin
+
+    def set_num_subcenters(self, num_subcenters):
+        if not self.loss_type == "subcenter-arc-softmax":
+            return
+
+        self.num_subcenters = num_subcenters
+        self.output.num_subcenters = num_subcenters
+
     def update_margin(self, epoch):
         if hasattr(self.output, "update_margin"):
             self.output.update_margin(epoch)
diff --git a/notebooks/tutorial_jsalt22/ivectors.ipynb b/notebooks/tutorial_jsalt22/ivectors.ipynb
index 46d4eb61..91548ea8 100644
--- a/notebooks/tutorial_jsalt22/ivectors.ipynb
+++ b/notebooks/tutorial_jsalt22/ivectors.ipynb
@@ -113,7 +113,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x, r_idx, spk_ids =generate_data(num_dims=3)"
+    "x_dim=3\n",
+    "x, r_idx, spk_ids=generate_data(num_dims=x_dim)"
    ]
   },
   {
@@ -138,7 +139,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ubm_gmm = hnp.pdfs.GMMDiagCov(num_comp=8, x_dim=3)\n",
+    "num_comp=8\n",
+    "y_dim=2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ubm_gmm = hnp.pdfs.GMMDiagCov(num_comp=num_comp, x_dim=x_dim)\n",
     "elbo, elbo_norm = ubm_gmm.fit(x_cat, epochs=10)\n",
     "fig = plt.figure()\n",
     "plt.plot(elbo_norm)\n",
@@ -167,7 +178,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "iv_model = hnp.pdfs.JFATotal(K=8, x_dim=3, y_dim=2)\n",
+    "iv_model = hnp.pdfs.JFATotal(K=num_comp, x_dim=x_dim, y_dim=y_dim)\n",
     "N=[]\n",
     "F=[]\n",
     "for x_i in x:\n",
@@ -193,7 +204,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "F.shape\n",
+    "num_utts=100\n",
+    "w = np.randn(num_utts, 2)\n",
+    "M = ubm_gmm.mu.ravel() + ubm_gmm.cholLambda.ravel() * np.dot(w, iv_model.T)\n",
+    "M = M.reshape(num_utts, num_comp, x_dim)\n",
+    "fig = plt.figure()\n",
+    "ax = fig.add_subplot(projection='3d')\n",
+    "colors = ['b', 'g','r','c','m','y','k','b']\n",
+    "for i in range(num_comp):\n",
+    "    ax.plot_surface(M[:,i,0], M[:,i,1], M[:,i,2], alpha=0.2, color=colors[i])\n",
+    "    ax.scatter(M[:,i,0], M[:,i,1], M[:,i,2], marker='o', color=colors[i])\n",
+    "ax.set_xlabel('x1')\n",
+    "ax.set_ylabel('x2')\n",
+    "ax.set_zlabel('x3')\n",
+    "plt.show()\n",
+    "\n",
     "\n"
    ]
   }

From 81d4a7f88e944c17db63842d39f0eb3a2bab1c7f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sun, 12 Jun 2022 17:44:15 -0400
Subject: [PATCH 017/154] added default config to voxceleb/v2

---
 ...lmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml |   45 +
 ...lmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml |   49 +
 ...lmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml |   50 +
 .../v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml |    6 +-
 egs/voxceleb/v2/default_config.sh             |    2 +-
 ...onfig_wavlmbaseplus_ecapatdnn512x3_v1.0.sh |   49 +
 egs/voxceleb/v2/run_011_train_xvector.sh      |   26 +-
 .../models/wav2xvectors/hf_wav2xvector.py     |    4 +-
 hyperion/torch/trainers/ae_trainer.py         |    2 +-
 hyperion/torch/trainers/dvae_trainer.py       |    2 +-
 hyperion/torch/trainers/vae_trainer.py        |    2 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |    2 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |    2 +-
 .../torch/trainers/xvector_adv_trainer.py     |    2 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |    2 +-
 hyperion/torch/trainers/xvector_trainer.py    |    2 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |    2 +-
 .../xvector_trainer_deep_feat_reg_from_wav.py |    2 +-
 .../trainers/xvector_trainer_from_wav.py      |    2 +-
 notebooks/tutorial_jsalt22/ivectors.ipynb     |    4 +-
 notebooks/tutorial_jsalt22/xvector.ipynb      | 3760 +++++++++++++++++
 21 files changed, 3988 insertions(+), 29 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh
 create mode 100644 notebooks/tutorial_jsalt22/xvector.ipynb

diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..f62b2e14
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
new file mode 100644
index 00000000..1298a056
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
@@ -0,0 +1,49 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5.5e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 4.4e-3
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 20
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
new file mode 100644
index 00000000..1721e337
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
@@ -0,0 +1,50 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 2.3e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 2e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 2
+  eff_batch_size: 192
+  train_mode: full
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
index 787e3718..69b85d8d 100644
--- a/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3.yaml
@@ -33,6 +33,8 @@ xvector:
   embed_dim: 192
   cos_scale: 32.0
   margin: 0.2
-  margin_warmup_epochs: 3
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
   dropout_rate: 0.0
-feat_fusion_method: last
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/default_config.sh b/egs/voxceleb/v2/default_config.sh
index 65108e89..abcc2a2e 120000
--- a/egs/voxceleb/v2/default_config.sh
+++ b/egs/voxceleb/v2/default_config.sh
@@ -1 +1 @@
-global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
\ No newline at end of file
+global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh
\ No newline at end of file
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..7d39995d
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/run_011_train_xvector.sh b/egs/voxceleb/v2/run_011_train_xvector.sh
index b959936f..dc4e1dee 100755
--- a/egs/voxceleb/v2/run_011_train_xvector.sh
+++ b/egs/voxceleb/v2/run_011_train_xvector.sh
@@ -34,17 +34,19 @@ if [ "$interactive" == "true" ];then
 fi
 
 if [ "$use_wandb" == "true" ];then
-  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.2 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
 fi
 
+
 # Network Training
 if [ $stage -le 1 ]; then
-  
-  mkdir -p $nnet_dir/log
+
+  mkdir -p $nnet_s1_dir/log
   $cuda_cmd \
-    --gpu $ngpu $nnet_dir/log/train.log \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    train_wav2vec2xvector.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+    train_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
@@ -52,7 +54,7 @@ if [ $stage -le 1 ]; then
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
-    --trainer.exp-path $nnet_dir $args \
+    --trainer.exp-path $nnet_s1_dir $args \
     --num-gpus $ngpu
   
 fi
@@ -60,14 +62,15 @@ fi
 if [ $stage -le 2 ]; then
 
   if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.wandb.name $nnet_name_s2.$(date -Iminutes)"
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
   fi
   
   mkdir -p $nnet_s2_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s2_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    finetune_wav2vec2xvector.py $nnet_type --cfg $xvec_train_s2_base_cfg $xvec_train_s2_args $extra_args \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
@@ -75,7 +78,7 @@ if [ $stage -le 2 ]; then
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
-    --in-model-file $nnet \
+    --in-model-file $nnet_s1 \
     --trainer.exp-path $nnet_s2_dir $args \
     --num-gpus $ngpu \
   
@@ -84,14 +87,15 @@ fi
 if [ $stage -le 3 ]; then
 
   if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.wandb.name $nnet_name_s3.$(date -Iminutes)"
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
   fi
   
   mkdir -p $nnet_s3_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s3_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    finetune_wav2vec2xvector.py $nnet_type --cfg $xvec_train_s3_base_cfg $xvec_train_s3_args $extra_args \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 85944fb9..d75a257b 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -50,7 +50,7 @@ def _make_fuser(self):
             self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
         elif self.feat_fusion_method == "linear":
             self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weights.data = torch.ones(num_layers) / num_layers
+            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
         elif self.feat_fusion_method == "cat":
             self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
 
@@ -74,7 +74,7 @@ def _fuse_hid_feats(self, hid_feats):
             feats = torch.sum(hid_feats * norm_weights, dim=-1)
         elif self.feat_fusion_method == "linear":
             hid_feats = torch.stack(hid_feats, dim=-1)
-            feats = self.feat_fuser(hid_feats)
+            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
         elif self.feat_fusion_method == "cat":
             hid_feats = torch.cat(hid_feats, dim=-1)
             feats = self.feat_fuser(hid_feats)
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index c1debdb6..21d53d32 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -63,7 +63,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 02c4fb6e..b75a94ab 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -61,7 +61,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index ae193209..284f07d0 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -61,7 +61,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 840d70d6..30d2d3b3 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -61,7 +61,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index cb09ba00..c484b5c7 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -61,7 +61,7 @@ def __init__(
         loggers=None,
         ddp=False,
         ddp_type="ddp",
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 91c75823..961597e5 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -69,7 +69,7 @@ def __init__(
         ddp=False,
         ddp_type="ddp",
         loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 2797e678..036ee46e 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -71,7 +71,7 @@ def __init__(
         ddp=False,
         ddp_type="ddp",
         loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 8c39a345..4cc4bc8c 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -63,7 +63,7 @@ def __init__(
         ddp=False,
         ddp_type="ddp",
         loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 58235961..1c0c26b7 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -74,7 +74,7 @@ def __init__(
         ddp_type="ddp",
         loss=None,
         reg_loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index da9d064a..1ad4d24a 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -75,7 +75,7 @@ def __init__(
         ddp_type="ddp",
         loss=None,
         reg_loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index d75936d8..64a1d187 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -63,7 +63,7 @@ def __init__(
         ddp=False,
         ddp_type="ddp",
         loss=None,
-        train_mode="train",
+        train_mode="full",
         use_amp=False,
         log_interval=10,
         use_tensorboard=False,
diff --git a/notebooks/tutorial_jsalt22/ivectors.ipynb b/notebooks/tutorial_jsalt22/ivectors.ipynb
index 91548ea8..7f2780d9 100644
--- a/notebooks/tutorial_jsalt22/ivectors.ipynb
+++ b/notebooks/tutorial_jsalt22/ivectors.ipynb
@@ -76,12 +76,12 @@
     "    \n",
     "    # Sample speakers\n",
     "    spk_ids = np.arange(num_spks)\n",
-    "    y = sb * rng.randn(num_spks, tv_dim)\n",
+    "    y = np.sqrt(sb) * rng.randn(num_spks, tv_dim)\n",
     "\n",
     "    # Sample i-vectors\n",
     "    spk_ids = np.repeat(spk_ids, num_utts, axis=0)\n",
     "    y = np.repeat(y, num_utts, axis=0)\n",
-    "    w = y + sw * rng.randn(num_spks*num_utts, tv_dim)\n",
+    "    w = y + np.sqrt(sw) * rng.randn(num_spks*num_utts, tv_dim)\n",
     "\n",
     "    x = []\n",
     "    r_idx = []\n",
diff --git a/notebooks/tutorial_jsalt22/xvector.ipynb b/notebooks/tutorial_jsalt22/xvector.ipynb
new file mode 100644
index 00000000..70f01057
--- /dev/null
+++ b/notebooks/tutorial_jsalt22/xvector.ipynb
@@ -0,0 +1,3760 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hpath='/exp/jvillalba/hyperion/hyperion-persephone'\n",
+    "import sys\n",
+    "sys.path.append(hpath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from copy import deepcopy\n",
+    "import logging\n",
+    "import numpy as np\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "\n",
+    "import hyperion as hyp\n",
+    "import hyperion.np as hnp\n",
+    "import hyperion.torch as ht\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class IVDataset(Dataset):\n",
+    "    \"\"\"Datasets that generates utterances following the i-vector model.\n",
+    "    \n",
+    "    Attributes:\n",
+    "      num_samples: num. of utterances in the dataset.\n",
+    "      num_spks: num. of speakers in the dataset.\n",
+    "      x_dim: feature dimension.\n",
+    "      num_gauss: number of Gaussian components in UBM-GMM\n",
+    "      w_dim: i-vector dimension.\n",
+    "      sb: isotropic between-class cov.\n",
+    "      sw: isotropic within-class cov.\n",
+    "      \n",
+    "    \"\"\"\n",
+    "    \n",
+    "    def __init__(self, num_samples=10000, num_spks=100, x_dim=16, num_gauss=32, w_dim=16, sb=0.7, sw=0.3, utt_length=200, unit_length=25, seed=1234):\n",
+    "        self.rng = np.random.RandomState(seed=seed)\n",
+    "        self.num_samples = num_samples\n",
+    "        self.num_spks = num_spks\n",
+    "        self.x_dim = x_dim\n",
+    "        self.w_dim = w_dim\n",
+    "        self.num_gauss = num_gauss\n",
+    "        self.utt_length = utt_length\n",
+    "        self.unit_length = unit_length\n",
+    "        self.sb = sb\n",
+    "        self.sw = sw\n",
+    "        self.y = self._make_spks(num_spks, w_dim, sb, self.rng)\n",
+    "        self.gmm_ubm = self._make_ubm(x_dim, num_gauss, self.rng)\n",
+    "        self.T = self._make_ivector(x_dim, num_gauss, w_dim, self.rng)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.num_samples\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        spk_idx = idx % self.y.shape[0]\n",
+    "        x = self.sample_utterance(spk_idx)\n",
+    "        x = x.astype('float32')\n",
+    "        return x, spk_idx\n",
+    "\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def _make_spks(num_spks, w_dim, sb, rng):\n",
+    "        \"\"\"Creates the speaker identity vectors\"\"\"\n",
+    "        return np.sqrt(sb) * rng.randn(num_spks, w_dim)\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def _make_ubm(x_dim, num_gauss, rng):\n",
+    "        \"\"\"Creates the UBM GMM\"\"\"\n",
+    "        # Define UBM\n",
+    "        # Means of the GMM-UBM\n",
+    "        ubm_means = rng.randn(num_gauss, x_dim)\n",
+    "        ubm_means = np.sqrt(x_dim) * ubm_means/np.linalg.norm(ubm_means, axis=-1, keepdims=True)\n",
+    "\n",
+    "        # Covariances of the GMM-UBM\n",
+    "        ubm_cov = 0.1 * np.ones((num_gauss, x_dim))\n",
+    "        ubm_prec = 1./ubm_cov\n",
+    "\n",
+    "        # Weights of the GMM-UBM\n",
+    "        ubm_weights = np.ones((num_gauss))/num_gauss\n",
+    "\n",
+    "        return hnp.pdfs.GMMDiagCov(pi=ubm_weights, mu=ubm_means, Lambda=ubm_prec)\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def _make_ivector(x_dim, num_gauss, w_dim, rng):\n",
+    "        # Define Total Variability sub-space\n",
+    "        T = rng.randn(w_dim, x_dim * num_gauss)\n",
+    "        T = 0.2 * T/np.max(T)\n",
+    "        return T\n",
+    "\n",
+    "    def sample_utterance(self, spk_idx):\n",
+    "        # generate i-vector\n",
+    "        y = self.y[spk_idx] # spk factor\n",
+    "        w = y + np.sqrt(self.sw) * self.rng.randn(self.w_dim)\n",
+    "\n",
+    "        # For each utterance\n",
+    "        # Compute the GMM mean of the utterance\n",
+    "        means_i = self.gmm_ubm.mu + np.dot(w, self.T).reshape(self.x_dim, self.num_gauss).T\n",
+    "\n",
+    "        # Create a GMM for the utterance.\n",
+    "        gmm = self.gmm_ubm.copy()\n",
+    "        gmm.mu = means_i\n",
+    "      \n",
+    "        # Sample the Gaussian components\n",
+    "        num_units = self.utt_length // self.unit_length\n",
+    "        r = self.rng.multinomial(1, gmm.pi, size=(num_units,))\n",
+    "        # Assume that we stay in the same component several time steps.\n",
+    "        r = np.repeat(r, self.unit_length, axis=0)\n",
+    "        # Draw samples from the GMM\n",
+    "        x = gmm.sample(r=r)\n",
+    "        return x\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the Training dataset\n",
+    "train_data = IVDataset()\n",
+    "# The Validation dataset is a copy of the training data but with less samples\n",
+    "val_data = deepcopy(train_data)\n",
+    "val_data.num_samples = 100\n",
+    "\n",
+    "# Create data loaders\n",
+    "train_loader = DataLoader(train_data, batch_size=32, shuffle=True)\n",
+    "val_loader = DataLoader(val_data, batch_size=32, shuffle=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class StemBlock(nn.Module):\n",
+    "    \"\"\"Build block input layer of x-vector model\n",
+    "\n",
+    "    Args:\n",
+    "      in_channels:   input channels.\n",
+    "      out_channels:  output channels.\n",
+    "      kernel_size:   kernels size for the convolution.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        in_channels,\n",
+    "        out_channels,\n",
+    "        kernel_size,\n",
+    "    ):\n",
+    "\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.activation = nn.ReLU()\n",
+    "        padding = int((kernel_size - 1) // 2)\n",
+    "        self.bn = nn.BatchNorm1d(out_channels)\n",
+    "        self.conv = nn.Conv1d(\n",
+    "            in_channels,\n",
+    "            out_channels,\n",
+    "            bias=False,\n",
+    "            kernel_size=kernel_size,\n",
+    "            padding=padding,\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x, x_mask=None):\n",
+    "        \"\"\"Forward function.\n",
+    "\n",
+    "        Args:\n",
+    "          x: input tensor with shape = (batch, in_channels, in_time).\n",
+    "          x_mask: unused.\n",
+    "\n",
+    "        Returns:\n",
+    "          Tensor with shape = (batch, out_channels, out_time).\n",
+    "        \"\"\"\n",
+    "\n",
+    "        x = self.conv(x)\n",
+    "        x = self.bn(x)\n",
+    "        x = self.activation(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TDNNBlock(nn.Module):\n",
+    "    \"\"\"TDNN Layer block.\n",
+    "\n",
+    "    Attributes:\n",
+    "      in_channels:       input channels.\n",
+    "      out_channels:          output channels.\n",
+    "      kernel_size:       kernel size.\n",
+    "      dilation:          dilation factor of the conv. kernels.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        in_channels,\n",
+    "        out_channels,\n",
+    "        kernel_size=3,\n",
+    "        dilation=1,\n",
+    "    ):\n",
+    "\n",
+    "        super().__init__()\n",
+    "        self.in_channels = in_channels\n",
+    "        self.out_channels = out_channels\n",
+    "        self.activation = nn.ReLU()\n",
+    "        self.bn = nn.BatchNorm1d(out_channels)\n",
+    "\n",
+    "        padding = int(dilation * (kernel_size - 1) // 2)\n",
+    "        self.conv = nn.Conv1d(\n",
+    "            in_channels,\n",
+    "            out_channels,\n",
+    "            bias=False,\n",
+    "            kernel_size=kernel_size,\n",
+    "            dilation=dilation,\n",
+    "            padding=padding,\n",
+    "        )\n",
+    "\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        \"\"\"Forward function.\n",
+    "\n",
+    "        Args:\n",
+    "          x: input tensor with shape = (batch, in_channels, time).\n",
+    "          x_mask: unused.\n",
+    "\n",
+    "        Returns:\n",
+    "          Tensor with shape = (batch, out_channels, time).\n",
+    "        \"\"\"\n",
+    "        x = self.conv(x)\n",
+    "        x = self.bn(x)\n",
+    "        x = self.activation(x)\n",
+    "        return x\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class StatsPooling(nn.Module):\n",
+    "    \"\"\"mean + stddev pooling layer.\"\"\"\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x_mean = torch.mean(x, dim=-1)\n",
+    "        x2_mean = torch.mean(x**2, dim=-1)\n",
+    "        x_std = torch.sqrt((x2_mean-x_mean**2).clamp(min=1e-5))\n",
+    "        return torch.cat((x_mean, x_std), dim=-1)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TDNNXVec(ht.TorchModel):\n",
+    "\n",
+    "    def __init__(self, feat_dim, num_layers, layer_dim, embed_dim, num_classes):\n",
+    "        super().__init__()\n",
+    "        self.in_block = StemBlock(feat_dim, layer_dim, kernel_size=5)\n",
+    "        tdnn_layers = []\n",
+    "        for i in range(num_layers):\n",
+    "            layer_i = TDNNBlock(layer_dim, layer_dim, kernel_size=3, dilation=i+2)\n",
+    "            tdnn_layers.append(layer_i)\n",
+    "\n",
+    "        self.tdnn_layers = nn.ModuleList(tdnn_layers)\n",
+    "        self.pooling = StatsPooling()\n",
+    "        self.projection = nn.Linear(2*layer_dim, embed_dim)\n",
+    "        self.output = nn.Linear(embed_dim, num_classes)\n",
+    "\n",
+    "    def update_loss_margin(self, epoch):\n",
+    "        pass\n",
+    "\n",
+    "    def forward(self, x, y=None, infer=False):\n",
+    "        x = x.transpose(1,2)\n",
+    "        x = self.in_block(x)\n",
+    "        for i, layer in enumerate(self.tdnn_layers):\n",
+    "            x = layer(x)\n",
+    "\n",
+    "        x = self.pooling(x)\n",
+    "        z = self.projection(x)\n",
+    "        if infer:\n",
+    "            return z\n",
+    "\n",
+    "        return self.output(z)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = TDNNXVec(16, 2, 32, 16, 100)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:optimizer args={'opt_type': 'sgd', 'lr': 0.01, 'momentum': 0.9, 'oss': False}\n",
+      "INFO:root:lr scheduler args={'lrsch_type': 'exp_lr', 'decay_rate': 0.5, 'decay_steps': 4000, 'hold_steps': 2000, 'warmup_steps': 1000, 'update_lr_on_opt_step': True}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from hyperion.torch.trainers import XVectorTrainer\n",
+    "from hyperion.torch.metrics import CategoricalAccuracy\n",
+    "\n",
+    "optim = {\"opt_type\": \"sgd\", \"lr\": 0.01, \"momentum\": 0.9}\n",
+    "lrsched = {\"lrsch_type\": \"exp_lr\", \"decay_rate\": 0.5, \"decay_steps\": 4000, \"hold_steps\": 2000, \"warmup_steps\": 1000, \"update_lr_on_opt_step\": True}\n",
+    "metrics = {\"acc\": CategoricalAccuracy()}\n",
+    "trainer = XVectorTrainer(model, optim=optim, lrsched=lrsched, exp_path='./tdnn_xvec', device=torch.device('cpu'), train_mode=\"full\", metrics=metrics )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:epoch: 1/100 starts\n",
+      "INFO:root:epoch: 1/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 4.601476 acc: 0.018750 lr: 0.000090\n",
+      "INFO:root:epoch: 1/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 4.598933 acc: 0.018750 lr: 0.000190\n",
+      "INFO:root:epoch: 1/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 4.598896 acc: 0.018750 lr: 0.000290\n",
+      "INFO:root:epoch: 1/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 4.598870 acc: 0.016406 lr: 0.000390\n",
+      "INFO:root:epoch: 1/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 4.598446 acc: 0.016250 lr: 0.000490\n",
+      "INFO:root:epoch: 1/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 4.596868 acc: 0.015625 lr: 0.000590\n",
+      "INFO:root:epoch: 1/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 4.596662 acc: 0.016071 lr: 0.000690\n",
+      "INFO:root:epoch: 1/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 4.596169 acc: 0.016797 lr: 0.000790\n",
+      "INFO:root:epoch: 1/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 4.596366 acc: 0.017361 lr: 0.000890\n",
+      "INFO:root:epoch: 1/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 4.598048 acc: 0.016562 lr: 0.000990\n",
+      "INFO:root:epoch: 1/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 4.597743 acc: 0.017330 lr: 0.001090\n",
+      "INFO:root:epoch: 1/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 4.597656 acc: 0.017969 lr: 0.001190\n",
+      "INFO:root:epoch: 1/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 4.597299 acc: 0.018510 lr: 0.001290\n",
+      "INFO:root:epoch: 1/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 4.596856 acc: 0.018750 lr: 0.001390\n",
+      "INFO:root:epoch: 1/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 4.596479 acc: 0.018542 lr: 0.001490\n",
+      "INFO:root:epoch: 1/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 4.596669 acc: 0.018359 lr: 0.001590\n",
+      "INFO:root:epoch: 1/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 4.596362 acc: 0.019301 lr: 0.001690\n",
+      "INFO:root:epoch: 1/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 4.596951 acc: 0.019271 lr: 0.001790\n",
+      "INFO:root:epoch: 1/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 4.597304 acc: 0.019572 lr: 0.001890\n",
+      "INFO:root:epoch: 1/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 4.597318 acc: 0.019063 lr: 0.001990\n",
+      "INFO:root:epoch: 1/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 4.596914 acc: 0.019494 lr: 0.002090\n",
+      "INFO:root:epoch: 1/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 4.597153 acc: 0.019176 lr: 0.002190\n",
+      "INFO:root:epoch: 1/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 4.597729 acc: 0.018750 lr: 0.002290\n",
+      "INFO:root:epoch: 1/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 4.597708 acc: 0.018750 lr: 0.002390\n",
+      "INFO:root:epoch: 1/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 4.597762 acc: 0.019375 lr: 0.002490\n",
+      "INFO:root:epoch: 1/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 4.597500 acc: 0.019351 lr: 0.002590\n",
+      "INFO:root:epoch: 1/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 4.597324 acc: 0.018981 lr: 0.002690\n",
+      "INFO:root:epoch: 1/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 4.597246 acc: 0.018527 lr: 0.002790\n",
+      "INFO:root:epoch: 1/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 4.597722 acc: 0.018211 lr: 0.002890\n",
+      "INFO:root:epoch: 1/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 4.597453 acc: 0.018437 lr: 0.002990\n",
+      "INFO:root:epoch: 1/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 4.597106 acc: 0.018548 lr: 0.003090\n",
+      "INFO:root:epoch: 2/100 starts\n",
+      "INFO:root:epoch: 2/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 4.587101 acc: 0.031250 lr: 0.003220\n",
+      "INFO:root:epoch: 2/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 4.588415 acc: 0.026562 lr: 0.003320\n",
+      "INFO:root:epoch: 2/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 4.582873 acc: 0.021875 lr: 0.003420\n",
+      "INFO:root:epoch: 2/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 4.584817 acc: 0.021094 lr: 0.003520\n",
+      "INFO:root:epoch: 2/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 4.586176 acc: 0.021875 lr: 0.003620\n",
+      "INFO:root:epoch: 2/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 4.587892 acc: 0.021354 lr: 0.003720\n",
+      "INFO:root:epoch: 2/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 4.589204 acc: 0.021875 lr: 0.003820\n",
+      "INFO:root:epoch: 2/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 4.587747 acc: 0.021094 lr: 0.003920\n",
+      "INFO:root:epoch: 2/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 4.587081 acc: 0.020139 lr: 0.004020\n",
+      "INFO:root:epoch: 2/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 4.587247 acc: 0.020000 lr: 0.004120\n",
+      "INFO:root:epoch: 2/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 4.586554 acc: 0.021307 lr: 0.004220\n",
+      "INFO:root:epoch: 2/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 4.587415 acc: 0.021094 lr: 0.004320\n",
+      "INFO:root:epoch: 2/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 4.587646 acc: 0.021635 lr: 0.004420\n",
+      "INFO:root:epoch: 2/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 4.586850 acc: 0.022098 lr: 0.004520\n",
+      "INFO:root:epoch: 2/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 4.586491 acc: 0.021667 lr: 0.004620\n",
+      "INFO:root:epoch: 2/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 4.586102 acc: 0.021680 lr: 0.004720\n",
+      "INFO:root:epoch: 2/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 4.585205 acc: 0.021691 lr: 0.004820\n",
+      "INFO:root:epoch: 2/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 4.584884 acc: 0.022743 lr: 0.004920\n",
+      "INFO:root:epoch: 2/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 4.584379 acc: 0.022697 lr: 0.005020\n",
+      "INFO:root:epoch: 2/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 4.584310 acc: 0.022656 lr: 0.005120\n",
+      "INFO:root:epoch: 2/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 4.583735 acc: 0.023512 lr: 0.005220\n",
+      "INFO:root:epoch: 2/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 4.583194 acc: 0.023722 lr: 0.005320\n",
+      "INFO:root:epoch: 2/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 4.582450 acc: 0.023913 lr: 0.005420\n",
+      "INFO:root:epoch: 2/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 4.581911 acc: 0.024089 lr: 0.005520\n",
+      "INFO:root:epoch: 2/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 4.581052 acc: 0.024500 lr: 0.005620\n",
+      "INFO:root:epoch: 2/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 4.579837 acc: 0.024880 lr: 0.005720\n",
+      "INFO:root:epoch: 2/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 4.579425 acc: 0.024769 lr: 0.005820\n",
+      "INFO:root:epoch: 2/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 4.578843 acc: 0.024888 lr: 0.005920\n",
+      "INFO:root:epoch: 2/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 4.577931 acc: 0.025216 lr: 0.006020\n",
+      "INFO:root:epoch: 2/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 4.577415 acc: 0.025000 lr: 0.006120\n",
+      "INFO:root:epoch: 2/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 4.576722 acc: 0.024798 lr: 0.006220\n",
+      "INFO:root:epoch: 3/100 starts\n",
+      "INFO:root:epoch: 3/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 4.554732 acc: 0.018750 lr: 0.006350\n",
+      "INFO:root:epoch: 3/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 4.546799 acc: 0.025000 lr: 0.006450\n",
+      "INFO:root:epoch: 3/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 4.543545 acc: 0.021875 lr: 0.006550\n",
+      "INFO:root:epoch: 3/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 4.539997 acc: 0.023437 lr: 0.006650\n",
+      "INFO:root:epoch: 3/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 4.534892 acc: 0.027500 lr: 0.006750\n",
+      "INFO:root:epoch: 3/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 4.533372 acc: 0.026042 lr: 0.006850\n",
+      "INFO:root:epoch: 3/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 4.535993 acc: 0.027232 lr: 0.006950\n",
+      "INFO:root:epoch: 3/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 4.532067 acc: 0.027734 lr: 0.007050\n",
+      "INFO:root:epoch: 3/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 4.529817 acc: 0.027083 lr: 0.007150\n",
+      "INFO:root:epoch: 3/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 4.528748 acc: 0.026875 lr: 0.007250\n",
+      "INFO:root:epoch: 3/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 4.524458 acc: 0.027273 lr: 0.007350\n",
+      "INFO:root:epoch: 3/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 4.522450 acc: 0.027083 lr: 0.007450\n",
+      "INFO:root:epoch: 3/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 4.519957 acc: 0.027644 lr: 0.007550\n",
+      "INFO:root:epoch: 3/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 4.516462 acc: 0.029018 lr: 0.007650\n",
+      "INFO:root:epoch: 3/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 4.512551 acc: 0.029375 lr: 0.007750\n",
+      "INFO:root:epoch: 3/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 4.511358 acc: 0.029102 lr: 0.007850\n",
+      "INFO:root:epoch: 3/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 4.505479 acc: 0.030515 lr: 0.007950\n",
+      "INFO:root:epoch: 3/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 4.500308 acc: 0.030382 lr: 0.008050\n",
+      "INFO:root:epoch: 3/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 4.496550 acc: 0.030099 lr: 0.008150\n",
+      "INFO:root:epoch: 3/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 4.491571 acc: 0.030781 lr: 0.008250\n",
+      "INFO:root:epoch: 3/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 4.486266 acc: 0.031250 lr: 0.008350\n",
+      "INFO:root:epoch: 3/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 4.480225 acc: 0.031108 lr: 0.008450\n",
+      "INFO:root:epoch: 3/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 4.474944 acc: 0.030299 lr: 0.008550\n",
+      "INFO:root:epoch: 3/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 4.469195 acc: 0.030599 lr: 0.008650\n",
+      "INFO:root:epoch: 3/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 4.462828 acc: 0.031250 lr: 0.008750\n",
+      "INFO:root:epoch: 3/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 4.458274 acc: 0.031611 lr: 0.008850\n",
+      "INFO:root:epoch: 3/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 4.451506 acc: 0.032639 lr: 0.008950\n",
+      "INFO:root:epoch: 3/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 4.447941 acc: 0.032701 lr: 0.009050\n",
+      "INFO:root:epoch: 3/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 4.442886 acc: 0.033190 lr: 0.009150\n",
+      "INFO:root:epoch: 3/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 4.436341 acc: 0.033021 lr: 0.009250\n",
+      "INFO:root:epoch: 3/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 4.431607 acc: 0.033165 lr: 0.009350\n",
+      "INFO:root:epoch: 4/100 starts\n",
+      "INFO:root:epoch: 4/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 4.216670 acc: 0.040625 lr: 0.009480\n",
+      "INFO:root:epoch: 4/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 4.220620 acc: 0.039062 lr: 0.009580\n",
+      "INFO:root:epoch: 4/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 4.215897 acc: 0.040625 lr: 0.009680\n",
+      "INFO:root:epoch: 4/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 4.204186 acc: 0.042188 lr: 0.009780\n",
+      "INFO:root:epoch: 4/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 4.204257 acc: 0.041875 lr: 0.009880\n",
+      "INFO:root:epoch: 4/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 4.193417 acc: 0.048438 lr: 0.009980\n",
+      "INFO:root:epoch: 4/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 4.188381 acc: 0.048214 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 4.175587 acc: 0.049219 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 4.172535 acc: 0.048958 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 4.158615 acc: 0.051562 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 4.143053 acc: 0.053409 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 4.133410 acc: 0.054427 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 4.131123 acc: 0.053846 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 4.130420 acc: 0.055580 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 4.121361 acc: 0.054792 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 4.107443 acc: 0.057617 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 4.098401 acc: 0.060294 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 4.090190 acc: 0.060937 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 4.076577 acc: 0.061349 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 4.067421 acc: 0.061562 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 4.060067 acc: 0.060863 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 4.049889 acc: 0.062074 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 4.040142 acc: 0.062636 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 4.031185 acc: 0.063932 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 4.015913 acc: 0.065375 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 4.004460 acc: 0.066947 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 3.989642 acc: 0.069329 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 3.973949 acc: 0.072210 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 3.960340 acc: 0.074461 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 3.948678 acc: 0.075312 lr: 0.010000\n",
+      "INFO:root:epoch: 4/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 3.937246 acc: 0.076512 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100 starts\n",
+      "INFO:root:epoch: 5/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 3.602264 acc: 0.146875 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 3.575348 acc: 0.131250 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 3.582449 acc: 0.118750 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 3.577921 acc: 0.121875 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 3.565801 acc: 0.121250 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 3.548683 acc: 0.118750 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 3.508676 acc: 0.120089 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 3.496036 acc: 0.123047 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 3.490975 acc: 0.122917 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 3.477949 acc: 0.124063 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 3.463334 acc: 0.125852 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 3.450261 acc: 0.124740 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 3.438849 acc: 0.128846 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 3.431314 acc: 0.129241 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 3.416327 acc: 0.133333 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 3.406788 acc: 0.136328 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 3.395345 acc: 0.138787 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 3.388426 acc: 0.140278 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 3.381553 acc: 0.142599 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 3.367660 acc: 0.145312 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 3.354665 acc: 0.147917 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 3.348045 acc: 0.148295 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 3.339810 acc: 0.151766 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 3.324989 acc: 0.154167 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 3.310998 acc: 0.156875 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 3.299634 acc: 0.158654 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 3.291406 acc: 0.160532 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 3.283068 acc: 0.161272 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 3.271852 acc: 0.162931 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 3.265716 acc: 0.163437 lr: 0.010000\n",
+      "INFO:root:epoch: 5/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 3.254701 acc: 0.165625 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100 starts\n",
+      "INFO:root:epoch: 6/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 2.820441 acc: 0.228125 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 2.939542 acc: 0.231250 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 2.921857 acc: 0.229167 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 2.906477 acc: 0.224219 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 2.914442 acc: 0.230000 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 2.915281 acc: 0.225000 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 2.917999 acc: 0.219643 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 2.905005 acc: 0.221875 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 2.914437 acc: 0.220486 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 2.907124 acc: 0.222812 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 2.909275 acc: 0.222727 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 2.903210 acc: 0.222656 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 2.899808 acc: 0.221875 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 2.897036 acc: 0.222768 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 2.892817 acc: 0.225000 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 2.893514 acc: 0.225391 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 2.886759 acc: 0.227390 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 2.884362 acc: 0.228299 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 2.880840 acc: 0.228125 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 2.872577 acc: 0.230938 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 2.867449 acc: 0.233185 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 2.862959 acc: 0.234233 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 2.862449 acc: 0.234511 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 2.855118 acc: 0.236458 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 2.851627 acc: 0.236500 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 2.842585 acc: 0.238341 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 2.837825 acc: 0.239583 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 2.834812 acc: 0.238616 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 2.829549 acc: 0.238901 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 2.827458 acc: 0.239896 lr: 0.010000\n",
+      "INFO:root:epoch: 6/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 2.823910 acc: 0.238810 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100 starts\n",
+      "INFO:root:epoch: 7/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 2.607040 acc: 0.287500 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 2.614367 acc: 0.289062 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 2.638236 acc: 0.283333 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 2.631425 acc: 0.284375 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 2.635860 acc: 0.283750 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 2.633486 acc: 0.283333 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 2.660004 acc: 0.282143 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 2.655013 acc: 0.284375 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 2.650505 acc: 0.284722 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 2.653058 acc: 0.281563 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 2.652163 acc: 0.280966 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 2.656446 acc: 0.280469 lr: 0.010000\n",
+      "INFO:root:epoch: 7/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 2.651380 acc: 0.280288 lr: 0.009988\n",
+      "INFO:root:epoch: 7/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 2.641273 acc: 0.280357 lr: 0.009971\n",
+      "INFO:root:epoch: 7/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 2.635538 acc: 0.280625 lr: 0.009953\n",
+      "INFO:root:epoch: 7/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 2.629010 acc: 0.280078 lr: 0.009936\n",
+      "INFO:root:epoch: 7/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 2.621454 acc: 0.282721 lr: 0.009919\n",
+      "INFO:root:epoch: 7/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 2.620412 acc: 0.282292 lr: 0.009902\n",
+      "INFO:root:epoch: 7/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 2.616242 acc: 0.282072 lr: 0.009885\n",
+      "INFO:root:epoch: 7/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 2.613541 acc: 0.283281 lr: 0.009867\n",
+      "INFO:root:epoch: 7/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 2.606067 acc: 0.285119 lr: 0.009850\n",
+      "INFO:root:epoch: 7/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 2.600847 acc: 0.285937 lr: 0.009833\n",
+      "INFO:root:epoch: 7/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 2.596402 acc: 0.287092 lr: 0.009816\n",
+      "INFO:root:epoch: 7/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 2.593893 acc: 0.287500 lr: 0.009799\n",
+      "INFO:root:epoch: 7/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 2.596835 acc: 0.287250 lr: 0.009782\n",
+      "INFO:root:epoch: 7/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 2.588553 acc: 0.289062 lr: 0.009765\n",
+      "INFO:root:epoch: 7/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 2.582253 acc: 0.290625 lr: 0.009748\n",
+      "INFO:root:epoch: 7/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 2.583183 acc: 0.289955 lr: 0.009732\n",
+      "INFO:root:epoch: 7/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 2.578796 acc: 0.290194 lr: 0.009715\n",
+      "INFO:root:epoch: 7/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 2.574179 acc: 0.289792 lr: 0.009698\n",
+      "INFO:root:epoch: 7/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 2.570780 acc: 0.290524 lr: 0.009681\n",
+      "INFO:root:epoch: 8/100 starts\n",
+      "INFO:root:epoch: 8/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 2.451610 acc: 0.346875 lr: 0.009659\n",
+      "INFO:root:epoch: 8/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 2.430122 acc: 0.337500 lr: 0.009643\n",
+      "INFO:root:epoch: 8/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 2.406345 acc: 0.322917 lr: 0.009626\n",
+      "INFO:root:epoch: 8/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 2.398308 acc: 0.324219 lr: 0.009609\n",
+      "INFO:root:epoch: 8/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 2.389222 acc: 0.323750 lr: 0.009593\n",
+      "INFO:root:epoch: 8/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 2.398692 acc: 0.321354 lr: 0.009576\n",
+      "INFO:root:epoch: 8/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 2.409715 acc: 0.319643 lr: 0.009559\n",
+      "INFO:root:epoch: 8/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 2.426385 acc: 0.313281 lr: 0.009543\n",
+      "INFO:root:epoch: 8/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 2.418058 acc: 0.312500 lr: 0.009526\n",
+      "INFO:root:epoch: 8/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 2.407412 acc: 0.319062 lr: 0.009510\n",
+      "INFO:root:epoch: 8/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 2.407154 acc: 0.318466 lr: 0.009493\n",
+      "INFO:root:epoch: 8/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 2.398121 acc: 0.319531 lr: 0.009477\n",
+      "INFO:root:epoch: 8/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 2.399252 acc: 0.318990 lr: 0.009461\n",
+      "INFO:root:epoch: 8/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 2.395213 acc: 0.321205 lr: 0.009444\n",
+      "INFO:root:epoch: 8/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 2.402388 acc: 0.321458 lr: 0.009428\n",
+      "INFO:root:epoch: 8/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 2.397477 acc: 0.323047 lr: 0.009412\n",
+      "INFO:root:epoch: 8/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 2.401867 acc: 0.320956 lr: 0.009395\n",
+      "INFO:root:epoch: 8/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 2.393594 acc: 0.321875 lr: 0.009379\n",
+      "INFO:root:epoch: 8/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 2.390463 acc: 0.323520 lr: 0.009363\n",
+      "INFO:root:epoch: 8/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 2.388567 acc: 0.324219 lr: 0.009347\n",
+      "INFO:root:epoch: 8/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 2.387305 acc: 0.324702 lr: 0.009330\n",
+      "INFO:root:epoch: 8/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 2.379817 acc: 0.327131 lr: 0.009314\n",
+      "INFO:root:epoch: 8/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 2.381477 acc: 0.326630 lr: 0.009298\n",
+      "INFO:root:epoch: 8/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 2.374062 acc: 0.328125 lr: 0.009282\n",
+      "INFO:root:epoch: 8/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 2.368610 acc: 0.330000 lr: 0.009266\n",
+      "INFO:root:epoch: 8/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 2.366560 acc: 0.330889 lr: 0.009250\n",
+      "INFO:root:epoch: 8/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 2.359682 acc: 0.332755 lr: 0.009234\n",
+      "INFO:root:epoch: 8/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 2.359157 acc: 0.332812 lr: 0.009218\n",
+      "INFO:root:epoch: 8/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 2.351791 acc: 0.334375 lr: 0.009202\n",
+      "INFO:root:epoch: 8/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 2.346438 acc: 0.335729 lr: 0.009186\n",
+      "INFO:root:epoch: 8/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 2.345128 acc: 0.336492 lr: 0.009170\n",
+      "INFO:root:epoch: 9/100 starts\n",
+      "INFO:root:epoch: 9/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 2.281513 acc: 0.356250 lr: 0.009149\n",
+      "INFO:root:epoch: 9/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 2.292913 acc: 0.357812 lr: 0.009134\n",
+      "INFO:root:epoch: 9/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 2.271482 acc: 0.366667 lr: 0.009118\n",
+      "INFO:root:epoch: 9/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 2.253157 acc: 0.363281 lr: 0.009102\n",
+      "INFO:root:epoch: 9/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 2.234764 acc: 0.366250 lr: 0.009086\n",
+      "INFO:root:epoch: 9/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 2.234404 acc: 0.369271 lr: 0.009070\n",
+      "INFO:root:epoch: 9/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 2.226868 acc: 0.370982 lr: 0.009055\n",
+      "INFO:root:epoch: 9/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 2.227068 acc: 0.371875 lr: 0.009039\n",
+      "INFO:root:epoch: 9/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 2.234855 acc: 0.369444 lr: 0.009023\n",
+      "INFO:root:epoch: 9/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 2.224417 acc: 0.371250 lr: 0.009008\n",
+      "INFO:root:epoch: 9/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 2.215646 acc: 0.369886 lr: 0.008992\n",
+      "INFO:root:epoch: 9/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 2.221938 acc: 0.367969 lr: 0.008977\n",
+      "INFO:root:epoch: 9/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 2.213379 acc: 0.369471 lr: 0.008961\n",
+      "INFO:root:epoch: 9/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 2.202343 acc: 0.370536 lr: 0.008946\n",
+      "INFO:root:epoch: 9/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 2.205875 acc: 0.365208 lr: 0.008930\n",
+      "INFO:root:epoch: 9/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 2.202338 acc: 0.367188 lr: 0.008915\n",
+      "INFO:root:epoch: 9/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 2.203373 acc: 0.365441 lr: 0.008899\n",
+      "INFO:root:epoch: 9/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 2.205812 acc: 0.366146 lr: 0.008884\n",
+      "INFO:root:epoch: 9/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 2.201259 acc: 0.367105 lr: 0.008868\n",
+      "INFO:root:epoch: 9/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 2.197683 acc: 0.367344 lr: 0.008853\n",
+      "INFO:root:epoch: 9/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 2.196061 acc: 0.367262 lr: 0.008838\n",
+      "INFO:root:epoch: 9/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 2.194003 acc: 0.368040 lr: 0.008822\n",
+      "INFO:root:epoch: 9/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 2.189800 acc: 0.369837 lr: 0.008807\n",
+      "INFO:root:epoch: 9/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 2.193373 acc: 0.369271 lr: 0.008792\n",
+      "INFO:root:epoch: 9/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 2.189737 acc: 0.370375 lr: 0.008777\n",
+      "INFO:root:epoch: 9/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 2.186865 acc: 0.371274 lr: 0.008762\n",
+      "INFO:root:epoch: 9/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 2.185678 acc: 0.370833 lr: 0.008746\n",
+      "INFO:root:epoch: 9/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 2.185041 acc: 0.369308 lr: 0.008731\n",
+      "INFO:root:epoch: 9/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 2.182037 acc: 0.369504 lr: 0.008716\n",
+      "INFO:root:epoch: 9/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 2.181730 acc: 0.370625 lr: 0.008701\n",
+      "INFO:root:epoch: 9/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 2.178883 acc: 0.372177 lr: 0.008686\n",
+      "INFO:root:epoch: 10/100 starts\n",
+      "INFO:root:epoch: 10/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 2.206240 acc: 0.356250 lr: 0.008666\n",
+      "INFO:root:epoch: 10/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 2.121857 acc: 0.393750 lr: 0.008651\n",
+      "INFO:root:epoch: 10/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 2.135537 acc: 0.379167 lr: 0.008636\n",
+      "INFO:root:epoch: 10/100  et: 1s eta: 13s batches: 40/313(12%) samples: 1280 loss: 2.120857 acc: 0.388281 lr: 0.008621\n",
+      "INFO:root:epoch: 10/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 2.131934 acc: 0.379375 lr: 0.008607\n",
+      "INFO:root:epoch: 10/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 2.105163 acc: 0.390625 lr: 0.008592\n",
+      "INFO:root:epoch: 10/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 2.096578 acc: 0.391071 lr: 0.008577\n",
+      "INFO:root:epoch: 10/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 2.085447 acc: 0.394141 lr: 0.008562\n",
+      "INFO:root:epoch: 10/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 2.076434 acc: 0.396875 lr: 0.008547\n",
+      "INFO:root:epoch: 10/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 2.064907 acc: 0.402188 lr: 0.008532\n",
+      "INFO:root:epoch: 10/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 2.054218 acc: 0.404830 lr: 0.008517\n",
+      "INFO:root:epoch: 10/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 2.041692 acc: 0.408073 lr: 0.008503\n",
+      "INFO:root:epoch: 10/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 2.031965 acc: 0.411058 lr: 0.008488\n",
+      "INFO:root:epoch: 10/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 2.017054 acc: 0.414286 lr: 0.008473\n",
+      "INFO:root:epoch: 10/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 2.022641 acc: 0.411667 lr: 0.008459\n",
+      "INFO:root:epoch: 10/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 2.028360 acc: 0.410938 lr: 0.008444\n",
+      "INFO:root:epoch: 10/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 2.032524 acc: 0.411029 lr: 0.008429\n",
+      "INFO:root:epoch: 10/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 2.033351 acc: 0.410417 lr: 0.008415\n",
+      "INFO:root:epoch: 10/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 2.031236 acc: 0.408553 lr: 0.008400\n",
+      "INFO:root:epoch: 10/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 2.030756 acc: 0.408750 lr: 0.008386\n",
+      "INFO:root:epoch: 10/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 2.030620 acc: 0.410119 lr: 0.008371\n",
+      "INFO:root:epoch: 10/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 2.023995 acc: 0.408097 lr: 0.008357\n",
+      "INFO:root:epoch: 10/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 2.021110 acc: 0.410870 lr: 0.008342\n",
+      "INFO:root:epoch: 10/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 2.029748 acc: 0.408984 lr: 0.008328\n",
+      "INFO:root:epoch: 10/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 2.029976 acc: 0.411000 lr: 0.008313\n",
+      "INFO:root:epoch: 10/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 2.032057 acc: 0.409014 lr: 0.008299\n",
+      "INFO:root:epoch: 10/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 2.026943 acc: 0.412153 lr: 0.008285\n",
+      "INFO:root:epoch: 10/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 2.024672 acc: 0.413951 lr: 0.008270\n",
+      "INFO:root:epoch: 10/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 2.022465 acc: 0.414871 lr: 0.008256\n",
+      "INFO:root:epoch: 10/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 2.018964 acc: 0.415521 lr: 0.008242\n",
+      "INFO:root:epoch: 10/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 2.013963 acc: 0.417036 lr: 0.008227\n",
+      "INFO:root:epoch: 11/100 starts\n",
+      "INFO:root:epoch: 11/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.937762 acc: 0.434375 lr: 0.008209\n",
+      "INFO:root:epoch: 11/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.951108 acc: 0.437500 lr: 0.008195\n",
+      "INFO:root:epoch: 11/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.933343 acc: 0.436458 lr: 0.008180\n",
+      "INFO:root:epoch: 11/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.929408 acc: 0.428906 lr: 0.008166\n",
+      "INFO:root:epoch: 11/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.915191 acc: 0.433125 lr: 0.008152\n",
+      "INFO:root:epoch: 11/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.903354 acc: 0.435938 lr: 0.008138\n",
+      "INFO:root:epoch: 11/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.914053 acc: 0.434821 lr: 0.008124\n",
+      "INFO:root:epoch: 11/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.927116 acc: 0.428125 lr: 0.008110\n",
+      "INFO:root:epoch: 11/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.913659 acc: 0.431250 lr: 0.008096\n",
+      "INFO:root:epoch: 11/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.905937 acc: 0.435000 lr: 0.008082\n",
+      "INFO:root:epoch: 11/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.909420 acc: 0.436932 lr: 0.008068\n",
+      "INFO:root:epoch: 11/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.912288 acc: 0.436719 lr: 0.008054\n",
+      "INFO:root:epoch: 11/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.908594 acc: 0.437500 lr: 0.008040\n",
+      "INFO:root:epoch: 11/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.900271 acc: 0.440402 lr: 0.008026\n",
+      "INFO:root:epoch: 11/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.896671 acc: 0.440000 lr: 0.008012\n",
+      "INFO:root:epoch: 11/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.901755 acc: 0.438867 lr: 0.007998\n",
+      "INFO:root:epoch: 11/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.905474 acc: 0.437684 lr: 0.007984\n",
+      "INFO:root:epoch: 11/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.909003 acc: 0.437153 lr: 0.007971\n",
+      "INFO:root:epoch: 11/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.919181 acc: 0.434704 lr: 0.007957\n",
+      "INFO:root:epoch: 11/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.924912 acc: 0.433437 lr: 0.007943\n",
+      "INFO:root:epoch: 11/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.922627 acc: 0.434077 lr: 0.007929\n",
+      "INFO:root:epoch: 11/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.919427 acc: 0.436506 lr: 0.007915\n",
+      "INFO:root:epoch: 11/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.915449 acc: 0.436821 lr: 0.007902\n",
+      "INFO:root:epoch: 11/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.911553 acc: 0.436328 lr: 0.007888\n",
+      "INFO:root:epoch: 11/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.911302 acc: 0.436375 lr: 0.007874\n",
+      "INFO:root:epoch: 11/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.908464 acc: 0.438942 lr: 0.007861\n",
+      "INFO:root:epoch: 11/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.901090 acc: 0.440972 lr: 0.007847\n",
+      "INFO:root:epoch: 11/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.895085 acc: 0.442522 lr: 0.007834\n",
+      "INFO:root:epoch: 11/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.893688 acc: 0.443427 lr: 0.007820\n",
+      "INFO:root:epoch: 11/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.894488 acc: 0.442813 lr: 0.007807\n",
+      "INFO:root:epoch: 11/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.895415 acc: 0.441230 lr: 0.007793\n",
+      "INFO:root:epoch: 12/100 starts\n",
+      "INFO:root:epoch: 12/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.809218 acc: 0.456250 lr: 0.007775\n",
+      "INFO:root:epoch: 12/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.880935 acc: 0.448437 lr: 0.007762\n",
+      "INFO:root:epoch: 12/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.879414 acc: 0.450000 lr: 0.007749\n",
+      "INFO:root:epoch: 12/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.840837 acc: 0.466406 lr: 0.007735\n",
+      "INFO:root:epoch: 12/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.828304 acc: 0.470000 lr: 0.007722\n",
+      "INFO:root:epoch: 12/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.816310 acc: 0.472917 lr: 0.007708\n",
+      "INFO:root:epoch: 12/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.795621 acc: 0.475446 lr: 0.007695\n",
+      "INFO:root:epoch: 12/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.800446 acc: 0.476562 lr: 0.007682\n",
+      "INFO:root:epoch: 12/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.794079 acc: 0.477431 lr: 0.007668\n",
+      "INFO:root:epoch: 12/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.795587 acc: 0.479375 lr: 0.007655\n",
+      "INFO:root:epoch: 12/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.795109 acc: 0.477557 lr: 0.007642\n",
+      "INFO:root:epoch: 12/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.810152 acc: 0.476562 lr: 0.007629\n",
+      "INFO:root:epoch: 12/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.814612 acc: 0.475240 lr: 0.007615\n",
+      "INFO:root:epoch: 12/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.816775 acc: 0.475446 lr: 0.007602\n",
+      "INFO:root:epoch: 12/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.808929 acc: 0.477708 lr: 0.007589\n",
+      "INFO:root:epoch: 12/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.802069 acc: 0.477930 lr: 0.007576\n",
+      "INFO:root:epoch: 12/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.791479 acc: 0.480882 lr: 0.007563\n",
+      "INFO:root:epoch: 12/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.791557 acc: 0.480382 lr: 0.007550\n",
+      "INFO:root:epoch: 12/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.790787 acc: 0.481086 lr: 0.007537\n",
+      "INFO:root:epoch: 12/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.782486 acc: 0.482187 lr: 0.007524\n",
+      "INFO:root:epoch: 12/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.786853 acc: 0.479018 lr: 0.007511\n",
+      "INFO:root:epoch: 12/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.784409 acc: 0.478409 lr: 0.007498\n",
+      "INFO:root:epoch: 12/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.782175 acc: 0.479755 lr: 0.007485\n",
+      "INFO:root:epoch: 12/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.777636 acc: 0.480339 lr: 0.007472\n",
+      "INFO:root:epoch: 12/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.775001 acc: 0.481250 lr: 0.007459\n",
+      "INFO:root:epoch: 12/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.773161 acc: 0.481010 lr: 0.007446\n",
+      "INFO:root:epoch: 12/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.769285 acc: 0.483333 lr: 0.007433\n",
+      "INFO:root:epoch: 12/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.771988 acc: 0.481250 lr: 0.007420\n",
+      "INFO:root:epoch: 12/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.768392 acc: 0.482651 lr: 0.007407\n",
+      "INFO:root:epoch: 12/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.773989 acc: 0.480000 lr: 0.007394\n",
+      "INFO:root:epoch: 12/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.773571 acc: 0.480645 lr: 0.007382\n",
+      "INFO:root:epoch: 13/100 starts\n",
+      "INFO:root:epoch: 13/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.907340 acc: 0.443750 lr: 0.007365\n",
+      "INFO:root:epoch: 13/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.854356 acc: 0.451563 lr: 0.007352\n",
+      "INFO:root:epoch: 13/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.850993 acc: 0.455208 lr: 0.007339\n",
+      "INFO:root:epoch: 13/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.807789 acc: 0.474219 lr: 0.007327\n",
+      "INFO:root:epoch: 13/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.767176 acc: 0.485000 lr: 0.007314\n",
+      "INFO:root:epoch: 13/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.754622 acc: 0.489062 lr: 0.007301\n",
+      "INFO:root:epoch: 13/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.735719 acc: 0.495536 lr: 0.007289\n",
+      "INFO:root:epoch: 13/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.729122 acc: 0.492578 lr: 0.007276\n",
+      "INFO:root:epoch: 13/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.739504 acc: 0.488194 lr: 0.007264\n",
+      "INFO:root:epoch: 13/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.723727 acc: 0.495000 lr: 0.007251\n",
+      "INFO:root:epoch: 13/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.726329 acc: 0.496307 lr: 0.007238\n",
+      "INFO:root:epoch: 13/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.720890 acc: 0.495833 lr: 0.007226\n",
+      "INFO:root:epoch: 13/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.717123 acc: 0.500240 lr: 0.007213\n",
+      "INFO:root:epoch: 13/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.702864 acc: 0.504687 lr: 0.007201\n",
+      "INFO:root:epoch: 13/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.716533 acc: 0.500208 lr: 0.007188\n",
+      "INFO:root:epoch: 13/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.707112 acc: 0.503711 lr: 0.007176\n",
+      "INFO:root:epoch: 13/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.704043 acc: 0.504044 lr: 0.007164\n",
+      "INFO:root:epoch: 13/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.706834 acc: 0.500868 lr: 0.007151\n",
+      "INFO:root:epoch: 13/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.701104 acc: 0.499671 lr: 0.007139\n",
+      "INFO:root:epoch: 13/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.706654 acc: 0.497969 lr: 0.007126\n",
+      "INFO:root:epoch: 13/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.709501 acc: 0.495238 lr: 0.007114\n",
+      "INFO:root:epoch: 13/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.712312 acc: 0.494602 lr: 0.007102\n",
+      "INFO:root:epoch: 13/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.709233 acc: 0.496060 lr: 0.007089\n",
+      "INFO:root:epoch: 13/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.706317 acc: 0.497266 lr: 0.007077\n",
+      "INFO:root:epoch: 13/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.709944 acc: 0.496625 lr: 0.007065\n",
+      "INFO:root:epoch: 13/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.708697 acc: 0.496034 lr: 0.007053\n",
+      "INFO:root:epoch: 13/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.708349 acc: 0.495370 lr: 0.007041\n",
+      "INFO:root:epoch: 13/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.705332 acc: 0.495647 lr: 0.007028\n",
+      "INFO:root:epoch: 13/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.702112 acc: 0.496444 lr: 0.007016\n",
+      "INFO:root:epoch: 13/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.699214 acc: 0.496354 lr: 0.007004\n",
+      "INFO:root:epoch: 13/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.700123 acc: 0.495968 lr: 0.006992\n",
+      "INFO:root:epoch: 14/100 starts\n",
+      "INFO:root:epoch: 14/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.590518 acc: 0.540625 lr: 0.006976\n",
+      "INFO:root:epoch: 14/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.565230 acc: 0.537500 lr: 0.006964\n",
+      "INFO:root:epoch: 14/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.629103 acc: 0.525000 lr: 0.006952\n",
+      "INFO:root:epoch: 14/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.609315 acc: 0.531250 lr: 0.006940\n",
+      "INFO:root:epoch: 14/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.618679 acc: 0.530000 lr: 0.006928\n",
+      "INFO:root:epoch: 14/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.636791 acc: 0.523958 lr: 0.006916\n",
+      "INFO:root:epoch: 14/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.622564 acc: 0.530357 lr: 0.006904\n",
+      "INFO:root:epoch: 14/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.633111 acc: 0.531250 lr: 0.006892\n",
+      "INFO:root:epoch: 14/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.629323 acc: 0.533333 lr: 0.006880\n",
+      "INFO:root:epoch: 14/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.631151 acc: 0.533125 lr: 0.006868\n",
+      "INFO:root:epoch: 14/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.631182 acc: 0.530966 lr: 0.006856\n",
+      "INFO:root:epoch: 14/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.630641 acc: 0.529167 lr: 0.006844\n",
+      "INFO:root:epoch: 14/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.631247 acc: 0.528365 lr: 0.006833\n",
+      "INFO:root:epoch: 14/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.642582 acc: 0.523884 lr: 0.006821\n",
+      "INFO:root:epoch: 14/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.634954 acc: 0.526250 lr: 0.006809\n",
+      "INFO:root:epoch: 14/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.633762 acc: 0.527734 lr: 0.006797\n",
+      "INFO:root:epoch: 14/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.627493 acc: 0.528309 lr: 0.006785\n",
+      "INFO:root:epoch: 14/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.622669 acc: 0.528299 lr: 0.006774\n",
+      "INFO:root:epoch: 14/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.621551 acc: 0.527632 lr: 0.006762\n",
+      "INFO:root:epoch: 14/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.618660 acc: 0.528750 lr: 0.006750\n",
+      "INFO:root:epoch: 14/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.620324 acc: 0.527976 lr: 0.006739\n",
+      "INFO:root:epoch: 14/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.619587 acc: 0.528267 lr: 0.006727\n",
+      "INFO:root:epoch: 14/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.617841 acc: 0.527310 lr: 0.006715\n",
+      "INFO:root:epoch: 14/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.620178 acc: 0.526042 lr: 0.006704\n",
+      "INFO:root:epoch: 14/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.620031 acc: 0.526500 lr: 0.006692\n",
+      "INFO:root:epoch: 14/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.626258 acc: 0.525601 lr: 0.006680\n",
+      "INFO:root:epoch: 14/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.629321 acc: 0.525000 lr: 0.006669\n",
+      "INFO:root:epoch: 14/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.624419 acc: 0.526339 lr: 0.006657\n",
+      "INFO:root:epoch: 14/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.620877 acc: 0.527155 lr: 0.006646\n",
+      "INFO:root:epoch: 14/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.620289 acc: 0.527292 lr: 0.006634\n",
+      "INFO:root:epoch: 14/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.618532 acc: 0.527319 lr: 0.006623\n",
+      "INFO:root:epoch: 15/100 starts\n",
+      "INFO:root:epoch: 15/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.670857 acc: 0.500000 lr: 0.006608\n",
+      "INFO:root:epoch: 15/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.671997 acc: 0.487500 lr: 0.006596\n",
+      "INFO:root:epoch: 15/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.660190 acc: 0.496875 lr: 0.006585\n",
+      "INFO:root:epoch: 15/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.652766 acc: 0.502344 lr: 0.006574\n",
+      "INFO:root:epoch: 15/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.599847 acc: 0.518125 lr: 0.006562\n",
+      "INFO:root:epoch: 15/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.589491 acc: 0.527604 lr: 0.006551\n",
+      "INFO:root:epoch: 15/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.575495 acc: 0.529018 lr: 0.006539\n",
+      "INFO:root:epoch: 15/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.584001 acc: 0.529297 lr: 0.006528\n",
+      "INFO:root:epoch: 15/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.589451 acc: 0.527083 lr: 0.006517\n",
+      "INFO:root:epoch: 15/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.582228 acc: 0.530000 lr: 0.006506\n",
+      "INFO:root:epoch: 15/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.584572 acc: 0.526989 lr: 0.006494\n",
+      "INFO:root:epoch: 15/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.575291 acc: 0.527865 lr: 0.006483\n",
+      "INFO:root:epoch: 15/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.568114 acc: 0.531490 lr: 0.006472\n",
+      "INFO:root:epoch: 15/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.570564 acc: 0.529911 lr: 0.006461\n",
+      "INFO:root:epoch: 15/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.564324 acc: 0.534583 lr: 0.006449\n",
+      "INFO:root:epoch: 15/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 1.562554 acc: 0.536914 lr: 0.006438\n",
+      "INFO:root:epoch: 15/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.560759 acc: 0.536397 lr: 0.006427\n",
+      "INFO:root:epoch: 15/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.559806 acc: 0.536979 lr: 0.006416\n",
+      "INFO:root:epoch: 15/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.553790 acc: 0.538487 lr: 0.006405\n",
+      "INFO:root:epoch: 15/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.557610 acc: 0.537500 lr: 0.006394\n",
+      "INFO:root:epoch: 15/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.560925 acc: 0.535863 lr: 0.006383\n",
+      "INFO:root:epoch: 15/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.561667 acc: 0.535085 lr: 0.006372\n",
+      "INFO:root:epoch: 15/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.560213 acc: 0.535462 lr: 0.006361\n",
+      "INFO:root:epoch: 15/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.562566 acc: 0.535547 lr: 0.006350\n",
+      "INFO:root:epoch: 15/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.570237 acc: 0.533750 lr: 0.006339\n",
+      "INFO:root:epoch: 15/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.569232 acc: 0.534014 lr: 0.006328\n",
+      "INFO:root:epoch: 15/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.570253 acc: 0.534028 lr: 0.006317\n",
+      "INFO:root:epoch: 15/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.567725 acc: 0.534710 lr: 0.006306\n",
+      "INFO:root:epoch: 15/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.563901 acc: 0.536099 lr: 0.006295\n",
+      "INFO:root:epoch: 15/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.562417 acc: 0.536146 lr: 0.006284\n",
+      "INFO:root:epoch: 15/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.560062 acc: 0.536895 lr: 0.006273\n",
+      "INFO:root:epoch: 16/100 starts\n",
+      "INFO:root:epoch: 16/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.412898 acc: 0.568750 lr: 0.006259\n",
+      "INFO:root:epoch: 16/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.427294 acc: 0.576563 lr: 0.006248\n",
+      "INFO:root:epoch: 16/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.443494 acc: 0.576042 lr: 0.006237\n",
+      "INFO:root:epoch: 16/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.482928 acc: 0.562500 lr: 0.006227\n",
+      "INFO:root:epoch: 16/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.471112 acc: 0.560625 lr: 0.006216\n",
+      "INFO:root:epoch: 16/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.481284 acc: 0.562500 lr: 0.006205\n",
+      "INFO:root:epoch: 16/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.476716 acc: 0.563839 lr: 0.006194\n",
+      "INFO:root:epoch: 16/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.483507 acc: 0.564062 lr: 0.006184\n",
+      "INFO:root:epoch: 16/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.500457 acc: 0.561458 lr: 0.006173\n",
+      "INFO:root:epoch: 16/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.494647 acc: 0.563125 lr: 0.006162\n",
+      "INFO:root:epoch: 16/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.493164 acc: 0.561364 lr: 0.006151\n",
+      "INFO:root:epoch: 16/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.485447 acc: 0.563021 lr: 0.006141\n",
+      "INFO:root:epoch: 16/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.490535 acc: 0.559856 lr: 0.006130\n",
+      "INFO:root:epoch: 16/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.488749 acc: 0.559598 lr: 0.006120\n",
+      "INFO:root:epoch: 16/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.488843 acc: 0.556250 lr: 0.006109\n",
+      "INFO:root:epoch: 16/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.487248 acc: 0.556445 lr: 0.006098\n",
+      "INFO:root:epoch: 16/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.491937 acc: 0.554596 lr: 0.006088\n",
+      "INFO:root:epoch: 16/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.488469 acc: 0.555729 lr: 0.006077\n",
+      "INFO:root:epoch: 16/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.490497 acc: 0.553947 lr: 0.006067\n",
+      "INFO:root:epoch: 16/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.495249 acc: 0.553125 lr: 0.006056\n",
+      "INFO:root:epoch: 16/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.495144 acc: 0.552976 lr: 0.006046\n",
+      "INFO:root:epoch: 16/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.491441 acc: 0.554830 lr: 0.006035\n",
+      "INFO:root:epoch: 16/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.488090 acc: 0.555571 lr: 0.006025\n",
+      "INFO:root:epoch: 16/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.485084 acc: 0.557031 lr: 0.006014\n",
+      "INFO:root:epoch: 16/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.489830 acc: 0.555625 lr: 0.006004\n",
+      "INFO:root:epoch: 16/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.488661 acc: 0.556130 lr: 0.005994\n",
+      "INFO:root:epoch: 16/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.488818 acc: 0.555903 lr: 0.005983\n",
+      "INFO:root:epoch: 16/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.491405 acc: 0.554464 lr: 0.005973\n",
+      "INFO:root:epoch: 16/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.495075 acc: 0.554310 lr: 0.005963\n",
+      "INFO:root:epoch: 16/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.492755 acc: 0.553542 lr: 0.005952\n",
+      "INFO:root:epoch: 16/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.491041 acc: 0.555141 lr: 0.005942\n",
+      "INFO:root:epoch: 17/100 starts\n",
+      "INFO:root:epoch: 17/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.449564 acc: 0.590625 lr: 0.005929\n",
+      "INFO:root:epoch: 17/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.427082 acc: 0.578125 lr: 0.005918\n",
+      "INFO:root:epoch: 17/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.475510 acc: 0.562500 lr: 0.005908\n",
+      "INFO:root:epoch: 17/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.448906 acc: 0.571875 lr: 0.005898\n",
+      "INFO:root:epoch: 17/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.455275 acc: 0.578125 lr: 0.005888\n",
+      "INFO:root:epoch: 17/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.463978 acc: 0.571875 lr: 0.005877\n",
+      "INFO:root:epoch: 17/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.450951 acc: 0.575000 lr: 0.005867\n",
+      "INFO:root:epoch: 17/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.455543 acc: 0.572266 lr: 0.005857\n",
+      "INFO:root:epoch: 17/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.461897 acc: 0.566319 lr: 0.005847\n",
+      "INFO:root:epoch: 17/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.459599 acc: 0.565000 lr: 0.005837\n",
+      "INFO:root:epoch: 17/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.445611 acc: 0.568182 lr: 0.005827\n",
+      "INFO:root:epoch: 17/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.447909 acc: 0.566146 lr: 0.005817\n",
+      "INFO:root:epoch: 17/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.450760 acc: 0.566346 lr: 0.005807\n",
+      "INFO:root:epoch: 17/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.442193 acc: 0.567634 lr: 0.005796\n",
+      "INFO:root:epoch: 17/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.434523 acc: 0.572292 lr: 0.005786\n",
+      "INFO:root:epoch: 17/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.432792 acc: 0.571875 lr: 0.005776\n",
+      "INFO:root:epoch: 17/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.425576 acc: 0.574265 lr: 0.005766\n",
+      "INFO:root:epoch: 17/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.426011 acc: 0.572917 lr: 0.005756\n",
+      "INFO:root:epoch: 17/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.427779 acc: 0.571217 lr: 0.005746\n",
+      "INFO:root:epoch: 17/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.430185 acc: 0.570625 lr: 0.005737\n",
+      "INFO:root:epoch: 17/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.423199 acc: 0.572173 lr: 0.005727\n",
+      "INFO:root:epoch: 17/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.421009 acc: 0.573011 lr: 0.005717\n",
+      "INFO:root:epoch: 17/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.416962 acc: 0.574049 lr: 0.005707\n",
+      "INFO:root:epoch: 17/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.410605 acc: 0.577995 lr: 0.005697\n",
+      "INFO:root:epoch: 17/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.410618 acc: 0.577625 lr: 0.005687\n",
+      "INFO:root:epoch: 17/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.408961 acc: 0.579808 lr: 0.005677\n",
+      "INFO:root:epoch: 17/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.408255 acc: 0.579514 lr: 0.005667\n",
+      "INFO:root:epoch: 17/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.411916 acc: 0.577790 lr: 0.005658\n",
+      "INFO:root:epoch: 17/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.416356 acc: 0.575754 lr: 0.005648\n",
+      "INFO:root:epoch: 17/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.414138 acc: 0.576667 lr: 0.005638\n",
+      "INFO:root:epoch: 17/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.412640 acc: 0.577520 lr: 0.005628\n",
+      "INFO:root:epoch: 18/100 starts\n",
+      "INFO:root:epoch: 18/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.434226 acc: 0.587500 lr: 0.005616\n",
+      "INFO:root:epoch: 18/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.439470 acc: 0.568750 lr: 0.005606\n",
+      "INFO:root:epoch: 18/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.417359 acc: 0.567708 lr: 0.005596\n",
+      "INFO:root:epoch: 18/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.435288 acc: 0.560156 lr: 0.005586\n",
+      "INFO:root:epoch: 18/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.424090 acc: 0.565000 lr: 0.005577\n",
+      "INFO:root:epoch: 18/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.410704 acc: 0.568229 lr: 0.005567\n",
+      "INFO:root:epoch: 18/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.392055 acc: 0.574554 lr: 0.005557\n",
+      "INFO:root:epoch: 18/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.389274 acc: 0.573047 lr: 0.005548\n",
+      "INFO:root:epoch: 18/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.394016 acc: 0.575000 lr: 0.005538\n",
+      "INFO:root:epoch: 18/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.399155 acc: 0.573125 lr: 0.005529\n",
+      "INFO:root:epoch: 18/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.393997 acc: 0.575568 lr: 0.005519\n",
+      "INFO:root:epoch: 18/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.394213 acc: 0.575781 lr: 0.005510\n",
+      "INFO:root:epoch: 18/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.389671 acc: 0.579327 lr: 0.005500\n",
+      "INFO:root:epoch: 18/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.405613 acc: 0.576116 lr: 0.005490\n",
+      "INFO:root:epoch: 18/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.401376 acc: 0.578333 lr: 0.005481\n",
+      "INFO:root:epoch: 18/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.409675 acc: 0.577930 lr: 0.005471\n",
+      "INFO:root:epoch: 18/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.400967 acc: 0.581066 lr: 0.005462\n",
+      "INFO:root:epoch: 18/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.394321 acc: 0.584028 lr: 0.005453\n",
+      "INFO:root:epoch: 18/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.395155 acc: 0.584704 lr: 0.005443\n",
+      "INFO:root:epoch: 18/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.387986 acc: 0.587344 lr: 0.005434\n",
+      "INFO:root:epoch: 18/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.382231 acc: 0.589286 lr: 0.005424\n",
+      "INFO:root:epoch: 18/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.381454 acc: 0.589915 lr: 0.005415\n",
+      "INFO:root:epoch: 18/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.381818 acc: 0.589402 lr: 0.005406\n",
+      "INFO:root:epoch: 18/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.382215 acc: 0.586979 lr: 0.005396\n",
+      "INFO:root:epoch: 18/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.378305 acc: 0.588500 lr: 0.005387\n",
+      "INFO:root:epoch: 18/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.376115 acc: 0.589303 lr: 0.005377\n",
+      "INFO:root:epoch: 18/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.372010 acc: 0.590278 lr: 0.005368\n",
+      "INFO:root:epoch: 18/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.377616 acc: 0.588504 lr: 0.005359\n",
+      "INFO:root:epoch: 18/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.377417 acc: 0.587823 lr: 0.005350\n",
+      "INFO:root:epoch: 18/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.379251 acc: 0.586771 lr: 0.005340\n",
+      "INFO:root:epoch: 18/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.379968 acc: 0.586996 lr: 0.005331\n",
+      "INFO:root:epoch: 19/100 starts\n",
+      "INFO:root:epoch: 19/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.266186 acc: 0.600000 lr: 0.005319\n",
+      "INFO:root:epoch: 19/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.324750 acc: 0.592187 lr: 0.005310\n",
+      "INFO:root:epoch: 19/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.323163 acc: 0.594792 lr: 0.005301\n",
+      "INFO:root:epoch: 19/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.319002 acc: 0.589844 lr: 0.005292\n",
+      "INFO:root:epoch: 19/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.336284 acc: 0.589375 lr: 0.005282\n",
+      "INFO:root:epoch: 19/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.336506 acc: 0.589062 lr: 0.005273\n",
+      "INFO:root:epoch: 19/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.334003 acc: 0.593750 lr: 0.005264\n",
+      "INFO:root:epoch: 19/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.328551 acc: 0.596094 lr: 0.005255\n",
+      "INFO:root:epoch: 19/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.327437 acc: 0.597222 lr: 0.005246\n",
+      "INFO:root:epoch: 19/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.332014 acc: 0.595937 lr: 0.005237\n",
+      "INFO:root:epoch: 19/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.328956 acc: 0.595170 lr: 0.005228\n",
+      "INFO:root:epoch: 19/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.332096 acc: 0.594010 lr: 0.005219\n",
+      "INFO:root:epoch: 19/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.337599 acc: 0.594231 lr: 0.005210\n",
+      "INFO:root:epoch: 19/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 1.338545 acc: 0.595536 lr: 0.005201\n",
+      "INFO:root:epoch: 19/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.336003 acc: 0.596250 lr: 0.005192\n",
+      "INFO:root:epoch: 19/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.334583 acc: 0.594922 lr: 0.005183\n",
+      "INFO:root:epoch: 19/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.336509 acc: 0.595221 lr: 0.005174\n",
+      "INFO:root:epoch: 19/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.342039 acc: 0.593576 lr: 0.005165\n",
+      "INFO:root:epoch: 19/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.332631 acc: 0.594737 lr: 0.005156\n",
+      "INFO:root:epoch: 19/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.325157 acc: 0.597344 lr: 0.005147\n",
+      "INFO:root:epoch: 19/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.327025 acc: 0.596875 lr: 0.005138\n",
+      "INFO:root:epoch: 19/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.321534 acc: 0.598153 lr: 0.005129\n",
+      "INFO:root:epoch: 19/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.320908 acc: 0.598098 lr: 0.005120\n",
+      "INFO:root:epoch: 19/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.317470 acc: 0.600000 lr: 0.005111\n",
+      "INFO:root:epoch: 19/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.320002 acc: 0.600625 lr: 0.005102\n",
+      "INFO:root:epoch: 19/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.325889 acc: 0.598918 lr: 0.005094\n",
+      "INFO:root:epoch: 19/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.325976 acc: 0.598148 lr: 0.005085\n",
+      "INFO:root:epoch: 19/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.326348 acc: 0.599554 lr: 0.005076\n",
+      "INFO:root:epoch: 19/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.325642 acc: 0.598922 lr: 0.005067\n",
+      "INFO:root:epoch: 19/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.324873 acc: 0.598646 lr: 0.005058\n",
+      "INFO:root:epoch: 19/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.325314 acc: 0.598690 lr: 0.005050\n",
+      "INFO:root:epoch: 20/100 starts\n",
+      "INFO:root:epoch: 20/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.280708 acc: 0.634375 lr: 0.005038\n",
+      "INFO:root:epoch: 20/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.235159 acc: 0.634375 lr: 0.005030\n",
+      "INFO:root:epoch: 20/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.306348 acc: 0.607292 lr: 0.005021\n",
+      "INFO:root:epoch: 20/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.309960 acc: 0.605469 lr: 0.005012\n",
+      "INFO:root:epoch: 20/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.307539 acc: 0.606875 lr: 0.005003\n",
+      "INFO:root:epoch: 20/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.297137 acc: 0.608854 lr: 0.004995\n",
+      "INFO:root:epoch: 20/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.275117 acc: 0.616964 lr: 0.004986\n",
+      "INFO:root:epoch: 20/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.299075 acc: 0.607422 lr: 0.004978\n",
+      "INFO:root:epoch: 20/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.289445 acc: 0.607639 lr: 0.004969\n",
+      "INFO:root:epoch: 20/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.277936 acc: 0.610312 lr: 0.004960\n",
+      "INFO:root:epoch: 20/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.297285 acc: 0.604830 lr: 0.004952\n",
+      "INFO:root:epoch: 20/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.289607 acc: 0.609115 lr: 0.004943\n",
+      "INFO:root:epoch: 20/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.295020 acc: 0.609135 lr: 0.004935\n",
+      "INFO:root:epoch: 20/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.303560 acc: 0.608482 lr: 0.004926\n",
+      "INFO:root:epoch: 20/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.300337 acc: 0.609792 lr: 0.004918\n",
+      "INFO:root:epoch: 20/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.296412 acc: 0.612695 lr: 0.004909\n",
+      "INFO:root:epoch: 20/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.292786 acc: 0.613603 lr: 0.004900\n",
+      "INFO:root:epoch: 20/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.292811 acc: 0.613368 lr: 0.004892\n",
+      "INFO:root:epoch: 20/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.283824 acc: 0.615625 lr: 0.004884\n",
+      "INFO:root:epoch: 20/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.280958 acc: 0.616406 lr: 0.004875\n",
+      "INFO:root:epoch: 20/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.274672 acc: 0.618006 lr: 0.004867\n",
+      "INFO:root:epoch: 20/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.278945 acc: 0.617045 lr: 0.004858\n",
+      "INFO:root:epoch: 20/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.284810 acc: 0.616168 lr: 0.004850\n",
+      "INFO:root:epoch: 20/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.287501 acc: 0.615755 lr: 0.004841\n",
+      "INFO:root:epoch: 20/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.283725 acc: 0.616625 lr: 0.004833\n",
+      "INFO:root:epoch: 20/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.285955 acc: 0.615505 lr: 0.004825\n",
+      "INFO:root:epoch: 20/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.286474 acc: 0.614583 lr: 0.004816\n",
+      "INFO:root:epoch: 20/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.281536 acc: 0.615737 lr: 0.004808\n",
+      "INFO:root:epoch: 20/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.279781 acc: 0.616272 lr: 0.004800\n",
+      "INFO:root:epoch: 20/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.278089 acc: 0.617500 lr: 0.004791\n",
+      "INFO:root:epoch: 20/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.275513 acc: 0.617641 lr: 0.004783\n",
+      "INFO:root:epoch: 21/100 starts\n",
+      "INFO:root:epoch: 21/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.225873 acc: 0.621875 lr: 0.004772\n",
+      "INFO:root:epoch: 21/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.342575 acc: 0.593750 lr: 0.004764\n",
+      "INFO:root:epoch: 21/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.354508 acc: 0.590625 lr: 0.004756\n",
+      "INFO:root:epoch: 21/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.306069 acc: 0.599219 lr: 0.004748\n",
+      "INFO:root:epoch: 21/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.305087 acc: 0.601875 lr: 0.004739\n",
+      "INFO:root:epoch: 21/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.281034 acc: 0.611458 lr: 0.004731\n",
+      "INFO:root:epoch: 21/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.252286 acc: 0.620536 lr: 0.004723\n",
+      "INFO:root:epoch: 21/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.254262 acc: 0.621094 lr: 0.004715\n",
+      "INFO:root:epoch: 21/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.265275 acc: 0.620833 lr: 0.004707\n",
+      "INFO:root:epoch: 21/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.276074 acc: 0.618125 lr: 0.004698\n",
+      "INFO:root:epoch: 21/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.272756 acc: 0.617045 lr: 0.004690\n",
+      "INFO:root:epoch: 21/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.277829 acc: 0.615104 lr: 0.004682\n",
+      "INFO:root:epoch: 21/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.274355 acc: 0.615625 lr: 0.004674\n",
+      "INFO:root:epoch: 21/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.280641 acc: 0.613616 lr: 0.004666\n",
+      "INFO:root:epoch: 21/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.284520 acc: 0.613333 lr: 0.004658\n",
+      "INFO:root:epoch: 21/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.276668 acc: 0.614648 lr: 0.004650\n",
+      "INFO:root:epoch: 21/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.276837 acc: 0.612500 lr: 0.004642\n",
+      "INFO:root:epoch: 21/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.281698 acc: 0.612674 lr: 0.004634\n",
+      "INFO:root:epoch: 21/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.273948 acc: 0.613816 lr: 0.004626\n",
+      "INFO:root:epoch: 21/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.263855 acc: 0.616250 lr: 0.004618\n",
+      "INFO:root:epoch: 21/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.266876 acc: 0.616815 lr: 0.004610\n",
+      "INFO:root:epoch: 21/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.269532 acc: 0.615625 lr: 0.004602\n",
+      "INFO:root:epoch: 21/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.269190 acc: 0.616576 lr: 0.004594\n",
+      "INFO:root:epoch: 21/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.270316 acc: 0.614844 lr: 0.004586\n",
+      "INFO:root:epoch: 21/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.269163 acc: 0.614125 lr: 0.004578\n",
+      "INFO:root:epoch: 21/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.272235 acc: 0.613702 lr: 0.004570\n",
+      "INFO:root:epoch: 21/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.272532 acc: 0.614583 lr: 0.004562\n",
+      "INFO:root:epoch: 21/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.270373 acc: 0.614955 lr: 0.004554\n",
+      "INFO:root:epoch: 21/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.268165 acc: 0.614978 lr: 0.004546\n",
+      "INFO:root:epoch: 21/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.272549 acc: 0.614479 lr: 0.004538\n",
+      "INFO:root:epoch: 21/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.274493 acc: 0.613609 lr: 0.004531\n",
+      "INFO:root:epoch: 22/100 starts\n",
+      "INFO:root:epoch: 22/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.269285 acc: 0.631250 lr: 0.004520\n",
+      "INFO:root:epoch: 22/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.262039 acc: 0.620312 lr: 0.004513\n",
+      "INFO:root:epoch: 22/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.256257 acc: 0.618750 lr: 0.004505\n",
+      "INFO:root:epoch: 22/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.254888 acc: 0.620312 lr: 0.004497\n",
+      "INFO:root:epoch: 22/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.241882 acc: 0.625000 lr: 0.004489\n",
+      "INFO:root:epoch: 22/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.238519 acc: 0.621875 lr: 0.004481\n",
+      "INFO:root:epoch: 22/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.222724 acc: 0.625446 lr: 0.004474\n",
+      "INFO:root:epoch: 22/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.206227 acc: 0.632812 lr: 0.004466\n",
+      "INFO:root:epoch: 22/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.200030 acc: 0.632292 lr: 0.004458\n",
+      "INFO:root:epoch: 22/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.195600 acc: 0.635313 lr: 0.004450\n",
+      "INFO:root:epoch: 22/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.191617 acc: 0.640341 lr: 0.004443\n",
+      "INFO:root:epoch: 22/100  et: 5s eta: 9s batches: 120/313(38%) samples: 3840 loss: 1.192565 acc: 0.638802 lr: 0.004435\n",
+      "INFO:root:epoch: 22/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.197187 acc: 0.639183 lr: 0.004427\n",
+      "INFO:root:epoch: 22/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 1.200389 acc: 0.637946 lr: 0.004420\n",
+      "INFO:root:epoch: 22/100  et: 7s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.200150 acc: 0.636042 lr: 0.004412\n",
+      "INFO:root:epoch: 22/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.198456 acc: 0.636719 lr: 0.004404\n",
+      "INFO:root:epoch: 22/100  et: 8s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.194059 acc: 0.638419 lr: 0.004397\n",
+      "INFO:root:epoch: 22/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.188364 acc: 0.640625 lr: 0.004389\n",
+      "INFO:root:epoch: 22/100  et: 9s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.187410 acc: 0.640296 lr: 0.004382\n",
+      "INFO:root:epoch: 22/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.189615 acc: 0.640469 lr: 0.004374\n",
+      "INFO:root:epoch: 22/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.187115 acc: 0.642411 lr: 0.004366\n",
+      "INFO:root:epoch: 22/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.193034 acc: 0.640767 lr: 0.004359\n",
+      "INFO:root:epoch: 22/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.191524 acc: 0.640897 lr: 0.004351\n",
+      "INFO:root:epoch: 22/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.191986 acc: 0.641536 lr: 0.004344\n",
+      "INFO:root:epoch: 22/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.194074 acc: 0.641000 lr: 0.004336\n",
+      "INFO:root:epoch: 22/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.187428 acc: 0.643990 lr: 0.004329\n",
+      "INFO:root:epoch: 22/100  et: 12s eta: 2s batches: 270/313(86%) samples: 8640 loss: 1.182578 acc: 0.645949 lr: 0.004321\n",
+      "INFO:root:epoch: 22/100  et: 13s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.183534 acc: 0.646652 lr: 0.004314\n",
+      "INFO:root:epoch: 22/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.182079 acc: 0.647306 lr: 0.004306\n",
+      "INFO:root:epoch: 22/100  et: 14s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.181875 acc: 0.646979 lr: 0.004299\n",
+      "INFO:root:epoch: 22/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.180883 acc: 0.647379 lr: 0.004291\n",
+      "INFO:root:epoch: 23/100 starts\n",
+      "INFO:root:epoch: 23/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.335497 acc: 0.603125 lr: 0.004282\n",
+      "INFO:root:epoch: 23/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.256130 acc: 0.623437 lr: 0.004274\n",
+      "INFO:root:epoch: 23/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.220576 acc: 0.625000 lr: 0.004267\n",
+      "INFO:root:epoch: 23/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.215305 acc: 0.626563 lr: 0.004259\n",
+      "INFO:root:epoch: 23/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.196881 acc: 0.626250 lr: 0.004252\n",
+      "INFO:root:epoch: 23/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.220484 acc: 0.626042 lr: 0.004245\n",
+      "INFO:root:epoch: 23/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.205832 acc: 0.625893 lr: 0.004237\n",
+      "INFO:root:epoch: 23/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.209833 acc: 0.628125 lr: 0.004230\n",
+      "INFO:root:epoch: 23/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.212374 acc: 0.626736 lr: 0.004223\n",
+      "INFO:root:epoch: 23/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.205179 acc: 0.629688 lr: 0.004215\n",
+      "INFO:root:epoch: 23/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.206868 acc: 0.629261 lr: 0.004208\n",
+      "INFO:root:epoch: 23/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.201353 acc: 0.630990 lr: 0.004201\n",
+      "INFO:root:epoch: 23/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.207954 acc: 0.630769 lr: 0.004194\n",
+      "INFO:root:epoch: 23/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.211173 acc: 0.630134 lr: 0.004186\n",
+      "INFO:root:epoch: 23/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.209848 acc: 0.632083 lr: 0.004179\n",
+      "INFO:root:epoch: 23/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.210798 acc: 0.630469 lr: 0.004172\n",
+      "INFO:root:epoch: 23/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.209039 acc: 0.630882 lr: 0.004165\n",
+      "INFO:root:epoch: 23/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.211289 acc: 0.631771 lr: 0.004157\n",
+      "INFO:root:epoch: 23/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.219773 acc: 0.628783 lr: 0.004150\n",
+      "INFO:root:epoch: 23/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.214747 acc: 0.629375 lr: 0.004143\n",
+      "INFO:root:epoch: 23/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.218350 acc: 0.628571 lr: 0.004136\n",
+      "INFO:root:epoch: 23/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.219696 acc: 0.629545 lr: 0.004129\n",
+      "INFO:root:epoch: 23/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.218164 acc: 0.629755 lr: 0.004122\n",
+      "INFO:root:epoch: 23/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.224656 acc: 0.627734 lr: 0.004114\n",
+      "INFO:root:epoch: 23/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.228053 acc: 0.625875 lr: 0.004107\n",
+      "INFO:root:epoch: 23/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.226239 acc: 0.625841 lr: 0.004100\n",
+      "INFO:root:epoch: 23/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.223572 acc: 0.625810 lr: 0.004093\n",
+      "INFO:root:epoch: 23/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.221086 acc: 0.626786 lr: 0.004086\n",
+      "INFO:root:epoch: 23/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.223270 acc: 0.626401 lr: 0.004079\n",
+      "INFO:root:epoch: 23/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.221303 acc: 0.626979 lr: 0.004072\n",
+      "INFO:root:epoch: 23/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.218277 acc: 0.627419 lr: 0.004065\n",
+      "INFO:root:epoch: 24/100 starts\n",
+      "INFO:root:epoch: 24/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.220275 acc: 0.634375 lr: 0.004056\n",
+      "INFO:root:epoch: 24/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.152032 acc: 0.654688 lr: 0.004049\n",
+      "INFO:root:epoch: 24/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.201902 acc: 0.634375 lr: 0.004042\n",
+      "INFO:root:epoch: 24/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.187698 acc: 0.639063 lr: 0.004035\n",
+      "INFO:root:epoch: 24/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.199120 acc: 0.638750 lr: 0.004028\n",
+      "INFO:root:epoch: 24/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.171957 acc: 0.648438 lr: 0.004021\n",
+      "INFO:root:epoch: 24/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.179553 acc: 0.645536 lr: 0.004014\n",
+      "INFO:root:epoch: 24/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.193809 acc: 0.642578 lr: 0.004007\n",
+      "INFO:root:epoch: 24/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.189958 acc: 0.641667 lr: 0.004000\n",
+      "INFO:root:epoch: 24/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.191840 acc: 0.638438 lr: 0.003993\n",
+      "INFO:root:epoch: 24/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.175894 acc: 0.644602 lr: 0.003986\n",
+      "INFO:root:epoch: 24/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.177396 acc: 0.642708 lr: 0.003979\n",
+      "INFO:root:epoch: 24/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.178631 acc: 0.640865 lr: 0.003972\n",
+      "INFO:root:epoch: 24/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.181614 acc: 0.639732 lr: 0.003965\n",
+      "INFO:root:epoch: 24/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.179924 acc: 0.639375 lr: 0.003958\n",
+      "INFO:root:epoch: 24/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.183492 acc: 0.637305 lr: 0.003952\n",
+      "INFO:root:epoch: 24/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.179016 acc: 0.638419 lr: 0.003945\n",
+      "INFO:root:epoch: 24/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.174863 acc: 0.640278 lr: 0.003938\n",
+      "INFO:root:epoch: 24/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.176255 acc: 0.641118 lr: 0.003931\n",
+      "INFO:root:epoch: 24/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.176602 acc: 0.640625 lr: 0.003924\n",
+      "INFO:root:epoch: 24/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.174983 acc: 0.641220 lr: 0.003917\n",
+      "INFO:root:epoch: 24/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.178939 acc: 0.640199 lr: 0.003911\n",
+      "INFO:root:epoch: 24/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.180348 acc: 0.639946 lr: 0.003904\n",
+      "INFO:root:epoch: 24/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.179358 acc: 0.641536 lr: 0.003897\n",
+      "INFO:root:epoch: 24/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.172712 acc: 0.643625 lr: 0.003890\n",
+      "INFO:root:epoch: 24/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.169369 acc: 0.642668 lr: 0.003884\n",
+      "INFO:root:epoch: 24/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.169085 acc: 0.642708 lr: 0.003877\n",
+      "INFO:root:epoch: 24/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.167223 acc: 0.643527 lr: 0.003870\n",
+      "INFO:root:epoch: 24/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.164962 acc: 0.643750 lr: 0.003864\n",
+      "INFO:root:epoch: 24/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.163042 acc: 0.643958 lr: 0.003857\n",
+      "INFO:root:epoch: 24/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.163734 acc: 0.643750 lr: 0.003850\n",
+      "INFO:root:epoch: 25/100 starts\n",
+      "INFO:root:epoch: 25/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.140937 acc: 0.643750 lr: 0.003842\n",
+      "INFO:root:epoch: 25/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.089915 acc: 0.660937 lr: 0.003835\n",
+      "INFO:root:epoch: 25/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.104070 acc: 0.659375 lr: 0.003828\n",
+      "INFO:root:epoch: 25/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.111904 acc: 0.656250 lr: 0.003822\n",
+      "INFO:root:epoch: 25/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.104860 acc: 0.661250 lr: 0.003815\n",
+      "INFO:root:epoch: 25/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.106434 acc: 0.659896 lr: 0.003808\n",
+      "INFO:root:epoch: 25/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.104390 acc: 0.660714 lr: 0.003802\n",
+      "INFO:root:epoch: 25/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.096111 acc: 0.663281 lr: 0.003795\n",
+      "INFO:root:epoch: 25/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.104070 acc: 0.662847 lr: 0.003789\n",
+      "INFO:root:epoch: 25/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.088546 acc: 0.667812 lr: 0.003782\n",
+      "INFO:root:epoch: 25/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.088519 acc: 0.669886 lr: 0.003776\n",
+      "INFO:root:epoch: 25/100  et: 5s eta: 9s batches: 120/313(38%) samples: 3840 loss: 1.094578 acc: 0.669010 lr: 0.003769\n",
+      "INFO:root:epoch: 25/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.099351 acc: 0.667548 lr: 0.003762\n",
+      "INFO:root:epoch: 25/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 1.101559 acc: 0.664509 lr: 0.003756\n",
+      "INFO:root:epoch: 25/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.098943 acc: 0.666458 lr: 0.003749\n",
+      "INFO:root:epoch: 25/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.095799 acc: 0.667578 lr: 0.003743\n",
+      "INFO:root:epoch: 25/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.085671 acc: 0.671324 lr: 0.003736\n",
+      "INFO:root:epoch: 25/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.087780 acc: 0.671354 lr: 0.003730\n",
+      "INFO:root:epoch: 25/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.086485 acc: 0.671217 lr: 0.003724\n",
+      "INFO:root:epoch: 25/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.092644 acc: 0.669219 lr: 0.003717\n",
+      "INFO:root:epoch: 25/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.095882 acc: 0.669643 lr: 0.003711\n",
+      "INFO:root:epoch: 25/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.095610 acc: 0.670312 lr: 0.003704\n",
+      "INFO:root:epoch: 25/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.094495 acc: 0.670516 lr: 0.003698\n",
+      "INFO:root:epoch: 25/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.093734 acc: 0.671484 lr: 0.003691\n",
+      "INFO:root:epoch: 25/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.091036 acc: 0.672125 lr: 0.003685\n",
+      "INFO:root:epoch: 25/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.096078 acc: 0.670673 lr: 0.003679\n",
+      "INFO:root:epoch: 25/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.102640 acc: 0.668287 lr: 0.003672\n",
+      "INFO:root:epoch: 25/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.105674 acc: 0.666853 lr: 0.003666\n",
+      "INFO:root:epoch: 25/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.107556 acc: 0.666272 lr: 0.003660\n",
+      "INFO:root:epoch: 25/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.108547 acc: 0.665000 lr: 0.003653\n",
+      "INFO:root:epoch: 25/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.110510 acc: 0.663810 lr: 0.003647\n",
+      "INFO:root:epoch: 26/100 starts\n",
+      "INFO:root:epoch: 26/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.032128 acc: 0.675000 lr: 0.003639\n",
+      "INFO:root:epoch: 26/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.131578 acc: 0.660937 lr: 0.003632\n",
+      "INFO:root:epoch: 26/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.152281 acc: 0.651042 lr: 0.003626\n",
+      "INFO:root:epoch: 26/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.123092 acc: 0.660156 lr: 0.003620\n",
+      "INFO:root:epoch: 26/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.104351 acc: 0.666250 lr: 0.003614\n",
+      "INFO:root:epoch: 26/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.095404 acc: 0.663542 lr: 0.003607\n",
+      "INFO:root:epoch: 26/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.109545 acc: 0.657589 lr: 0.003601\n",
+      "INFO:root:epoch: 26/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.099196 acc: 0.661719 lr: 0.003595\n",
+      "INFO:root:epoch: 26/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.082922 acc: 0.669444 lr: 0.003589\n",
+      "INFO:root:epoch: 26/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.089567 acc: 0.666250 lr: 0.003582\n",
+      "INFO:root:epoch: 26/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.100713 acc: 0.662500 lr: 0.003576\n",
+      "INFO:root:epoch: 26/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.100876 acc: 0.664844 lr: 0.003570\n",
+      "INFO:root:epoch: 26/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.099064 acc: 0.666106 lr: 0.003564\n",
+      "INFO:root:epoch: 26/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.102101 acc: 0.665402 lr: 0.003558\n",
+      "INFO:root:epoch: 26/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.097139 acc: 0.667083 lr: 0.003551\n",
+      "INFO:root:epoch: 26/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 1.098766 acc: 0.666406 lr: 0.003545\n",
+      "INFO:root:epoch: 26/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.102775 acc: 0.664890 lr: 0.003539\n",
+      "INFO:root:epoch: 26/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.102847 acc: 0.665104 lr: 0.003533\n",
+      "INFO:root:epoch: 26/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.104444 acc: 0.663158 lr: 0.003527\n",
+      "INFO:root:epoch: 26/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.108503 acc: 0.662656 lr: 0.003521\n",
+      "INFO:root:epoch: 26/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.100149 acc: 0.663542 lr: 0.003515\n",
+      "INFO:root:epoch: 26/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.092307 acc: 0.666619 lr: 0.003509\n",
+      "INFO:root:epoch: 26/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.098929 acc: 0.663859 lr: 0.003503\n",
+      "INFO:root:epoch: 26/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.098367 acc: 0.664453 lr: 0.003497\n",
+      "INFO:root:epoch: 26/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.101024 acc: 0.664125 lr: 0.003490\n",
+      "INFO:root:epoch: 26/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.100228 acc: 0.664303 lr: 0.003484\n",
+      "INFO:root:epoch: 26/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.100727 acc: 0.663426 lr: 0.003478\n",
+      "INFO:root:epoch: 26/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.097767 acc: 0.663504 lr: 0.003472\n",
+      "INFO:root:epoch: 26/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.094813 acc: 0.664547 lr: 0.003466\n",
+      "INFO:root:epoch: 26/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.095614 acc: 0.665312 lr: 0.003460\n",
+      "INFO:root:epoch: 26/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.097389 acc: 0.665020 lr: 0.003454\n",
+      "INFO:root:epoch: 27/100 starts\n",
+      "INFO:root:epoch: 27/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.184505 acc: 0.662500 lr: 0.003447\n",
+      "INFO:root:epoch: 27/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.138632 acc: 0.659375 lr: 0.003441\n",
+      "INFO:root:epoch: 27/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.120402 acc: 0.669792 lr: 0.003435\n",
+      "INFO:root:epoch: 27/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.113013 acc: 0.670312 lr: 0.003429\n",
+      "INFO:root:epoch: 27/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.081761 acc: 0.670625 lr: 0.003423\n",
+      "INFO:root:epoch: 27/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.101537 acc: 0.664062 lr: 0.003417\n",
+      "INFO:root:epoch: 27/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.086233 acc: 0.666518 lr: 0.003411\n",
+      "INFO:root:epoch: 27/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.086780 acc: 0.663281 lr: 0.003405\n",
+      "INFO:root:epoch: 27/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.085530 acc: 0.664931 lr: 0.003399\n",
+      "INFO:root:epoch: 27/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.083584 acc: 0.665000 lr: 0.003393\n",
+      "INFO:root:epoch: 27/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.077050 acc: 0.667898 lr: 0.003387\n",
+      "INFO:root:epoch: 27/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.086782 acc: 0.665885 lr: 0.003382\n",
+      "INFO:root:epoch: 27/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.092208 acc: 0.663462 lr: 0.003376\n",
+      "INFO:root:epoch: 27/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 1.100480 acc: 0.661830 lr: 0.003370\n",
+      "INFO:root:epoch: 27/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.097543 acc: 0.662708 lr: 0.003364\n",
+      "INFO:root:epoch: 27/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.103326 acc: 0.661914 lr: 0.003358\n",
+      "INFO:root:epoch: 27/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.094556 acc: 0.664154 lr: 0.003352\n",
+      "INFO:root:epoch: 27/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.097146 acc: 0.662674 lr: 0.003347\n",
+      "INFO:root:epoch: 27/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.097222 acc: 0.662829 lr: 0.003341\n",
+      "INFO:root:epoch: 27/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.095615 acc: 0.663281 lr: 0.003335\n",
+      "INFO:root:epoch: 27/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.095298 acc: 0.662649 lr: 0.003329\n",
+      "INFO:root:epoch: 27/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.093007 acc: 0.662358 lr: 0.003323\n",
+      "INFO:root:epoch: 27/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.097329 acc: 0.660462 lr: 0.003318\n",
+      "INFO:root:epoch: 27/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.101940 acc: 0.659635 lr: 0.003312\n",
+      "INFO:root:epoch: 27/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.101342 acc: 0.658500 lr: 0.003306\n",
+      "INFO:root:epoch: 27/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.098830 acc: 0.660096 lr: 0.003300\n",
+      "INFO:root:epoch: 27/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.104528 acc: 0.659838 lr: 0.003295\n",
+      "INFO:root:epoch: 27/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.100905 acc: 0.661272 lr: 0.003289\n",
+      "INFO:root:epoch: 27/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.102857 acc: 0.660560 lr: 0.003283\n",
+      "INFO:root:epoch: 27/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.105049 acc: 0.659792 lr: 0.003278\n",
+      "INFO:root:epoch: 27/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.103576 acc: 0.660081 lr: 0.003272\n",
+      "INFO:root:epoch: 28/100 starts\n",
+      "INFO:root:epoch: 28/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.227883 acc: 0.637500 lr: 0.003265\n",
+      "INFO:root:epoch: 28/100  et: 0s eta: 14s batches: 20/313(6%) samples: 640 loss: 1.145105 acc: 0.670313 lr: 0.003259\n",
+      "INFO:root:epoch: 28/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.129914 acc: 0.670833 lr: 0.003253\n",
+      "INFO:root:epoch: 28/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.166356 acc: 0.654687 lr: 0.003248\n",
+      "INFO:root:epoch: 28/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.128786 acc: 0.663750 lr: 0.003242\n",
+      "INFO:root:epoch: 28/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.114269 acc: 0.670833 lr: 0.003236\n",
+      "INFO:root:epoch: 28/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.103913 acc: 0.672321 lr: 0.003231\n",
+      "INFO:root:epoch: 28/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.088877 acc: 0.677344 lr: 0.003225\n",
+      "INFO:root:epoch: 28/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.089763 acc: 0.677083 lr: 0.003220\n",
+      "INFO:root:epoch: 28/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.081860 acc: 0.675000 lr: 0.003214\n",
+      "INFO:root:epoch: 28/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.081396 acc: 0.676705 lr: 0.003209\n",
+      "INFO:root:epoch: 28/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.083598 acc: 0.672656 lr: 0.003203\n",
+      "INFO:root:epoch: 28/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.074400 acc: 0.675000 lr: 0.003197\n",
+      "INFO:root:epoch: 28/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.066627 acc: 0.675223 lr: 0.003192\n",
+      "INFO:root:epoch: 28/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.074734 acc: 0.672083 lr: 0.003186\n",
+      "INFO:root:epoch: 28/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.073389 acc: 0.674219 lr: 0.003181\n",
+      "INFO:root:epoch: 28/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.073848 acc: 0.675184 lr: 0.003175\n",
+      "INFO:root:epoch: 28/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.065460 acc: 0.676562 lr: 0.003170\n",
+      "INFO:root:epoch: 28/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.056416 acc: 0.679605 lr: 0.003164\n",
+      "INFO:root:epoch: 28/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.055903 acc: 0.677656 lr: 0.003159\n",
+      "INFO:root:epoch: 28/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.056110 acc: 0.676637 lr: 0.003153\n",
+      "INFO:root:epoch: 28/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.054593 acc: 0.677131 lr: 0.003148\n",
+      "INFO:root:epoch: 28/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.055392 acc: 0.675815 lr: 0.003143\n",
+      "INFO:root:epoch: 28/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.053244 acc: 0.675911 lr: 0.003137\n",
+      "INFO:root:epoch: 28/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.051967 acc: 0.675625 lr: 0.003132\n",
+      "INFO:root:epoch: 28/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.052468 acc: 0.675361 lr: 0.003126\n",
+      "INFO:root:epoch: 28/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.052359 acc: 0.676042 lr: 0.003121\n",
+      "INFO:root:epoch: 28/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.051918 acc: 0.676451 lr: 0.003115\n",
+      "INFO:root:epoch: 28/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.052795 acc: 0.675754 lr: 0.003110\n",
+      "INFO:root:epoch: 28/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.060497 acc: 0.674167 lr: 0.003105\n",
+      "INFO:root:epoch: 28/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.061997 acc: 0.673286 lr: 0.003099\n",
+      "INFO:root:epoch: 29/100 starts\n",
+      "INFO:root:epoch: 29/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.970789 acc: 0.700000 lr: 0.003092\n",
+      "INFO:root:epoch: 29/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.958573 acc: 0.709375 lr: 0.003087\n",
+      "INFO:root:epoch: 29/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.977617 acc: 0.702083 lr: 0.003082\n",
+      "INFO:root:epoch: 29/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.991939 acc: 0.691406 lr: 0.003076\n",
+      "INFO:root:epoch: 29/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.050411 acc: 0.686250 lr: 0.003071\n",
+      "INFO:root:epoch: 29/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.055734 acc: 0.684375 lr: 0.003066\n",
+      "INFO:root:epoch: 29/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.057889 acc: 0.683929 lr: 0.003060\n",
+      "INFO:root:epoch: 29/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.046052 acc: 0.686719 lr: 0.003055\n",
+      "INFO:root:epoch: 29/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.056212 acc: 0.683681 lr: 0.003050\n",
+      "INFO:root:epoch: 29/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.045250 acc: 0.687813 lr: 0.003044\n",
+      "INFO:root:epoch: 29/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.047811 acc: 0.685795 lr: 0.003039\n",
+      "INFO:root:epoch: 29/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.047982 acc: 0.685156 lr: 0.003034\n",
+      "INFO:root:epoch: 29/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.045031 acc: 0.684135 lr: 0.003029\n",
+      "INFO:root:epoch: 29/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 1.062456 acc: 0.680134 lr: 0.003023\n",
+      "INFO:root:epoch: 29/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.055318 acc: 0.682292 lr: 0.003018\n",
+      "INFO:root:epoch: 29/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.055131 acc: 0.683398 lr: 0.003013\n",
+      "INFO:root:epoch: 29/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.054986 acc: 0.682353 lr: 0.003008\n",
+      "INFO:root:epoch: 29/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.056818 acc: 0.681771 lr: 0.003003\n",
+      "INFO:root:epoch: 29/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.054681 acc: 0.681414 lr: 0.002997\n",
+      "INFO:root:epoch: 29/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.049460 acc: 0.682969 lr: 0.002992\n",
+      "INFO:root:epoch: 29/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.051103 acc: 0.682887 lr: 0.002987\n",
+      "INFO:root:epoch: 29/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.052097 acc: 0.682386 lr: 0.002982\n",
+      "INFO:root:epoch: 29/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.051775 acc: 0.682065 lr: 0.002977\n",
+      "INFO:root:epoch: 29/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.051963 acc: 0.681641 lr: 0.002971\n",
+      "INFO:root:epoch: 29/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.051166 acc: 0.682250 lr: 0.002966\n",
+      "INFO:root:epoch: 29/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.053132 acc: 0.681611 lr: 0.002961\n",
+      "INFO:root:epoch: 29/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.048926 acc: 0.683218 lr: 0.002956\n",
+      "INFO:root:epoch: 29/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.055068 acc: 0.680246 lr: 0.002951\n",
+      "INFO:root:epoch: 29/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.052462 acc: 0.681034 lr: 0.002946\n",
+      "INFO:root:epoch: 29/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.053347 acc: 0.680313 lr: 0.002941\n",
+      "INFO:root:epoch: 29/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.051773 acc: 0.679335 lr: 0.002936\n",
+      "INFO:root:epoch: 30/100 starts\n",
+      "INFO:root:epoch: 30/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.058173 acc: 0.687500 lr: 0.002929\n",
+      "INFO:root:epoch: 30/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.028388 acc: 0.709375 lr: 0.002924\n",
+      "INFO:root:epoch: 30/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.005509 acc: 0.704167 lr: 0.002919\n",
+      "INFO:root:epoch: 30/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.011474 acc: 0.703906 lr: 0.002914\n",
+      "INFO:root:epoch: 30/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.029843 acc: 0.694375 lr: 0.002909\n",
+      "INFO:root:epoch: 30/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.032557 acc: 0.692188 lr: 0.002904\n",
+      "INFO:root:epoch: 30/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.029639 acc: 0.687946 lr: 0.002899\n",
+      "INFO:root:epoch: 30/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.019300 acc: 0.691797 lr: 0.002894\n",
+      "INFO:root:epoch: 30/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.022530 acc: 0.690278 lr: 0.002889\n",
+      "INFO:root:epoch: 30/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.038184 acc: 0.687188 lr: 0.002884\n",
+      "INFO:root:epoch: 30/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.032987 acc: 0.690341 lr: 0.002879\n",
+      "INFO:root:epoch: 30/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.038354 acc: 0.688021 lr: 0.002874\n",
+      "INFO:root:epoch: 30/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.044560 acc: 0.686779 lr: 0.002869\n",
+      "INFO:root:epoch: 30/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.046210 acc: 0.685268 lr: 0.002864\n",
+      "INFO:root:epoch: 30/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.041039 acc: 0.684792 lr: 0.002859\n",
+      "INFO:root:epoch: 30/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.040980 acc: 0.683008 lr: 0.002854\n",
+      "INFO:root:epoch: 30/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.038761 acc: 0.684559 lr: 0.002849\n",
+      "INFO:root:epoch: 30/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.037539 acc: 0.683333 lr: 0.002844\n",
+      "INFO:root:epoch: 30/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.029771 acc: 0.684868 lr: 0.002839\n",
+      "INFO:root:epoch: 30/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.030799 acc: 0.685156 lr: 0.002834\n",
+      "INFO:root:epoch: 30/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.030693 acc: 0.686012 lr: 0.002829\n",
+      "INFO:root:epoch: 30/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.032144 acc: 0.685511 lr: 0.002824\n",
+      "INFO:root:epoch: 30/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.032008 acc: 0.683967 lr: 0.002819\n",
+      "INFO:root:epoch: 30/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.028640 acc: 0.684896 lr: 0.002815\n",
+      "INFO:root:epoch: 30/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.025480 acc: 0.686500 lr: 0.002810\n",
+      "INFO:root:epoch: 30/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.027657 acc: 0.684976 lr: 0.002805\n",
+      "INFO:root:epoch: 30/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 1.031818 acc: 0.683565 lr: 0.002800\n",
+      "INFO:root:epoch: 30/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.026295 acc: 0.684933 lr: 0.002795\n",
+      "INFO:root:epoch: 30/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.026661 acc: 0.685453 lr: 0.002790\n",
+      "INFO:root:epoch: 30/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.025083 acc: 0.685625 lr: 0.002785\n",
+      "INFO:root:epoch: 30/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.023058 acc: 0.686794 lr: 0.002781\n",
+      "INFO:root:epoch: 31/100 starts\n",
+      "INFO:root:epoch: 31/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.878643 acc: 0.725000 lr: 0.002774\n",
+      "INFO:root:epoch: 31/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.947811 acc: 0.707812 lr: 0.002770\n",
+      "INFO:root:epoch: 31/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.990170 acc: 0.684375 lr: 0.002765\n",
+      "INFO:root:epoch: 31/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.010024 acc: 0.678906 lr: 0.002760\n",
+      "INFO:root:epoch: 31/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.987055 acc: 0.692500 lr: 0.002755\n",
+      "INFO:root:epoch: 31/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.997396 acc: 0.686979 lr: 0.002750\n",
+      "INFO:root:epoch: 31/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.007594 acc: 0.682143 lr: 0.002746\n",
+      "INFO:root:epoch: 31/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.005843 acc: 0.683594 lr: 0.002741\n",
+      "INFO:root:epoch: 31/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.002664 acc: 0.683681 lr: 0.002736\n",
+      "INFO:root:epoch: 31/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.002633 acc: 0.682812 lr: 0.002731\n",
+      "INFO:root:epoch: 31/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.006251 acc: 0.684091 lr: 0.002727\n",
+      "INFO:root:epoch: 31/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.006203 acc: 0.684635 lr: 0.002722\n",
+      "INFO:root:epoch: 31/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.008185 acc: 0.685096 lr: 0.002717\n",
+      "INFO:root:epoch: 31/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.006810 acc: 0.686607 lr: 0.002713\n",
+      "INFO:root:epoch: 31/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.003456 acc: 0.688333 lr: 0.002708\n",
+      "INFO:root:epoch: 31/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.008638 acc: 0.688477 lr: 0.002703\n",
+      "INFO:root:epoch: 31/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.011245 acc: 0.688787 lr: 0.002699\n",
+      "INFO:root:epoch: 31/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.007533 acc: 0.688889 lr: 0.002694\n",
+      "INFO:root:epoch: 31/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.008936 acc: 0.688322 lr: 0.002689\n",
+      "INFO:root:epoch: 31/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 1.008792 acc: 0.687969 lr: 0.002685\n",
+      "INFO:root:epoch: 31/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 1.008772 acc: 0.688542 lr: 0.002680\n",
+      "INFO:root:epoch: 31/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 1.004743 acc: 0.689773 lr: 0.002675\n",
+      "INFO:root:epoch: 31/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 1.006531 acc: 0.689946 lr: 0.002671\n",
+      "INFO:root:epoch: 31/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 1.007611 acc: 0.690495 lr: 0.002666\n",
+      "INFO:root:epoch: 31/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 1.009190 acc: 0.689000 lr: 0.002661\n",
+      "INFO:root:epoch: 31/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 1.009587 acc: 0.688101 lr: 0.002657\n",
+      "INFO:root:epoch: 31/100  et: 12s eta: 2s batches: 270/313(86%) samples: 8640 loss: 1.005554 acc: 0.689236 lr: 0.002652\n",
+      "INFO:root:epoch: 31/100  et: 13s eta: 1s batches: 280/313(89%) samples: 8960 loss: 1.007945 acc: 0.687612 lr: 0.002648\n",
+      "INFO:root:epoch: 31/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 1.008952 acc: 0.688470 lr: 0.002643\n",
+      "INFO:root:epoch: 31/100  et: 14s eta: 0s batches: 300/313(95%) samples: 9600 loss: 1.002400 acc: 0.690521 lr: 0.002638\n",
+      "INFO:root:epoch: 31/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 1.000607 acc: 0.691331 lr: 0.002634\n",
+      "INFO:root:epoch: 32/100 starts\n",
+      "INFO:root:epoch: 32/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.120256 acc: 0.646875 lr: 0.002628\n",
+      "INFO:root:epoch: 32/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.032334 acc: 0.671875 lr: 0.002623\n",
+      "INFO:root:epoch: 32/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 1.055711 acc: 0.673958 lr: 0.002619\n",
+      "INFO:root:epoch: 32/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.031622 acc: 0.691406 lr: 0.002614\n",
+      "INFO:root:epoch: 32/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.043867 acc: 0.685000 lr: 0.002610\n",
+      "INFO:root:epoch: 32/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.012254 acc: 0.692188 lr: 0.002605\n",
+      "INFO:root:epoch: 32/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 1.005487 acc: 0.685714 lr: 0.002601\n",
+      "INFO:root:epoch: 32/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 1.022546 acc: 0.682422 lr: 0.002596\n",
+      "INFO:root:epoch: 32/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 1.046185 acc: 0.678125 lr: 0.002592\n",
+      "INFO:root:epoch: 32/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 1.050502 acc: 0.675938 lr: 0.002587\n",
+      "INFO:root:epoch: 32/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 1.039648 acc: 0.680114 lr: 0.002583\n",
+      "INFO:root:epoch: 32/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 1.029496 acc: 0.682552 lr: 0.002578\n",
+      "INFO:root:epoch: 32/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 1.033686 acc: 0.681971 lr: 0.002574\n",
+      "INFO:root:epoch: 32/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 1.024202 acc: 0.685714 lr: 0.002569\n",
+      "INFO:root:epoch: 32/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.021629 acc: 0.686875 lr: 0.002565\n",
+      "INFO:root:epoch: 32/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.012940 acc: 0.689844 lr: 0.002561\n",
+      "INFO:root:epoch: 32/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.002413 acc: 0.693015 lr: 0.002556\n",
+      "INFO:root:epoch: 32/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.996699 acc: 0.693750 lr: 0.002552\n",
+      "INFO:root:epoch: 32/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.992887 acc: 0.694572 lr: 0.002547\n",
+      "INFO:root:epoch: 32/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.989263 acc: 0.695781 lr: 0.002543\n",
+      "INFO:root:epoch: 32/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.991710 acc: 0.694792 lr: 0.002538\n",
+      "INFO:root:epoch: 32/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.992421 acc: 0.695170 lr: 0.002534\n",
+      "INFO:root:epoch: 32/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.993581 acc: 0.696196 lr: 0.002530\n",
+      "INFO:root:epoch: 32/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.988297 acc: 0.698047 lr: 0.002525\n",
+      "INFO:root:epoch: 32/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.982323 acc: 0.699125 lr: 0.002521\n",
+      "INFO:root:epoch: 32/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.981103 acc: 0.699038 lr: 0.002517\n",
+      "INFO:root:epoch: 32/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.983450 acc: 0.697685 lr: 0.002512\n",
+      "INFO:root:epoch: 32/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.984714 acc: 0.696429 lr: 0.002508\n",
+      "INFO:root:epoch: 32/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.982777 acc: 0.696552 lr: 0.002503\n",
+      "INFO:root:epoch: 32/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.986487 acc: 0.695104 lr: 0.002499\n",
+      "INFO:root:epoch: 32/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.986540 acc: 0.694758 lr: 0.002495\n",
+      "INFO:root:epoch: 33/100 starts\n",
+      "INFO:root:epoch: 33/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 1.017195 acc: 0.696875 lr: 0.002489\n",
+      "INFO:root:epoch: 33/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.995858 acc: 0.687500 lr: 0.002485\n",
+      "INFO:root:epoch: 33/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.054737 acc: 0.675000 lr: 0.002481\n",
+      "INFO:root:epoch: 33/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.030615 acc: 0.679688 lr: 0.002476\n",
+      "INFO:root:epoch: 33/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.012855 acc: 0.690000 lr: 0.002472\n",
+      "INFO:root:epoch: 33/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.986055 acc: 0.692708 lr: 0.002468\n",
+      "INFO:root:epoch: 33/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.975835 acc: 0.696875 lr: 0.002463\n",
+      "INFO:root:epoch: 33/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.981859 acc: 0.692187 lr: 0.002459\n",
+      "INFO:root:epoch: 33/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.985633 acc: 0.694444 lr: 0.002455\n",
+      "INFO:root:epoch: 33/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.983942 acc: 0.696250 lr: 0.002451\n",
+      "INFO:root:epoch: 33/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.982093 acc: 0.695739 lr: 0.002446\n",
+      "INFO:root:epoch: 33/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.985689 acc: 0.696615 lr: 0.002442\n",
+      "INFO:root:epoch: 33/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.995205 acc: 0.692067 lr: 0.002438\n",
+      "INFO:root:epoch: 33/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.989432 acc: 0.692187 lr: 0.002434\n",
+      "INFO:root:epoch: 33/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 1.005926 acc: 0.687292 lr: 0.002430\n",
+      "INFO:root:epoch: 33/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 1.006885 acc: 0.687500 lr: 0.002425\n",
+      "INFO:root:epoch: 33/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 1.001893 acc: 0.688603 lr: 0.002421\n",
+      "INFO:root:epoch: 33/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 1.000617 acc: 0.689236 lr: 0.002417\n",
+      "INFO:root:epoch: 33/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 1.003761 acc: 0.689474 lr: 0.002413\n",
+      "INFO:root:epoch: 33/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.995013 acc: 0.691875 lr: 0.002409\n",
+      "INFO:root:epoch: 33/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.990297 acc: 0.694494 lr: 0.002404\n",
+      "INFO:root:epoch: 33/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.995231 acc: 0.692045 lr: 0.002400\n",
+      "INFO:root:epoch: 33/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.995214 acc: 0.692391 lr: 0.002396\n",
+      "INFO:root:epoch: 33/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.993845 acc: 0.692188 lr: 0.002392\n",
+      "INFO:root:epoch: 33/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.993042 acc: 0.692125 lr: 0.002388\n",
+      "INFO:root:epoch: 33/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.993076 acc: 0.691827 lr: 0.002384\n",
+      "INFO:root:epoch: 33/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.994440 acc: 0.691667 lr: 0.002380\n",
+      "INFO:root:epoch: 33/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.992376 acc: 0.692076 lr: 0.002375\n",
+      "INFO:root:epoch: 33/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.990601 acc: 0.692888 lr: 0.002371\n",
+      "INFO:root:epoch: 33/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.993241 acc: 0.692604 lr: 0.002367\n",
+      "INFO:root:epoch: 33/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.994272 acc: 0.692238 lr: 0.002363\n",
+      "INFO:root:epoch: 34/100 starts\n",
+      "INFO:root:epoch: 34/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.829187 acc: 0.737500 lr: 0.002358\n",
+      "INFO:root:epoch: 34/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.951155 acc: 0.692187 lr: 0.002354\n",
+      "INFO:root:epoch: 34/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.957783 acc: 0.692708 lr: 0.002350\n",
+      "INFO:root:epoch: 34/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.989606 acc: 0.681250 lr: 0.002346\n",
+      "INFO:root:epoch: 34/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.983047 acc: 0.685000 lr: 0.002341\n",
+      "INFO:root:epoch: 34/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.972665 acc: 0.692708 lr: 0.002337\n",
+      "INFO:root:epoch: 34/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.970072 acc: 0.696429 lr: 0.002333\n",
+      "INFO:root:epoch: 34/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.965343 acc: 0.698438 lr: 0.002329\n",
+      "INFO:root:epoch: 34/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.952787 acc: 0.703472 lr: 0.002325\n",
+      "INFO:root:epoch: 34/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.951892 acc: 0.702187 lr: 0.002321\n",
+      "INFO:root:epoch: 34/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.951053 acc: 0.702557 lr: 0.002317\n",
+      "INFO:root:epoch: 34/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.964162 acc: 0.698438 lr: 0.002313\n",
+      "INFO:root:epoch: 34/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.965018 acc: 0.699279 lr: 0.002309\n",
+      "INFO:root:epoch: 34/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.961824 acc: 0.699554 lr: 0.002305\n",
+      "INFO:root:epoch: 34/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.957807 acc: 0.701875 lr: 0.002301\n",
+      "INFO:root:epoch: 34/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.959694 acc: 0.700195 lr: 0.002297\n",
+      "INFO:root:epoch: 34/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.955124 acc: 0.702022 lr: 0.002293\n",
+      "INFO:root:epoch: 34/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.961792 acc: 0.700694 lr: 0.002289\n",
+      "INFO:root:epoch: 34/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.963842 acc: 0.700329 lr: 0.002285\n",
+      "INFO:root:epoch: 34/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.970577 acc: 0.699687 lr: 0.002281\n",
+      "INFO:root:epoch: 34/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.968876 acc: 0.700893 lr: 0.002277\n",
+      "INFO:root:epoch: 34/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.965493 acc: 0.702273 lr: 0.002274\n",
+      "INFO:root:epoch: 34/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.965751 acc: 0.700815 lr: 0.002270\n",
+      "INFO:root:epoch: 34/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.967889 acc: 0.700391 lr: 0.002266\n",
+      "INFO:root:epoch: 34/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.970088 acc: 0.699500 lr: 0.002262\n",
+      "INFO:root:epoch: 34/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.969501 acc: 0.699880 lr: 0.002258\n",
+      "INFO:root:epoch: 34/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.965315 acc: 0.701620 lr: 0.002254\n",
+      "INFO:root:epoch: 34/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.962363 acc: 0.702679 lr: 0.002250\n",
+      "INFO:root:epoch: 34/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.962301 acc: 0.701401 lr: 0.002246\n",
+      "INFO:root:epoch: 34/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.963125 acc: 0.699896 lr: 0.002242\n",
+      "INFO:root:epoch: 34/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.964709 acc: 0.700000 lr: 0.002238\n",
+      "INFO:root:epoch: 35/100 starts\n",
+      "INFO:root:epoch: 35/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.009297 acc: 0.696875 lr: 0.002233\n",
+      "INFO:root:epoch: 35/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 1.023766 acc: 0.692188 lr: 0.002229\n",
+      "INFO:root:epoch: 35/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 1.057812 acc: 0.683333 lr: 0.002226\n",
+      "INFO:root:epoch: 35/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 1.042550 acc: 0.686719 lr: 0.002222\n",
+      "INFO:root:epoch: 35/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.014933 acc: 0.692500 lr: 0.002218\n",
+      "INFO:root:epoch: 35/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 1.001648 acc: 0.700000 lr: 0.002214\n",
+      "INFO:root:epoch: 35/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.987618 acc: 0.702232 lr: 0.002210\n",
+      "INFO:root:epoch: 35/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.991443 acc: 0.701172 lr: 0.002206\n",
+      "INFO:root:epoch: 35/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.997950 acc: 0.698958 lr: 0.002203\n",
+      "INFO:root:epoch: 35/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.994920 acc: 0.699063 lr: 0.002199\n",
+      "INFO:root:epoch: 35/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.984630 acc: 0.701705 lr: 0.002195\n",
+      "INFO:root:epoch: 35/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.991273 acc: 0.700000 lr: 0.002191\n",
+      "INFO:root:epoch: 35/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.988474 acc: 0.699519 lr: 0.002187\n",
+      "INFO:root:epoch: 35/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.996823 acc: 0.697768 lr: 0.002184\n",
+      "INFO:root:epoch: 35/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.989630 acc: 0.697292 lr: 0.002180\n",
+      "INFO:root:epoch: 35/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.985122 acc: 0.698047 lr: 0.002176\n",
+      "INFO:root:epoch: 35/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.984472 acc: 0.696875 lr: 0.002172\n",
+      "INFO:root:epoch: 35/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.979952 acc: 0.697743 lr: 0.002168\n",
+      "INFO:root:epoch: 35/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.978553 acc: 0.698191 lr: 0.002165\n",
+      "INFO:root:epoch: 35/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.977551 acc: 0.698281 lr: 0.002161\n",
+      "INFO:root:epoch: 35/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.970447 acc: 0.701488 lr: 0.002157\n",
+      "INFO:root:epoch: 35/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.968814 acc: 0.702841 lr: 0.002153\n",
+      "INFO:root:epoch: 35/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.968661 acc: 0.702174 lr: 0.002150\n",
+      "INFO:root:epoch: 35/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.969574 acc: 0.702214 lr: 0.002146\n",
+      "INFO:root:epoch: 35/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.970585 acc: 0.702125 lr: 0.002142\n",
+      "INFO:root:epoch: 35/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.972630 acc: 0.702284 lr: 0.002139\n",
+      "INFO:root:epoch: 35/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.971067 acc: 0.702083 lr: 0.002135\n",
+      "INFO:root:epoch: 35/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.968114 acc: 0.702455 lr: 0.002131\n",
+      "INFO:root:epoch: 35/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.966663 acc: 0.702802 lr: 0.002128\n",
+      "INFO:root:epoch: 35/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.964420 acc: 0.703958 lr: 0.002124\n",
+      "INFO:root:epoch: 35/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.960788 acc: 0.704839 lr: 0.002120\n",
+      "INFO:root:epoch: 36/100 starts\n",
+      "INFO:root:epoch: 36/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.091243 acc: 0.687500 lr: 0.002115\n",
+      "INFO:root:epoch: 36/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.977320 acc: 0.712500 lr: 0.002112\n",
+      "INFO:root:epoch: 36/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.966894 acc: 0.710417 lr: 0.002108\n",
+      "INFO:root:epoch: 36/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.945779 acc: 0.711719 lr: 0.002104\n",
+      "INFO:root:epoch: 36/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.927921 acc: 0.715625 lr: 0.002101\n",
+      "INFO:root:epoch: 36/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.918030 acc: 0.718750 lr: 0.002097\n",
+      "INFO:root:epoch: 36/100  et: 3s eta: 10s batches: 70/313(22%) samples: 2240 loss: 0.921133 acc: 0.718304 lr: 0.002094\n",
+      "INFO:root:epoch: 36/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.933628 acc: 0.718750 lr: 0.002090\n",
+      "INFO:root:epoch: 36/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.930386 acc: 0.717361 lr: 0.002086\n",
+      "INFO:root:epoch: 36/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.934669 acc: 0.716562 lr: 0.002083\n",
+      "INFO:root:epoch: 36/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.944246 acc: 0.713920 lr: 0.002079\n",
+      "INFO:root:epoch: 36/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.947096 acc: 0.713802 lr: 0.002075\n",
+      "INFO:root:epoch: 36/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.939754 acc: 0.716827 lr: 0.002072\n",
+      "INFO:root:epoch: 36/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.941787 acc: 0.715625 lr: 0.002068\n",
+      "INFO:root:epoch: 36/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.941816 acc: 0.715833 lr: 0.002065\n",
+      "INFO:root:epoch: 36/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.943801 acc: 0.714453 lr: 0.002061\n",
+      "INFO:root:epoch: 36/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.946200 acc: 0.714338 lr: 0.002058\n",
+      "INFO:root:epoch: 36/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.940760 acc: 0.714757 lr: 0.002054\n",
+      "INFO:root:epoch: 36/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.945329 acc: 0.713651 lr: 0.002050\n",
+      "INFO:root:epoch: 36/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.946733 acc: 0.713594 lr: 0.002047\n",
+      "INFO:root:epoch: 36/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.945527 acc: 0.714137 lr: 0.002043\n",
+      "INFO:root:epoch: 36/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.949205 acc: 0.712784 lr: 0.002040\n",
+      "INFO:root:epoch: 36/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.955257 acc: 0.711549 lr: 0.002036\n",
+      "INFO:root:epoch: 36/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.967109 acc: 0.706901 lr: 0.002033\n",
+      "INFO:root:epoch: 36/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.966555 acc: 0.707125 lr: 0.002029\n",
+      "INFO:root:epoch: 36/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.963576 acc: 0.708774 lr: 0.002026\n",
+      "INFO:root:epoch: 36/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.958365 acc: 0.709606 lr: 0.002022\n",
+      "INFO:root:epoch: 36/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.956237 acc: 0.710379 lr: 0.002019\n",
+      "INFO:root:epoch: 36/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.958801 acc: 0.709698 lr: 0.002015\n",
+      "INFO:root:epoch: 36/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.956470 acc: 0.709375 lr: 0.002012\n",
+      "INFO:root:epoch: 36/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.959181 acc: 0.708770 lr: 0.002008\n",
+      "INFO:root:epoch: 37/100 starts\n",
+      "INFO:root:epoch: 37/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.947274 acc: 0.696875 lr: 0.002004\n",
+      "INFO:root:epoch: 37/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.937142 acc: 0.706250 lr: 0.002000\n",
+      "INFO:root:epoch: 37/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.940878 acc: 0.715625 lr: 0.001997\n",
+      "INFO:root:epoch: 37/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.966049 acc: 0.709375 lr: 0.001993\n",
+      "INFO:root:epoch: 37/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 1.001778 acc: 0.698750 lr: 0.001990\n",
+      "INFO:root:epoch: 37/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.985789 acc: 0.703646 lr: 0.001986\n",
+      "INFO:root:epoch: 37/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.976270 acc: 0.703571 lr: 0.001983\n",
+      "INFO:root:epoch: 37/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.971587 acc: 0.704688 lr: 0.001980\n",
+      "INFO:root:epoch: 37/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.962920 acc: 0.707639 lr: 0.001976\n",
+      "INFO:root:epoch: 37/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.954290 acc: 0.710625 lr: 0.001973\n",
+      "INFO:root:epoch: 37/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.943198 acc: 0.713920 lr: 0.001969\n",
+      "INFO:root:epoch: 37/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.946688 acc: 0.715365 lr: 0.001966\n",
+      "INFO:root:epoch: 37/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.947125 acc: 0.713942 lr: 0.001962\n",
+      "INFO:root:epoch: 37/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.945415 acc: 0.714732 lr: 0.001959\n",
+      "INFO:root:epoch: 37/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.939685 acc: 0.714583 lr: 0.001956\n",
+      "INFO:root:epoch: 37/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.941070 acc: 0.714063 lr: 0.001952\n",
+      "INFO:root:epoch: 37/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.938366 acc: 0.713419 lr: 0.001949\n",
+      "INFO:root:epoch: 37/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.935093 acc: 0.713194 lr: 0.001946\n",
+      "INFO:root:epoch: 37/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.931260 acc: 0.713651 lr: 0.001942\n",
+      "INFO:root:epoch: 37/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.933529 acc: 0.711875 lr: 0.001939\n",
+      "INFO:root:epoch: 37/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.937069 acc: 0.711012 lr: 0.001935\n",
+      "INFO:root:epoch: 37/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.938863 acc: 0.710511 lr: 0.001932\n",
+      "INFO:root:epoch: 37/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.939828 acc: 0.709783 lr: 0.001929\n",
+      "INFO:root:epoch: 37/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.939845 acc: 0.710156 lr: 0.001925\n",
+      "INFO:root:epoch: 37/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.940458 acc: 0.709625 lr: 0.001922\n",
+      "INFO:root:epoch: 37/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.944485 acc: 0.707452 lr: 0.001919\n",
+      "INFO:root:epoch: 37/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.943645 acc: 0.706944 lr: 0.001915\n",
+      "INFO:root:epoch: 37/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.945993 acc: 0.707366 lr: 0.001912\n",
+      "INFO:root:epoch: 37/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.941415 acc: 0.708297 lr: 0.001909\n",
+      "INFO:root:epoch: 37/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.940199 acc: 0.709167 lr: 0.001906\n",
+      "INFO:root:epoch: 37/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.940290 acc: 0.709980 lr: 0.001902\n",
+      "INFO:root:epoch: 38/100 starts\n",
+      "INFO:root:epoch: 38/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.915666 acc: 0.690625 lr: 0.001898\n",
+      "INFO:root:epoch: 38/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.961178 acc: 0.693750 lr: 0.001895\n",
+      "INFO:root:epoch: 38/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.954265 acc: 0.690625 lr: 0.001891\n",
+      "INFO:root:epoch: 38/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.928703 acc: 0.699219 lr: 0.001888\n",
+      "INFO:root:epoch: 38/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.951760 acc: 0.690000 lr: 0.001885\n",
+      "INFO:root:epoch: 38/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.931337 acc: 0.694271 lr: 0.001882\n",
+      "INFO:root:epoch: 38/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.919142 acc: 0.704018 lr: 0.001878\n",
+      "INFO:root:epoch: 38/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.930935 acc: 0.697656 lr: 0.001875\n",
+      "INFO:root:epoch: 38/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.920327 acc: 0.706250 lr: 0.001872\n",
+      "INFO:root:epoch: 38/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.918248 acc: 0.709375 lr: 0.001869\n",
+      "INFO:root:epoch: 38/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.916976 acc: 0.708239 lr: 0.001865\n",
+      "INFO:root:epoch: 38/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.913504 acc: 0.708073 lr: 0.001862\n",
+      "INFO:root:epoch: 38/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.920087 acc: 0.707692 lr: 0.001859\n",
+      "INFO:root:epoch: 38/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.912807 acc: 0.710491 lr: 0.001856\n",
+      "INFO:root:epoch: 38/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.901150 acc: 0.715417 lr: 0.001852\n",
+      "INFO:root:epoch: 38/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.902415 acc: 0.716406 lr: 0.001849\n",
+      "INFO:root:epoch: 38/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.903299 acc: 0.716176 lr: 0.001846\n",
+      "INFO:root:epoch: 38/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.902306 acc: 0.715799 lr: 0.001843\n",
+      "INFO:root:epoch: 38/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.905362 acc: 0.714803 lr: 0.001840\n",
+      "INFO:root:epoch: 38/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.909509 acc: 0.715000 lr: 0.001836\n",
+      "INFO:root:epoch: 38/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.912586 acc: 0.715476 lr: 0.001833\n",
+      "INFO:root:epoch: 38/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.914302 acc: 0.713636 lr: 0.001830\n",
+      "INFO:root:epoch: 38/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.915388 acc: 0.714946 lr: 0.001827\n",
+      "INFO:root:epoch: 38/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.917406 acc: 0.713411 lr: 0.001824\n",
+      "INFO:root:epoch: 38/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.919715 acc: 0.712750 lr: 0.001821\n",
+      "INFO:root:epoch: 38/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.919115 acc: 0.713101 lr: 0.001817\n",
+      "INFO:root:epoch: 38/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.921368 acc: 0.712731 lr: 0.001814\n",
+      "INFO:root:epoch: 38/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.923399 acc: 0.712165 lr: 0.001811\n",
+      "INFO:root:epoch: 38/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.923672 acc: 0.711746 lr: 0.001808\n",
+      "INFO:root:epoch: 38/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.928174 acc: 0.709896 lr: 0.001805\n",
+      "INFO:root:epoch: 38/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.925099 acc: 0.710484 lr: 0.001802\n",
+      "INFO:root:epoch: 39/100 starts\n",
+      "INFO:root:epoch: 39/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.921871 acc: 0.718750 lr: 0.001798\n",
+      "INFO:root:epoch: 39/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.929147 acc: 0.712500 lr: 0.001795\n",
+      "INFO:root:epoch: 39/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.916779 acc: 0.726042 lr: 0.001792\n",
+      "INFO:root:epoch: 39/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.913380 acc: 0.714063 lr: 0.001788\n",
+      "INFO:root:epoch: 39/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.898546 acc: 0.716875 lr: 0.001785\n",
+      "INFO:root:epoch: 39/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.891703 acc: 0.719792 lr: 0.001782\n",
+      "INFO:root:epoch: 39/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.891161 acc: 0.718304 lr: 0.001779\n",
+      "INFO:root:epoch: 39/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.884714 acc: 0.716406 lr: 0.001776\n",
+      "INFO:root:epoch: 39/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.902276 acc: 0.711458 lr: 0.001773\n",
+      "INFO:root:epoch: 39/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.900820 acc: 0.714375 lr: 0.001770\n",
+      "INFO:root:epoch: 39/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.901034 acc: 0.715057 lr: 0.001767\n",
+      "INFO:root:epoch: 39/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.905887 acc: 0.715365 lr: 0.001764\n",
+      "INFO:root:epoch: 39/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.908046 acc: 0.713942 lr: 0.001761\n",
+      "INFO:root:epoch: 39/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.906175 acc: 0.711830 lr: 0.001758\n",
+      "INFO:root:epoch: 39/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.903609 acc: 0.712708 lr: 0.001755\n",
+      "INFO:root:epoch: 39/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.908699 acc: 0.711133 lr: 0.001752\n",
+      "INFO:root:epoch: 39/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.909322 acc: 0.711581 lr: 0.001749\n",
+      "INFO:root:epoch: 39/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.904612 acc: 0.714236 lr: 0.001746\n",
+      "INFO:root:epoch: 39/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.908094 acc: 0.714474 lr: 0.001743\n",
+      "INFO:root:epoch: 39/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.906864 acc: 0.716562 lr: 0.001740\n",
+      "INFO:root:epoch: 39/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.908377 acc: 0.715774 lr: 0.001736\n",
+      "INFO:root:epoch: 39/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.909021 acc: 0.716903 lr: 0.001733\n",
+      "INFO:root:epoch: 39/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.907651 acc: 0.716168 lr: 0.001730\n",
+      "INFO:root:epoch: 39/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.910696 acc: 0.715234 lr: 0.001727\n",
+      "INFO:root:epoch: 39/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.907546 acc: 0.715625 lr: 0.001724\n",
+      "INFO:root:epoch: 39/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.908025 acc: 0.716707 lr: 0.001722\n",
+      "INFO:root:epoch: 39/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.912086 acc: 0.715509 lr: 0.001719\n",
+      "INFO:root:epoch: 39/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.912558 acc: 0.714732 lr: 0.001716\n",
+      "INFO:root:epoch: 39/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.909682 acc: 0.715302 lr: 0.001713\n",
+      "INFO:root:epoch: 39/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.906029 acc: 0.716979 lr: 0.001710\n",
+      "INFO:root:epoch: 39/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.903505 acc: 0.717036 lr: 0.001707\n",
+      "INFO:root:epoch: 40/100 starts\n",
+      "INFO:root:epoch: 40/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.780566 acc: 0.775000 lr: 0.001703\n",
+      "INFO:root:epoch: 40/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.838111 acc: 0.748437 lr: 0.001700\n",
+      "INFO:root:epoch: 40/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.856948 acc: 0.733333 lr: 0.001697\n",
+      "INFO:root:epoch: 40/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.862112 acc: 0.732812 lr: 0.001694\n",
+      "INFO:root:epoch: 40/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.860197 acc: 0.730000 lr: 0.001691\n",
+      "INFO:root:epoch: 40/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.863356 acc: 0.728125 lr: 0.001688\n",
+      "INFO:root:epoch: 40/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.878911 acc: 0.724554 lr: 0.001685\n",
+      "INFO:root:epoch: 40/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.888058 acc: 0.722656 lr: 0.001682\n",
+      "INFO:root:epoch: 40/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.894968 acc: 0.722222 lr: 0.001679\n",
+      "INFO:root:epoch: 40/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.894365 acc: 0.721563 lr: 0.001676\n",
+      "INFO:root:epoch: 40/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.903343 acc: 0.718466 lr: 0.001674\n",
+      "INFO:root:epoch: 40/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.910384 acc: 0.715365 lr: 0.001671\n",
+      "INFO:root:epoch: 40/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.899847 acc: 0.717788 lr: 0.001668\n",
+      "INFO:root:epoch: 40/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.894946 acc: 0.718973 lr: 0.001665\n",
+      "INFO:root:epoch: 40/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.903341 acc: 0.717708 lr: 0.001662\n",
+      "INFO:root:epoch: 40/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.897506 acc: 0.718750 lr: 0.001659\n",
+      "INFO:root:epoch: 40/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.898148 acc: 0.720956 lr: 0.001656\n",
+      "INFO:root:epoch: 40/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.905616 acc: 0.718750 lr: 0.001653\n",
+      "INFO:root:epoch: 40/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.904865 acc: 0.719408 lr: 0.001651\n",
+      "INFO:root:epoch: 40/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.906802 acc: 0.718438 lr: 0.001648\n",
+      "INFO:root:epoch: 40/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.904453 acc: 0.719345 lr: 0.001645\n",
+      "INFO:root:epoch: 40/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.903755 acc: 0.720170 lr: 0.001642\n",
+      "INFO:root:epoch: 40/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.899484 acc: 0.721196 lr: 0.001639\n",
+      "INFO:root:epoch: 40/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.897623 acc: 0.721484 lr: 0.001636\n",
+      "INFO:root:epoch: 40/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.893251 acc: 0.722375 lr: 0.001633\n",
+      "INFO:root:epoch: 40/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.892736 acc: 0.722356 lr: 0.001631\n",
+      "INFO:root:epoch: 40/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.897582 acc: 0.722106 lr: 0.001628\n",
+      "INFO:root:epoch: 40/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.900767 acc: 0.721987 lr: 0.001625\n",
+      "INFO:root:epoch: 40/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.904848 acc: 0.721659 lr: 0.001622\n",
+      "INFO:root:epoch: 40/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.904811 acc: 0.721250 lr: 0.001619\n",
+      "INFO:root:epoch: 40/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.905520 acc: 0.720464 lr: 0.001617\n",
+      "INFO:root:epoch: 41/100 starts\n",
+      "INFO:root:epoch: 41/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.893831 acc: 0.746875 lr: 0.001613\n",
+      "INFO:root:epoch: 41/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.886461 acc: 0.735937 lr: 0.001610\n",
+      "INFO:root:epoch: 41/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.885964 acc: 0.736458 lr: 0.001607\n",
+      "INFO:root:epoch: 41/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.863613 acc: 0.744531 lr: 0.001605\n",
+      "INFO:root:epoch: 41/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.868521 acc: 0.738750 lr: 0.001602\n",
+      "INFO:root:epoch: 41/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.889519 acc: 0.728125 lr: 0.001599\n",
+      "INFO:root:epoch: 41/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.882022 acc: 0.731250 lr: 0.001596\n",
+      "INFO:root:epoch: 41/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.883197 acc: 0.727344 lr: 0.001593\n",
+      "INFO:root:epoch: 41/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.870593 acc: 0.729167 lr: 0.001591\n",
+      "INFO:root:epoch: 41/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.859885 acc: 0.731250 lr: 0.001588\n",
+      "INFO:root:epoch: 41/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.863553 acc: 0.730682 lr: 0.001585\n",
+      "INFO:root:epoch: 41/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.859644 acc: 0.734375 lr: 0.001582\n",
+      "INFO:root:epoch: 41/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.865917 acc: 0.733654 lr: 0.001580\n",
+      "INFO:root:epoch: 41/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.870035 acc: 0.731250 lr: 0.001577\n",
+      "INFO:root:epoch: 41/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.862183 acc: 0.733542 lr: 0.001574\n",
+      "INFO:root:epoch: 41/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.863656 acc: 0.732812 lr: 0.001572\n",
+      "INFO:root:epoch: 41/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.866855 acc: 0.731250 lr: 0.001569\n",
+      "INFO:root:epoch: 41/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.867640 acc: 0.730903 lr: 0.001566\n",
+      "INFO:root:epoch: 41/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.866226 acc: 0.730757 lr: 0.001563\n",
+      "INFO:root:epoch: 41/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.863282 acc: 0.731875 lr: 0.001561\n",
+      "INFO:root:epoch: 41/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.864556 acc: 0.731101 lr: 0.001558\n",
+      "INFO:root:epoch: 41/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.863321 acc: 0.730966 lr: 0.001555\n",
+      "INFO:root:epoch: 41/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.861878 acc: 0.731522 lr: 0.001553\n",
+      "INFO:root:epoch: 41/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.859133 acc: 0.733594 lr: 0.001550\n",
+      "INFO:root:epoch: 41/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.862252 acc: 0.732375 lr: 0.001547\n",
+      "INFO:root:epoch: 41/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.861156 acc: 0.733053 lr: 0.001545\n",
+      "INFO:root:epoch: 41/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.861156 acc: 0.733681 lr: 0.001542\n",
+      "INFO:root:epoch: 41/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.862086 acc: 0.732924 lr: 0.001539\n",
+      "INFO:root:epoch: 41/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.864426 acc: 0.732651 lr: 0.001537\n",
+      "INFO:root:epoch: 41/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.866150 acc: 0.732083 lr: 0.001534\n",
+      "INFO:root:epoch: 41/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.864671 acc: 0.731855 lr: 0.001531\n",
+      "INFO:root:epoch: 42/100 starts\n",
+      "INFO:root:epoch: 42/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.892114 acc: 0.743750 lr: 0.001528\n",
+      "INFO:root:epoch: 42/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.844824 acc: 0.765625 lr: 0.001525\n",
+      "INFO:root:epoch: 42/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.867760 acc: 0.752083 lr: 0.001522\n",
+      "INFO:root:epoch: 42/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.872542 acc: 0.754687 lr: 0.001520\n",
+      "INFO:root:epoch: 42/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.869582 acc: 0.753125 lr: 0.001517\n",
+      "INFO:root:epoch: 42/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.867329 acc: 0.750521 lr: 0.001515\n",
+      "INFO:root:epoch: 42/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.850506 acc: 0.753571 lr: 0.001512\n",
+      "INFO:root:epoch: 42/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.838864 acc: 0.755469 lr: 0.001509\n",
+      "INFO:root:epoch: 42/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.852119 acc: 0.747917 lr: 0.001507\n",
+      "INFO:root:epoch: 42/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.847287 acc: 0.746875 lr: 0.001504\n",
+      "INFO:root:epoch: 42/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.849393 acc: 0.747159 lr: 0.001502\n",
+      "INFO:root:epoch: 42/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.849027 acc: 0.746354 lr: 0.001499\n",
+      "INFO:root:epoch: 42/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.852098 acc: 0.745433 lr: 0.001496\n",
+      "INFO:root:epoch: 42/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.856265 acc: 0.741295 lr: 0.001494\n",
+      "INFO:root:epoch: 42/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.859484 acc: 0.738750 lr: 0.001491\n",
+      "INFO:root:epoch: 42/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.858422 acc: 0.738086 lr: 0.001489\n",
+      "INFO:root:epoch: 42/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.860048 acc: 0.739522 lr: 0.001486\n",
+      "INFO:root:epoch: 42/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.863799 acc: 0.736979 lr: 0.001483\n",
+      "INFO:root:epoch: 42/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.864196 acc: 0.736184 lr: 0.001481\n",
+      "INFO:root:epoch: 42/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.868736 acc: 0.733594 lr: 0.001478\n",
+      "INFO:root:epoch: 42/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.871624 acc: 0.733333 lr: 0.001476\n",
+      "INFO:root:epoch: 42/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.870885 acc: 0.734375 lr: 0.001473\n",
+      "INFO:root:epoch: 42/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.870002 acc: 0.733696 lr: 0.001471\n",
+      "INFO:root:epoch: 42/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.870474 acc: 0.733984 lr: 0.001468\n",
+      "INFO:root:epoch: 42/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.873119 acc: 0.732250 lr: 0.001466\n",
+      "INFO:root:epoch: 42/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.870922 acc: 0.732452 lr: 0.001463\n",
+      "INFO:root:epoch: 42/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.875132 acc: 0.731944 lr: 0.001460\n",
+      "INFO:root:epoch: 42/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.877856 acc: 0.730915 lr: 0.001458\n",
+      "INFO:root:epoch: 42/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.874014 acc: 0.731897 lr: 0.001455\n",
+      "INFO:root:epoch: 42/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.877910 acc: 0.730625 lr: 0.001453\n",
+      "INFO:root:epoch: 42/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.880198 acc: 0.730242 lr: 0.001450\n",
+      "INFO:root:epoch: 43/100 starts\n",
+      "INFO:root:epoch: 43/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.932633 acc: 0.715625 lr: 0.001447\n",
+      "INFO:root:epoch: 43/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.976757 acc: 0.687500 lr: 0.001445\n",
+      "INFO:root:epoch: 43/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.899957 acc: 0.709375 lr: 0.001442\n",
+      "INFO:root:epoch: 43/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.895332 acc: 0.713281 lr: 0.001440\n",
+      "INFO:root:epoch: 43/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.928037 acc: 0.708125 lr: 0.001437\n",
+      "INFO:root:epoch: 43/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.916536 acc: 0.705729 lr: 0.001435\n",
+      "INFO:root:epoch: 43/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.907680 acc: 0.708482 lr: 0.001432\n",
+      "INFO:root:epoch: 43/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.906414 acc: 0.710937 lr: 0.001430\n",
+      "INFO:root:epoch: 43/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.915844 acc: 0.707639 lr: 0.001427\n",
+      "INFO:root:epoch: 43/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.908398 acc: 0.711875 lr: 0.001425\n",
+      "INFO:root:epoch: 43/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.912975 acc: 0.708807 lr: 0.001422\n",
+      "INFO:root:epoch: 43/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.912979 acc: 0.710417 lr: 0.001420\n",
+      "INFO:root:epoch: 43/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.906021 acc: 0.712981 lr: 0.001417\n",
+      "INFO:root:epoch: 43/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.916571 acc: 0.710268 lr: 0.001415\n",
+      "INFO:root:epoch: 43/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.909473 acc: 0.712083 lr: 0.001412\n",
+      "INFO:root:epoch: 43/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.907134 acc: 0.713867 lr: 0.001410\n",
+      "INFO:root:epoch: 43/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.904646 acc: 0.713419 lr: 0.001408\n",
+      "INFO:root:epoch: 43/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.906878 acc: 0.713889 lr: 0.001405\n",
+      "INFO:root:epoch: 43/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.902582 acc: 0.714967 lr: 0.001403\n",
+      "INFO:root:epoch: 43/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.900949 acc: 0.716563 lr: 0.001400\n",
+      "INFO:root:epoch: 43/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.894646 acc: 0.718601 lr: 0.001398\n",
+      "INFO:root:epoch: 43/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.891527 acc: 0.720739 lr: 0.001395\n",
+      "INFO:root:epoch: 43/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.892044 acc: 0.721603 lr: 0.001393\n",
+      "INFO:root:epoch: 43/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.890487 acc: 0.722396 lr: 0.001391\n",
+      "INFO:root:epoch: 43/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.893807 acc: 0.721125 lr: 0.001388\n",
+      "INFO:root:epoch: 43/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.895236 acc: 0.721274 lr: 0.001386\n",
+      "INFO:root:epoch: 43/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.895607 acc: 0.720718 lr: 0.001383\n",
+      "INFO:root:epoch: 43/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.895230 acc: 0.720424 lr: 0.001381\n",
+      "INFO:root:epoch: 43/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.894518 acc: 0.720259 lr: 0.001379\n",
+      "INFO:root:epoch: 43/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.894952 acc: 0.720521 lr: 0.001376\n",
+      "INFO:root:epoch: 43/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.896812 acc: 0.721169 lr: 0.001374\n",
+      "INFO:root:epoch: 44/100 starts\n",
+      "INFO:root:epoch: 44/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.854088 acc: 0.734375 lr: 0.001371\n",
+      "INFO:root:epoch: 44/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.893576 acc: 0.731250 lr: 0.001368\n",
+      "INFO:root:epoch: 44/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.901208 acc: 0.722917 lr: 0.001366\n",
+      "INFO:root:epoch: 44/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.911228 acc: 0.722656 lr: 0.001364\n",
+      "INFO:root:epoch: 44/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.913262 acc: 0.727500 lr: 0.001361\n",
+      "INFO:root:epoch: 44/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.915631 acc: 0.722917 lr: 0.001359\n",
+      "INFO:root:epoch: 44/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.918094 acc: 0.719196 lr: 0.001357\n",
+      "INFO:root:epoch: 44/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.907978 acc: 0.722266 lr: 0.001354\n",
+      "INFO:root:epoch: 44/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.915693 acc: 0.718403 lr: 0.001352\n",
+      "INFO:root:epoch: 44/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.908236 acc: 0.719688 lr: 0.001350\n",
+      "INFO:root:epoch: 44/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.903183 acc: 0.722727 lr: 0.001347\n",
+      "INFO:root:epoch: 44/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.888709 acc: 0.726563 lr: 0.001345\n",
+      "INFO:root:epoch: 44/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.881539 acc: 0.728365 lr: 0.001343\n",
+      "INFO:root:epoch: 44/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.882548 acc: 0.727009 lr: 0.001340\n",
+      "INFO:root:epoch: 44/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.886563 acc: 0.725000 lr: 0.001338\n",
+      "INFO:root:epoch: 44/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.880728 acc: 0.728125 lr: 0.001336\n",
+      "INFO:root:epoch: 44/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.888216 acc: 0.726471 lr: 0.001333\n",
+      "INFO:root:epoch: 44/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.882822 acc: 0.728472 lr: 0.001331\n",
+      "INFO:root:epoch: 44/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.881073 acc: 0.729112 lr: 0.001329\n",
+      "INFO:root:epoch: 44/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.882001 acc: 0.729063 lr: 0.001326\n",
+      "INFO:root:epoch: 44/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.879309 acc: 0.730357 lr: 0.001324\n",
+      "INFO:root:epoch: 44/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.877268 acc: 0.730256 lr: 0.001322\n",
+      "INFO:root:epoch: 44/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.871074 acc: 0.732065 lr: 0.001319\n",
+      "INFO:root:epoch: 44/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.869375 acc: 0.732682 lr: 0.001317\n",
+      "INFO:root:epoch: 44/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.869586 acc: 0.732750 lr: 0.001315\n",
+      "INFO:root:epoch: 44/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.870473 acc: 0.732572 lr: 0.001313\n",
+      "INFO:root:epoch: 44/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.874371 acc: 0.731366 lr: 0.001310\n",
+      "INFO:root:epoch: 44/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.878666 acc: 0.730022 lr: 0.001308\n",
+      "INFO:root:epoch: 44/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.881055 acc: 0.729310 lr: 0.001306\n",
+      "INFO:root:epoch: 44/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.882445 acc: 0.728854 lr: 0.001304\n",
+      "INFO:root:epoch: 44/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.887197 acc: 0.727520 lr: 0.001301\n",
+      "INFO:root:epoch: 45/100 starts\n",
+      "INFO:root:epoch: 45/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.721088 acc: 0.812500 lr: 0.001298\n",
+      "INFO:root:epoch: 45/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.854947 acc: 0.745313 lr: 0.001296\n",
+      "INFO:root:epoch: 45/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.881816 acc: 0.736458 lr: 0.001294\n",
+      "INFO:root:epoch: 45/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.884462 acc: 0.729687 lr: 0.001292\n",
+      "INFO:root:epoch: 45/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.880657 acc: 0.730625 lr: 0.001289\n",
+      "INFO:root:epoch: 45/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.873069 acc: 0.731771 lr: 0.001287\n",
+      "INFO:root:epoch: 45/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.875560 acc: 0.733036 lr: 0.001285\n",
+      "INFO:root:epoch: 45/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.876301 acc: 0.732813 lr: 0.001283\n",
+      "INFO:root:epoch: 45/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.869964 acc: 0.734375 lr: 0.001280\n",
+      "INFO:root:epoch: 45/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.861509 acc: 0.733438 lr: 0.001278\n",
+      "INFO:root:epoch: 45/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.858668 acc: 0.732102 lr: 0.001276\n",
+      "INFO:root:epoch: 45/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.861969 acc: 0.731250 lr: 0.001274\n",
+      "INFO:root:epoch: 45/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.862078 acc: 0.730288 lr: 0.001272\n",
+      "INFO:root:epoch: 45/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.864396 acc: 0.731250 lr: 0.001269\n",
+      "INFO:root:epoch: 45/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.864452 acc: 0.731875 lr: 0.001267\n",
+      "INFO:root:epoch: 45/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.873887 acc: 0.727148 lr: 0.001265\n",
+      "INFO:root:epoch: 45/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.874892 acc: 0.727022 lr: 0.001263\n",
+      "INFO:root:epoch: 45/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.868370 acc: 0.730035 lr: 0.001261\n",
+      "INFO:root:epoch: 45/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.864252 acc: 0.730099 lr: 0.001258\n",
+      "INFO:root:epoch: 45/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.868312 acc: 0.728906 lr: 0.001256\n",
+      "INFO:root:epoch: 45/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.870158 acc: 0.729018 lr: 0.001254\n",
+      "INFO:root:epoch: 45/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.867791 acc: 0.729688 lr: 0.001252\n",
+      "INFO:root:epoch: 45/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.866661 acc: 0.729484 lr: 0.001250\n",
+      "INFO:root:epoch: 45/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.868936 acc: 0.728385 lr: 0.001248\n",
+      "INFO:root:epoch: 45/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.874296 acc: 0.727375 lr: 0.001245\n",
+      "INFO:root:epoch: 45/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.875546 acc: 0.728125 lr: 0.001243\n",
+      "INFO:root:epoch: 45/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.877334 acc: 0.727778 lr: 0.001241\n",
+      "INFO:root:epoch: 45/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.879317 acc: 0.726897 lr: 0.001239\n",
+      "INFO:root:epoch: 45/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.876840 acc: 0.727802 lr: 0.001237\n",
+      "INFO:root:epoch: 45/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.873443 acc: 0.728958 lr: 0.001235\n",
+      "INFO:root:epoch: 45/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.878095 acc: 0.727823 lr: 0.001233\n",
+      "INFO:root:epoch: 46/100 starts\n",
+      "INFO:root:epoch: 46/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.801696 acc: 0.737500 lr: 0.001230\n",
+      "INFO:root:epoch: 46/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.773932 acc: 0.745313 lr: 0.001228\n",
+      "INFO:root:epoch: 46/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.809889 acc: 0.736458 lr: 0.001226\n",
+      "INFO:root:epoch: 46/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.833769 acc: 0.737500 lr: 0.001223\n",
+      "INFO:root:epoch: 46/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.865570 acc: 0.736875 lr: 0.001221\n",
+      "INFO:root:epoch: 46/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.871965 acc: 0.733333 lr: 0.001219\n",
+      "INFO:root:epoch: 46/100  et: 3s eta: 10s batches: 70/313(22%) samples: 2240 loss: 0.891527 acc: 0.728125 lr: 0.001217\n",
+      "INFO:root:epoch: 46/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.870470 acc: 0.733594 lr: 0.001215\n",
+      "INFO:root:epoch: 46/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.866191 acc: 0.734375 lr: 0.001213\n",
+      "INFO:root:epoch: 46/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.853962 acc: 0.739688 lr: 0.001211\n",
+      "INFO:root:epoch: 46/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.864541 acc: 0.736080 lr: 0.001209\n",
+      "INFO:root:epoch: 46/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.865457 acc: 0.738021 lr: 0.001207\n",
+      "INFO:root:epoch: 46/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.866309 acc: 0.737260 lr: 0.001204\n",
+      "INFO:root:epoch: 46/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.862072 acc: 0.737500 lr: 0.001202\n",
+      "INFO:root:epoch: 46/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.859909 acc: 0.738125 lr: 0.001200\n",
+      "INFO:root:epoch: 46/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.863566 acc: 0.736133 lr: 0.001198\n",
+      "INFO:root:epoch: 46/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.866700 acc: 0.736029 lr: 0.001196\n",
+      "INFO:root:epoch: 46/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.862114 acc: 0.737847 lr: 0.001194\n",
+      "INFO:root:epoch: 46/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.861451 acc: 0.738651 lr: 0.001192\n",
+      "INFO:root:epoch: 46/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.864858 acc: 0.738437 lr: 0.001190\n",
+      "INFO:root:epoch: 46/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.861198 acc: 0.738542 lr: 0.001188\n",
+      "INFO:root:epoch: 46/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.858953 acc: 0.738210 lr: 0.001186\n",
+      "INFO:root:epoch: 46/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.858204 acc: 0.737772 lr: 0.001184\n",
+      "INFO:root:epoch: 46/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.862027 acc: 0.736328 lr: 0.001182\n",
+      "INFO:root:epoch: 46/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.859693 acc: 0.737125 lr: 0.001180\n",
+      "INFO:root:epoch: 46/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.864086 acc: 0.735697 lr: 0.001178\n",
+      "INFO:root:epoch: 46/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.859521 acc: 0.736690 lr: 0.001176\n",
+      "INFO:root:epoch: 46/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.863924 acc: 0.735156 lr: 0.001174\n",
+      "INFO:root:epoch: 46/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.868049 acc: 0.734806 lr: 0.001172\n",
+      "INFO:root:epoch: 46/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.870123 acc: 0.733854 lr: 0.001170\n",
+      "INFO:root:epoch: 46/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.872136 acc: 0.733266 lr: 0.001168\n",
+      "INFO:root:epoch: 47/100 starts\n",
+      "INFO:root:epoch: 47/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.869445 acc: 0.712500 lr: 0.001165\n",
+      "INFO:root:epoch: 47/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.851833 acc: 0.735938 lr: 0.001163\n",
+      "INFO:root:epoch: 47/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.853480 acc: 0.728125 lr: 0.001161\n",
+      "INFO:root:epoch: 47/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.871268 acc: 0.719531 lr: 0.001159\n",
+      "INFO:root:epoch: 47/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.853625 acc: 0.726250 lr: 0.001157\n",
+      "INFO:root:epoch: 47/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.847571 acc: 0.731771 lr: 0.001155\n",
+      "INFO:root:epoch: 47/100  et: 3s eta: 10s batches: 70/313(22%) samples: 2240 loss: 0.846769 acc: 0.729911 lr: 0.001153\n",
+      "INFO:root:epoch: 47/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.858061 acc: 0.727344 lr: 0.001151\n",
+      "INFO:root:epoch: 47/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.864871 acc: 0.725000 lr: 0.001149\n",
+      "INFO:root:epoch: 47/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.858079 acc: 0.727500 lr: 0.001147\n",
+      "INFO:root:epoch: 47/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.861945 acc: 0.727841 lr: 0.001145\n",
+      "INFO:root:epoch: 47/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.862408 acc: 0.727865 lr: 0.001143\n",
+      "INFO:root:epoch: 47/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.862439 acc: 0.728606 lr: 0.001141\n",
+      "INFO:root:epoch: 47/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.860833 acc: 0.727902 lr: 0.001139\n",
+      "INFO:root:epoch: 47/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.863450 acc: 0.726458 lr: 0.001137\n",
+      "INFO:root:epoch: 47/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.861756 acc: 0.726758 lr: 0.001135\n",
+      "INFO:root:epoch: 47/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.860222 acc: 0.728309 lr: 0.001133\n",
+      "INFO:root:epoch: 47/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.858540 acc: 0.730556 lr: 0.001131\n",
+      "INFO:root:epoch: 47/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.861725 acc: 0.729770 lr: 0.001129\n",
+      "INFO:root:epoch: 47/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.868071 acc: 0.728750 lr: 0.001127\n",
+      "INFO:root:epoch: 47/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.873154 acc: 0.726042 lr: 0.001125\n",
+      "INFO:root:epoch: 47/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.875945 acc: 0.726420 lr: 0.001123\n",
+      "INFO:root:epoch: 47/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.875667 acc: 0.726223 lr: 0.001121\n",
+      "INFO:root:epoch: 47/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.869773 acc: 0.727344 lr: 0.001119\n",
+      "INFO:root:epoch: 47/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.869154 acc: 0.727875 lr: 0.001117\n",
+      "INFO:root:epoch: 47/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.870076 acc: 0.727043 lr: 0.001115\n",
+      "INFO:root:epoch: 47/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.870282 acc: 0.728009 lr: 0.001114\n",
+      "INFO:root:epoch: 47/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.871832 acc: 0.727902 lr: 0.001112\n",
+      "INFO:root:epoch: 47/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.874218 acc: 0.727047 lr: 0.001110\n",
+      "INFO:root:epoch: 47/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.874666 acc: 0.727500 lr: 0.001108\n",
+      "INFO:root:epoch: 47/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.873458 acc: 0.727722 lr: 0.001106\n",
+      "INFO:root:epoch: 48/100 starts\n",
+      "INFO:root:epoch: 48/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.915660 acc: 0.706250 lr: 0.001103\n",
+      "INFO:root:epoch: 48/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.908028 acc: 0.718750 lr: 0.001101\n",
+      "INFO:root:epoch: 48/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.908596 acc: 0.717708 lr: 0.001100\n",
+      "INFO:root:epoch: 48/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.874110 acc: 0.732813 lr: 0.001098\n",
+      "INFO:root:epoch: 48/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.858332 acc: 0.737500 lr: 0.001096\n",
+      "INFO:root:epoch: 48/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.871197 acc: 0.732292 lr: 0.001094\n",
+      "INFO:root:epoch: 48/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.861462 acc: 0.733482 lr: 0.001092\n",
+      "INFO:root:epoch: 48/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.870519 acc: 0.730859 lr: 0.001090\n",
+      "INFO:root:epoch: 48/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.856348 acc: 0.736806 lr: 0.001088\n",
+      "INFO:root:epoch: 48/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.850899 acc: 0.736250 lr: 0.001086\n",
+      "INFO:root:epoch: 48/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.851499 acc: 0.736648 lr: 0.001084\n",
+      "INFO:root:epoch: 48/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.849902 acc: 0.738542 lr: 0.001083\n",
+      "INFO:root:epoch: 48/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.848409 acc: 0.741827 lr: 0.001081\n",
+      "INFO:root:epoch: 48/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.851525 acc: 0.739732 lr: 0.001079\n",
+      "INFO:root:epoch: 48/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.847849 acc: 0.740000 lr: 0.001077\n",
+      "INFO:root:epoch: 48/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.838211 acc: 0.743945 lr: 0.001075\n",
+      "INFO:root:epoch: 48/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.838068 acc: 0.743566 lr: 0.001073\n",
+      "INFO:root:epoch: 48/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.840692 acc: 0.741667 lr: 0.001071\n",
+      "INFO:root:epoch: 48/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.848038 acc: 0.739474 lr: 0.001069\n",
+      "INFO:root:epoch: 48/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.841606 acc: 0.742187 lr: 0.001068\n",
+      "INFO:root:epoch: 48/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.843021 acc: 0.741518 lr: 0.001066\n",
+      "INFO:root:epoch: 48/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.839918 acc: 0.743324 lr: 0.001064\n",
+      "INFO:root:epoch: 48/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.839910 acc: 0.743071 lr: 0.001062\n",
+      "INFO:root:epoch: 48/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.838402 acc: 0.743359 lr: 0.001060\n",
+      "INFO:root:epoch: 48/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.843530 acc: 0.742250 lr: 0.001058\n",
+      "INFO:root:epoch: 48/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.844521 acc: 0.742067 lr: 0.001057\n",
+      "INFO:root:epoch: 48/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.849001 acc: 0.740046 lr: 0.001055\n",
+      "INFO:root:epoch: 48/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.849362 acc: 0.740067 lr: 0.001053\n",
+      "INFO:root:epoch: 48/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.849835 acc: 0.739655 lr: 0.001051\n",
+      "INFO:root:epoch: 48/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.849499 acc: 0.739375 lr: 0.001049\n",
+      "INFO:root:epoch: 48/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.846603 acc: 0.740222 lr: 0.001047\n",
+      "INFO:root:epoch: 49/100 starts\n",
+      "INFO:root:epoch: 49/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.871459 acc: 0.740625 lr: 0.001045\n",
+      "INFO:root:epoch: 49/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.880735 acc: 0.728125 lr: 0.001043\n",
+      "INFO:root:epoch: 49/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.887872 acc: 0.728125 lr: 0.001042\n",
+      "INFO:root:epoch: 49/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.875082 acc: 0.734375 lr: 0.001040\n",
+      "INFO:root:epoch: 49/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.870226 acc: 0.735000 lr: 0.001038\n",
+      "INFO:root:epoch: 49/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.869364 acc: 0.733333 lr: 0.001036\n",
+      "INFO:root:epoch: 49/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.873440 acc: 0.733482 lr: 0.001034\n",
+      "INFO:root:epoch: 49/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.861439 acc: 0.737891 lr: 0.001033\n",
+      "INFO:root:epoch: 49/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.862657 acc: 0.734028 lr: 0.001031\n",
+      "INFO:root:epoch: 49/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.854155 acc: 0.736563 lr: 0.001029\n",
+      "INFO:root:epoch: 49/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.850740 acc: 0.734659 lr: 0.001027\n",
+      "INFO:root:epoch: 49/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.844839 acc: 0.737500 lr: 0.001025\n",
+      "INFO:root:epoch: 49/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.844869 acc: 0.737019 lr: 0.001024\n",
+      "INFO:root:epoch: 49/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.847296 acc: 0.735268 lr: 0.001022\n",
+      "INFO:root:epoch: 49/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.849787 acc: 0.736458 lr: 0.001020\n",
+      "INFO:root:epoch: 49/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.847962 acc: 0.736719 lr: 0.001018\n",
+      "INFO:root:epoch: 49/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.847930 acc: 0.738787 lr: 0.001017\n",
+      "INFO:root:epoch: 49/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.841681 acc: 0.740625 lr: 0.001015\n",
+      "INFO:root:epoch: 49/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.849310 acc: 0.738487 lr: 0.001013\n",
+      "INFO:root:epoch: 49/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.847989 acc: 0.738906 lr: 0.001011\n",
+      "INFO:root:epoch: 49/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.844989 acc: 0.740030 lr: 0.001010\n",
+      "INFO:root:epoch: 49/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.844893 acc: 0.739205 lr: 0.001008\n",
+      "INFO:root:epoch: 49/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.844115 acc: 0.739402 lr: 0.001006\n",
+      "INFO:root:epoch: 49/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.842952 acc: 0.740755 lr: 0.001004\n",
+      "INFO:root:epoch: 49/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.841943 acc: 0.740875 lr: 0.001003\n",
+      "INFO:root:epoch: 49/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.842328 acc: 0.740745 lr: 0.001001\n",
+      "INFO:root:epoch: 49/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.846242 acc: 0.739352 lr: 0.000999\n",
+      "INFO:root:epoch: 49/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.844139 acc: 0.739509 lr: 0.000997\n",
+      "INFO:root:epoch: 49/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.844395 acc: 0.739224 lr: 0.000996\n",
+      "INFO:root:epoch: 49/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.847630 acc: 0.737812 lr: 0.000994\n",
+      "INFO:root:epoch: 49/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.851312 acc: 0.736290 lr: 0.000992\n",
+      "INFO:root:epoch: 50/100 starts\n",
+      "INFO:root:epoch: 50/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 1.015676 acc: 0.703125 lr: 0.000990\n",
+      "INFO:root:epoch: 50/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.923647 acc: 0.734375 lr: 0.000988\n",
+      "INFO:root:epoch: 50/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.890851 acc: 0.737500 lr: 0.000987\n",
+      "INFO:root:epoch: 50/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.874218 acc: 0.742969 lr: 0.000985\n",
+      "INFO:root:epoch: 50/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.854517 acc: 0.743125 lr: 0.000983\n",
+      "INFO:root:epoch: 50/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.849495 acc: 0.748958 lr: 0.000981\n",
+      "INFO:root:epoch: 50/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.858147 acc: 0.745089 lr: 0.000980\n",
+      "INFO:root:epoch: 50/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.857522 acc: 0.745312 lr: 0.000978\n",
+      "INFO:root:epoch: 50/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.850765 acc: 0.746181 lr: 0.000976\n",
+      "INFO:root:epoch: 50/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.850748 acc: 0.745312 lr: 0.000975\n",
+      "INFO:root:epoch: 50/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.853070 acc: 0.741761 lr: 0.000973\n",
+      "INFO:root:epoch: 50/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.849600 acc: 0.742448 lr: 0.000971\n",
+      "INFO:root:epoch: 50/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.841722 acc: 0.743990 lr: 0.000970\n",
+      "INFO:root:epoch: 50/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.839816 acc: 0.743080 lr: 0.000968\n",
+      "INFO:root:epoch: 50/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.840112 acc: 0.741667 lr: 0.000966\n",
+      "INFO:root:epoch: 50/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.845308 acc: 0.740820 lr: 0.000965\n",
+      "INFO:root:epoch: 50/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.847078 acc: 0.740625 lr: 0.000963\n",
+      "INFO:root:epoch: 50/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.852572 acc: 0.738715 lr: 0.000961\n",
+      "INFO:root:epoch: 50/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.853428 acc: 0.738980 lr: 0.000960\n",
+      "INFO:root:epoch: 50/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.850048 acc: 0.740781 lr: 0.000958\n",
+      "INFO:root:epoch: 50/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.846018 acc: 0.741667 lr: 0.000956\n",
+      "INFO:root:epoch: 50/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.845517 acc: 0.742330 lr: 0.000955\n",
+      "INFO:root:epoch: 50/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.844762 acc: 0.741304 lr: 0.000953\n",
+      "INFO:root:epoch: 50/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.841995 acc: 0.741667 lr: 0.000951\n",
+      "INFO:root:epoch: 50/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.836288 acc: 0.743875 lr: 0.000950\n",
+      "INFO:root:epoch: 50/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.835791 acc: 0.743870 lr: 0.000948\n",
+      "INFO:root:epoch: 50/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.838450 acc: 0.742940 lr: 0.000946\n",
+      "INFO:root:epoch: 50/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.838960 acc: 0.742522 lr: 0.000945\n",
+      "INFO:root:epoch: 50/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.844399 acc: 0.740409 lr: 0.000943\n",
+      "INFO:root:epoch: 50/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.845635 acc: 0.739896 lr: 0.000941\n",
+      "INFO:root:epoch: 50/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.849859 acc: 0.738105 lr: 0.000940\n",
+      "INFO:root:epoch: 51/100 starts\n",
+      "INFO:root:epoch: 51/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.831457 acc: 0.706250 lr: 0.000938\n",
+      "INFO:root:epoch: 51/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.797039 acc: 0.731250 lr: 0.000936\n",
+      "INFO:root:epoch: 51/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.826571 acc: 0.731250 lr: 0.000934\n",
+      "INFO:root:epoch: 51/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.827990 acc: 0.736719 lr: 0.000933\n",
+      "INFO:root:epoch: 51/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.848228 acc: 0.731875 lr: 0.000931\n",
+      "INFO:root:epoch: 51/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.847784 acc: 0.734375 lr: 0.000930\n",
+      "INFO:root:epoch: 51/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.853702 acc: 0.733929 lr: 0.000928\n",
+      "INFO:root:epoch: 51/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.849845 acc: 0.736328 lr: 0.000926\n",
+      "INFO:root:epoch: 51/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.855950 acc: 0.734722 lr: 0.000925\n",
+      "INFO:root:epoch: 51/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.853829 acc: 0.736563 lr: 0.000923\n",
+      "INFO:root:epoch: 51/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.863996 acc: 0.734091 lr: 0.000922\n",
+      "INFO:root:epoch: 51/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.867579 acc: 0.734896 lr: 0.000920\n",
+      "INFO:root:epoch: 51/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.861326 acc: 0.736298 lr: 0.000918\n",
+      "INFO:root:epoch: 51/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.858348 acc: 0.737277 lr: 0.000917\n",
+      "INFO:root:epoch: 51/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.864448 acc: 0.735625 lr: 0.000915\n",
+      "INFO:root:epoch: 51/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.865278 acc: 0.733789 lr: 0.000914\n",
+      "INFO:root:epoch: 51/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.858263 acc: 0.736581 lr: 0.000912\n",
+      "INFO:root:epoch: 51/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.855220 acc: 0.738194 lr: 0.000910\n",
+      "INFO:root:epoch: 51/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.857610 acc: 0.737500 lr: 0.000909\n",
+      "INFO:root:epoch: 51/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.858362 acc: 0.736094 lr: 0.000907\n",
+      "INFO:root:epoch: 51/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.856905 acc: 0.736310 lr: 0.000906\n",
+      "INFO:root:epoch: 51/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.860737 acc: 0.735653 lr: 0.000904\n",
+      "INFO:root:epoch: 51/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.857090 acc: 0.736957 lr: 0.000903\n",
+      "INFO:root:epoch: 51/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.855965 acc: 0.737109 lr: 0.000901\n",
+      "INFO:root:epoch: 51/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.856065 acc: 0.736125 lr: 0.000899\n",
+      "INFO:root:epoch: 51/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.858335 acc: 0.735577 lr: 0.000898\n",
+      "INFO:root:epoch: 51/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.863159 acc: 0.734722 lr: 0.000896\n",
+      "INFO:root:epoch: 51/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.863976 acc: 0.734263 lr: 0.000895\n",
+      "INFO:root:epoch: 51/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.863092 acc: 0.734698 lr: 0.000893\n",
+      "INFO:root:epoch: 51/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.863980 acc: 0.734375 lr: 0.000892\n",
+      "INFO:root:epoch: 51/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.867559 acc: 0.732863 lr: 0.000890\n",
+      "INFO:root:epoch: 52/100 starts\n",
+      "INFO:root:epoch: 52/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.743803 acc: 0.771875 lr: 0.000888\n",
+      "INFO:root:epoch: 52/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.762042 acc: 0.759375 lr: 0.000887\n",
+      "INFO:root:epoch: 52/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.825565 acc: 0.748958 lr: 0.000885\n",
+      "INFO:root:epoch: 52/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.841342 acc: 0.739062 lr: 0.000884\n",
+      "INFO:root:epoch: 52/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.845629 acc: 0.740625 lr: 0.000882\n",
+      "INFO:root:epoch: 52/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.834545 acc: 0.738542 lr: 0.000881\n",
+      "INFO:root:epoch: 52/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.843534 acc: 0.736607 lr: 0.000879\n",
+      "INFO:root:epoch: 52/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.842049 acc: 0.736328 lr: 0.000877\n",
+      "INFO:root:epoch: 52/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.827880 acc: 0.740625 lr: 0.000876\n",
+      "INFO:root:epoch: 52/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.820315 acc: 0.742812 lr: 0.000874\n",
+      "INFO:root:epoch: 52/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.825259 acc: 0.743182 lr: 0.000873\n",
+      "INFO:root:epoch: 52/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.830174 acc: 0.741146 lr: 0.000871\n",
+      "INFO:root:epoch: 52/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.833646 acc: 0.739423 lr: 0.000870\n",
+      "INFO:root:epoch: 52/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.833089 acc: 0.739955 lr: 0.000868\n",
+      "INFO:root:epoch: 52/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.829194 acc: 0.741042 lr: 0.000867\n",
+      "INFO:root:epoch: 52/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.834378 acc: 0.740820 lr: 0.000865\n",
+      "INFO:root:epoch: 52/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.832258 acc: 0.740993 lr: 0.000864\n",
+      "INFO:root:epoch: 52/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.829481 acc: 0.740625 lr: 0.000862\n",
+      "INFO:root:epoch: 52/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.831247 acc: 0.740461 lr: 0.000861\n",
+      "INFO:root:epoch: 52/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.829492 acc: 0.740781 lr: 0.000859\n",
+      "INFO:root:epoch: 52/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.826394 acc: 0.742262 lr: 0.000858\n",
+      "INFO:root:epoch: 52/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.829801 acc: 0.741051 lr: 0.000856\n",
+      "INFO:root:epoch: 52/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.826304 acc: 0.741168 lr: 0.000855\n",
+      "INFO:root:epoch: 52/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.826644 acc: 0.740625 lr: 0.000853\n",
+      "INFO:root:epoch: 52/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.828600 acc: 0.740000 lr: 0.000852\n",
+      "INFO:root:epoch: 52/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.828024 acc: 0.740625 lr: 0.000851\n",
+      "INFO:root:epoch: 52/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.828098 acc: 0.740394 lr: 0.000849\n",
+      "INFO:root:epoch: 52/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.832659 acc: 0.738281 lr: 0.000848\n",
+      "INFO:root:epoch: 52/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.832256 acc: 0.738578 lr: 0.000846\n",
+      "INFO:root:epoch: 52/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.828255 acc: 0.740208 lr: 0.000845\n",
+      "INFO:root:epoch: 52/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.828766 acc: 0.740524 lr: 0.000843\n",
+      "INFO:root:epoch: 53/100 starts\n",
+      "INFO:root:epoch: 53/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.894693 acc: 0.715625 lr: 0.000841\n",
+      "INFO:root:epoch: 53/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.920465 acc: 0.707812 lr: 0.000840\n",
+      "INFO:root:epoch: 53/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.854437 acc: 0.734375 lr: 0.000838\n",
+      "INFO:root:epoch: 53/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.836487 acc: 0.739063 lr: 0.000837\n",
+      "INFO:root:epoch: 53/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.837251 acc: 0.740625 lr: 0.000835\n",
+      "INFO:root:epoch: 53/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.835642 acc: 0.744271 lr: 0.000834\n",
+      "INFO:root:epoch: 53/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.848960 acc: 0.740179 lr: 0.000833\n",
+      "INFO:root:epoch: 53/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.836181 acc: 0.744141 lr: 0.000831\n",
+      "INFO:root:epoch: 53/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.841505 acc: 0.745139 lr: 0.000830\n",
+      "INFO:root:epoch: 53/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.843772 acc: 0.744688 lr: 0.000828\n",
+      "INFO:root:epoch: 53/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.843461 acc: 0.743182 lr: 0.000827\n",
+      "INFO:root:epoch: 53/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.847543 acc: 0.742708 lr: 0.000825\n",
+      "INFO:root:epoch: 53/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.850061 acc: 0.740385 lr: 0.000824\n",
+      "INFO:root:epoch: 53/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.852352 acc: 0.736384 lr: 0.000823\n",
+      "INFO:root:epoch: 53/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.850986 acc: 0.738333 lr: 0.000821\n",
+      "INFO:root:epoch: 53/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.856137 acc: 0.736133 lr: 0.000820\n",
+      "INFO:root:epoch: 53/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.855595 acc: 0.736397 lr: 0.000818\n",
+      "INFO:root:epoch: 53/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.850728 acc: 0.736632 lr: 0.000817\n",
+      "INFO:root:epoch: 53/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.847100 acc: 0.737500 lr: 0.000815\n",
+      "INFO:root:epoch: 53/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.846957 acc: 0.736719 lr: 0.000814\n",
+      "INFO:root:epoch: 53/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.851465 acc: 0.734226 lr: 0.000813\n",
+      "INFO:root:epoch: 53/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.858108 acc: 0.731250 lr: 0.000811\n",
+      "INFO:root:epoch: 53/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.851864 acc: 0.733288 lr: 0.000810\n",
+      "INFO:root:epoch: 53/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.851618 acc: 0.733594 lr: 0.000808\n",
+      "INFO:root:epoch: 53/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.853848 acc: 0.733375 lr: 0.000807\n",
+      "INFO:root:epoch: 53/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.857994 acc: 0.731851 lr: 0.000806\n",
+      "INFO:root:epoch: 53/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.855323 acc: 0.733218 lr: 0.000804\n",
+      "INFO:root:epoch: 53/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.856908 acc: 0.732812 lr: 0.000803\n",
+      "INFO:root:epoch: 53/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.855603 acc: 0.732543 lr: 0.000801\n",
+      "INFO:root:epoch: 53/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.856527 acc: 0.731979 lr: 0.000800\n",
+      "INFO:root:epoch: 53/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.855646 acc: 0.733367 lr: 0.000799\n",
+      "INFO:root:epoch: 54/100 starts\n",
+      "INFO:root:epoch: 54/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.839867 acc: 0.753125 lr: 0.000797\n",
+      "INFO:root:epoch: 54/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.853545 acc: 0.742188 lr: 0.000795\n",
+      "INFO:root:epoch: 54/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.918257 acc: 0.725000 lr: 0.000794\n",
+      "INFO:root:epoch: 54/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.896112 acc: 0.725781 lr: 0.000793\n",
+      "INFO:root:epoch: 54/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.889089 acc: 0.729375 lr: 0.000791\n",
+      "INFO:root:epoch: 54/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.864582 acc: 0.732812 lr: 0.000790\n",
+      "INFO:root:epoch: 54/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.863641 acc: 0.731250 lr: 0.000789\n",
+      "INFO:root:epoch: 54/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.870458 acc: 0.729687 lr: 0.000787\n",
+      "INFO:root:epoch: 54/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.856251 acc: 0.734375 lr: 0.000786\n",
+      "INFO:root:epoch: 54/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.855672 acc: 0.733750 lr: 0.000785\n",
+      "INFO:root:epoch: 54/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.848062 acc: 0.737216 lr: 0.000783\n",
+      "INFO:root:epoch: 54/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.847672 acc: 0.737500 lr: 0.000782\n",
+      "INFO:root:epoch: 54/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.857222 acc: 0.735817 lr: 0.000780\n",
+      "INFO:root:epoch: 54/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.853165 acc: 0.737054 lr: 0.000779\n",
+      "INFO:root:epoch: 54/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.851349 acc: 0.737292 lr: 0.000778\n",
+      "INFO:root:epoch: 54/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.848894 acc: 0.740820 lr: 0.000776\n",
+      "INFO:root:epoch: 54/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.851145 acc: 0.739522 lr: 0.000775\n",
+      "INFO:root:epoch: 54/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.848070 acc: 0.740625 lr: 0.000774\n",
+      "INFO:root:epoch: 54/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.847512 acc: 0.740625 lr: 0.000772\n",
+      "INFO:root:epoch: 54/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.850677 acc: 0.739375 lr: 0.000771\n",
+      "INFO:root:epoch: 54/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.847481 acc: 0.739286 lr: 0.000770\n",
+      "INFO:root:epoch: 54/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.845264 acc: 0.739347 lr: 0.000768\n",
+      "INFO:root:epoch: 54/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.844385 acc: 0.738043 lr: 0.000767\n",
+      "INFO:root:epoch: 54/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.847182 acc: 0.736979 lr: 0.000766\n",
+      "INFO:root:epoch: 54/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.844987 acc: 0.737250 lr: 0.000764\n",
+      "INFO:root:epoch: 54/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.842700 acc: 0.738582 lr: 0.000763\n",
+      "INFO:root:epoch: 54/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.841688 acc: 0.738310 lr: 0.000762\n",
+      "INFO:root:epoch: 54/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.845397 acc: 0.737612 lr: 0.000760\n",
+      "INFO:root:epoch: 54/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.845304 acc: 0.737716 lr: 0.000759\n",
+      "INFO:root:epoch: 54/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.847718 acc: 0.738333 lr: 0.000758\n",
+      "INFO:root:epoch: 54/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.846684 acc: 0.738810 lr: 0.000757\n",
+      "INFO:root:epoch: 55/100 starts\n",
+      "INFO:root:epoch: 55/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.825940 acc: 0.775000 lr: 0.000755\n",
+      "INFO:root:epoch: 55/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.822556 acc: 0.745313 lr: 0.000753\n",
+      "INFO:root:epoch: 55/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.832248 acc: 0.738542 lr: 0.000752\n",
+      "INFO:root:epoch: 55/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.839558 acc: 0.739844 lr: 0.000751\n",
+      "INFO:root:epoch: 55/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.862434 acc: 0.735000 lr: 0.000750\n",
+      "INFO:root:epoch: 55/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.850984 acc: 0.738021 lr: 0.000748\n",
+      "INFO:root:epoch: 55/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.866064 acc: 0.730804 lr: 0.000747\n",
+      "INFO:root:epoch: 55/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.864999 acc: 0.731250 lr: 0.000746\n",
+      "INFO:root:epoch: 55/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.890002 acc: 0.724306 lr: 0.000744\n",
+      "INFO:root:epoch: 55/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.880109 acc: 0.726875 lr: 0.000743\n",
+      "INFO:root:epoch: 55/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.875342 acc: 0.729261 lr: 0.000742\n",
+      "INFO:root:epoch: 55/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.877625 acc: 0.729167 lr: 0.000741\n",
+      "INFO:root:epoch: 55/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.867685 acc: 0.732692 lr: 0.000739\n",
+      "INFO:root:epoch: 55/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.870108 acc: 0.731473 lr: 0.000738\n",
+      "INFO:root:epoch: 55/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.865714 acc: 0.732708 lr: 0.000737\n",
+      "INFO:root:epoch: 55/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.858313 acc: 0.735352 lr: 0.000735\n",
+      "INFO:root:epoch: 55/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.852047 acc: 0.736213 lr: 0.000734\n",
+      "INFO:root:epoch: 55/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.851131 acc: 0.736458 lr: 0.000733\n",
+      "INFO:root:epoch: 55/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.852184 acc: 0.735526 lr: 0.000732\n",
+      "INFO:root:epoch: 55/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.853389 acc: 0.735313 lr: 0.000730\n",
+      "INFO:root:epoch: 55/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.855622 acc: 0.734226 lr: 0.000729\n",
+      "INFO:root:epoch: 55/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.857671 acc: 0.733523 lr: 0.000728\n",
+      "INFO:root:epoch: 55/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.856719 acc: 0.734103 lr: 0.000727\n",
+      "INFO:root:epoch: 55/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.852812 acc: 0.735547 lr: 0.000725\n",
+      "INFO:root:epoch: 55/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.848211 acc: 0.737875 lr: 0.000724\n",
+      "INFO:root:epoch: 55/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.849574 acc: 0.738101 lr: 0.000723\n",
+      "INFO:root:epoch: 55/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.843678 acc: 0.740046 lr: 0.000722\n",
+      "INFO:root:epoch: 55/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.842221 acc: 0.740067 lr: 0.000720\n",
+      "INFO:root:epoch: 55/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.841687 acc: 0.740086 lr: 0.000719\n",
+      "INFO:root:epoch: 55/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.843619 acc: 0.738854 lr: 0.000718\n",
+      "INFO:root:epoch: 55/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.843388 acc: 0.738710 lr: 0.000717\n",
+      "INFO:root:epoch: 56/100 starts\n",
+      "INFO:root:epoch: 56/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.878755 acc: 0.743750 lr: 0.000715\n",
+      "INFO:root:epoch: 56/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.891985 acc: 0.732812 lr: 0.000714\n",
+      "INFO:root:epoch: 56/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.860375 acc: 0.737500 lr: 0.000712\n",
+      "INFO:root:epoch: 56/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.871171 acc: 0.731250 lr: 0.000711\n",
+      "INFO:root:epoch: 56/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.875896 acc: 0.728125 lr: 0.000710\n",
+      "INFO:root:epoch: 56/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.884268 acc: 0.722396 lr: 0.000709\n",
+      "INFO:root:epoch: 56/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.893236 acc: 0.718304 lr: 0.000708\n",
+      "INFO:root:epoch: 56/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.881897 acc: 0.721094 lr: 0.000706\n",
+      "INFO:root:epoch: 56/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.868662 acc: 0.729514 lr: 0.000705\n",
+      "INFO:root:epoch: 56/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.859089 acc: 0.730000 lr: 0.000704\n",
+      "INFO:root:epoch: 56/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.851870 acc: 0.731534 lr: 0.000703\n",
+      "INFO:root:epoch: 56/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.856896 acc: 0.731250 lr: 0.000701\n",
+      "INFO:root:epoch: 56/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.850384 acc: 0.733894 lr: 0.000700\n",
+      "INFO:root:epoch: 56/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.849512 acc: 0.734375 lr: 0.000699\n",
+      "INFO:root:epoch: 56/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.844825 acc: 0.736458 lr: 0.000698\n",
+      "INFO:root:epoch: 56/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.841548 acc: 0.737891 lr: 0.000697\n",
+      "INFO:root:epoch: 56/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.841932 acc: 0.736765 lr: 0.000695\n",
+      "INFO:root:epoch: 56/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.841608 acc: 0.735243 lr: 0.000694\n",
+      "INFO:root:epoch: 56/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.836072 acc: 0.737336 lr: 0.000693\n",
+      "INFO:root:epoch: 56/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.837611 acc: 0.737813 lr: 0.000692\n",
+      "INFO:root:epoch: 56/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.838714 acc: 0.736905 lr: 0.000691\n",
+      "INFO:root:epoch: 56/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.838372 acc: 0.737500 lr: 0.000689\n",
+      "INFO:root:epoch: 56/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.836598 acc: 0.737772 lr: 0.000688\n",
+      "INFO:root:epoch: 56/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.841651 acc: 0.737109 lr: 0.000687\n",
+      "INFO:root:epoch: 56/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.838858 acc: 0.737125 lr: 0.000686\n",
+      "INFO:root:epoch: 56/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.833190 acc: 0.739183 lr: 0.000685\n",
+      "INFO:root:epoch: 56/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.832718 acc: 0.739699 lr: 0.000683\n",
+      "INFO:root:epoch: 56/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.832299 acc: 0.739844 lr: 0.000682\n",
+      "INFO:root:epoch: 56/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.830403 acc: 0.740841 lr: 0.000681\n",
+      "INFO:root:epoch: 56/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.832268 acc: 0.740208 lr: 0.000680\n",
+      "INFO:root:epoch: 56/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.835169 acc: 0.738710 lr: 0.000679\n",
+      "INFO:root:epoch: 57/100 starts\n",
+      "INFO:root:epoch: 57/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.909517 acc: 0.725000 lr: 0.000677\n",
+      "INFO:root:epoch: 57/100  et: 0s eta: 14s batches: 20/313(6%) samples: 640 loss: 0.867697 acc: 0.728125 lr: 0.000676\n",
+      "INFO:root:epoch: 57/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.840410 acc: 0.731250 lr: 0.000675\n",
+      "INFO:root:epoch: 57/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.805389 acc: 0.741406 lr: 0.000674\n",
+      "INFO:root:epoch: 57/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.814985 acc: 0.741250 lr: 0.000673\n",
+      "INFO:root:epoch: 57/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.809866 acc: 0.744271 lr: 0.000671\n",
+      "INFO:root:epoch: 57/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.817273 acc: 0.741964 lr: 0.000670\n",
+      "INFO:root:epoch: 57/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.812742 acc: 0.741016 lr: 0.000669\n",
+      "INFO:root:epoch: 57/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.815324 acc: 0.743056 lr: 0.000668\n",
+      "INFO:root:epoch: 57/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.821292 acc: 0.738437 lr: 0.000667\n",
+      "INFO:root:epoch: 57/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.816058 acc: 0.739773 lr: 0.000666\n",
+      "INFO:root:epoch: 57/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.817590 acc: 0.739583 lr: 0.000664\n",
+      "INFO:root:epoch: 57/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.818817 acc: 0.738221 lr: 0.000663\n",
+      "INFO:root:epoch: 57/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.816559 acc: 0.736607 lr: 0.000662\n",
+      "INFO:root:epoch: 57/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.825244 acc: 0.737083 lr: 0.000661\n",
+      "INFO:root:epoch: 57/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.823392 acc: 0.738281 lr: 0.000660\n",
+      "INFO:root:epoch: 57/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.823858 acc: 0.739154 lr: 0.000659\n",
+      "INFO:root:epoch: 57/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.822548 acc: 0.739757 lr: 0.000658\n",
+      "INFO:root:epoch: 57/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.820885 acc: 0.741283 lr: 0.000656\n",
+      "INFO:root:epoch: 57/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.820076 acc: 0.742500 lr: 0.000655\n",
+      "INFO:root:epoch: 57/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.824961 acc: 0.741220 lr: 0.000654\n",
+      "INFO:root:epoch: 57/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.825085 acc: 0.741335 lr: 0.000653\n",
+      "INFO:root:epoch: 57/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.827870 acc: 0.739810 lr: 0.000652\n",
+      "INFO:root:epoch: 57/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.828434 acc: 0.739844 lr: 0.000651\n",
+      "INFO:root:epoch: 57/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.828785 acc: 0.739250 lr: 0.000650\n",
+      "INFO:root:epoch: 57/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.827633 acc: 0.739543 lr: 0.000648\n",
+      "INFO:root:epoch: 57/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.825659 acc: 0.739699 lr: 0.000647\n",
+      "INFO:root:epoch: 57/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.823095 acc: 0.739732 lr: 0.000646\n",
+      "INFO:root:epoch: 57/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.823756 acc: 0.739009 lr: 0.000645\n",
+      "INFO:root:epoch: 57/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.825304 acc: 0.738854 lr: 0.000644\n",
+      "INFO:root:epoch: 57/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.827320 acc: 0.738206 lr: 0.000643\n",
+      "INFO:root:epoch: 58/100 starts\n",
+      "INFO:root:epoch: 58/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.798118 acc: 0.734375 lr: 0.000641\n",
+      "INFO:root:epoch: 58/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.830979 acc: 0.731250 lr: 0.000640\n",
+      "INFO:root:epoch: 58/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.848662 acc: 0.725000 lr: 0.000639\n",
+      "INFO:root:epoch: 58/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.838207 acc: 0.728906 lr: 0.000638\n",
+      "INFO:root:epoch: 58/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.838796 acc: 0.736875 lr: 0.000637\n",
+      "INFO:root:epoch: 58/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.818739 acc: 0.743229 lr: 0.000636\n",
+      "INFO:root:epoch: 58/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.809720 acc: 0.745536 lr: 0.000635\n",
+      "INFO:root:epoch: 58/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.822597 acc: 0.743750 lr: 0.000634\n",
+      "INFO:root:epoch: 58/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.830851 acc: 0.738889 lr: 0.000633\n",
+      "INFO:root:epoch: 58/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.834010 acc: 0.738750 lr: 0.000632\n",
+      "INFO:root:epoch: 58/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.829917 acc: 0.738352 lr: 0.000630\n",
+      "INFO:root:epoch: 58/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.823273 acc: 0.741667 lr: 0.000629\n",
+      "INFO:root:epoch: 58/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.812068 acc: 0.746154 lr: 0.000628\n",
+      "INFO:root:epoch: 58/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.811988 acc: 0.746875 lr: 0.000627\n",
+      "INFO:root:epoch: 58/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.805599 acc: 0.748542 lr: 0.000626\n",
+      "INFO:root:epoch: 58/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.806976 acc: 0.748242 lr: 0.000625\n",
+      "INFO:root:epoch: 58/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.811154 acc: 0.746507 lr: 0.000624\n",
+      "INFO:root:epoch: 58/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.809796 acc: 0.746181 lr: 0.000623\n",
+      "INFO:root:epoch: 58/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.809086 acc: 0.748520 lr: 0.000622\n",
+      "INFO:root:epoch: 58/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.810409 acc: 0.746719 lr: 0.000621\n",
+      "INFO:root:epoch: 58/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.812442 acc: 0.745833 lr: 0.000620\n",
+      "INFO:root:epoch: 58/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.811211 acc: 0.746591 lr: 0.000619\n",
+      "INFO:root:epoch: 58/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.812541 acc: 0.746875 lr: 0.000617\n",
+      "INFO:root:epoch: 58/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.816855 acc: 0.744531 lr: 0.000616\n",
+      "INFO:root:epoch: 58/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.819121 acc: 0.744000 lr: 0.000615\n",
+      "INFO:root:epoch: 58/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.819055 acc: 0.742668 lr: 0.000614\n",
+      "INFO:root:epoch: 58/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.820233 acc: 0.742940 lr: 0.000613\n",
+      "INFO:root:epoch: 58/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.818864 acc: 0.743973 lr: 0.000612\n",
+      "INFO:root:epoch: 58/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.821589 acc: 0.743211 lr: 0.000611\n",
+      "INFO:root:epoch: 58/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.821042 acc: 0.743229 lr: 0.000610\n",
+      "INFO:root:epoch: 58/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.823857 acc: 0.743145 lr: 0.000609\n",
+      "INFO:root:epoch: 59/100 starts\n",
+      "INFO:root:epoch: 59/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.789549 acc: 0.765625 lr: 0.000608\n",
+      "INFO:root:epoch: 59/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.790341 acc: 0.760938 lr: 0.000607\n",
+      "INFO:root:epoch: 59/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.806979 acc: 0.755208 lr: 0.000605\n",
+      "INFO:root:epoch: 59/100  et: 1s eta: 13s batches: 40/313(12%) samples: 1280 loss: 0.815923 acc: 0.753906 lr: 0.000604\n",
+      "INFO:root:epoch: 59/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.822824 acc: 0.753750 lr: 0.000603\n",
+      "INFO:root:epoch: 59/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.822709 acc: 0.751563 lr: 0.000602\n",
+      "INFO:root:epoch: 59/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.832924 acc: 0.747321 lr: 0.000601\n",
+      "INFO:root:epoch: 59/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.847297 acc: 0.740234 lr: 0.000600\n",
+      "INFO:root:epoch: 59/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.860101 acc: 0.734375 lr: 0.000599\n",
+      "INFO:root:epoch: 59/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.860811 acc: 0.734062 lr: 0.000598\n",
+      "INFO:root:epoch: 59/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.854995 acc: 0.737500 lr: 0.000597\n",
+      "INFO:root:epoch: 59/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.848008 acc: 0.739844 lr: 0.000596\n",
+      "INFO:root:epoch: 59/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.834319 acc: 0.743029 lr: 0.000595\n",
+      "INFO:root:epoch: 59/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.833186 acc: 0.741518 lr: 0.000594\n",
+      "INFO:root:epoch: 59/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.835036 acc: 0.741458 lr: 0.000593\n",
+      "INFO:root:epoch: 59/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.833203 acc: 0.741602 lr: 0.000592\n",
+      "INFO:root:epoch: 59/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.833846 acc: 0.740625 lr: 0.000591\n",
+      "INFO:root:epoch: 59/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.831953 acc: 0.741493 lr: 0.000590\n",
+      "INFO:root:epoch: 59/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.831190 acc: 0.741941 lr: 0.000589\n",
+      "INFO:root:epoch: 59/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.839709 acc: 0.739688 lr: 0.000588\n",
+      "INFO:root:epoch: 59/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.841757 acc: 0.737500 lr: 0.000587\n",
+      "INFO:root:epoch: 59/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.842198 acc: 0.737642 lr: 0.000586\n",
+      "INFO:root:epoch: 59/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.841420 acc: 0.736821 lr: 0.000585\n",
+      "INFO:root:epoch: 59/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.838895 acc: 0.738021 lr: 0.000584\n",
+      "INFO:root:epoch: 59/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.834055 acc: 0.739500 lr: 0.000583\n",
+      "INFO:root:epoch: 59/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.834251 acc: 0.739904 lr: 0.000582\n",
+      "INFO:root:epoch: 59/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.834817 acc: 0.739005 lr: 0.000581\n",
+      "INFO:root:epoch: 59/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.835571 acc: 0.738616 lr: 0.000580\n",
+      "INFO:root:epoch: 59/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.833812 acc: 0.740409 lr: 0.000579\n",
+      "INFO:root:epoch: 59/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.830910 acc: 0.741979 lr: 0.000578\n",
+      "INFO:root:epoch: 59/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.831648 acc: 0.742036 lr: 0.000577\n",
+      "INFO:root:epoch: 60/100 starts\n",
+      "INFO:root:epoch: 60/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.885892 acc: 0.775000 lr: 0.000576\n",
+      "INFO:root:epoch: 60/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.879384 acc: 0.754688 lr: 0.000575\n",
+      "INFO:root:epoch: 60/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.839739 acc: 0.762500 lr: 0.000574\n",
+      "INFO:root:epoch: 60/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.829774 acc: 0.758594 lr: 0.000573\n",
+      "INFO:root:epoch: 60/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.843364 acc: 0.753125 lr: 0.000572\n",
+      "INFO:root:epoch: 60/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.826290 acc: 0.756250 lr: 0.000571\n",
+      "INFO:root:epoch: 60/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.831055 acc: 0.752232 lr: 0.000570\n",
+      "INFO:root:epoch: 60/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.820983 acc: 0.754297 lr: 0.000569\n",
+      "INFO:root:epoch: 60/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.812713 acc: 0.754167 lr: 0.000568\n",
+      "INFO:root:epoch: 60/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.808644 acc: 0.753750 lr: 0.000567\n",
+      "INFO:root:epoch: 60/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.812561 acc: 0.750284 lr: 0.000566\n",
+      "INFO:root:epoch: 60/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.824937 acc: 0.746615 lr: 0.000565\n",
+      "INFO:root:epoch: 60/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.827743 acc: 0.744471 lr: 0.000564\n",
+      "INFO:root:epoch: 60/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.827845 acc: 0.742188 lr: 0.000563\n",
+      "INFO:root:epoch: 60/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.824984 acc: 0.742500 lr: 0.000562\n",
+      "INFO:root:epoch: 60/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.820623 acc: 0.742773 lr: 0.000561\n",
+      "INFO:root:epoch: 60/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.824389 acc: 0.740441 lr: 0.000560\n",
+      "INFO:root:epoch: 60/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.819917 acc: 0.741840 lr: 0.000559\n",
+      "INFO:root:epoch: 60/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.821452 acc: 0.741612 lr: 0.000558\n",
+      "INFO:root:epoch: 60/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.821996 acc: 0.740781 lr: 0.000557\n",
+      "INFO:root:epoch: 60/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.825038 acc: 0.741071 lr: 0.000556\n",
+      "INFO:root:epoch: 60/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.833056 acc: 0.739489 lr: 0.000555\n",
+      "INFO:root:epoch: 60/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.834126 acc: 0.740625 lr: 0.000554\n",
+      "INFO:root:epoch: 60/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.838261 acc: 0.739193 lr: 0.000553\n",
+      "INFO:root:epoch: 60/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.838391 acc: 0.738250 lr: 0.000552\n",
+      "INFO:root:epoch: 60/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.837515 acc: 0.738462 lr: 0.000551\n",
+      "INFO:root:epoch: 60/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.840794 acc: 0.738079 lr: 0.000550\n",
+      "INFO:root:epoch: 60/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.841568 acc: 0.738058 lr: 0.000549\n",
+      "INFO:root:epoch: 60/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.842523 acc: 0.738362 lr: 0.000548\n",
+      "INFO:root:epoch: 60/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.844872 acc: 0.737187 lr: 0.000547\n",
+      "INFO:root:epoch: 60/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.842277 acc: 0.736794 lr: 0.000546\n",
+      "INFO:root:epoch: 61/100 starts\n",
+      "INFO:root:epoch: 61/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.805544 acc: 0.753125 lr: 0.000545\n",
+      "INFO:root:epoch: 61/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.833780 acc: 0.742188 lr: 0.000544\n",
+      "INFO:root:epoch: 61/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.839274 acc: 0.740625 lr: 0.000543\n",
+      "INFO:root:epoch: 61/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.833520 acc: 0.744531 lr: 0.000542\n",
+      "INFO:root:epoch: 61/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.835566 acc: 0.743125 lr: 0.000541\n",
+      "INFO:root:epoch: 61/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.820548 acc: 0.746875 lr: 0.000540\n",
+      "INFO:root:epoch: 61/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.813290 acc: 0.748214 lr: 0.000539\n",
+      "INFO:root:epoch: 61/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.806995 acc: 0.748437 lr: 0.000539\n",
+      "INFO:root:epoch: 61/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.822883 acc: 0.743403 lr: 0.000538\n",
+      "INFO:root:epoch: 61/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.815852 acc: 0.746250 lr: 0.000537\n",
+      "INFO:root:epoch: 61/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.810577 acc: 0.747159 lr: 0.000536\n",
+      "INFO:root:epoch: 61/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.816468 acc: 0.744010 lr: 0.000535\n",
+      "INFO:root:epoch: 61/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.810262 acc: 0.745913 lr: 0.000534\n",
+      "INFO:root:epoch: 61/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.818650 acc: 0.743973 lr: 0.000533\n",
+      "INFO:root:epoch: 61/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.823645 acc: 0.742083 lr: 0.000532\n",
+      "INFO:root:epoch: 61/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.829649 acc: 0.740039 lr: 0.000531\n",
+      "INFO:root:epoch: 61/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.826165 acc: 0.740257 lr: 0.000530\n",
+      "INFO:root:epoch: 61/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.823962 acc: 0.740972 lr: 0.000529\n",
+      "INFO:root:epoch: 61/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.824045 acc: 0.741447 lr: 0.000528\n",
+      "INFO:root:epoch: 61/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.822660 acc: 0.741875 lr: 0.000527\n",
+      "INFO:root:epoch: 61/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.822528 acc: 0.741815 lr: 0.000527\n",
+      "INFO:root:epoch: 61/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.820247 acc: 0.741477 lr: 0.000526\n",
+      "INFO:root:epoch: 61/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.824795 acc: 0.739810 lr: 0.000525\n",
+      "INFO:root:epoch: 61/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.825198 acc: 0.739193 lr: 0.000524\n",
+      "INFO:root:epoch: 61/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.820621 acc: 0.741750 lr: 0.000523\n",
+      "INFO:root:epoch: 61/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.822385 acc: 0.740986 lr: 0.000522\n",
+      "INFO:root:epoch: 61/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.825277 acc: 0.739815 lr: 0.000521\n",
+      "INFO:root:epoch: 61/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.827828 acc: 0.738728 lr: 0.000520\n",
+      "INFO:root:epoch: 61/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.824658 acc: 0.739547 lr: 0.000519\n",
+      "INFO:root:epoch: 61/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.828213 acc: 0.737813 lr: 0.000518\n",
+      "INFO:root:epoch: 61/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.828741 acc: 0.738206 lr: 0.000518\n",
+      "INFO:root:epoch: 62/100 starts\n",
+      "INFO:root:epoch: 62/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.844680 acc: 0.765625 lr: 0.000516\n",
+      "INFO:root:epoch: 62/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.862761 acc: 0.767188 lr: 0.000515\n",
+      "INFO:root:epoch: 62/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.863700 acc: 0.759375 lr: 0.000515\n",
+      "INFO:root:epoch: 62/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.832684 acc: 0.765625 lr: 0.000514\n",
+      "INFO:root:epoch: 62/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.837586 acc: 0.765625 lr: 0.000513\n",
+      "INFO:root:epoch: 62/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.814643 acc: 0.770313 lr: 0.000512\n",
+      "INFO:root:epoch: 62/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.811375 acc: 0.763839 lr: 0.000511\n",
+      "INFO:root:epoch: 62/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.820167 acc: 0.757422 lr: 0.000510\n",
+      "INFO:root:epoch: 62/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.824017 acc: 0.757986 lr: 0.000509\n",
+      "INFO:root:epoch: 62/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.825212 acc: 0.756563 lr: 0.000508\n",
+      "INFO:root:epoch: 62/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.821173 acc: 0.757102 lr: 0.000507\n",
+      "INFO:root:epoch: 62/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.820457 acc: 0.756250 lr: 0.000507\n",
+      "INFO:root:epoch: 62/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.819386 acc: 0.756010 lr: 0.000506\n",
+      "INFO:root:epoch: 62/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.821911 acc: 0.753571 lr: 0.000505\n",
+      "INFO:root:epoch: 62/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.819128 acc: 0.752708 lr: 0.000504\n",
+      "INFO:root:epoch: 62/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.816960 acc: 0.753320 lr: 0.000503\n",
+      "INFO:root:epoch: 62/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.815645 acc: 0.753493 lr: 0.000502\n",
+      "INFO:root:epoch: 62/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.819335 acc: 0.750868 lr: 0.000501\n",
+      "INFO:root:epoch: 62/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.815293 acc: 0.750658 lr: 0.000500\n",
+      "INFO:root:epoch: 62/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.813565 acc: 0.750312 lr: 0.000500\n",
+      "INFO:root:epoch: 62/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.819647 acc: 0.748065 lr: 0.000499\n",
+      "INFO:root:epoch: 62/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.821927 acc: 0.747585 lr: 0.000498\n",
+      "INFO:root:epoch: 62/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.819023 acc: 0.748505 lr: 0.000497\n",
+      "INFO:root:epoch: 62/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.820804 acc: 0.747266 lr: 0.000496\n",
+      "INFO:root:epoch: 62/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.820380 acc: 0.747250 lr: 0.000495\n",
+      "INFO:root:epoch: 62/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.814690 acc: 0.748558 lr: 0.000494\n",
+      "INFO:root:epoch: 62/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.814140 acc: 0.750000 lr: 0.000494\n",
+      "INFO:root:epoch: 62/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.812839 acc: 0.750781 lr: 0.000493\n",
+      "INFO:root:epoch: 62/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.812621 acc: 0.750216 lr: 0.000492\n",
+      "INFO:root:epoch: 62/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.813526 acc: 0.748958 lr: 0.000491\n",
+      "INFO:root:epoch: 62/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.815095 acc: 0.747379 lr: 0.000490\n",
+      "INFO:root:epoch: 63/100 starts\n",
+      "INFO:root:epoch: 63/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.796004 acc: 0.759375 lr: 0.000489\n",
+      "INFO:root:epoch: 63/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.827343 acc: 0.740625 lr: 0.000488\n",
+      "INFO:root:epoch: 63/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.810678 acc: 0.753125 lr: 0.000487\n",
+      "INFO:root:epoch: 63/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.817466 acc: 0.749219 lr: 0.000487\n",
+      "INFO:root:epoch: 63/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.814195 acc: 0.745625 lr: 0.000486\n",
+      "INFO:root:epoch: 63/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.837556 acc: 0.738021 lr: 0.000485\n",
+      "INFO:root:epoch: 63/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.830012 acc: 0.738839 lr: 0.000484\n",
+      "INFO:root:epoch: 63/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.845837 acc: 0.737891 lr: 0.000483\n",
+      "INFO:root:epoch: 63/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.837978 acc: 0.742014 lr: 0.000482\n",
+      "INFO:root:epoch: 63/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.839345 acc: 0.739063 lr: 0.000482\n",
+      "INFO:root:epoch: 63/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.840577 acc: 0.738068 lr: 0.000481\n",
+      "INFO:root:epoch: 63/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.837437 acc: 0.739323 lr: 0.000480\n",
+      "INFO:root:epoch: 63/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.833851 acc: 0.740865 lr: 0.000479\n",
+      "INFO:root:epoch: 63/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.831390 acc: 0.739286 lr: 0.000478\n",
+      "INFO:root:epoch: 63/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.825265 acc: 0.740417 lr: 0.000477\n",
+      "INFO:root:epoch: 63/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.823050 acc: 0.742773 lr: 0.000477\n",
+      "INFO:root:epoch: 63/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.817195 acc: 0.744485 lr: 0.000476\n",
+      "INFO:root:epoch: 63/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.818153 acc: 0.743750 lr: 0.000475\n",
+      "INFO:root:epoch: 63/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.819347 acc: 0.743914 lr: 0.000474\n",
+      "INFO:root:epoch: 63/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.820129 acc: 0.742813 lr: 0.000473\n",
+      "INFO:root:epoch: 63/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.822119 acc: 0.741667 lr: 0.000472\n",
+      "INFO:root:epoch: 63/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.820984 acc: 0.741903 lr: 0.000472\n",
+      "INFO:root:epoch: 63/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.818035 acc: 0.743071 lr: 0.000471\n",
+      "INFO:root:epoch: 63/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.820342 acc: 0.742708 lr: 0.000470\n",
+      "INFO:root:epoch: 63/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.822475 acc: 0.742000 lr: 0.000469\n",
+      "INFO:root:epoch: 63/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.822982 acc: 0.741466 lr: 0.000468\n",
+      "INFO:root:epoch: 63/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.818938 acc: 0.742361 lr: 0.000468\n",
+      "INFO:root:epoch: 63/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.822081 acc: 0.741518 lr: 0.000467\n",
+      "INFO:root:epoch: 63/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.819623 acc: 0.741595 lr: 0.000466\n",
+      "INFO:root:epoch: 63/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.818924 acc: 0.742396 lr: 0.000465\n",
+      "INFO:root:epoch: 63/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.819387 acc: 0.742238 lr: 0.000464\n",
+      "INFO:root:epoch: 64/100 starts\n",
+      "INFO:root:epoch: 64/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.781804 acc: 0.743750 lr: 0.000463\n",
+      "INFO:root:epoch: 64/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.778554 acc: 0.751563 lr: 0.000462\n",
+      "INFO:root:epoch: 64/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.765933 acc: 0.766667 lr: 0.000462\n",
+      "INFO:root:epoch: 64/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.799363 acc: 0.753125 lr: 0.000461\n",
+      "INFO:root:epoch: 64/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.818070 acc: 0.745625 lr: 0.000460\n",
+      "INFO:root:epoch: 64/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.788903 acc: 0.754687 lr: 0.000459\n",
+      "INFO:root:epoch: 64/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.792313 acc: 0.753125 lr: 0.000458\n",
+      "INFO:root:epoch: 64/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.794060 acc: 0.752734 lr: 0.000458\n",
+      "INFO:root:epoch: 64/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.796208 acc: 0.750694 lr: 0.000457\n",
+      "INFO:root:epoch: 64/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.801066 acc: 0.749687 lr: 0.000456\n",
+      "INFO:root:epoch: 64/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.806042 acc: 0.748011 lr: 0.000455\n",
+      "INFO:root:epoch: 64/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.813346 acc: 0.748177 lr: 0.000455\n",
+      "INFO:root:epoch: 64/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.820467 acc: 0.746635 lr: 0.000454\n",
+      "INFO:root:epoch: 64/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.826604 acc: 0.743973 lr: 0.000453\n",
+      "INFO:root:epoch: 64/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.825852 acc: 0.743750 lr: 0.000452\n",
+      "INFO:root:epoch: 64/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.822501 acc: 0.744727 lr: 0.000451\n",
+      "INFO:root:epoch: 64/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.822466 acc: 0.744301 lr: 0.000451\n",
+      "INFO:root:epoch: 64/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.822896 acc: 0.745486 lr: 0.000450\n",
+      "INFO:root:epoch: 64/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.827110 acc: 0.743750 lr: 0.000449\n",
+      "INFO:root:epoch: 64/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.832103 acc: 0.742187 lr: 0.000448\n",
+      "INFO:root:epoch: 64/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.831318 acc: 0.743155 lr: 0.000447\n",
+      "INFO:root:epoch: 64/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.824343 acc: 0.745170 lr: 0.000447\n",
+      "INFO:root:epoch: 64/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.825736 acc: 0.743342 lr: 0.000446\n",
+      "INFO:root:epoch: 64/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.823029 acc: 0.744141 lr: 0.000445\n",
+      "INFO:root:epoch: 64/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.821226 acc: 0.744250 lr: 0.000444\n",
+      "INFO:root:epoch: 64/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.821424 acc: 0.743630 lr: 0.000444\n",
+      "INFO:root:epoch: 64/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.823106 acc: 0.744213 lr: 0.000443\n",
+      "INFO:root:epoch: 64/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.819443 acc: 0.744866 lr: 0.000442\n",
+      "INFO:root:epoch: 64/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.813378 acc: 0.746444 lr: 0.000441\n",
+      "INFO:root:epoch: 64/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.814149 acc: 0.745521 lr: 0.000441\n",
+      "INFO:root:epoch: 64/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.811714 acc: 0.746169 lr: 0.000440\n",
+      "INFO:root:epoch: 65/100 starts\n",
+      "INFO:root:epoch: 65/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.758955 acc: 0.775000 lr: 0.000439\n",
+      "INFO:root:epoch: 65/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.791680 acc: 0.759375 lr: 0.000438\n",
+      "INFO:root:epoch: 65/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.805555 acc: 0.753125 lr: 0.000437\n",
+      "INFO:root:epoch: 65/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.838570 acc: 0.740625 lr: 0.000437\n",
+      "INFO:root:epoch: 65/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.835562 acc: 0.741875 lr: 0.000436\n",
+      "INFO:root:epoch: 65/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.838233 acc: 0.738542 lr: 0.000435\n",
+      "INFO:root:epoch: 65/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.834390 acc: 0.740625 lr: 0.000434\n",
+      "INFO:root:epoch: 65/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.829112 acc: 0.741797 lr: 0.000434\n",
+      "INFO:root:epoch: 65/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.826130 acc: 0.744097 lr: 0.000433\n",
+      "INFO:root:epoch: 65/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.822832 acc: 0.742500 lr: 0.000432\n",
+      "INFO:root:epoch: 65/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.814990 acc: 0.744886 lr: 0.000431\n",
+      "INFO:root:epoch: 65/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.814008 acc: 0.745052 lr: 0.000431\n",
+      "INFO:root:epoch: 65/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.817677 acc: 0.743990 lr: 0.000430\n",
+      "INFO:root:epoch: 65/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.823342 acc: 0.741741 lr: 0.000429\n",
+      "INFO:root:epoch: 65/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.819397 acc: 0.742083 lr: 0.000428\n",
+      "INFO:root:epoch: 65/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.824206 acc: 0.741211 lr: 0.000428\n",
+      "INFO:root:epoch: 65/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.820364 acc: 0.743566 lr: 0.000427\n",
+      "INFO:root:epoch: 65/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.825177 acc: 0.742882 lr: 0.000426\n",
+      "INFO:root:epoch: 65/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.820837 acc: 0.743257 lr: 0.000425\n",
+      "INFO:root:epoch: 65/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.821903 acc: 0.743125 lr: 0.000425\n",
+      "INFO:root:epoch: 65/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.824014 acc: 0.741964 lr: 0.000424\n",
+      "INFO:root:epoch: 65/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.824602 acc: 0.742330 lr: 0.000423\n",
+      "INFO:root:epoch: 65/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.826400 acc: 0.742391 lr: 0.000422\n",
+      "INFO:root:epoch: 65/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.825911 acc: 0.742187 lr: 0.000422\n",
+      "INFO:root:epoch: 65/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.832440 acc: 0.739625 lr: 0.000421\n",
+      "INFO:root:epoch: 65/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.830574 acc: 0.740625 lr: 0.000420\n",
+      "INFO:root:epoch: 65/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.828070 acc: 0.742130 lr: 0.000419\n",
+      "INFO:root:epoch: 65/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.826871 acc: 0.742522 lr: 0.000419\n",
+      "INFO:root:epoch: 65/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.829686 acc: 0.742457 lr: 0.000418\n",
+      "INFO:root:epoch: 65/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.827813 acc: 0.742292 lr: 0.000417\n",
+      "INFO:root:epoch: 65/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.830362 acc: 0.741835 lr: 0.000417\n",
+      "INFO:root:epoch: 66/100 starts\n",
+      "INFO:root:epoch: 66/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.745997 acc: 0.765625 lr: 0.000416\n",
+      "INFO:root:epoch: 66/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.756653 acc: 0.767187 lr: 0.000415\n",
+      "INFO:root:epoch: 66/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.760714 acc: 0.768750 lr: 0.000414\n",
+      "INFO:root:epoch: 66/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.774805 acc: 0.764844 lr: 0.000413\n",
+      "INFO:root:epoch: 66/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.775891 acc: 0.766875 lr: 0.000413\n",
+      "INFO:root:epoch: 66/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.783079 acc: 0.761979 lr: 0.000412\n",
+      "INFO:root:epoch: 66/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.784908 acc: 0.760268 lr: 0.000411\n",
+      "INFO:root:epoch: 66/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.784080 acc: 0.760938 lr: 0.000411\n",
+      "INFO:root:epoch: 66/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.787995 acc: 0.761458 lr: 0.000410\n",
+      "INFO:root:epoch: 66/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.794286 acc: 0.760312 lr: 0.000409\n",
+      "INFO:root:epoch: 66/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.803357 acc: 0.759659 lr: 0.000409\n",
+      "INFO:root:epoch: 66/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.806682 acc: 0.755729 lr: 0.000408\n",
+      "INFO:root:epoch: 66/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.811768 acc: 0.755048 lr: 0.000407\n",
+      "INFO:root:epoch: 66/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.816728 acc: 0.753571 lr: 0.000406\n",
+      "INFO:root:epoch: 66/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.819195 acc: 0.750000 lr: 0.000406\n",
+      "INFO:root:epoch: 66/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.815099 acc: 0.751172 lr: 0.000405\n",
+      "INFO:root:epoch: 66/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.815097 acc: 0.750919 lr: 0.000404\n",
+      "INFO:root:epoch: 66/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.818380 acc: 0.749132 lr: 0.000404\n",
+      "INFO:root:epoch: 66/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.818319 acc: 0.747533 lr: 0.000403\n",
+      "INFO:root:epoch: 66/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.817883 acc: 0.747656 lr: 0.000402\n",
+      "INFO:root:epoch: 66/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.815824 acc: 0.748512 lr: 0.000401\n",
+      "INFO:root:epoch: 66/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.814059 acc: 0.749006 lr: 0.000401\n",
+      "INFO:root:epoch: 66/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.817707 acc: 0.747826 lr: 0.000400\n",
+      "INFO:root:epoch: 66/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.819252 acc: 0.746354 lr: 0.000399\n",
+      "INFO:root:epoch: 66/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.822368 acc: 0.745875 lr: 0.000399\n",
+      "INFO:root:epoch: 66/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.819690 acc: 0.745913 lr: 0.000398\n",
+      "INFO:root:epoch: 66/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.821290 acc: 0.744444 lr: 0.000397\n",
+      "INFO:root:epoch: 66/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.820228 acc: 0.744978 lr: 0.000397\n",
+      "INFO:root:epoch: 66/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.817387 acc: 0.745797 lr: 0.000396\n",
+      "INFO:root:epoch: 66/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.817338 acc: 0.746458 lr: 0.000395\n",
+      "INFO:root:epoch: 66/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.820686 acc: 0.745766 lr: 0.000395\n",
+      "INFO:root:epoch: 67/100 starts\n",
+      "INFO:root:epoch: 67/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.832244 acc: 0.743750 lr: 0.000394\n",
+      "INFO:root:epoch: 67/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.813995 acc: 0.745313 lr: 0.000393\n",
+      "INFO:root:epoch: 67/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.843474 acc: 0.736458 lr: 0.000392\n",
+      "INFO:root:epoch: 67/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.814236 acc: 0.746875 lr: 0.000392\n",
+      "INFO:root:epoch: 67/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.804321 acc: 0.753750 lr: 0.000391\n",
+      "INFO:root:epoch: 67/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.818051 acc: 0.746354 lr: 0.000390\n",
+      "INFO:root:epoch: 67/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.814831 acc: 0.750893 lr: 0.000390\n",
+      "INFO:root:epoch: 67/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.827012 acc: 0.743750 lr: 0.000389\n",
+      "INFO:root:epoch: 67/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.823958 acc: 0.745139 lr: 0.000388\n",
+      "INFO:root:epoch: 67/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.819067 acc: 0.745938 lr: 0.000388\n",
+      "INFO:root:epoch: 67/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.811314 acc: 0.749148 lr: 0.000387\n",
+      "INFO:root:epoch: 67/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.803624 acc: 0.752083 lr: 0.000386\n",
+      "INFO:root:epoch: 67/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.809094 acc: 0.750240 lr: 0.000386\n",
+      "INFO:root:epoch: 67/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.818883 acc: 0.747545 lr: 0.000385\n",
+      "INFO:root:epoch: 67/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.821867 acc: 0.746667 lr: 0.000384\n",
+      "INFO:root:epoch: 67/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.817816 acc: 0.749219 lr: 0.000384\n",
+      "INFO:root:epoch: 67/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.811076 acc: 0.752206 lr: 0.000383\n",
+      "INFO:root:epoch: 67/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.804419 acc: 0.753993 lr: 0.000382\n",
+      "INFO:root:epoch: 67/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.798674 acc: 0.755921 lr: 0.000382\n",
+      "INFO:root:epoch: 67/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.799432 acc: 0.756562 lr: 0.000381\n",
+      "INFO:root:epoch: 67/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.796897 acc: 0.756696 lr: 0.000380\n",
+      "INFO:root:epoch: 67/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.792756 acc: 0.757955 lr: 0.000380\n",
+      "INFO:root:epoch: 67/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.791548 acc: 0.757880 lr: 0.000379\n",
+      "INFO:root:epoch: 67/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.793815 acc: 0.757161 lr: 0.000378\n",
+      "INFO:root:epoch: 67/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.792313 acc: 0.757250 lr: 0.000378\n",
+      "INFO:root:epoch: 67/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.790128 acc: 0.757212 lr: 0.000377\n",
+      "INFO:root:epoch: 67/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.792920 acc: 0.757755 lr: 0.000376\n",
+      "INFO:root:epoch: 67/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.793132 acc: 0.757478 lr: 0.000376\n",
+      "INFO:root:epoch: 67/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.796018 acc: 0.756573 lr: 0.000375\n",
+      "INFO:root:epoch: 67/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.799939 acc: 0.755000 lr: 0.000374\n",
+      "INFO:root:epoch: 67/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.801079 acc: 0.754536 lr: 0.000374\n",
+      "INFO:root:epoch: 68/100 starts\n",
+      "INFO:root:epoch: 68/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.805098 acc: 0.771875 lr: 0.000373\n",
+      "INFO:root:epoch: 68/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.794243 acc: 0.753125 lr: 0.000372\n",
+      "INFO:root:epoch: 68/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.841131 acc: 0.738542 lr: 0.000372\n",
+      "INFO:root:epoch: 68/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.868295 acc: 0.736719 lr: 0.000371\n",
+      "INFO:root:epoch: 68/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.864982 acc: 0.738125 lr: 0.000370\n",
+      "INFO:root:epoch: 68/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.843730 acc: 0.742188 lr: 0.000370\n",
+      "INFO:root:epoch: 68/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.833939 acc: 0.745982 lr: 0.000369\n",
+      "INFO:root:epoch: 68/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.832561 acc: 0.747656 lr: 0.000368\n",
+      "INFO:root:epoch: 68/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.831154 acc: 0.747917 lr: 0.000368\n",
+      "INFO:root:epoch: 68/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.831324 acc: 0.748750 lr: 0.000367\n",
+      "INFO:root:epoch: 68/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.821556 acc: 0.751420 lr: 0.000367\n",
+      "INFO:root:epoch: 68/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.820977 acc: 0.749479 lr: 0.000366\n",
+      "INFO:root:epoch: 68/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.814694 acc: 0.750240 lr: 0.000365\n",
+      "INFO:root:epoch: 68/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.820349 acc: 0.748661 lr: 0.000365\n",
+      "INFO:root:epoch: 68/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.825379 acc: 0.747708 lr: 0.000364\n",
+      "INFO:root:epoch: 68/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.822750 acc: 0.748828 lr: 0.000363\n",
+      "INFO:root:epoch: 68/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.825821 acc: 0.747426 lr: 0.000363\n",
+      "INFO:root:epoch: 68/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.823634 acc: 0.748958 lr: 0.000362\n",
+      "INFO:root:epoch: 68/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.823277 acc: 0.747862 lr: 0.000361\n",
+      "INFO:root:epoch: 68/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.820041 acc: 0.748437 lr: 0.000361\n",
+      "INFO:root:epoch: 68/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.820466 acc: 0.748512 lr: 0.000360\n",
+      "INFO:root:epoch: 68/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.820309 acc: 0.748864 lr: 0.000360\n",
+      "INFO:root:epoch: 68/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.825311 acc: 0.747962 lr: 0.000359\n",
+      "INFO:root:epoch: 68/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.827428 acc: 0.747396 lr: 0.000358\n",
+      "INFO:root:epoch: 68/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.829084 acc: 0.746625 lr: 0.000358\n",
+      "INFO:root:epoch: 68/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.827185 acc: 0.747236 lr: 0.000357\n",
+      "INFO:root:epoch: 68/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.822909 acc: 0.748264 lr: 0.000356\n",
+      "INFO:root:epoch: 68/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.820255 acc: 0.748437 lr: 0.000356\n",
+      "INFO:root:epoch: 68/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.824639 acc: 0.747737 lr: 0.000355\n",
+      "INFO:root:epoch: 68/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.827008 acc: 0.746979 lr: 0.000355\n",
+      "INFO:root:epoch: 68/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.825204 acc: 0.747581 lr: 0.000354\n",
+      "INFO:root:epoch: 69/100 starts\n",
+      "INFO:root:epoch: 69/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.838380 acc: 0.721875 lr: 0.000353\n",
+      "INFO:root:epoch: 69/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.807019 acc: 0.742188 lr: 0.000353\n",
+      "INFO:root:epoch: 69/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.810526 acc: 0.729167 lr: 0.000352\n",
+      "INFO:root:epoch: 69/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.808304 acc: 0.731250 lr: 0.000351\n",
+      "INFO:root:epoch: 69/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.813057 acc: 0.736875 lr: 0.000351\n",
+      "INFO:root:epoch: 69/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.828840 acc: 0.730729 lr: 0.000350\n",
+      "INFO:root:epoch: 69/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.827348 acc: 0.730804 lr: 0.000350\n",
+      "INFO:root:epoch: 69/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.823257 acc: 0.732813 lr: 0.000349\n",
+      "INFO:root:epoch: 69/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.824329 acc: 0.732986 lr: 0.000348\n",
+      "INFO:root:epoch: 69/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.817774 acc: 0.735313 lr: 0.000348\n",
+      "INFO:root:epoch: 69/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.811549 acc: 0.738920 lr: 0.000347\n",
+      "INFO:root:epoch: 69/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.810713 acc: 0.740365 lr: 0.000347\n",
+      "INFO:root:epoch: 69/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.805829 acc: 0.745433 lr: 0.000346\n",
+      "INFO:root:epoch: 69/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.802905 acc: 0.747545 lr: 0.000345\n",
+      "INFO:root:epoch: 69/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.804984 acc: 0.748542 lr: 0.000345\n",
+      "INFO:root:epoch: 69/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.807312 acc: 0.748242 lr: 0.000344\n",
+      "INFO:root:epoch: 69/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.815804 acc: 0.746507 lr: 0.000344\n",
+      "INFO:root:epoch: 69/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.820381 acc: 0.745486 lr: 0.000343\n",
+      "INFO:root:epoch: 69/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.824466 acc: 0.743750 lr: 0.000342\n",
+      "INFO:root:epoch: 69/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.825536 acc: 0.744063 lr: 0.000342\n",
+      "INFO:root:epoch: 69/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.824606 acc: 0.745536 lr: 0.000341\n",
+      "INFO:root:epoch: 69/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.827560 acc: 0.744886 lr: 0.000341\n",
+      "INFO:root:epoch: 69/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.824883 acc: 0.744565 lr: 0.000340\n",
+      "INFO:root:epoch: 69/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.824868 acc: 0.744661 lr: 0.000339\n",
+      "INFO:root:epoch: 69/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.827894 acc: 0.744000 lr: 0.000339\n",
+      "INFO:root:epoch: 69/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.825162 acc: 0.744351 lr: 0.000338\n",
+      "INFO:root:epoch: 69/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.825078 acc: 0.744560 lr: 0.000338\n",
+      "INFO:root:epoch: 69/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.823879 acc: 0.744196 lr: 0.000337\n",
+      "INFO:root:epoch: 69/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.829744 acc: 0.743427 lr: 0.000336\n",
+      "INFO:root:epoch: 69/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.832037 acc: 0.741563 lr: 0.000336\n",
+      "INFO:root:epoch: 69/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.828244 acc: 0.742641 lr: 0.000335\n",
+      "INFO:root:epoch: 70/100 starts\n",
+      "INFO:root:epoch: 70/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.798392 acc: 0.756250 lr: 0.000335\n",
+      "INFO:root:epoch: 70/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.785699 acc: 0.754688 lr: 0.000334\n",
+      "INFO:root:epoch: 70/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.834265 acc: 0.741667 lr: 0.000333\n",
+      "INFO:root:epoch: 70/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.814712 acc: 0.745313 lr: 0.000333\n",
+      "INFO:root:epoch: 70/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.783671 acc: 0.753750 lr: 0.000332\n",
+      "INFO:root:epoch: 70/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.773007 acc: 0.758333 lr: 0.000332\n",
+      "INFO:root:epoch: 70/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.792273 acc: 0.757143 lr: 0.000331\n",
+      "INFO:root:epoch: 70/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.794986 acc: 0.754297 lr: 0.000331\n",
+      "INFO:root:epoch: 70/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.799806 acc: 0.752778 lr: 0.000330\n",
+      "INFO:root:epoch: 70/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.813534 acc: 0.750000 lr: 0.000329\n",
+      "INFO:root:epoch: 70/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.811018 acc: 0.751420 lr: 0.000329\n",
+      "INFO:root:epoch: 70/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.809025 acc: 0.750781 lr: 0.000328\n",
+      "INFO:root:epoch: 70/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.812925 acc: 0.750240 lr: 0.000328\n",
+      "INFO:root:epoch: 70/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.812998 acc: 0.750000 lr: 0.000327\n",
+      "INFO:root:epoch: 70/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.815848 acc: 0.748542 lr: 0.000327\n",
+      "INFO:root:epoch: 70/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.812975 acc: 0.748828 lr: 0.000326\n",
+      "INFO:root:epoch: 70/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.810961 acc: 0.750184 lr: 0.000325\n",
+      "INFO:root:epoch: 70/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.815061 acc: 0.748785 lr: 0.000325\n",
+      "INFO:root:epoch: 70/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.808749 acc: 0.750329 lr: 0.000324\n",
+      "INFO:root:epoch: 70/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.807923 acc: 0.749844 lr: 0.000324\n",
+      "INFO:root:epoch: 70/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.811348 acc: 0.748810 lr: 0.000323\n",
+      "INFO:root:epoch: 70/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.809734 acc: 0.750000 lr: 0.000323\n",
+      "INFO:root:epoch: 70/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.812528 acc: 0.749185 lr: 0.000322\n",
+      "INFO:root:epoch: 70/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.818263 acc: 0.747005 lr: 0.000322\n",
+      "INFO:root:epoch: 70/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.818961 acc: 0.747625 lr: 0.000321\n",
+      "INFO:root:epoch: 70/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.815984 acc: 0.748438 lr: 0.000320\n",
+      "INFO:root:epoch: 70/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.818170 acc: 0.747685 lr: 0.000320\n",
+      "INFO:root:epoch: 70/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.820473 acc: 0.746987 lr: 0.000319\n",
+      "INFO:root:epoch: 70/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.817501 acc: 0.747953 lr: 0.000319\n",
+      "INFO:root:epoch: 70/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.816513 acc: 0.749375 lr: 0.000318\n",
+      "INFO:root:epoch: 70/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.815202 acc: 0.750302 lr: 0.000318\n",
+      "INFO:root:epoch: 71/100 starts\n",
+      "INFO:root:epoch: 71/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.864179 acc: 0.731250 lr: 0.000317\n",
+      "INFO:root:epoch: 71/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.829603 acc: 0.742188 lr: 0.000316\n",
+      "INFO:root:epoch: 71/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.809511 acc: 0.745833 lr: 0.000316\n",
+      "INFO:root:epoch: 71/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.791211 acc: 0.760938 lr: 0.000315\n",
+      "INFO:root:epoch: 71/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.795259 acc: 0.758750 lr: 0.000315\n",
+      "INFO:root:epoch: 71/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.782706 acc: 0.764583 lr: 0.000314\n",
+      "INFO:root:epoch: 71/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.787226 acc: 0.761607 lr: 0.000314\n",
+      "INFO:root:epoch: 71/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.782050 acc: 0.759375 lr: 0.000313\n",
+      "INFO:root:epoch: 71/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.789894 acc: 0.756597 lr: 0.000313\n",
+      "INFO:root:epoch: 71/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.785893 acc: 0.756875 lr: 0.000312\n",
+      "INFO:root:epoch: 71/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.789324 acc: 0.755114 lr: 0.000311\n",
+      "INFO:root:epoch: 71/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.790187 acc: 0.756510 lr: 0.000311\n",
+      "INFO:root:epoch: 71/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.792512 acc: 0.755769 lr: 0.000310\n",
+      "INFO:root:epoch: 71/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.787107 acc: 0.757589 lr: 0.000310\n",
+      "INFO:root:epoch: 71/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.786991 acc: 0.758750 lr: 0.000309\n",
+      "INFO:root:epoch: 71/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.789724 acc: 0.759375 lr: 0.000309\n",
+      "INFO:root:epoch: 71/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.788426 acc: 0.759926 lr: 0.000308\n",
+      "INFO:root:epoch: 71/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.785897 acc: 0.760764 lr: 0.000308\n",
+      "INFO:root:epoch: 71/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.787667 acc: 0.760033 lr: 0.000307\n",
+      "INFO:root:epoch: 71/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.787929 acc: 0.759688 lr: 0.000307\n",
+      "INFO:root:epoch: 71/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.791317 acc: 0.758333 lr: 0.000306\n",
+      "INFO:root:epoch: 71/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.795959 acc: 0.756392 lr: 0.000306\n",
+      "INFO:root:epoch: 71/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.796229 acc: 0.756658 lr: 0.000305\n",
+      "INFO:root:epoch: 71/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.795460 acc: 0.756901 lr: 0.000305\n",
+      "INFO:root:epoch: 71/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.797127 acc: 0.757000 lr: 0.000304\n",
+      "INFO:root:epoch: 71/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.796646 acc: 0.756250 lr: 0.000303\n",
+      "INFO:root:epoch: 71/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.797799 acc: 0.755787 lr: 0.000303\n",
+      "INFO:root:epoch: 71/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.800690 acc: 0.755580 lr: 0.000302\n",
+      "INFO:root:epoch: 71/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.796875 acc: 0.755388 lr: 0.000302\n",
+      "INFO:root:epoch: 71/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.798822 acc: 0.753333 lr: 0.000301\n",
+      "INFO:root:epoch: 71/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.794765 acc: 0.754738 lr: 0.000301\n",
+      "INFO:root:epoch: 72/100 starts\n",
+      "INFO:root:epoch: 72/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.877658 acc: 0.693750 lr: 0.000300\n",
+      "INFO:root:epoch: 72/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.812990 acc: 0.734375 lr: 0.000300\n",
+      "INFO:root:epoch: 72/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.780840 acc: 0.746875 lr: 0.000299\n",
+      "INFO:root:epoch: 72/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.779216 acc: 0.754687 lr: 0.000299\n",
+      "INFO:root:epoch: 72/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.775524 acc: 0.758750 lr: 0.000298\n",
+      "INFO:root:epoch: 72/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.787118 acc: 0.756250 lr: 0.000298\n",
+      "INFO:root:epoch: 72/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.775308 acc: 0.758036 lr: 0.000297\n",
+      "INFO:root:epoch: 72/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.778646 acc: 0.758203 lr: 0.000297\n",
+      "INFO:root:epoch: 72/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.783685 acc: 0.755903 lr: 0.000296\n",
+      "INFO:root:epoch: 72/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.785047 acc: 0.756562 lr: 0.000296\n",
+      "INFO:root:epoch: 72/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.794285 acc: 0.755966 lr: 0.000295\n",
+      "INFO:root:epoch: 72/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.800572 acc: 0.754167 lr: 0.000295\n",
+      "INFO:root:epoch: 72/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.800480 acc: 0.754327 lr: 0.000294\n",
+      "INFO:root:epoch: 72/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.799564 acc: 0.754464 lr: 0.000293\n",
+      "INFO:root:epoch: 72/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.792825 acc: 0.756667 lr: 0.000293\n",
+      "INFO:root:epoch: 72/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.792193 acc: 0.754102 lr: 0.000292\n",
+      "INFO:root:epoch: 72/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.794815 acc: 0.754228 lr: 0.000292\n",
+      "INFO:root:epoch: 72/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.791455 acc: 0.756076 lr: 0.000291\n",
+      "INFO:root:epoch: 72/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.794742 acc: 0.755757 lr: 0.000291\n",
+      "INFO:root:epoch: 72/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.795464 acc: 0.755625 lr: 0.000290\n",
+      "INFO:root:epoch: 72/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.788110 acc: 0.758482 lr: 0.000290\n",
+      "INFO:root:epoch: 72/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.788900 acc: 0.758523 lr: 0.000289\n",
+      "INFO:root:epoch: 72/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.790933 acc: 0.755707 lr: 0.000289\n",
+      "INFO:root:epoch: 72/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.794669 acc: 0.755729 lr: 0.000288\n",
+      "INFO:root:epoch: 72/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.798293 acc: 0.754000 lr: 0.000288\n",
+      "INFO:root:epoch: 72/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.795453 acc: 0.754447 lr: 0.000287\n",
+      "INFO:root:epoch: 72/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.799001 acc: 0.754167 lr: 0.000287\n",
+      "INFO:root:epoch: 72/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.798975 acc: 0.754241 lr: 0.000286\n",
+      "INFO:root:epoch: 72/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.799768 acc: 0.753772 lr: 0.000286\n",
+      "INFO:root:epoch: 72/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.803339 acc: 0.753021 lr: 0.000285\n",
+      "INFO:root:epoch: 72/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.806513 acc: 0.752016 lr: 0.000285\n",
+      "INFO:root:epoch: 73/100 starts\n",
+      "INFO:root:epoch: 73/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.859161 acc: 0.743750 lr: 0.000284\n",
+      "INFO:root:epoch: 73/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.795976 acc: 0.745313 lr: 0.000284\n",
+      "INFO:root:epoch: 73/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.796629 acc: 0.745833 lr: 0.000283\n",
+      "INFO:root:epoch: 73/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.785419 acc: 0.753906 lr: 0.000283\n",
+      "INFO:root:epoch: 73/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.798214 acc: 0.749375 lr: 0.000282\n",
+      "INFO:root:epoch: 73/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.816244 acc: 0.742188 lr: 0.000282\n",
+      "INFO:root:epoch: 73/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.820838 acc: 0.737946 lr: 0.000281\n",
+      "INFO:root:epoch: 73/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.836950 acc: 0.732422 lr: 0.000281\n",
+      "INFO:root:epoch: 73/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.836056 acc: 0.731944 lr: 0.000280\n",
+      "INFO:root:epoch: 73/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.826064 acc: 0.735312 lr: 0.000280\n",
+      "INFO:root:epoch: 73/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.817924 acc: 0.740341 lr: 0.000279\n",
+      "INFO:root:epoch: 73/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.818547 acc: 0.741146 lr: 0.000279\n",
+      "INFO:root:epoch: 73/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.820180 acc: 0.740625 lr: 0.000278\n",
+      "INFO:root:epoch: 73/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.819756 acc: 0.740402 lr: 0.000278\n",
+      "INFO:root:epoch: 73/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.813831 acc: 0.742708 lr: 0.000278\n",
+      "INFO:root:epoch: 73/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.824447 acc: 0.738867 lr: 0.000277\n",
+      "INFO:root:epoch: 73/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.817084 acc: 0.740809 lr: 0.000277\n",
+      "INFO:root:epoch: 73/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.820232 acc: 0.740104 lr: 0.000276\n",
+      "INFO:root:epoch: 73/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.817827 acc: 0.741118 lr: 0.000276\n",
+      "INFO:root:epoch: 73/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.811755 acc: 0.742031 lr: 0.000275\n",
+      "INFO:root:epoch: 73/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.810096 acc: 0.741964 lr: 0.000275\n",
+      "INFO:root:epoch: 73/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.809113 acc: 0.742614 lr: 0.000274\n",
+      "INFO:root:epoch: 73/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.811607 acc: 0.741168 lr: 0.000274\n",
+      "INFO:root:epoch: 73/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.813121 acc: 0.742318 lr: 0.000273\n",
+      "INFO:root:epoch: 73/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.813520 acc: 0.741625 lr: 0.000273\n",
+      "INFO:root:epoch: 73/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.811619 acc: 0.743269 lr: 0.000272\n",
+      "INFO:root:epoch: 73/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.805024 acc: 0.746296 lr: 0.000272\n",
+      "INFO:root:epoch: 73/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.803621 acc: 0.746652 lr: 0.000271\n",
+      "INFO:root:epoch: 73/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.806689 acc: 0.745690 lr: 0.000271\n",
+      "INFO:root:epoch: 73/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.805943 acc: 0.745938 lr: 0.000270\n",
+      "INFO:root:epoch: 73/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.805572 acc: 0.746169 lr: 0.000270\n",
+      "INFO:root:epoch: 74/100 starts\n",
+      "INFO:root:epoch: 74/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.807711 acc: 0.756250 lr: 0.000269\n",
+      "INFO:root:epoch: 74/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.799644 acc: 0.757812 lr: 0.000269\n",
+      "INFO:root:epoch: 74/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.761504 acc: 0.762500 lr: 0.000268\n",
+      "INFO:root:epoch: 74/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.774130 acc: 0.762500 lr: 0.000268\n",
+      "INFO:root:epoch: 74/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.803446 acc: 0.758750 lr: 0.000267\n",
+      "INFO:root:epoch: 74/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.813824 acc: 0.751042 lr: 0.000267\n",
+      "INFO:root:epoch: 74/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.815117 acc: 0.749107 lr: 0.000267\n",
+      "INFO:root:epoch: 74/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.826940 acc: 0.747266 lr: 0.000266\n",
+      "INFO:root:epoch: 74/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.824534 acc: 0.747569 lr: 0.000266\n",
+      "INFO:root:epoch: 74/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.809307 acc: 0.753125 lr: 0.000265\n",
+      "INFO:root:epoch: 74/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.812155 acc: 0.754261 lr: 0.000265\n",
+      "INFO:root:epoch: 74/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.808710 acc: 0.753125 lr: 0.000264\n",
+      "INFO:root:epoch: 74/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.802184 acc: 0.753606 lr: 0.000264\n",
+      "INFO:root:epoch: 74/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.801939 acc: 0.754018 lr: 0.000263\n",
+      "INFO:root:epoch: 74/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.796604 acc: 0.755000 lr: 0.000263\n",
+      "INFO:root:epoch: 74/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.796238 acc: 0.755273 lr: 0.000262\n",
+      "INFO:root:epoch: 74/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.801234 acc: 0.753309 lr: 0.000262\n",
+      "INFO:root:epoch: 74/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.801564 acc: 0.752083 lr: 0.000262\n",
+      "INFO:root:epoch: 74/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.802566 acc: 0.753289 lr: 0.000261\n",
+      "INFO:root:epoch: 74/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.801704 acc: 0.754219 lr: 0.000261\n",
+      "INFO:root:epoch: 74/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.799007 acc: 0.755208 lr: 0.000260\n",
+      "INFO:root:epoch: 74/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.801955 acc: 0.755256 lr: 0.000260\n",
+      "INFO:root:epoch: 74/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.797431 acc: 0.757065 lr: 0.000259\n",
+      "INFO:root:epoch: 74/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.800845 acc: 0.755208 lr: 0.000259\n",
+      "INFO:root:epoch: 74/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.796210 acc: 0.757500 lr: 0.000258\n",
+      "INFO:root:epoch: 74/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.794110 acc: 0.758293 lr: 0.000258\n",
+      "INFO:root:epoch: 74/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.795419 acc: 0.758449 lr: 0.000257\n",
+      "INFO:root:epoch: 74/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.796114 acc: 0.756696 lr: 0.000257\n",
+      "INFO:root:epoch: 74/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.795710 acc: 0.755927 lr: 0.000257\n",
+      "INFO:root:epoch: 74/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.792194 acc: 0.757604 lr: 0.000256\n",
+      "INFO:root:epoch: 74/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.792609 acc: 0.757661 lr: 0.000256\n",
+      "INFO:root:epoch: 75/100 starts\n",
+      "INFO:root:epoch: 75/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.810152 acc: 0.759375 lr: 0.000255\n",
+      "INFO:root:epoch: 75/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.827014 acc: 0.765625 lr: 0.000255\n",
+      "INFO:root:epoch: 75/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.894969 acc: 0.743750 lr: 0.000254\n",
+      "INFO:root:epoch: 75/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.866518 acc: 0.750781 lr: 0.000254\n",
+      "INFO:root:epoch: 75/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.836647 acc: 0.755000 lr: 0.000253\n",
+      "INFO:root:epoch: 75/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.838524 acc: 0.748958 lr: 0.000253\n",
+      "INFO:root:epoch: 75/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.843608 acc: 0.750446 lr: 0.000252\n",
+      "INFO:root:epoch: 75/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.842278 acc: 0.750781 lr: 0.000252\n",
+      "INFO:root:epoch: 75/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.820922 acc: 0.756944 lr: 0.000252\n",
+      "INFO:root:epoch: 75/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.820232 acc: 0.754375 lr: 0.000251\n",
+      "INFO:root:epoch: 75/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.807429 acc: 0.754830 lr: 0.000251\n",
+      "INFO:root:epoch: 75/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.806791 acc: 0.753646 lr: 0.000250\n",
+      "INFO:root:epoch: 75/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.811307 acc: 0.750481 lr: 0.000250\n",
+      "INFO:root:epoch: 75/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.816761 acc: 0.749554 lr: 0.000249\n",
+      "INFO:root:epoch: 75/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.820517 acc: 0.748542 lr: 0.000249\n",
+      "INFO:root:epoch: 75/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.819179 acc: 0.747852 lr: 0.000249\n",
+      "INFO:root:epoch: 75/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.819349 acc: 0.748346 lr: 0.000248\n",
+      "INFO:root:epoch: 75/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.820818 acc: 0.747743 lr: 0.000248\n",
+      "INFO:root:epoch: 75/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.827496 acc: 0.746217 lr: 0.000247\n",
+      "INFO:root:epoch: 75/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.825079 acc: 0.747031 lr: 0.000247\n",
+      "INFO:root:epoch: 75/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.824680 acc: 0.747173 lr: 0.000246\n",
+      "INFO:root:epoch: 75/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.824993 acc: 0.746591 lr: 0.000246\n",
+      "INFO:root:epoch: 75/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.823038 acc: 0.747418 lr: 0.000246\n",
+      "INFO:root:epoch: 75/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.820720 acc: 0.748568 lr: 0.000245\n",
+      "INFO:root:epoch: 75/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.827207 acc: 0.746125 lr: 0.000245\n",
+      "INFO:root:epoch: 75/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.825874 acc: 0.748437 lr: 0.000244\n",
+      "INFO:root:epoch: 75/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.825848 acc: 0.748380 lr: 0.000244\n",
+      "INFO:root:epoch: 75/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.827415 acc: 0.748772 lr: 0.000243\n",
+      "INFO:root:epoch: 75/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.828070 acc: 0.748491 lr: 0.000243\n",
+      "INFO:root:epoch: 75/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.827790 acc: 0.748542 lr: 0.000243\n",
+      "INFO:root:epoch: 75/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.825898 acc: 0.749395 lr: 0.000242\n",
+      "INFO:root:epoch: 76/100 starts\n",
+      "INFO:root:epoch: 76/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.860451 acc: 0.759375 lr: 0.000242\n",
+      "INFO:root:epoch: 76/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.808804 acc: 0.765625 lr: 0.000241\n",
+      "INFO:root:epoch: 76/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.837553 acc: 0.758333 lr: 0.000241\n",
+      "INFO:root:epoch: 76/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.830293 acc: 0.753125 lr: 0.000240\n",
+      "INFO:root:epoch: 76/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.852930 acc: 0.746875 lr: 0.000240\n",
+      "INFO:root:epoch: 76/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.833143 acc: 0.751563 lr: 0.000240\n",
+      "INFO:root:epoch: 76/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.824864 acc: 0.754464 lr: 0.000239\n",
+      "INFO:root:epoch: 76/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.826082 acc: 0.751172 lr: 0.000239\n",
+      "INFO:root:epoch: 76/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.821045 acc: 0.751042 lr: 0.000238\n",
+      "INFO:root:epoch: 76/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.820884 acc: 0.750625 lr: 0.000238\n",
+      "INFO:root:epoch: 76/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.815294 acc: 0.752557 lr: 0.000237\n",
+      "INFO:root:epoch: 76/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.813564 acc: 0.752083 lr: 0.000237\n",
+      "INFO:root:epoch: 76/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.809495 acc: 0.751683 lr: 0.000237\n",
+      "INFO:root:epoch: 76/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.808137 acc: 0.752679 lr: 0.000236\n",
+      "INFO:root:epoch: 76/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.797264 acc: 0.755833 lr: 0.000236\n",
+      "INFO:root:epoch: 76/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.797611 acc: 0.756250 lr: 0.000235\n",
+      "INFO:root:epoch: 76/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.798314 acc: 0.755147 lr: 0.000235\n",
+      "INFO:root:epoch: 76/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.799500 acc: 0.755035 lr: 0.000235\n",
+      "INFO:root:epoch: 76/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.801328 acc: 0.754112 lr: 0.000234\n",
+      "INFO:root:epoch: 76/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.799718 acc: 0.754062 lr: 0.000234\n",
+      "INFO:root:epoch: 76/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.796545 acc: 0.754762 lr: 0.000233\n",
+      "INFO:root:epoch: 76/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.797761 acc: 0.754972 lr: 0.000233\n",
+      "INFO:root:epoch: 76/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.795961 acc: 0.755435 lr: 0.000233\n",
+      "INFO:root:epoch: 76/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.794366 acc: 0.755859 lr: 0.000232\n",
+      "INFO:root:epoch: 76/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.792657 acc: 0.756250 lr: 0.000232\n",
+      "INFO:root:epoch: 76/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.791301 acc: 0.756250 lr: 0.000231\n",
+      "INFO:root:epoch: 76/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.790612 acc: 0.755903 lr: 0.000231\n",
+      "INFO:root:epoch: 76/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.788301 acc: 0.756362 lr: 0.000231\n",
+      "INFO:root:epoch: 76/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.792633 acc: 0.755496 lr: 0.000230\n",
+      "INFO:root:epoch: 76/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.791107 acc: 0.755729 lr: 0.000230\n",
+      "INFO:root:epoch: 76/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.789158 acc: 0.756754 lr: 0.000229\n",
+      "INFO:root:epoch: 77/100 starts\n",
+      "INFO:root:epoch: 77/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.832452 acc: 0.740625 lr: 0.000229\n",
+      "INFO:root:epoch: 77/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.805612 acc: 0.753125 lr: 0.000228\n",
+      "INFO:root:epoch: 77/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.803798 acc: 0.745833 lr: 0.000228\n",
+      "INFO:root:epoch: 77/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.796911 acc: 0.750000 lr: 0.000228\n",
+      "INFO:root:epoch: 77/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.810061 acc: 0.747500 lr: 0.000227\n",
+      "INFO:root:epoch: 77/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.796402 acc: 0.748958 lr: 0.000227\n",
+      "INFO:root:epoch: 77/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.788726 acc: 0.750893 lr: 0.000227\n",
+      "INFO:root:epoch: 77/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.786726 acc: 0.753125 lr: 0.000226\n",
+      "INFO:root:epoch: 77/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.795086 acc: 0.752778 lr: 0.000226\n",
+      "INFO:root:epoch: 77/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.804910 acc: 0.748125 lr: 0.000225\n",
+      "INFO:root:epoch: 77/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.813269 acc: 0.746591 lr: 0.000225\n",
+      "INFO:root:epoch: 77/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.811426 acc: 0.747396 lr: 0.000225\n",
+      "INFO:root:epoch: 77/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.807266 acc: 0.749519 lr: 0.000224\n",
+      "INFO:root:epoch: 77/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.804830 acc: 0.750223 lr: 0.000224\n",
+      "INFO:root:epoch: 77/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.802381 acc: 0.750417 lr: 0.000223\n",
+      "INFO:root:epoch: 77/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.806278 acc: 0.749414 lr: 0.000223\n",
+      "INFO:root:epoch: 77/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.811008 acc: 0.747610 lr: 0.000223\n",
+      "INFO:root:epoch: 77/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.813348 acc: 0.747743 lr: 0.000222\n",
+      "INFO:root:epoch: 77/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.811135 acc: 0.748355 lr: 0.000222\n",
+      "INFO:root:epoch: 77/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.804978 acc: 0.751094 lr: 0.000221\n",
+      "INFO:root:epoch: 77/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.802043 acc: 0.752381 lr: 0.000221\n",
+      "INFO:root:epoch: 77/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.803534 acc: 0.751989 lr: 0.000221\n",
+      "INFO:root:epoch: 77/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.799325 acc: 0.753940 lr: 0.000220\n",
+      "INFO:root:epoch: 77/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.803571 acc: 0.752604 lr: 0.000220\n",
+      "INFO:root:epoch: 77/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.798379 acc: 0.753375 lr: 0.000220\n",
+      "INFO:root:epoch: 77/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.791920 acc: 0.755409 lr: 0.000219\n",
+      "INFO:root:epoch: 77/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.796250 acc: 0.754282 lr: 0.000219\n",
+      "INFO:root:epoch: 77/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.795690 acc: 0.754799 lr: 0.000218\n",
+      "INFO:root:epoch: 77/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.796734 acc: 0.754526 lr: 0.000218\n",
+      "INFO:root:epoch: 77/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.797624 acc: 0.753958 lr: 0.000218\n",
+      "INFO:root:epoch: 77/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.797562 acc: 0.754435 lr: 0.000217\n",
+      "INFO:root:epoch: 78/100 starts\n",
+      "INFO:root:epoch: 78/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.798032 acc: 0.756250 lr: 0.000217\n",
+      "INFO:root:epoch: 78/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.773538 acc: 0.750000 lr: 0.000216\n",
+      "INFO:root:epoch: 78/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.793279 acc: 0.743750 lr: 0.000216\n",
+      "INFO:root:epoch: 78/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.792584 acc: 0.742969 lr: 0.000216\n",
+      "INFO:root:epoch: 78/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.778425 acc: 0.748750 lr: 0.000215\n",
+      "INFO:root:epoch: 78/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.779079 acc: 0.747396 lr: 0.000215\n",
+      "INFO:root:epoch: 78/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.774168 acc: 0.751339 lr: 0.000215\n",
+      "INFO:root:epoch: 78/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.768096 acc: 0.755469 lr: 0.000214\n",
+      "INFO:root:epoch: 78/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.777745 acc: 0.752431 lr: 0.000214\n",
+      "INFO:root:epoch: 78/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.774641 acc: 0.751875 lr: 0.000213\n",
+      "INFO:root:epoch: 78/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.777431 acc: 0.752557 lr: 0.000213\n",
+      "INFO:root:epoch: 78/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.780717 acc: 0.752083 lr: 0.000213\n",
+      "INFO:root:epoch: 78/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.788871 acc: 0.748798 lr: 0.000212\n",
+      "INFO:root:epoch: 78/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.787595 acc: 0.747991 lr: 0.000212\n",
+      "INFO:root:epoch: 78/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.790902 acc: 0.748958 lr: 0.000212\n",
+      "INFO:root:epoch: 78/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.787587 acc: 0.750977 lr: 0.000211\n",
+      "INFO:root:epoch: 78/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.787144 acc: 0.749449 lr: 0.000211\n",
+      "INFO:root:epoch: 78/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.787887 acc: 0.750694 lr: 0.000211\n",
+      "INFO:root:epoch: 78/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.786996 acc: 0.750164 lr: 0.000210\n",
+      "INFO:root:epoch: 78/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.789652 acc: 0.750781 lr: 0.000210\n",
+      "INFO:root:epoch: 78/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.792301 acc: 0.749851 lr: 0.000209\n",
+      "INFO:root:epoch: 78/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.791866 acc: 0.750000 lr: 0.000209\n",
+      "INFO:root:epoch: 78/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.789160 acc: 0.750951 lr: 0.000209\n",
+      "INFO:root:epoch: 78/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.798120 acc: 0.748307 lr: 0.000208\n",
+      "INFO:root:epoch: 78/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.796369 acc: 0.749625 lr: 0.000208\n",
+      "INFO:root:epoch: 78/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.800060 acc: 0.750000 lr: 0.000208\n",
+      "INFO:root:epoch: 78/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.802455 acc: 0.748495 lr: 0.000207\n",
+      "INFO:root:epoch: 78/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.802527 acc: 0.748549 lr: 0.000207\n",
+      "INFO:root:epoch: 78/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.799196 acc: 0.749138 lr: 0.000207\n",
+      "INFO:root:epoch: 78/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.801000 acc: 0.749167 lr: 0.000206\n",
+      "INFO:root:epoch: 78/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.799783 acc: 0.749194 lr: 0.000206\n",
+      "INFO:root:epoch: 79/100 starts\n",
+      "INFO:root:epoch: 79/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.880325 acc: 0.721875 lr: 0.000205\n",
+      "INFO:root:epoch: 79/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.848140 acc: 0.729687 lr: 0.000205\n",
+      "INFO:root:epoch: 79/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.816481 acc: 0.736458 lr: 0.000205\n",
+      "INFO:root:epoch: 79/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.794711 acc: 0.742969 lr: 0.000204\n",
+      "INFO:root:epoch: 79/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.818348 acc: 0.742500 lr: 0.000204\n",
+      "INFO:root:epoch: 79/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.818294 acc: 0.738021 lr: 0.000204\n",
+      "INFO:root:epoch: 79/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.815743 acc: 0.740625 lr: 0.000203\n",
+      "INFO:root:epoch: 79/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.814110 acc: 0.741016 lr: 0.000203\n",
+      "INFO:root:epoch: 79/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.824917 acc: 0.738889 lr: 0.000203\n",
+      "INFO:root:epoch: 79/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.817645 acc: 0.740625 lr: 0.000202\n",
+      "INFO:root:epoch: 79/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.816279 acc: 0.743182 lr: 0.000202\n",
+      "INFO:root:epoch: 79/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.803411 acc: 0.746875 lr: 0.000201\n",
+      "INFO:root:epoch: 79/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.794337 acc: 0.751202 lr: 0.000201\n",
+      "INFO:root:epoch: 79/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.801573 acc: 0.750223 lr: 0.000201\n",
+      "INFO:root:epoch: 79/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.811712 acc: 0.747083 lr: 0.000200\n",
+      "INFO:root:epoch: 79/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.816973 acc: 0.744531 lr: 0.000200\n",
+      "INFO:root:epoch: 79/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.819649 acc: 0.742831 lr: 0.000200\n",
+      "INFO:root:epoch: 79/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.813565 acc: 0.744097 lr: 0.000199\n",
+      "INFO:root:epoch: 79/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.811333 acc: 0.745724 lr: 0.000199\n",
+      "INFO:root:epoch: 79/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.815313 acc: 0.743594 lr: 0.000199\n",
+      "INFO:root:epoch: 79/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.815732 acc: 0.742262 lr: 0.000198\n",
+      "INFO:root:epoch: 79/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.816799 acc: 0.743182 lr: 0.000198\n",
+      "INFO:root:epoch: 79/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.816930 acc: 0.743342 lr: 0.000198\n",
+      "INFO:root:epoch: 79/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.814491 acc: 0.744792 lr: 0.000197\n",
+      "INFO:root:epoch: 79/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.810731 acc: 0.745500 lr: 0.000197\n",
+      "INFO:root:epoch: 79/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.812610 acc: 0.743990 lr: 0.000197\n",
+      "INFO:root:epoch: 79/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.813645 acc: 0.742593 lr: 0.000196\n",
+      "INFO:root:epoch: 79/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.813632 acc: 0.742634 lr: 0.000196\n",
+      "INFO:root:epoch: 79/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.818151 acc: 0.742457 lr: 0.000196\n",
+      "INFO:root:epoch: 79/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.818030 acc: 0.743021 lr: 0.000195\n",
+      "INFO:root:epoch: 79/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.817498 acc: 0.743347 lr: 0.000195\n",
+      "INFO:root:epoch: 80/100 starts\n",
+      "INFO:root:epoch: 80/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.750064 acc: 0.734375 lr: 0.000195\n",
+      "INFO:root:epoch: 80/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.767472 acc: 0.742188 lr: 0.000194\n",
+      "INFO:root:epoch: 80/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.758861 acc: 0.748958 lr: 0.000194\n",
+      "INFO:root:epoch: 80/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.787833 acc: 0.742969 lr: 0.000194\n",
+      "INFO:root:epoch: 80/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.796420 acc: 0.745000 lr: 0.000193\n",
+      "INFO:root:epoch: 80/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.779974 acc: 0.748958 lr: 0.000193\n",
+      "INFO:root:epoch: 80/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.784120 acc: 0.748214 lr: 0.000192\n",
+      "INFO:root:epoch: 80/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.774460 acc: 0.752734 lr: 0.000192\n",
+      "INFO:root:epoch: 80/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.777161 acc: 0.751736 lr: 0.000192\n",
+      "INFO:root:epoch: 80/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.783559 acc: 0.751250 lr: 0.000192\n",
+      "INFO:root:epoch: 80/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.786514 acc: 0.751989 lr: 0.000191\n",
+      "INFO:root:epoch: 80/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.791311 acc: 0.754427 lr: 0.000191\n",
+      "INFO:root:epoch: 80/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.791077 acc: 0.754808 lr: 0.000191\n",
+      "INFO:root:epoch: 80/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.791207 acc: 0.755134 lr: 0.000190\n",
+      "INFO:root:epoch: 80/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.792544 acc: 0.754375 lr: 0.000190\n",
+      "INFO:root:epoch: 80/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.798387 acc: 0.751172 lr: 0.000190\n",
+      "INFO:root:epoch: 80/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.803075 acc: 0.749081 lr: 0.000189\n",
+      "INFO:root:epoch: 80/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.803261 acc: 0.748090 lr: 0.000189\n",
+      "INFO:root:epoch: 80/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.801857 acc: 0.748849 lr: 0.000189\n",
+      "INFO:root:epoch: 80/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.801413 acc: 0.749219 lr: 0.000188\n",
+      "INFO:root:epoch: 80/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.802582 acc: 0.749405 lr: 0.000188\n",
+      "INFO:root:epoch: 80/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.801758 acc: 0.749716 lr: 0.000188\n",
+      "INFO:root:epoch: 80/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.797444 acc: 0.750951 lr: 0.000187\n",
+      "INFO:root:epoch: 80/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.800117 acc: 0.751042 lr: 0.000187\n",
+      "INFO:root:epoch: 80/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.796923 acc: 0.752625 lr: 0.000187\n",
+      "INFO:root:epoch: 80/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.798514 acc: 0.751803 lr: 0.000186\n",
+      "INFO:root:epoch: 80/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.802870 acc: 0.749884 lr: 0.000186\n",
+      "INFO:root:epoch: 80/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.802658 acc: 0.749888 lr: 0.000186\n",
+      "INFO:root:epoch: 80/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.800922 acc: 0.749138 lr: 0.000185\n",
+      "INFO:root:epoch: 80/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.795446 acc: 0.752083 lr: 0.000185\n",
+      "INFO:root:epoch: 80/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.797460 acc: 0.750907 lr: 0.000185\n",
+      "INFO:root:epoch: 81/100 starts\n",
+      "INFO:root:epoch: 81/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.762089 acc: 0.771875 lr: 0.000184\n",
+      "INFO:root:epoch: 81/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.782786 acc: 0.773438 lr: 0.000184\n",
+      "INFO:root:epoch: 81/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.786439 acc: 0.772917 lr: 0.000184\n",
+      "INFO:root:epoch: 81/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.796047 acc: 0.759375 lr: 0.000183\n",
+      "INFO:root:epoch: 81/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.793564 acc: 0.758750 lr: 0.000183\n",
+      "INFO:root:epoch: 81/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.777598 acc: 0.759375 lr: 0.000183\n",
+      "INFO:root:epoch: 81/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.777131 acc: 0.758482 lr: 0.000182\n",
+      "INFO:root:epoch: 81/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.774471 acc: 0.757422 lr: 0.000182\n",
+      "INFO:root:epoch: 81/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.780834 acc: 0.752778 lr: 0.000182\n",
+      "INFO:root:epoch: 81/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.780715 acc: 0.752500 lr: 0.000181\n",
+      "INFO:root:epoch: 81/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.784636 acc: 0.753125 lr: 0.000181\n",
+      "INFO:root:epoch: 81/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.788807 acc: 0.751563 lr: 0.000181\n",
+      "INFO:root:epoch: 81/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.793873 acc: 0.752644 lr: 0.000180\n",
+      "INFO:root:epoch: 81/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.791054 acc: 0.752679 lr: 0.000180\n",
+      "INFO:root:epoch: 81/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.791990 acc: 0.752292 lr: 0.000180\n",
+      "INFO:root:epoch: 81/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.790770 acc: 0.752734 lr: 0.000180\n",
+      "INFO:root:epoch: 81/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.788597 acc: 0.754963 lr: 0.000179\n",
+      "INFO:root:epoch: 81/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.791801 acc: 0.753646 lr: 0.000179\n",
+      "INFO:root:epoch: 81/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.797125 acc: 0.752796 lr: 0.000179\n",
+      "INFO:root:epoch: 81/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.795501 acc: 0.754375 lr: 0.000178\n",
+      "INFO:root:epoch: 81/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.791406 acc: 0.755506 lr: 0.000178\n",
+      "INFO:root:epoch: 81/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.787304 acc: 0.755824 lr: 0.000178\n",
+      "INFO:root:epoch: 81/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.785418 acc: 0.757065 lr: 0.000177\n",
+      "INFO:root:epoch: 81/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.786203 acc: 0.756771 lr: 0.000177\n",
+      "INFO:root:epoch: 81/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.787770 acc: 0.756000 lr: 0.000177\n",
+      "INFO:root:epoch: 81/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.789473 acc: 0.755889 lr: 0.000176\n",
+      "INFO:root:epoch: 81/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.791416 acc: 0.755671 lr: 0.000176\n",
+      "INFO:root:epoch: 81/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.790074 acc: 0.756473 lr: 0.000176\n",
+      "INFO:root:epoch: 81/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.794264 acc: 0.754526 lr: 0.000176\n",
+      "INFO:root:epoch: 81/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.789935 acc: 0.756146 lr: 0.000175\n",
+      "INFO:root:epoch: 81/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.791139 acc: 0.755847 lr: 0.000175\n",
+      "INFO:root:epoch: 82/100 starts\n",
+      "INFO:root:epoch: 82/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.628804 acc: 0.803125 lr: 0.000175\n",
+      "INFO:root:epoch: 82/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.695736 acc: 0.801562 lr: 0.000174\n",
+      "INFO:root:epoch: 82/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.726282 acc: 0.792708 lr: 0.000174\n",
+      "INFO:root:epoch: 82/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.762066 acc: 0.775000 lr: 0.000174\n",
+      "INFO:root:epoch: 82/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.771014 acc: 0.766875 lr: 0.000173\n",
+      "INFO:root:epoch: 82/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.781545 acc: 0.765625 lr: 0.000173\n",
+      "INFO:root:epoch: 82/100  et: 3s eta: 10s batches: 70/313(22%) samples: 2240 loss: 0.781009 acc: 0.764286 lr: 0.000173\n",
+      "INFO:root:epoch: 82/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.782196 acc: 0.762891 lr: 0.000172\n",
+      "INFO:root:epoch: 82/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.782277 acc: 0.759722 lr: 0.000172\n",
+      "INFO:root:epoch: 82/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.778186 acc: 0.758750 lr: 0.000172\n",
+      "INFO:root:epoch: 82/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.779406 acc: 0.757102 lr: 0.000172\n",
+      "INFO:root:epoch: 82/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.785728 acc: 0.752604 lr: 0.000171\n",
+      "INFO:root:epoch: 82/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.792901 acc: 0.752163 lr: 0.000171\n",
+      "INFO:root:epoch: 82/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.792623 acc: 0.751786 lr: 0.000171\n",
+      "INFO:root:epoch: 82/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.787286 acc: 0.752708 lr: 0.000170\n",
+      "INFO:root:epoch: 82/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.788793 acc: 0.753320 lr: 0.000170\n",
+      "INFO:root:epoch: 82/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.784648 acc: 0.755331 lr: 0.000170\n",
+      "INFO:root:epoch: 82/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.784689 acc: 0.754688 lr: 0.000169\n",
+      "INFO:root:epoch: 82/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.783967 acc: 0.755263 lr: 0.000169\n",
+      "INFO:root:epoch: 82/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.788794 acc: 0.754375 lr: 0.000169\n",
+      "INFO:root:epoch: 82/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.794243 acc: 0.751786 lr: 0.000169\n",
+      "INFO:root:epoch: 82/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.796243 acc: 0.751847 lr: 0.000168\n",
+      "INFO:root:epoch: 82/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.794753 acc: 0.751087 lr: 0.000168\n",
+      "INFO:root:epoch: 82/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.793552 acc: 0.750911 lr: 0.000168\n",
+      "INFO:root:epoch: 82/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.792333 acc: 0.751000 lr: 0.000167\n",
+      "INFO:root:epoch: 82/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.793941 acc: 0.750841 lr: 0.000167\n",
+      "INFO:root:epoch: 82/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.791701 acc: 0.751273 lr: 0.000167\n",
+      "INFO:root:epoch: 82/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.796419 acc: 0.749888 lr: 0.000167\n",
+      "INFO:root:epoch: 82/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.798194 acc: 0.749138 lr: 0.000166\n",
+      "INFO:root:epoch: 82/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.796819 acc: 0.749479 lr: 0.000166\n",
+      "INFO:root:epoch: 82/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.797615 acc: 0.748286 lr: 0.000166\n",
+      "INFO:root:epoch: 83/100 starts\n",
+      "INFO:root:epoch: 83/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.761741 acc: 0.775000 lr: 0.000165\n",
+      "INFO:root:epoch: 83/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.751758 acc: 0.771875 lr: 0.000165\n",
+      "INFO:root:epoch: 83/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.760744 acc: 0.757292 lr: 0.000165\n",
+      "INFO:root:epoch: 83/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.801879 acc: 0.746875 lr: 0.000164\n",
+      "INFO:root:epoch: 83/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.812679 acc: 0.746250 lr: 0.000164\n",
+      "INFO:root:epoch: 83/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.812477 acc: 0.746354 lr: 0.000164\n",
+      "INFO:root:epoch: 83/100  et: 3s eta: 10s batches: 70/313(22%) samples: 2240 loss: 0.800193 acc: 0.747321 lr: 0.000164\n",
+      "INFO:root:epoch: 83/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.804929 acc: 0.750000 lr: 0.000163\n",
+      "INFO:root:epoch: 83/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.809009 acc: 0.746875 lr: 0.000163\n",
+      "INFO:root:epoch: 83/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.805895 acc: 0.747187 lr: 0.000163\n",
+      "INFO:root:epoch: 83/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.799392 acc: 0.750000 lr: 0.000162\n",
+      "INFO:root:epoch: 83/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.793033 acc: 0.749219 lr: 0.000162\n",
+      "INFO:root:epoch: 83/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.794914 acc: 0.748558 lr: 0.000162\n",
+      "INFO:root:epoch: 83/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.796939 acc: 0.749330 lr: 0.000162\n",
+      "INFO:root:epoch: 83/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.802187 acc: 0.746875 lr: 0.000161\n",
+      "INFO:root:epoch: 83/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.796101 acc: 0.749023 lr: 0.000161\n",
+      "INFO:root:epoch: 83/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.798109 acc: 0.749816 lr: 0.000161\n",
+      "INFO:root:epoch: 83/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.795528 acc: 0.749479 lr: 0.000161\n",
+      "INFO:root:epoch: 83/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.794467 acc: 0.750164 lr: 0.000160\n",
+      "INFO:root:epoch: 83/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.793782 acc: 0.751094 lr: 0.000160\n",
+      "INFO:root:epoch: 83/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.793767 acc: 0.750446 lr: 0.000160\n",
+      "INFO:root:epoch: 83/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.793982 acc: 0.750142 lr: 0.000159\n",
+      "INFO:root:epoch: 83/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.792620 acc: 0.750136 lr: 0.000159\n",
+      "INFO:root:epoch: 83/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.793667 acc: 0.750781 lr: 0.000159\n",
+      "INFO:root:epoch: 83/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.792085 acc: 0.751000 lr: 0.000159\n",
+      "INFO:root:epoch: 83/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.791104 acc: 0.752163 lr: 0.000158\n",
+      "INFO:root:epoch: 83/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.789927 acc: 0.752315 lr: 0.000158\n",
+      "INFO:root:epoch: 83/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.790392 acc: 0.752121 lr: 0.000158\n",
+      "INFO:root:epoch: 83/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.795518 acc: 0.750539 lr: 0.000157\n",
+      "INFO:root:epoch: 83/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.796847 acc: 0.750417 lr: 0.000157\n",
+      "INFO:root:epoch: 83/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.794402 acc: 0.750806 lr: 0.000157\n",
+      "INFO:root:epoch: 84/100 starts\n",
+      "INFO:root:epoch: 84/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.820571 acc: 0.759375 lr: 0.000157\n",
+      "INFO:root:epoch: 84/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.880304 acc: 0.737500 lr: 0.000156\n",
+      "INFO:root:epoch: 84/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.824604 acc: 0.750000 lr: 0.000156\n",
+      "INFO:root:epoch: 84/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.815394 acc: 0.754687 lr: 0.000156\n",
+      "INFO:root:epoch: 84/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.787376 acc: 0.764375 lr: 0.000155\n",
+      "INFO:root:epoch: 84/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.791501 acc: 0.762500 lr: 0.000155\n",
+      "INFO:root:epoch: 84/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.801442 acc: 0.758482 lr: 0.000155\n",
+      "INFO:root:epoch: 84/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.811390 acc: 0.753516 lr: 0.000155\n",
+      "INFO:root:epoch: 84/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.810934 acc: 0.751736 lr: 0.000154\n",
+      "INFO:root:epoch: 84/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.801340 acc: 0.754688 lr: 0.000154\n",
+      "INFO:root:epoch: 84/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.809829 acc: 0.751989 lr: 0.000154\n",
+      "INFO:root:epoch: 84/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.806949 acc: 0.753385 lr: 0.000154\n",
+      "INFO:root:epoch: 84/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.808518 acc: 0.751202 lr: 0.000153\n",
+      "INFO:root:epoch: 84/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.811593 acc: 0.750893 lr: 0.000153\n",
+      "INFO:root:epoch: 84/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.807069 acc: 0.752292 lr: 0.000153\n",
+      "INFO:root:epoch: 84/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.807367 acc: 0.750586 lr: 0.000153\n",
+      "INFO:root:epoch: 84/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.807674 acc: 0.749816 lr: 0.000152\n",
+      "INFO:root:epoch: 84/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.810116 acc: 0.749653 lr: 0.000152\n",
+      "INFO:root:epoch: 84/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.803725 acc: 0.752796 lr: 0.000152\n",
+      "INFO:root:epoch: 84/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.802151 acc: 0.752969 lr: 0.000152\n",
+      "INFO:root:epoch: 84/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.795957 acc: 0.754018 lr: 0.000151\n",
+      "INFO:root:epoch: 84/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.792063 acc: 0.755682 lr: 0.000151\n",
+      "INFO:root:epoch: 84/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.788154 acc: 0.756386 lr: 0.000151\n",
+      "INFO:root:epoch: 84/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.786676 acc: 0.755599 lr: 0.000150\n",
+      "INFO:root:epoch: 84/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.791370 acc: 0.755500 lr: 0.000150\n",
+      "INFO:root:epoch: 84/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.795353 acc: 0.752644 lr: 0.000150\n",
+      "INFO:root:epoch: 84/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.792128 acc: 0.753009 lr: 0.000150\n",
+      "INFO:root:epoch: 84/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.793074 acc: 0.752567 lr: 0.000149\n",
+      "INFO:root:epoch: 84/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.790604 acc: 0.752909 lr: 0.000149\n",
+      "INFO:root:epoch: 84/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.792560 acc: 0.752396 lr: 0.000149\n",
+      "INFO:root:epoch: 84/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.792698 acc: 0.751915 lr: 0.000149\n",
+      "INFO:root:epoch: 85/100 starts\n",
+      "INFO:root:epoch: 85/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.766288 acc: 0.796875 lr: 0.000148\n",
+      "INFO:root:epoch: 85/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.747180 acc: 0.784375 lr: 0.000148\n",
+      "INFO:root:epoch: 85/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.769042 acc: 0.769792 lr: 0.000148\n",
+      "INFO:root:epoch: 85/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.766711 acc: 0.764063 lr: 0.000148\n",
+      "INFO:root:epoch: 85/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.766502 acc: 0.764375 lr: 0.000147\n",
+      "INFO:root:epoch: 85/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.761401 acc: 0.764583 lr: 0.000147\n",
+      "INFO:root:epoch: 85/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.755251 acc: 0.766964 lr: 0.000147\n",
+      "INFO:root:epoch: 85/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.761742 acc: 0.763672 lr: 0.000147\n",
+      "INFO:root:epoch: 85/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.764124 acc: 0.761111 lr: 0.000146\n",
+      "INFO:root:epoch: 85/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.773257 acc: 0.760000 lr: 0.000146\n",
+      "INFO:root:epoch: 85/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.786567 acc: 0.758239 lr: 0.000146\n",
+      "INFO:root:epoch: 85/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.796150 acc: 0.754167 lr: 0.000146\n",
+      "INFO:root:epoch: 85/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.802659 acc: 0.752163 lr: 0.000145\n",
+      "INFO:root:epoch: 85/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.798765 acc: 0.754018 lr: 0.000145\n",
+      "INFO:root:epoch: 85/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.793041 acc: 0.754792 lr: 0.000145\n",
+      "INFO:root:epoch: 85/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.798035 acc: 0.754297 lr: 0.000145\n",
+      "INFO:root:epoch: 85/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.799259 acc: 0.755147 lr: 0.000144\n",
+      "INFO:root:epoch: 85/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.801317 acc: 0.754861 lr: 0.000144\n",
+      "INFO:root:epoch: 85/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.794634 acc: 0.758388 lr: 0.000144\n",
+      "INFO:root:epoch: 85/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.793190 acc: 0.758906 lr: 0.000144\n",
+      "INFO:root:epoch: 85/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.791191 acc: 0.759673 lr: 0.000143\n",
+      "INFO:root:epoch: 85/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.789623 acc: 0.759943 lr: 0.000143\n",
+      "INFO:root:epoch: 85/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.789784 acc: 0.759783 lr: 0.000143\n",
+      "INFO:root:epoch: 85/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.790267 acc: 0.759375 lr: 0.000143\n",
+      "INFO:root:epoch: 85/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.794921 acc: 0.757875 lr: 0.000142\n",
+      "INFO:root:epoch: 85/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.795896 acc: 0.757813 lr: 0.000142\n",
+      "INFO:root:epoch: 85/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.793987 acc: 0.758796 lr: 0.000142\n",
+      "INFO:root:epoch: 85/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.794202 acc: 0.758482 lr: 0.000142\n",
+      "INFO:root:epoch: 85/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.795893 acc: 0.757543 lr: 0.000141\n",
+      "INFO:root:epoch: 85/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.796696 acc: 0.756563 lr: 0.000141\n",
+      "INFO:root:epoch: 85/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.798361 acc: 0.756048 lr: 0.000141\n",
+      "INFO:root:epoch: 86/100 starts\n",
+      "INFO:root:epoch: 86/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.789228 acc: 0.743750 lr: 0.000140\n",
+      "INFO:root:epoch: 86/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.786835 acc: 0.756250 lr: 0.000140\n",
+      "INFO:root:epoch: 86/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.767423 acc: 0.758333 lr: 0.000140\n",
+      "INFO:root:epoch: 86/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.768584 acc: 0.751563 lr: 0.000140\n",
+      "INFO:root:epoch: 86/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.781230 acc: 0.746875 lr: 0.000140\n",
+      "INFO:root:epoch: 86/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.784208 acc: 0.745313 lr: 0.000139\n",
+      "INFO:root:epoch: 86/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.804742 acc: 0.743750 lr: 0.000139\n",
+      "INFO:root:epoch: 86/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.805145 acc: 0.742187 lr: 0.000139\n",
+      "INFO:root:epoch: 86/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.799827 acc: 0.743056 lr: 0.000139\n",
+      "INFO:root:epoch: 86/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.811971 acc: 0.741875 lr: 0.000138\n",
+      "INFO:root:epoch: 86/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.803488 acc: 0.744318 lr: 0.000138\n",
+      "INFO:root:epoch: 86/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.795298 acc: 0.747396 lr: 0.000138\n",
+      "INFO:root:epoch: 86/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.790926 acc: 0.749279 lr: 0.000138\n",
+      "INFO:root:epoch: 86/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.783147 acc: 0.749777 lr: 0.000137\n",
+      "INFO:root:epoch: 86/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.789888 acc: 0.747500 lr: 0.000137\n",
+      "INFO:root:epoch: 86/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.791250 acc: 0.747070 lr: 0.000137\n",
+      "INFO:root:epoch: 86/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.788609 acc: 0.747610 lr: 0.000137\n",
+      "INFO:root:epoch: 86/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.791464 acc: 0.746875 lr: 0.000136\n",
+      "INFO:root:epoch: 86/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.795199 acc: 0.746217 lr: 0.000136\n",
+      "INFO:root:epoch: 86/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.796702 acc: 0.745469 lr: 0.000136\n",
+      "INFO:root:epoch: 86/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.799329 acc: 0.745238 lr: 0.000136\n",
+      "INFO:root:epoch: 86/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.795992 acc: 0.746875 lr: 0.000135\n",
+      "INFO:root:epoch: 86/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.793393 acc: 0.749049 lr: 0.000135\n",
+      "INFO:root:epoch: 86/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.793658 acc: 0.748828 lr: 0.000135\n",
+      "INFO:root:epoch: 86/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.793849 acc: 0.748000 lr: 0.000135\n",
+      "INFO:root:epoch: 86/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.792766 acc: 0.747716 lr: 0.000135\n",
+      "INFO:root:epoch: 86/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.791771 acc: 0.747106 lr: 0.000134\n",
+      "INFO:root:epoch: 86/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.796318 acc: 0.745759 lr: 0.000134\n",
+      "INFO:root:epoch: 86/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.798938 acc: 0.745690 lr: 0.000134\n",
+      "INFO:root:epoch: 86/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.799687 acc: 0.746458 lr: 0.000134\n",
+      "INFO:root:epoch: 86/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.796616 acc: 0.748488 lr: 0.000133\n",
+      "INFO:root:epoch: 87/100 starts\n",
+      "INFO:root:epoch: 87/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.850964 acc: 0.740625 lr: 0.000133\n",
+      "INFO:root:epoch: 87/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.802344 acc: 0.746875 lr: 0.000133\n",
+      "INFO:root:epoch: 87/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.815467 acc: 0.739583 lr: 0.000133\n",
+      "INFO:root:epoch: 87/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.809854 acc: 0.742969 lr: 0.000132\n",
+      "INFO:root:epoch: 87/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.780924 acc: 0.751875 lr: 0.000132\n",
+      "INFO:root:epoch: 87/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.791136 acc: 0.755729 lr: 0.000132\n",
+      "INFO:root:epoch: 87/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.790528 acc: 0.755357 lr: 0.000132\n",
+      "INFO:root:epoch: 87/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.790394 acc: 0.755469 lr: 0.000131\n",
+      "INFO:root:epoch: 87/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.788743 acc: 0.752083 lr: 0.000131\n",
+      "INFO:root:epoch: 87/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.789852 acc: 0.753125 lr: 0.000131\n",
+      "INFO:root:epoch: 87/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.784777 acc: 0.753977 lr: 0.000131\n",
+      "INFO:root:epoch: 87/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.776484 acc: 0.757031 lr: 0.000131\n",
+      "INFO:root:epoch: 87/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.775763 acc: 0.756250 lr: 0.000130\n",
+      "INFO:root:epoch: 87/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.784094 acc: 0.754464 lr: 0.000130\n",
+      "INFO:root:epoch: 87/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.781750 acc: 0.754792 lr: 0.000130\n",
+      "INFO:root:epoch: 87/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.783062 acc: 0.754883 lr: 0.000130\n",
+      "INFO:root:epoch: 87/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.779886 acc: 0.756985 lr: 0.000129\n",
+      "INFO:root:epoch: 87/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.782445 acc: 0.756944 lr: 0.000129\n",
+      "INFO:root:epoch: 87/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.790414 acc: 0.756414 lr: 0.000129\n",
+      "INFO:root:epoch: 87/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.790827 acc: 0.755937 lr: 0.000129\n",
+      "INFO:root:epoch: 87/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.792944 acc: 0.755357 lr: 0.000129\n",
+      "INFO:root:epoch: 87/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.793317 acc: 0.754119 lr: 0.000128\n",
+      "INFO:root:epoch: 87/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.785357 acc: 0.756386 lr: 0.000128\n",
+      "INFO:root:epoch: 87/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.787292 acc: 0.754948 lr: 0.000128\n",
+      "INFO:root:epoch: 87/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.785757 acc: 0.755000 lr: 0.000128\n",
+      "INFO:root:epoch: 87/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.786238 acc: 0.755048 lr: 0.000127\n",
+      "INFO:root:epoch: 87/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.784898 acc: 0.755671 lr: 0.000127\n",
+      "INFO:root:epoch: 87/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.783881 acc: 0.755915 lr: 0.000127\n",
+      "INFO:root:epoch: 87/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.786642 acc: 0.755388 lr: 0.000127\n",
+      "INFO:root:epoch: 87/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.784731 acc: 0.755937 lr: 0.000127\n",
+      "INFO:root:epoch: 87/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.781761 acc: 0.756552 lr: 0.000126\n",
+      "INFO:root:epoch: 88/100 starts\n",
+      "INFO:root:epoch: 88/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.747271 acc: 0.737500 lr: 0.000126\n",
+      "INFO:root:epoch: 88/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.740076 acc: 0.756250 lr: 0.000126\n",
+      "INFO:root:epoch: 88/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.748918 acc: 0.761458 lr: 0.000126\n",
+      "INFO:root:epoch: 88/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.732172 acc: 0.767969 lr: 0.000125\n",
+      "INFO:root:epoch: 88/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.747654 acc: 0.769375 lr: 0.000125\n",
+      "INFO:root:epoch: 88/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.764433 acc: 0.760937 lr: 0.000125\n",
+      "INFO:root:epoch: 88/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.770631 acc: 0.760268 lr: 0.000125\n",
+      "INFO:root:epoch: 88/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.765169 acc: 0.762109 lr: 0.000125\n",
+      "INFO:root:epoch: 88/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.775203 acc: 0.760417 lr: 0.000124\n",
+      "INFO:root:epoch: 88/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.774713 acc: 0.759062 lr: 0.000124\n",
+      "INFO:root:epoch: 88/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.773177 acc: 0.760795 lr: 0.000124\n",
+      "INFO:root:epoch: 88/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.773849 acc: 0.760156 lr: 0.000124\n",
+      "INFO:root:epoch: 88/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.770292 acc: 0.761058 lr: 0.000123\n",
+      "INFO:root:epoch: 88/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.771836 acc: 0.759375 lr: 0.000123\n",
+      "INFO:root:epoch: 88/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.770684 acc: 0.760625 lr: 0.000123\n",
+      "INFO:root:epoch: 88/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.766468 acc: 0.762500 lr: 0.000123\n",
+      "INFO:root:epoch: 88/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.770944 acc: 0.760662 lr: 0.000123\n",
+      "INFO:root:epoch: 88/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.769675 acc: 0.760937 lr: 0.000122\n",
+      "INFO:root:epoch: 88/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.766954 acc: 0.762171 lr: 0.000122\n",
+      "INFO:root:epoch: 88/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.769263 acc: 0.760937 lr: 0.000122\n",
+      "INFO:root:epoch: 88/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.777239 acc: 0.757589 lr: 0.000122\n",
+      "INFO:root:epoch: 88/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.776881 acc: 0.757812 lr: 0.000122\n",
+      "INFO:root:epoch: 88/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.777944 acc: 0.756522 lr: 0.000121\n",
+      "INFO:root:epoch: 88/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.777051 acc: 0.756510 lr: 0.000121\n",
+      "INFO:root:epoch: 88/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.778403 acc: 0.756375 lr: 0.000121\n",
+      "INFO:root:epoch: 88/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.778209 acc: 0.756971 lr: 0.000121\n",
+      "INFO:root:epoch: 88/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.775398 acc: 0.757292 lr: 0.000120\n",
+      "INFO:root:epoch: 88/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.775242 acc: 0.757143 lr: 0.000120\n",
+      "INFO:root:epoch: 88/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.775968 acc: 0.757543 lr: 0.000120\n",
+      "INFO:root:epoch: 88/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.775583 acc: 0.757083 lr: 0.000120\n",
+      "INFO:root:epoch: 88/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.773207 acc: 0.757258 lr: 0.000120\n",
+      "INFO:root:epoch: 89/100 starts\n",
+      "INFO:root:epoch: 89/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.734807 acc: 0.753125 lr: 0.000119\n",
+      "INFO:root:epoch: 89/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.776515 acc: 0.748438 lr: 0.000119\n",
+      "INFO:root:epoch: 89/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.785510 acc: 0.744792 lr: 0.000119\n",
+      "INFO:root:epoch: 89/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.772257 acc: 0.761719 lr: 0.000119\n",
+      "INFO:root:epoch: 89/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.781800 acc: 0.763750 lr: 0.000119\n",
+      "INFO:root:epoch: 89/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.783931 acc: 0.761458 lr: 0.000118\n",
+      "INFO:root:epoch: 89/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.777696 acc: 0.765179 lr: 0.000118\n",
+      "INFO:root:epoch: 89/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.776828 acc: 0.763672 lr: 0.000118\n",
+      "INFO:root:epoch: 89/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.778528 acc: 0.764931 lr: 0.000118\n",
+      "INFO:root:epoch: 89/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.782296 acc: 0.764375 lr: 0.000118\n",
+      "INFO:root:epoch: 89/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.779648 acc: 0.763068 lr: 0.000117\n",
+      "INFO:root:epoch: 89/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.781904 acc: 0.761719 lr: 0.000117\n",
+      "INFO:root:epoch: 89/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.783871 acc: 0.761538 lr: 0.000117\n",
+      "INFO:root:epoch: 89/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.780222 acc: 0.762277 lr: 0.000117\n",
+      "INFO:root:epoch: 89/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.790571 acc: 0.760000 lr: 0.000117\n",
+      "INFO:root:epoch: 89/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.788741 acc: 0.760352 lr: 0.000116\n",
+      "INFO:root:epoch: 89/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.786166 acc: 0.759743 lr: 0.000116\n",
+      "INFO:root:epoch: 89/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.783236 acc: 0.761806 lr: 0.000116\n",
+      "INFO:root:epoch: 89/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.789045 acc: 0.760855 lr: 0.000116\n",
+      "INFO:root:epoch: 89/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.788909 acc: 0.761719 lr: 0.000116\n",
+      "INFO:root:epoch: 89/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.787145 acc: 0.762798 lr: 0.000115\n",
+      "INFO:root:epoch: 89/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.786296 acc: 0.762926 lr: 0.000115\n",
+      "INFO:root:epoch: 89/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.787380 acc: 0.762500 lr: 0.000115\n",
+      "INFO:root:epoch: 89/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.791102 acc: 0.761198 lr: 0.000115\n",
+      "INFO:root:epoch: 89/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.787385 acc: 0.762750 lr: 0.000115\n",
+      "INFO:root:epoch: 89/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.787774 acc: 0.761058 lr: 0.000114\n",
+      "INFO:root:epoch: 89/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.787381 acc: 0.760301 lr: 0.000114\n",
+      "INFO:root:epoch: 89/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.784104 acc: 0.761272 lr: 0.000114\n",
+      "INFO:root:epoch: 89/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.783240 acc: 0.760560 lr: 0.000114\n",
+      "INFO:root:epoch: 89/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.784704 acc: 0.760313 lr: 0.000114\n",
+      "INFO:root:epoch: 89/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.783017 acc: 0.761391 lr: 0.000113\n",
+      "INFO:root:epoch: 90/100 starts\n",
+      "INFO:root:epoch: 90/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.997723 acc: 0.671875 lr: 0.000113\n",
+      "INFO:root:epoch: 90/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.891037 acc: 0.723437 lr: 0.000113\n",
+      "INFO:root:epoch: 90/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.856110 acc: 0.741667 lr: 0.000113\n",
+      "INFO:root:epoch: 90/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.829870 acc: 0.750781 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.825318 acc: 0.754375 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.819115 acc: 0.751562 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.821462 acc: 0.746875 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.810009 acc: 0.751563 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.805757 acc: 0.753125 lr: 0.000112\n",
+      "INFO:root:epoch: 90/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.792173 acc: 0.755937 lr: 0.000111\n",
+      "INFO:root:epoch: 90/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.797606 acc: 0.755398 lr: 0.000111\n",
+      "INFO:root:epoch: 90/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.788231 acc: 0.758073 lr: 0.000111\n",
+      "INFO:root:epoch: 90/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.790362 acc: 0.757692 lr: 0.000111\n",
+      "INFO:root:epoch: 90/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.787044 acc: 0.758482 lr: 0.000111\n",
+      "INFO:root:epoch: 90/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.785833 acc: 0.758333 lr: 0.000110\n",
+      "INFO:root:epoch: 90/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.786919 acc: 0.758594 lr: 0.000110\n",
+      "INFO:root:epoch: 90/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.795069 acc: 0.755882 lr: 0.000110\n",
+      "INFO:root:epoch: 90/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.801482 acc: 0.752431 lr: 0.000110\n",
+      "INFO:root:epoch: 90/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.799135 acc: 0.753618 lr: 0.000110\n",
+      "INFO:root:epoch: 90/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.802517 acc: 0.752344 lr: 0.000109\n",
+      "INFO:root:epoch: 90/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.802409 acc: 0.753423 lr: 0.000109\n",
+      "INFO:root:epoch: 90/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.799748 acc: 0.754119 lr: 0.000109\n",
+      "INFO:root:epoch: 90/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.799636 acc: 0.754348 lr: 0.000109\n",
+      "INFO:root:epoch: 90/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.797934 acc: 0.754036 lr: 0.000109\n",
+      "INFO:root:epoch: 90/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.796901 acc: 0.754625 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.796197 acc: 0.753966 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.795277 acc: 0.754514 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.794153 acc: 0.754911 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.797511 acc: 0.754741 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.798095 acc: 0.755729 lr: 0.000108\n",
+      "INFO:root:epoch: 90/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.797423 acc: 0.755645 lr: 0.000107\n",
+      "INFO:root:epoch: 91/100 starts\n",
+      "INFO:root:epoch: 91/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.880258 acc: 0.734375 lr: 0.000107\n",
+      "INFO:root:epoch: 91/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.863308 acc: 0.737500 lr: 0.000107\n",
+      "INFO:root:epoch: 91/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.802331 acc: 0.753125 lr: 0.000107\n",
+      "INFO:root:epoch: 91/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.790554 acc: 0.753906 lr: 0.000107\n",
+      "INFO:root:epoch: 91/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.782695 acc: 0.761250 lr: 0.000106\n",
+      "INFO:root:epoch: 91/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.794129 acc: 0.754687 lr: 0.000106\n",
+      "INFO:root:epoch: 91/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.807174 acc: 0.752679 lr: 0.000106\n",
+      "INFO:root:epoch: 91/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.798740 acc: 0.752344 lr: 0.000106\n",
+      "INFO:root:epoch: 91/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.797484 acc: 0.755208 lr: 0.000106\n",
+      "INFO:root:epoch: 91/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.804903 acc: 0.753438 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.810278 acc: 0.753125 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.802937 acc: 0.754688 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.805282 acc: 0.751923 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.802206 acc: 0.753348 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.811627 acc: 0.749583 lr: 0.000105\n",
+      "INFO:root:epoch: 91/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.806510 acc: 0.751367 lr: 0.000104\n",
+      "INFO:root:epoch: 91/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.804635 acc: 0.752390 lr: 0.000104\n",
+      "INFO:root:epoch: 91/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.801146 acc: 0.753993 lr: 0.000104\n",
+      "INFO:root:epoch: 91/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.805238 acc: 0.753454 lr: 0.000104\n",
+      "INFO:root:epoch: 91/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.803522 acc: 0.752500 lr: 0.000104\n",
+      "INFO:root:epoch: 91/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.798943 acc: 0.753720 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.803901 acc: 0.752131 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.803263 acc: 0.752717 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.800978 acc: 0.753255 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.800123 acc: 0.754125 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.799911 acc: 0.754687 lr: 0.000103\n",
+      "INFO:root:epoch: 91/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.800873 acc: 0.754745 lr: 0.000102\n",
+      "INFO:root:epoch: 91/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.797758 acc: 0.755246 lr: 0.000102\n",
+      "INFO:root:epoch: 91/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.795045 acc: 0.755603 lr: 0.000102\n",
+      "INFO:root:epoch: 91/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.794688 acc: 0.756354 lr: 0.000102\n",
+      "INFO:root:epoch: 91/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.795100 acc: 0.755948 lr: 0.000102\n",
+      "INFO:root:epoch: 92/100 starts\n",
+      "INFO:root:epoch: 92/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.690164 acc: 0.806250 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.703724 acc: 0.804688 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.741244 acc: 0.785417 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.774958 acc: 0.772656 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.798213 acc: 0.758750 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.797358 acc: 0.758333 lr: 0.000101\n",
+      "INFO:root:epoch: 92/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.792055 acc: 0.756696 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.790635 acc: 0.756250 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.781314 acc: 0.757986 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.786877 acc: 0.753437 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.793163 acc: 0.749432 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.791653 acc: 0.748958 lr: 0.000100\n",
+      "INFO:root:epoch: 92/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.784631 acc: 0.750000 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.792362 acc: 0.748661 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.794085 acc: 0.748125 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.801355 acc: 0.746680 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.794676 acc: 0.748346 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.794705 acc: 0.748785 lr: 0.000099\n",
+      "INFO:root:epoch: 92/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.794772 acc: 0.748684 lr: 0.000098\n",
+      "INFO:root:epoch: 92/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.795025 acc: 0.749063 lr: 0.000098\n",
+      "INFO:root:epoch: 92/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.792504 acc: 0.749554 lr: 0.000098\n",
+      "INFO:root:epoch: 92/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.789176 acc: 0.750852 lr: 0.000098\n",
+      "INFO:root:epoch: 92/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.787420 acc: 0.752717 lr: 0.000098\n",
+      "INFO:root:epoch: 92/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.790530 acc: 0.751693 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.789574 acc: 0.751500 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.790968 acc: 0.752404 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.791330 acc: 0.752662 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.788709 acc: 0.752902 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.788136 acc: 0.753556 lr: 0.000097\n",
+      "INFO:root:epoch: 92/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.783615 acc: 0.754583 lr: 0.000096\n",
+      "INFO:root:epoch: 92/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.785232 acc: 0.753831 lr: 0.000096\n",
+      "INFO:root:epoch: 93/100 starts\n",
+      "INFO:root:epoch: 93/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.731577 acc: 0.753125 lr: 0.000096\n",
+      "INFO:root:epoch: 93/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.806961 acc: 0.728125 lr: 0.000096\n",
+      "INFO:root:epoch: 93/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.780273 acc: 0.742708 lr: 0.000096\n",
+      "INFO:root:epoch: 93/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.794445 acc: 0.740625 lr: 0.000096\n",
+      "INFO:root:epoch: 93/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.785886 acc: 0.748750 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.808928 acc: 0.743750 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.807731 acc: 0.746429 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.809243 acc: 0.746094 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.801031 acc: 0.748264 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.806838 acc: 0.747187 lr: 0.000095\n",
+      "INFO:root:epoch: 93/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.808934 acc: 0.746023 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.813034 acc: 0.742187 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.805116 acc: 0.744471 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.810833 acc: 0.743304 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.805423 acc: 0.745000 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.805258 acc: 0.746484 lr: 0.000094\n",
+      "INFO:root:epoch: 93/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.799348 acc: 0.748529 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.798730 acc: 0.748958 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.799672 acc: 0.749507 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.808083 acc: 0.748437 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.808896 acc: 0.748214 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.801298 acc: 0.750284 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.797310 acc: 0.752446 lr: 0.000093\n",
+      "INFO:root:epoch: 93/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.798673 acc: 0.751823 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.801596 acc: 0.751750 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.799826 acc: 0.752163 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 12s eta: 2s batches: 270/313(86%) samples: 8640 loss: 0.796523 acc: 0.751968 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 13s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.796352 acc: 0.751228 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.799463 acc: 0.750108 lr: 0.000092\n",
+      "INFO:root:epoch: 93/100  et: 14s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.800424 acc: 0.749479 lr: 0.000091\n",
+      "INFO:root:epoch: 93/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.797798 acc: 0.750706 lr: 0.000091\n",
+      "INFO:root:epoch: 94/100 starts\n",
+      "INFO:root:epoch: 94/100  et: 0s eta: 14s batches: 10/313(3%) samples: 320 loss: 0.803447 acc: 0.762500 lr: 0.000091\n",
+      "INFO:root:epoch: 94/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.833519 acc: 0.745312 lr: 0.000091\n",
+      "INFO:root:epoch: 94/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.840047 acc: 0.743750 lr: 0.000091\n",
+      "INFO:root:epoch: 94/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.841335 acc: 0.739844 lr: 0.000091\n",
+      "INFO:root:epoch: 94/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.821914 acc: 0.746250 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.825954 acc: 0.744792 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.844646 acc: 0.739286 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.839375 acc: 0.737109 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.827256 acc: 0.740972 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.824656 acc: 0.745937 lr: 0.000090\n",
+      "INFO:root:epoch: 94/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.822932 acc: 0.746023 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.821938 acc: 0.747135 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.809640 acc: 0.751683 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.803667 acc: 0.754018 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.806155 acc: 0.752917 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.808421 acc: 0.752930 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.803617 acc: 0.754779 lr: 0.000089\n",
+      "INFO:root:epoch: 94/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.801626 acc: 0.754514 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.798918 acc: 0.754112 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.797969 acc: 0.755000 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.797693 acc: 0.754762 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.800783 acc: 0.753267 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.798359 acc: 0.754891 lr: 0.000088\n",
+      "INFO:root:epoch: 94/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.800326 acc: 0.754167 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.789442 acc: 0.758000 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.787871 acc: 0.758413 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.784470 acc: 0.759722 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.783457 acc: 0.759710 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.782562 acc: 0.759698 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.782705 acc: 0.759375 lr: 0.000087\n",
+      "INFO:root:epoch: 94/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.778999 acc: 0.760282 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100 starts\n",
+      "INFO:root:epoch: 95/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.766231 acc: 0.762500 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.751542 acc: 0.765625 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.773591 acc: 0.756250 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.763280 acc: 0.757031 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.771654 acc: 0.757500 lr: 0.000086\n",
+      "INFO:root:epoch: 95/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.772644 acc: 0.754687 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.768126 acc: 0.758036 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.781850 acc: 0.756641 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.798895 acc: 0.751389 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.796311 acc: 0.752812 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.796165 acc: 0.752841 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.786548 acc: 0.754687 lr: 0.000085\n",
+      "INFO:root:epoch: 95/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.788553 acc: 0.756250 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.786640 acc: 0.757589 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.790195 acc: 0.756875 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.790926 acc: 0.757227 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.792668 acc: 0.755699 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.796034 acc: 0.754167 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.796901 acc: 0.753289 lr: 0.000084\n",
+      "INFO:root:epoch: 95/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.793658 acc: 0.754219 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.790697 acc: 0.755357 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.791174 acc: 0.755540 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.790981 acc: 0.755842 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.792551 acc: 0.754948 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.790318 acc: 0.755750 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.788822 acc: 0.756731 lr: 0.000083\n",
+      "INFO:root:epoch: 95/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.785852 acc: 0.757755 lr: 0.000082\n",
+      "INFO:root:epoch: 95/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.785526 acc: 0.757701 lr: 0.000082\n",
+      "INFO:root:epoch: 95/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.788618 acc: 0.757112 lr: 0.000082\n",
+      "INFO:root:epoch: 95/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.787243 acc: 0.757708 lr: 0.000082\n",
+      "INFO:root:epoch: 95/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.786788 acc: 0.758165 lr: 0.000082\n",
+      "INFO:root:epoch: 96/100 starts\n",
+      "INFO:root:epoch: 96/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.900184 acc: 0.721875 lr: 0.000082\n",
+      "INFO:root:epoch: 96/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.817050 acc: 0.726562 lr: 0.000082\n",
+      "INFO:root:epoch: 96/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.787611 acc: 0.742708 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.786342 acc: 0.742187 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.785539 acc: 0.748125 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.809411 acc: 0.743229 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.815168 acc: 0.744643 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.808906 acc: 0.747656 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.799573 acc: 0.748958 lr: 0.000081\n",
+      "INFO:root:epoch: 96/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.809905 acc: 0.745937 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.815977 acc: 0.744886 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.809168 acc: 0.747656 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.803039 acc: 0.749038 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.803802 acc: 0.750670 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.800406 acc: 0.751458 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.801153 acc: 0.751367 lr: 0.000080\n",
+      "INFO:root:epoch: 96/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.795543 acc: 0.753493 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.792251 acc: 0.754340 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.795225 acc: 0.754276 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.794892 acc: 0.755156 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.801911 acc: 0.753571 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.804700 acc: 0.753125 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.806657 acc: 0.752446 lr: 0.000079\n",
+      "INFO:root:epoch: 96/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.804139 acc: 0.752604 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.802955 acc: 0.753250 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.800339 acc: 0.753726 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.800201 acc: 0.752894 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.800504 acc: 0.751563 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.798371 acc: 0.752155 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.798875 acc: 0.751354 lr: 0.000078\n",
+      "INFO:root:epoch: 96/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.798440 acc: 0.751915 lr: 0.000078\n",
+      "INFO:root:epoch: 97/100 starts\n",
+      "INFO:root:epoch: 97/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.719486 acc: 0.768750 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.778803 acc: 0.753125 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 1s eta: 13s batches: 30/313(9%) samples: 960 loss: 0.755296 acc: 0.759375 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.797087 acc: 0.750781 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.783176 acc: 0.753750 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.789088 acc: 0.750000 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.783504 acc: 0.753125 lr: 0.000077\n",
+      "INFO:root:epoch: 97/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.795049 acc: 0.750391 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.797903 acc: 0.750347 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.795908 acc: 0.749375 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.791749 acc: 0.748295 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.793694 acc: 0.749219 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 6s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.790152 acc: 0.750962 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 6s eta: 8s batches: 140/313(44%) samples: 4480 loss: 0.781105 acc: 0.752232 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.780573 acc: 0.753333 lr: 0.000076\n",
+      "INFO:root:epoch: 97/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.778776 acc: 0.753125 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.777386 acc: 0.753493 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.778101 acc: 0.753646 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.782027 acc: 0.752467 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.775025 acc: 0.755156 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.772303 acc: 0.756101 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.768858 acc: 0.757670 lr: 0.000075\n",
+      "INFO:root:epoch: 97/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.770851 acc: 0.756522 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.770891 acc: 0.755990 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.773270 acc: 0.755750 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 12s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.771640 acc: 0.756611 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.771903 acc: 0.756019 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.777557 acc: 0.754911 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.780185 acc: 0.753664 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.779205 acc: 0.755417 lr: 0.000074\n",
+      "INFO:root:epoch: 97/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.780044 acc: 0.756048 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100 starts\n",
+      "INFO:root:epoch: 98/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.707994 acc: 0.771875 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.712420 acc: 0.775000 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.744193 acc: 0.769792 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.754451 acc: 0.761719 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.754195 acc: 0.758125 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.764942 acc: 0.757813 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.758779 acc: 0.757589 lr: 0.000073\n",
+      "INFO:root:epoch: 98/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.766650 acc: 0.755078 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.757546 acc: 0.757639 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.761171 acc: 0.758125 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.762360 acc: 0.759943 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.770996 acc: 0.758594 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.768122 acc: 0.759615 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.772493 acc: 0.756250 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.770627 acc: 0.757500 lr: 0.000072\n",
+      "INFO:root:epoch: 98/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.768439 acc: 0.757227 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.781094 acc: 0.754228 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.785662 acc: 0.754340 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.783487 acc: 0.755099 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.780362 acc: 0.755156 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.784881 acc: 0.754613 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.782945 acc: 0.756108 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.784274 acc: 0.756522 lr: 0.000071\n",
+      "INFO:root:epoch: 98/100  et: 11s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.781904 acc: 0.757292 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.778922 acc: 0.758000 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.776607 acc: 0.758053 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.776517 acc: 0.758102 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.776057 acc: 0.758147 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.775508 acc: 0.757759 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.774023 acc: 0.758854 lr: 0.000070\n",
+      "INFO:root:epoch: 98/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.771974 acc: 0.759173 lr: 0.000070\n",
+      "INFO:root:epoch: 99/100 starts\n",
+      "INFO:root:epoch: 99/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.837768 acc: 0.746875 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.823928 acc: 0.743750 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.777027 acc: 0.758333 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.774453 acc: 0.763281 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 2s eta: 12s batches: 50/313(15%) samples: 1600 loss: 0.784160 acc: 0.758125 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.773204 acc: 0.759896 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.767902 acc: 0.762500 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.773605 acc: 0.759766 lr: 0.000069\n",
+      "INFO:root:epoch: 99/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.792748 acc: 0.755903 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.793041 acc: 0.755625 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 5s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.783605 acc: 0.757955 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.784180 acc: 0.757292 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.787015 acc: 0.759135 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.784868 acc: 0.758929 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.783041 acc: 0.760000 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 7s eta: 7s batches: 160/313(51%) samples: 5120 loss: 0.787532 acc: 0.759961 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.780208 acc: 0.763235 lr: 0.000068\n",
+      "INFO:root:epoch: 99/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.782567 acc: 0.761632 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.782641 acc: 0.760197 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.780030 acc: 0.760781 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.777667 acc: 0.761310 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.778561 acc: 0.760938 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.784298 acc: 0.758967 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.782546 acc: 0.758854 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.782698 acc: 0.758625 lr: 0.000067\n",
+      "INFO:root:epoch: 99/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.781098 acc: 0.758413 lr: 0.000066\n",
+      "INFO:root:epoch: 99/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.781811 acc: 0.758796 lr: 0.000066\n",
+      "INFO:root:epoch: 99/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.778903 acc: 0.760156 lr: 0.000066\n",
+      "INFO:root:epoch: 99/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.774765 acc: 0.761422 lr: 0.000066\n",
+      "INFO:root:epoch: 99/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.775532 acc: 0.760313 lr: 0.000066\n",
+      "INFO:root:epoch: 99/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.778129 acc: 0.759173 lr: 0.000066\n",
+      "INFO:root:epoch: 100/100 starts\n",
+      "INFO:root:epoch: 100/100  et: 0s eta: 13s batches: 10/313(3%) samples: 320 loss: 0.728071 acc: 0.762500 lr: 0.000066\n",
+      "INFO:root:epoch: 100/100  et: 0s eta: 13s batches: 20/313(6%) samples: 640 loss: 0.806044 acc: 0.764063 lr: 0.000066\n",
+      "INFO:root:epoch: 100/100  et: 1s eta: 12s batches: 30/313(9%) samples: 960 loss: 0.840349 acc: 0.759375 lr: 0.000066\n",
+      "INFO:root:epoch: 100/100  et: 1s eta: 12s batches: 40/313(12%) samples: 1280 loss: 0.832785 acc: 0.755469 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 2s eta: 11s batches: 50/313(15%) samples: 1600 loss: 0.830045 acc: 0.754375 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 2s eta: 11s batches: 60/313(19%) samples: 1920 loss: 0.827730 acc: 0.751042 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 3s eta: 11s batches: 70/313(22%) samples: 2240 loss: 0.814045 acc: 0.758482 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 3s eta: 10s batches: 80/313(25%) samples: 2560 loss: 0.798234 acc: 0.760547 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 4s eta: 10s batches: 90/313(28%) samples: 2880 loss: 0.800798 acc: 0.757986 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 4s eta: 9s batches: 100/313(31%) samples: 3200 loss: 0.792712 acc: 0.757500 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 4s eta: 9s batches: 110/313(35%) samples: 3520 loss: 0.783494 acc: 0.760227 lr: 0.000065\n",
+      "INFO:root:epoch: 100/100  et: 5s eta: 8s batches: 120/313(38%) samples: 3840 loss: 0.777760 acc: 0.761719 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 5s eta: 8s batches: 130/313(41%) samples: 4160 loss: 0.774788 acc: 0.762981 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 6s eta: 7s batches: 140/313(44%) samples: 4480 loss: 0.766845 acc: 0.765848 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 6s eta: 7s batches: 150/313(47%) samples: 4800 loss: 0.765367 acc: 0.767292 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 7s eta: 6s batches: 160/313(51%) samples: 5120 loss: 0.762996 acc: 0.768945 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 7s eta: 6s batches: 170/313(54%) samples: 5440 loss: 0.766414 acc: 0.768382 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 8s eta: 6s batches: 180/313(57%) samples: 5760 loss: 0.768261 acc: 0.766319 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 8s eta: 5s batches: 190/313(60%) samples: 6080 loss: 0.768353 acc: 0.765954 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 9s eta: 5s batches: 200/313(63%) samples: 6400 loss: 0.769359 acc: 0.765156 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 9s eta: 4s batches: 210/313(67%) samples: 6720 loss: 0.771308 acc: 0.765030 lr: 0.000064\n",
+      "INFO:root:epoch: 100/100  et: 10s eta: 4s batches: 220/313(70%) samples: 7040 loss: 0.771582 acc: 0.764631 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 10s eta: 3s batches: 230/313(73%) samples: 7360 loss: 0.776534 acc: 0.762500 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 10s eta: 3s batches: 240/313(76%) samples: 7680 loss: 0.782496 acc: 0.759635 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 11s eta: 2s batches: 250/313(79%) samples: 8000 loss: 0.782019 acc: 0.760250 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 11s eta: 2s batches: 260/313(83%) samples: 8320 loss: 0.780009 acc: 0.761058 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 12s eta: 1s batches: 270/313(86%) samples: 8640 loss: 0.776966 acc: 0.760880 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 12s eta: 1s batches: 280/313(89%) samples: 8960 loss: 0.776915 acc: 0.761607 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 13s eta: 1s batches: 290/313(92%) samples: 9280 loss: 0.773110 acc: 0.763901 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 13s eta: 0s batches: 300/313(95%) samples: 9600 loss: 0.776147 acc: 0.762917 lr: 0.000063\n",
+      "INFO:root:epoch: 100/100  et: 14s eta: 0s batches: 310/313(99%) samples: 9920 loss: 0.777149 acc: 0.762601 lr: 0.000062\n"
+     ]
+    }
+   ],
+   "source": [
+    "logger = logging.getLogger()\n",
+    "logger.setLevel(0)\n",
+    "trainer.fit(train_loader, val_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "     epoch  train_loss  train_acc        lr  val_loss  val_acc\n",
+      "21       1    4.597061     0.0185  0.003120  4.604409     0.03\n",
+      "22       2    4.576668     0.0248  0.006250  4.550752     0.03\n",
+      "23       3    4.429581     0.0333  0.009380  4.132248     0.03\n",
+      "24       4    3.932376     0.0777  0.010000  3.541716     0.11\n",
+      "25       5    3.252122     0.1659  0.010000  3.084885     0.17\n",
+      "..     ...         ...        ...       ...       ...      ...\n",
+      "116     96    0.799725     0.7515  0.000077  0.671766     0.78\n",
+      "117     97    0.781655     0.7553  0.000073  0.714923     0.77\n",
+      "118     98    0.771329     0.7589  0.000070  0.513621     0.84\n",
+      "119     99    0.778845     0.7587  0.000066  0.385253     0.89\n",
+      "120    100    0.777107     0.7624  0.000062  0.483041     0.85\n",
+      "\n",
+      "[100 rows x 6 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "train_result = pd.read_csv('./tdnn_xvec/train.log')\n",
+    "train_result = train_result[train_result.index>20]\n",
+    "print(train_result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA68AAALYCAYAAACTyMQkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAACcVUlEQVR4nOzddZiU1RvG8fvdpbthF2nEAgQFlA4BAUVFRAzs7u6un4otJraioihiIIqigIiKhGIhioAg3V275/fHw7DBxszuzLwT3891zTUweXZ3dmfu9zznOZ5zTgAAAAAAxLIUvwcAAAAAAEBhCK8AAAAAgJhHeAUAAAAAxDzCKwAAAAAg5hFeAQAAAAAxj/AKAAAAAIh5hFcAAAAAQMwjvAIAkprneYd5nveq53nzPM/bsvv0p+d5wz3POzzb7Sp7nrfI87xNnuc1yuexnvE8z3med3W2y87cfVlBpzuDGGfD3bf9JCxfOAAAcaaE3wMAAMAPnuelSnpU0uWSdkiaIOkDSZmS9pV0sqTzPM873Tn3hnNuved550v6VNJLnucd4Zxz2R6vp6QLJX0r6fE8nvILSVPzGc7EsHxRAAAkMMIrACBZ/U8WXGdIOsE5tyD7lZ7nVZZ0o6Qqgcucc+M8z3tZ0tmSLpH01O7bVpL0kqStks5yzmXm8XzjnXMPh//LAAAgOVA2DABIOp7n7SvpWkmrJPXNHVwlyTm33jl3k6Thua66WtJiSQ94ntd492WPSaov6Wbn3F8RG3gIPM/r6nne557nrfU8b6vneb96nne953kl87jtIM/zvvE8b6Xneds8z/vP87xPPc/rlet2PT3PG+953lLP87Z7nrfM87yvPc87KXpfGQAgWRFeAQDJ6EzZe+DzzrmVBd3QObc91//XSzpPUnlJr3ied7RsJvYbSU9GZLQh2h0mv5LUXtJ7koZJ8iQ9KGm053lettteIuldSWmS3pEF8S8lNZN0ZLbbHS1pvKTmkj6S9IishLq6pBMi/kUBAJIeZcMAgGTUYff5V0W5s3PuM8/zXpJ0jqTDJG2RlQu7Au7W2/O8Cvlc95xzbllRxpLb7nLn5yVtk3SYc+6P3ZffImmcpKMlnS7ptd13OUfSEkktnXNbcj1W9Wz/PVvSTkmtnHMrCrgdAAARQXgFACSjOrvP/yvGY9wm6SxJpSXd4pybV8jte+0+5WWMpLCEV0nHSaok6clAcJUk59xOz/NulPSjpDOUFV4la1iVkfuBnHOrc120c/epsNsBABB2lA0DAFA0NynrffSY3d2LC3Kdc87L5/RTGMd18O7zSbmvcM5Nl7Qp220kKxVuKOlXz/Pu8TzvCM/zyuXxuO/ISqV/9TzvYc/z+u2e5QUAICoIrwCAZBSY5axblDt7ntdV0qWSvpf0sqx0+KrwDK3YKu0+X57P9cuz3UaShko6X9IGSbfK1ruu8TxvhOd5tQM3cs69I2mgpPmyr3WspFWe533seV7T8H4JAADsjfAKAEhGgf1We4R6R8/zyssC63ZZ2fBVkhZJusfzvGZhG2HRbdh9Xjuf62tnu42cecE5d6ikWpIGyRoznSpr5KRstx3tnOskqZps7exbu8/HeZ5XKqxfBQAAuRBeAQDJ6FVJmZLO9zyvRkE39DyvdK6LHpTUWNLtzrk5zrkNsu7DZWTdh/1+b/1p93mX3Fd4nneIpArZbpODc26lc+4959wxu2/TJa/S4N3bCI11zp0h6UNJTSUdEJbRAwCQD7/fYAEAiLrde7E+LKmmpLGe5zXIfRvP8yp6nnevrKQ2cFl3SRfLyoUfzfZ4n8tmYztIujyyoy/Uh7KZ1XOzzwR7nldC0gO7//t6tsu75n4Az/PKSqosaZd2N3LyPK9z7nW9u4N6IPxvC+PXAADAXryCu/oDAJCYdgexxyRdJuu2+6Wk32Uzsk1knYErSTrNOTdi9zY3v8g6Fbd2zs3J9XiVJf0qK6ltGeg+7HnemZJekfSFssqVc5vjnBtZyHgbytabLpY0IZ+bTXHOveh53smSRsiaM70jaa2koyQdJOkTSccEtvXxPG+dpHWSfpC0UDaD3Fc2m/qUc+6y3bf7affX/q2kBbJ9Y7tLaiXpE+dc/4LGDwBAcRFeAQBJzfO8wyVdKKmzpDRZVdIiSZMlPe+cm7b7ds9IukjS9c65h/J5rL6SPt19327OOZctvBbkQ+fccYWMs6EsvBbkNefcmbtv303WEfkwWSCdJ5txfdQ5t2e7G8/zLpKF1ZaycLpR0p+yvWJHZAu5g2UNmw6VfZ+2SfpHtuXO8865HYWMDQCAYiG8AgAAAABiHmteAQAAAAAxj/AKAAAAAIh5hFcAAAAAQMwjvAIAAAAAYl4JvwcQiho1ariGDRv6OobNmzerfPnyvo4BCOD1iFjC6xGxhNcjYgmvR8SSWH89zpgxY5VzrmZe18VVeG3YsKGmT5/u6xgmTpyobt26+ToGIIDXI2IJr0fEEl6PiCW8HhFLYv316Hnewvyuo2wYAAAAABDzCK8AAAAAgJhHeAUAAAAAxDzCKwAAAAAg5hFeAQAAAAAxj/AKAAAAAIh5hFcAAAAAQMwjvAIAAAAAYh7hFQAAAAAQ8wivAAAAAICYR3gFAAAAAMQ8wisAAAAAIOYRXgEAAAAAMY/wCgAAAACIeYRXAAAAAEDMI7wCAAAAAGIe4RUAAAAAEPMIrwAAAACAmEd4BQAAAADEPMIrAAAAACDmEV4BAAAAADGP8AoAAAAAiHmEVwAAAABAzCO8AgAAAABiHuEVAAAAABDzCK8AAAAAgJhHeAUAAAAAxDzCazg9+6w0dKjfowAAAACAhEN4DRfn9OfLU6QbbtD6C66XnPN7RAAAAACQMAiv4eJ5uqPh63paF6vy8If0SZ1z9PQTu7R8ud8DAwAAAID4R3gNo5GjUtVv3lOa0uN2Hb3iFdW98gQ1StumI4+UXn1VWr/e7xECAAAAQHwivIZZo8aeOk24S3rySR2nD/VrvT5aOme9zjpLql1buvhiKooBAAAAIFSE10i57DLpzTfVeMm3+rl6d/04doWOP956Ov3wg9+DAwAAAID4QniNpFNOkT76SN6cOWpzRUe9cMsCVaggDR/u98AAAAAAIL4QXiOtb1/pyy+lVatUvndHXdvnV40cyfpXAAAAAAgF4TUaOnSQJk+WnNNNk47U1q1Ob73l96AAAAAAIH4QXqOlRQvp1ltVauUS9Wq+VMOH07gJAAAAAIJFeI2mJk0kSRf3+ls//STNmOHvcAAAAAAgXhBeo6lpU0lS7ybzVK4cjZsAAAAAIFiE12hq0EAqUULllvytwYOlt9+WNm70e1AAAAAAEPsIr9FUooQF2L//1vnnS5s2SSNH+j0oAAAAAIh9hNdoa9pUmjdPhx0mNW8uvfCC3wMCAAAAgNhHeI22pk2lv/+WJ6fzz5d+/FH66Se/BwUAAAAAsY3wGm1Nmkjr10urV2vIEKlMGWZfAQAAAKAwhNdo291xWPPmqWpVadAgacQIafNmf4cFAAAAALGM8BptgfD699+SpPPOkzZskEaN8nFMAAAAABDjCK/R1qiR5Hl7wmunTtL++7PnKwAAAAAUhPAabWXKSPvsI82bJ8ly7HnnSd99J/36q89jAwAAAIAYRXj1w+6OwwGnny6VKkXjJgAAAADID+HVD7nCa40a0vHHS6+/Lm3d6uO4AAAAACBGEV790KSJtHKldWra7fzzpXXrpPff929YAAAAABCrCK9+yLZdTkC3bnYxjZsAAAAAYG+EVz/k2i5HssZN554rffONtGCBP8MCAAAAgFhFePVD48Z2nm3mVZI6drTzuXOjPB4AAAAAiHGEVz9UrCjVrp1j5lWS6te383//9WFMAAAAABDDCK9+ydVxWJLS06WUFMIrAAAAAORGePVLkyZ7lQ2XKCHVrUt4BQAAAIDcCK9+adpUWrx4r41d69cnvAIAAABAboRXvwQ6Dv/zT46LCa8AAAAAsDfCq1+aNLHzXKXD9etLixZJmZk+jAkAAAAAYhTh1S957PUqWXjdsUNascKHMQEAAABAjCK8+qVaNalqVbbLAQAAAIAgEF79lEfHYcIrAAAAAOyN8OqnPPZ6JbwCAAAAwN4Ir35q2lRauFDauXPPRZUrSxUr2sUAAAAAAEN49VPTplJGRo6k6nlslwMAAAAAuRFe/RTYLieP0mHCKwAAAABkIbz6qYDtcgivAAAAAJCF8Oqn2rWl8uXz7Di8apW0ZYtP4wIAAACAGEN49ZPnWelwPh2HFy3yYUwAAAAAEIMIr35juxwAAAAAKBTh1W9Nm0r//GNdh3cjvAIAAABAToRXvzVpIu3YIf33356L6ta1imLCKwAAAAAYwqvf8ug4XLKklJ5OeAUAAACAAMKr3wLhNY+Ow4RXAAAAADCEV7/VrSuVKsVerwAAAABQAMKr31JTpcaN8wyvixZJmZk+jQsAAAAAYgjhNRbks13O9u3SypU+jQkAAAAAYgjhNRY0aWJrXp3bcxHb5QAAAABAFsJrLGjaVNq8WVq+fM9FhFcAAAAAyEJ4jQV5bJdDeAUAAACALITXWJDHdjlVq0oVKhBeAQAAAEAivMaGBg2s63C2mVfPY7scAAAAAAggvMaCkiUtwLLXKwAAAADkifAaK5o2zVE2LBFeAQAAACCA8BormjTJc+Z1xQpp61afxgQAAAAAMYLwGiuaNpXWrpXWrNlzUaDj8KJFPo0JAAAAAGIE4TVW5NFxmO1yAAAAAMAQXmNFkyZ2zl6vAAAAALAXwmusaNzYzrOF17p1bcscwisAAACAZEd4jRVly0r77JOjbLhUKSktjfAKAAAAAITXWJJPx2HCKwAAAIBkR3iNJU2bEl4BAAAAIA+E11jStKm0fLm0adOeiwLh1TkfxwUAAAAAPiO8xpJA06Z//tlzUf360vbt0sqVPo0JAAAAAGIA4TWW1K1r50uW7LmI7XIAAAAAgPAaW9LT7Xzp0j0XEV4BAAAAgPAaW9LS7JzwCgAAAAA5EF5jSZkyUpUqOcJrtWpSuXKEVwAAAADJjfAaa9LTc6x59Ty2ywEAAAAAwmusSUvLMfMqEV4BAAAAgPAaawivAAAAALAXwmusSUuzsmHn9lxUv760fLm0bZuP4wIAAAAAHxFeY016urRjh7R27Z6LAh2HFy/2aUwAAAAA4DPCa6xhuxwAAAAA2AvhNdYQXgEAAABgL4TXWJOebufZtsvZZx87J7wCAAAASFaE11iTx8xr6dJSnTqEVwAAAADJy/fw6nneZ57nOc/z7vV7LDGhfHmpYsU8t8tZuNCnMQEAAACAz3wNr57nnSzpYD/HEJMC2+Vkw16vAAAAAJKZb+HV87yqkh6TdLVfY4hZ6el5zrz++2+O7V8BAAAAIGn4OfP6oKRfnXNv+ziG2JSWlmd43bZNWrXKpzEBAAAAgI9K+PGknud1knS6KBnOWyC8Oid5nqSc2+XUrOnj2AAAAADAB1EPr57nlZL0vKSHnXN/BnH78yWdL0m1a9fWxIkTIzvAQmzatCniY9hn61Y13bJF34wdq4wKFSRJK1ZUkNRGn376qzZuZPoVJhqvRyBYvB4RS3g9IpbwekQsiefXox8zr9dLKivpvmBu7JwbLmm4JLVp08Z169YtciMLwsSJExXxMSxZIj37rDo3bSrtv78k6aCDpAsvlCpVai6fvwWIIVF5PQJB4vWIWMLrEbGE1yNiSTy/HqO65tXzvPqSbpF0m6TSnudV8Tyvyu6rA/9PjeaYYlIee73WqCGVKUPHYQAAAADJKdoNmxpLKiNphKS12U6SdO3uf7eI8phiTyC8Ztsux/PYLgcAAABA8op22fBPkrrncfnXskD7kqS/ozmgmJSebuf5bJcDAAAAAMkmquHVObdO0sTcl3vWUXehc26v65JSxYpSuXJ5htdPP/VpTAAAAADgIz/3eUV+PC/fvV6XLZO2b/dpXAAAAADgE1/2ec3NOef5PYaYk56eY82rJDVoYOeLF0tNmvgwJgAAAADwCTOvsSqfmVeJda8AAAAAkg/hNVblEV7r1bNzwisAAACAZEN4jVVpadLGjdKmTXsuqlPHzles8GlMAAAAAOATwmusymO7nAoVpDJlCK8AAAAAkg/hNValpdl5tvDqeVKtWoRXAAAAAMmH8Bqr8givEuEVAAAAQHIivMaqQNlwru1yCK8AAAAAkhHhNVZVqSKVLs3MKwAAAACI8Bq7PC/P7XIC4dU5n8YFAAAAAD4gvMaytLS9yoZr1pR27JA2bPBpTAAAAADgA8JrLEtPz3PmVaJ0GAAAAEByIbzGsnzKhiXCKwAAAIDkQniNZWlp0rp10tatey4ivAIAAABIRoTXWBbYLifb7CvhFQAAAEAyIrzGsrQ0O88WXmvWtHPCKwAAAIBkQniNZXmE19KlpcqVpZUrfRoTAAAAAPiA8BrLAmXDubbLCez1CgAAAADJgvAay6pXl0qWzLPjMOEVAAAAQDIhvMYyz5Pq1CG8AgAAAEh6hNdYl89er4RXAAAAAMmE8Brr0tPzXPO6apWUkeHTmAAAAAAgygivsS6fmVfnpNWrfRoTAAAAAEQZ4TXWpaVZSt2+fc9FtWrZOaXDAAAAAJIF4TXWBbbLWbZsz0WEVwAAAADJhvAa69LS7Dxb6TDhFQAAAECyIbzGOsIrAAAAABBeY14e4bVaNSklhfAKAAAAIHkQXmNdzZpSamqO7XJSUuxiwisAAACAZEF4jXWpqVLt2nlul0N4BQAAAJAsCK/xIJ+9XgmvAAAAAJIF4TUepKfnKBuWKBsGAAAAkFwIr/GAmVcAAAAASY7wGg/S0qSVK6WdO/dcVKuWtHGjtG2bj+MCAAAAgCghvMaDtDTJOWn58j0XBfZ6XbnSpzEBAAAAQBQRXuNBerqdZysdDoRXSocBAAAAJAPCazxIS7NzwisAAACAJEV4jQeEVwAAAABJjvAaD2rXljwvx3Y5hFcAAAAAyYTwGg9KlLC0mm3mtUIFqUwZwisAAACA5EB4jRe59nr1PPZ6BQAAAJA8CK/xIi0tR9mwRHgFAAAAkDwIr/EiPT3HzKtEeAUAAACQPAiv8SItTVq+XMrI2HMR4RUAAABAsiC8xou0NCkzU1q5cs9FgfDqnI/jAgAAAIAoILzGi/R0O8+1Xc6OHdKGDT6NCQAAAACihPAaL9LS7Dzbulf2egUAAACQLAiv8YLwCgAAACCJEV7jRZ06dp6rbFgivAIAAABIfITXeFGqlFSjBjOvAAAAAJIS4TWepKXlCK81a9o54RUAAABAoiO8xpNc4bVUKalKFcIrAAAAgMRHeI0n6ek51rxKNvuabetXAAAAAEhIhNd4kpYmLVsmZWbuuahWLWZeAQAAACQ+wms8SUuTdu2SVq/ecxHhFQAAAEAyILzGk8Ber7m2yyG8AgAAAEh0hNd4kp5u57m2y1m1SsrI8GlMAAAAABAFhNd4Egiv//2356JatSTnclQSAwAAAEDCIbzGk7p1pZQUaeHCPRfVqmXnlA4DAAAASGSE13hSsqRUr540f/6eiwivAAAAAJIB4TXeNGpEeAUAAACQdAiv8aZhQ8IrAAAAgKRDeI03jRrZVjnbt0uSqlWzZbCEVwAAAACJjPAabxo1svPdTZtSUqSaNQmvAAAAABIb4TXeNGxo57lKhwmvAAAAABIZ4TXeBGZeFyzYcxHhFQAAAECiI7zGm/R02zKHmVcAAAAASYTwGm9SUqQGDQivAAAAAJIK4TUeNWq0V9nwxo3S1q3+DQkAAAAAIonwGo8aNcpzr9eVK30aDwAAAABEGOE1HjVsaEl10yZJhFcAAAAAiY/wGo9y7fUaCK+sewUAAACQqAiv8SgQXneXDhNeAQAAACQ6wms8atjQzneH15o17b+EVwAAAACJivAaj2rVksqV29NxuEIFqUwZwisAAACAxEV4jUeeZ7Ovu2dePY+9XgEAAAAkNsJrvMoWXiXCKwAAAIDERniNV3ns9Up4BQAAAJCoCK/xqlEjaf16ad06SYRXAAAAAImN8BqvcnUcDoRX5/wbEgAAAABECuE1XuWx1+uOHdKGDT6OCQAAAAAihPAarwLhdfd2ObVq2X8pHQYAAACQiAiv8apKFalSpRwzrxLhFQAAAEBiIrzGK8/L0XGY8AoAAAAgkRFe41mjRpQNAwAAAEgKhNd41rChzbw6p5o17SLCKwAAAIBERHiNZ40aSVu2SCtXqlQpWwZLeAUAAACQiAiv8SyPjsOEVwAAAACJiPAazxo2tPNsTZtWrvRvOAAAAAAQKYTXeBaYec0WXpl5BQAAAJCICK/xrEIFqUYNwisAAACAhEd4jXcNG+ZY87pqlZSR4euIAAAAACDsCK/xrlGjHDOvzkmrV/s8JgAAAAAIM8JrvGvUSFq4UMrMVK1adhGlwwAAAAASDeE13jVsKO3YIS1dqpo17SLCKwAAAIBEQ3iNd9k6DjPzCgAAACBREV7jHeEVAAAAQBIgvMa7Bg3sfMECVasmpaQQXgEAAAAkHsJrvCtTRkpLk+bPV0qKVKeOtHix34MCAAAAgPAivCaCbNvlNG4s/fOPz+MBAAAAgDAjvCaCRo2kBQskSU2aEF4BAAAAJB7CayJo2FBatEjatUtNmkj//Sdt3er3oAAAAAAgfAiviaBRIykjQ1q0SI0b20W7q4gBAAAAICEQXhNBYLucBQvUpIn9k9JhAAAAAImE8JoIGja08/nz94TXefN8Gw0AAAAAhB3hNRHUq2cbvM6frxo1pAoVCK8AAAAAEgvhNRGULGkBdv58eR4dhwEAAAAkHsJromjYMMd2Ocy8AgAAAEgkhNdE0ajRnhbDjRvbPzMzfR4TAAAAAIQJ4TVRNGokLVkibdumJk2k7dvtvwAAAACQCAiviSLQcfjff/fs9UrpMAAAAIBEQXhNFIG9XrNtl0PTJgAAAACJgvCaKLKF1/r1pdRUZl4BAAAAJA7Ca6JIS7MtcxYsUMmSUv36hFcAAAAAiYPwmihSU6UGDfZ0HGavVwAAAACJhPCaSLJtl8NerwAAAAASCeE1kTRsKC1YIMn2el29Wlq/3tcRAQAAAEBYEF4TSaNG0sqV0qZNdBwGAAAAkFAIr4kk0HF4wYI94ZXSYQAAAACJgPCaSBo2tPP589W4sf2TmVcAAAAAiYDwmkiyzbxWqiTVqMHMKwAAAIDEQHhNJLVqSWXL0nEYAAAAQMIhvCYSz5Pq1pWWLJFkHYcpGwYAAACQCAiviSYtTVq6VJLNvP77r7Rzp89jAgAAAIBiIrwmmmzhtXFjKSNDWrjQ5zEBAAAAQDERXhNNrplXidJhAAAAAPGP8Jpo0tKkTZukjRvZ6xUAAABAwiC8Jpr0dDtfulRpaVLp0oRXAAAAAPEv6uHV87wjPc/7yvO8ZZ7nbfc8b7Hnee96nndgtMeSkNLS7HzpUqWk0HEYAAAAQGIo4cNzVpM0Q9IzklZKqi/pRknfe57XwjlHe6HiyBZeJfZ6BQAAAJAYoh5enXNvS3o7+2We502TNEfSCZIeifaYEkqu8Nq4sTRxouScbQMLAAAAAPEoVta8rt59vsvXUSSCqlVtoWu2mddNm6SVK30eFwAAAAAUg2/h1fO8VM/zSnmet6+k5yUtU64ZWRSB50l16uy1XQ6lwwAAAADimZ8zrz9I2i5prqSWkno451b4OJ7EkW2v18aN7SKaNgEAAACIZ340bAo4TVIlSY0lXSvpC8/zOjnnFmS/ked550s6X5Jq166tiRMnRnmYOW3atMn3MRTmoJIlVe7vv/XjxInasSNFntdZX365QHXr0gsr0cTD6xHJg9cjYgmvR8QSXo+IJfH8evScc36PQZ7nVZG0QNJI59yF+d2uTZs2bvr06dEaVp4mTpyobt26+TqGQl1yifTWW9LatZKkevWkHj2k117zeVwIu7h4PSJp8HpELOH1iFjC6xGxJNZfj57nzXDOtcnrupho2OScWyfpb0lNfR5KYkhPl9atk7ZulcRerwAAAADiX0yEV8/zakvaXxJthcIhsF3OsmWS2OsVAAAAQPyL+ppXz/M+kDRT0mxJGyQ1k3SVbJsc9ngNh+x7vTZqpCZN7J9btkjlyvk7NAAAAAAoCj9mXr+XdJyk1ySNlXS1pEmSWjnn5vownsSTPbwqq+Pw/Pk+jQcAAAAAiinqM6/OuQclPRjt500qucJr9r1eDzrIpzEBAAAAQDHExJpXhFnNmlJqKnu9AgAAAEgYhNdElJIi1a69J7xWry5VqkTTJgAAAADxi/CaqNLS9oRXz6PjMAAAAID4RnhNVOnp0pIle/7LXq8AAAAA4hnhNVFlm3mVbOZ1/nwpI8PHMQEAAABAERFeE1VamrRypbRzpyQLrzt2SP/95/O4AAAAAKAICK+JKrBdzvLlkug4DAAAACC+EV4TVQF7vQIAAABAvCG8Jqpc4bVePalECcIrAAAAgPhEeE1UucJriRJSgwaUDQMAAACIT4TXRFW7tm3wmqvjMDOvAAAAAOIR4TVRlSwp1ayZY6/XJk2YeQUAAAAQnwiviSzXXq+NG0tr1kjr1vk3JAAAAAAoCsJrIssVXuk4DAAAACBeEV4TWR4zrxKlwwAAAADiD+E1kaWlScuXSxkZkrLCKzOvAAAAAOIN4TWRpaVZcF21SpJUsaJUqxbhFQAAAED8Ibwmslx7vUo2+/r33z6NBwAAAACKiPCayPIIr61bSzNm7KkkBgAAAIC4QHhNZOnpdp4tvHbqJG3cKP3yi09jAgAAAIAiILwmsjp17HzJkj0Xdexo51Om+DAeAAAAACgiwmsiK1NGqlo1x8xr/frSPvsQXgEAAADEF8Jrosu116vnWenwlCmScz6OCwAAAABCQHhNdLnCq2Th9b//pH//9WlMAAAAABAiwmuiyyO8su4VAAAAQLwhvCa6QHjNViPcooVUsSLhFQAAAED8ILwmurQ0accOae3aPRelpkodOkjffuvjuAAAAAAgBITXRJeWZud5lA7/+muOTAsAAAAAMYvwmujS0+08216vkjVtck767jsfxgQAAAAAISK8Jrp8Zl7btZNKlKB0GAAAAEB8ILwmunzCa/nyUuvWNG0CAAAAEB8Ir4muQgU75QqvkpUOT5tm/ZwAAAAAIJYRXpNBHnu9ShZet22TZs70YUwAAAAAEALCazLIJ7x27GjnlA4DAAAAiHWE12SQT3itXVtq2pTwCgAAACD2EV6TQT7hVbLS4W+/tW1zAAAAACBWEV6TQXq6tHmztHHjXld16iStWiXNnevDuAAAAAAgSITXZBDYLmfJkr2uYt0rAAAAgHhAeE0G+ez1Kkn77SdVr054BQAAABDbCK/JoIDw6nlZ614BAAAAIFYRXpNBAeFVstLhv/6Sli+P4pgAAAAAIASE12RQpYpUunSBHYclZl8BAAAAxC7CazLwvAK3yznkEKlMGcIrAAAAgNhFeE0WBYTX0qWltm1p2gQAAAAgdhFek0V6er7hVbLS4ZkzpS1bojgmAAAAAAgS4TVZpKXluc9rQKdO0q5d0rRpURwTAAAAAASJ8Jos0tKk9eulrVvzvLp9ezundBgAAABALCK8JotCtsupWlVq3pzwCgAAACA2EV6TRSHhVbLS4alTpYyMKI0JAAAAAIJEeE0WQYTXjh2ljRulX3+N0pgAAAAAIEgFhlfP8yp5nucV9iCe55XzPO+Q8A0LYRfkzKtE6TAAAACA2FPYzOtaSW0D//E8L8XzvNme5x2Q63YtJP0Y7sEhjGrUkEqUKDC8Nmgg1a1LeAUAAAAQewoLr7lnXT1JzSWVjcxwEDEpKVKdOgWGV8+z2dcpUyTnojg2AAAAACgEa16TSSF7vUpSly7S4sXSP/9EaUwAAAAAEATCazJJSytw5lWSevSw8wkTojAeAAAAAAgS4TWZBBFe99tPSk8nvAIAAACILSWCuE0bz/Mq7P53iiQnqa3neVWy3ebAcA8MEZCWJq1aJe3YIZUqledNPE864gjps8+kzExbKgsAAAAAfgsmvA7T3o2bns32b7f7elr8xLrAdjnLl0v16uV7sx49pDfesP1eW7aM0tgAAAAAoACFhdfuURkFoiP7Xq8FhNcjjrDzCRMIrwAAAABiQ4Hh1Tk3KVoDQRRkD68FqFdP2ndfC69XXRWFcQEAAABAIQpc0eh5Xs1gH8jzvCOKPxxEVHq6nRcSXiWbfZ00Sdq5M8JjAgAAAIAgFNaO53fP804p6Aae51XxPO9VSePDNipERq1a1pEpiPDao4e0aZM0fXoUxgUAAAAAhSgsvE6RNMLzvI89z6ub+0rP806U9Iek4yVdGf7hIaxKlLAAu2RJoTftvnu1M1vmAAAAAIgFBYZX59wASSdLaifpN8/zLpAkz/Pqep73oaSRkmZJau6cGxbpwSIM0tKkf/6xfXAKUKOG1KqV9NVX0RkWAAAAABSk0F08nXPvyPZxHSvpGc/zfpD0m6T2kk5zzvVzzv0b2WEibDp0sER66KHSJ59ILv8djnr0kKZOlbZujeL4AAAAACAPhYZXSXLOrZZ0h6T5ktpKqiDpLOfcmxEcGyLhySelESOkjRul/v2l9u2lL7/MM8QecYS0fbv07bc+jBMAAAAAsik0vHrmGkk/SSop6UJJsyWN8TzvUc/zykV2iAir1FTp1FOlP/6QXnjB1r/26mWLXKdMyXHTLl1smSzrXgEAAAD4rbCtcg6S9L2koZLekK1tHS6bfb1dFmR/9TyvZ6QHijArWVI691zpr7+kYcOkP/+UOneW+vaVfvpJklShgnTYYax7BQAAAOC/wmZeZ0qqIqm7c+4i59xGSXLOZTjn7pfUStJ/kj73PO+VSA4UEVK6tHTppdK8edLQodK0aRZiN22SZOtep0+X1q3zd5gAAAAAklth4fUxSQc75ybndaVzbq5zrrOky2Xb5SBelSsnXXed9P77FlzHjZNk614zM6VJk3weHwAAAICkVlh4vVlSL8/zmud3g93X/Ssp39sgjnTuLNWsaSFW0uGHS2XLUjoMAAAAwF+Fhdchkt6WtLmA22yS9JakTuEaFHyUmiode6w0dqy0bZtKl5Y6daJpEwAAAAB/BRNeX3HOzc/vBs65BZJelnRGGMcFPw0caKXDX34pyUqHf/tNWrbM53EBAAAASFqFhddDJI0P4nG+lNSm+MNBTOjRQ6pceU/p8BFH2MWUDgMAAADwS2HhtaKktUE8ztrdt0UiKFVK6t9f+ugjaedOtW4tValCeAUAAADgn8LC6ypJDYJ4nPq7b4tEcfzx0po10qRJSk2VunVj3SsAAAAA/xQWXqcouLWsZ+6+LRLFkUfa9jmjR0uy0uEFC6R//vF3WAAAAACSU2Hh9XFJR3ie95jneaVyX+l5XknP8x6X1EO2JywSRblyUt++0gcfSJmZrHsFAAAA4KsCw6tz7jtJ10i6XNJiz/NGeJ533+7TCEmLJV0q6Rrn3PeRHy6i6vjjrcXwd99p//2ltDRKhwEAAAD4o0RhN3DOPe553kxJN0gaIKns7qu2Spoo6QHn3DcRGyH8c/TR1rxp9Gh5HTuqRw/piy8k5yTP83twAAAAAJJJYWXDkiTn3GTn3FGyjsJ1dp8qOeeOIrgmsEqVpJ49bcsc53TEEdKKFdKvv/o9MAAAAADJJqjwGuCcy3TOrdh9yojUoBBDBg6UFi6UZs1Sjx52EeteAQAAAERbSOEVSeiYY6TUVOn999WggdSkCeteAQAAAEQf4RUFq1FD6to1x5Y5kyZJu3b5PC4AAAAASYXwisIdf7w0Z470++/q2VPasEGaOtXvQQEAAABIJoRXFG7AADsfPVp9+0ply0rvvOPvkAAAAAAkF8IrCpeeLrVvL40erQoVbAedUaMoHQYAAAAQPYRXBGfgQGnWLOmff3TSSdLKldLEiX4PCgAAAECyILwiOMcfb+cffKC+faUKFaSRI/0dEgAAAIDkQXhFcBo1klq3lt5/X2XLSscdZw2Id+zwe2AAAAAAkgHhFcE7/njpu++kJUs0eLC0dq30xRd+DwoAAABAMiC8IngDB9r5Bx+od2+pShW6DgMAAACIDsIrgnfAAdL++0ujR6tUKZuIHTNG2rrV74EBAAAASHSEV4Rm4EBp0iRp5EidNChDGzdK48b5PSgAAAAAiY7witBccIHNvp58snpe3ULnVRypd9/O8HtUAAAAABIc4RWhqVdPmj1bevddeSkpGr7xZN3xfgtte3WklEGIBQAAABAZhFeELiVFGjRImj1bv9/5rjJcisqcdbLUvLn09tuEWAAAAABhR3hF0aWkaP/bBqlP2mw92HaUVKKEdMopFmL//dfv0QEAAABIIIRXFEtKijRocIpu//kErZ/8s+2dM2eO9NZbfg8NAAAAQAIhvKLYBg+WduyQxnyUIp14otSwoTRrlt/DAgAAAJBACK8otsMOkxo0kEaO3H1B69aEVwAAAABhRXhFsXmezb5++aW0apUsvP71l7Rxo99DAwAAAJAgCK8Ii5NOknbtkkaPloVXybbUAQAAAIAwILwiLFq1kpo1s35Ne8IrpcMAAAAAwoTwirAIlA5PnCgtS0mXatYkvAIAAAAIG8Irwuakk6TMTOm99z2aNgEAAAAIK8IrwubAA6XmzXd3HW7dWvr1V9tDBwAAAACKifCKsDrpJOnbb6VV9VpLO3dKv//u95AAAAAAJADCK8Jq8GA7/2ABTZsAAAAAhA/hFWHVtKl02GHS0583lcqXJ7wCAAAACAvCK8JuyBDp519StHnfgwmvAAAAAMKC8IqwGzxYKlFC+slrLf30k7UgBgAAAIBiILwi7GrWlPr0kT6Y31ratEmaN8/vIQEAAACIc4RXRMRpp0lfraNpEwAAAIDwILwiIvr3l/6tcJB2eSWsdBgAAAAAioHwiogoW1Y6ZlBp/e4dpIzpzLwCAAAAKB7CKyLmtNOkGZmttWMa4RUAAABA8RBeETFdu0rzK7dW2fXLpaVL/R4OAAAAgDhGeEXEpKRIaf2sadP6icy+AgAAACg6wisiquvlB0uSfn+L8AoAAACg6AiviKgDD6+kf0s11dZvCa8AAAAAio7wiojbun9rNVg7S3Pn+j0SAAAAAPGK8IqIS+vbSk30j957ab3fQwEAAAAQpwiviLhKXa1p0y9v/CTnfB4MAAAAgLhEeEXktbbwWnvpLH33nc9jAQAAABCXCK+IvDp1lFm7jg5N/UkjRvg9GAAAAADxiPCKqEg5pLW6VJyld96RduzwezQAAAAA4g3hFdHRurXqbfxdm9Zs17hxfg8GAAAAQLyJanj1PO8Ez/Pe9zxvoed5Wz3P+9PzvPs9z6sYzXHAB61bKyVjlzpX+ZXSYQAAAAAhi/bM67WSMiTdLKmPpGclXSTpC8/zmAVOZLubNp1zyCx9/LG0bp2/wwEAAAAQX0pE+fn6O+dWZvv/JM/z1kh6TVI3SV9FeTyIlkaNpEqVdES1Wdq+XRo1SjrvPL8HBQAAACBeRHW2M1dwDfhx93ndaI4FUZaSIrVqpZr/zVKLFtITT0iZmX4PCgAAAEC8iIVS3a67z//wdRSIvNat5f38s26+IUO//SZ98IHfAwIAAAAQLzznnH9P7nl1Jc2S9LNzrlc+tzlf0vmSVLt27UNHjhwZxRHubdOmTapQoYKvY4hXtT/7TAc8+KC+e/k1Db7jBJUqlakXXpguz/N7ZPGL1yNiCa9HxBJej4glvB4RS2L99di9e/cZzrk2eV3nW3j1PK+CpImS0iW1c84tLuw+bdq0cdOnT4/00Ao0ceJEdevWzdcxxK2ff5ZatZLeekuv7ThZZ54pffihdMwxfg8sfvF6RCzh9YhYwusRsYTXI2JJrL8ePc/LN7z6UjbseV5ZSR9LaizpyGCCKxLAgQdKpUpJs2bplFOsh9M990g+Tv4DAAAAiBNRD6+e55WU9J6kNpL6Oed+ifYY4JOSJaXmzaVZs1SypHTTTdL06dLnn/s9MAAAAACxLqrhdfderm9K6iHpOOfc99F8fsSA1q2ln36SnNMZZ0j16jH7CgAAAKBw0Z55fVrSIEmPSNrsed7h2U77RHks8EPr1tKqVdJ//6lUKemGG6SpU6Wvv/Z7YAAAAABiWbTDa9/d57dI+i7X6dwojwV+aN3azmfNkiSdc46UlmazrwAAAACQn6iGV+dcQ+ecl8/pzmiOBT5p2VLyPGnmTElSmTLSdddJEydKU6b4OzQAAAAAscuXbsNIYhUqSIceKo0du+eiCy6QatZk9hUAAABA/giviL4TTpB+/FFasECSVK6cdM010vjx0rRp/g4NAAAAQGwivCL6Bg2y8/ff33PRxRdL1aox+woAAAAgb4RXRF/jxtIhh0ijRu25qGJF6corpU8+2dPLCQAAAAD2ILzCH4MGST/8IP37756LLrtMqlRJuvdeH8cFAAAAICYRXuGPQOnwe+/tuahKFenyy6XRo6Vff/VnWAAAAABiE+EV/mjSxPZ8zVY6LFnpcIUK0n33+TMsAAAAALGJ8Ar/DBokff+9tGjRnouqV7fmTe++K82d6+PYAAAAAMQUwiv8c8IJdp6tdFiSrr5aKlVKeuABH8YEAAAAICYRXuGfffeVDj54r9Lh2rWl886T3nhDWrjQp7EBAAAAiCmEV/hr0CDpu++kxYtzXHzddZLnSUOH+jQuAAAAADGF8Ap/BboOv/9+jovr1ZPOPFN66SVpyZLoDwsAAABAbCG8wl/NmkktW+5VOixJN9wg7dwpPfKID+MCAAAAEFMIr/DfoEHSt99K//2X4+ImTaRTTpGee05atcqnsQEAAACICYRX+C/QdThX6bAk3XSTtHWr9Pjj0R0SAAAAgNhCeIX/9t9fat48z9LhAw+UBg6Uhg2T1q2L/tAAAAAAxAbCK2JDoHQ4j+5MN98sbdggPf20D+MCAAAAEBMIr4gNgwZJzuVZOty6tXTUUdJjj0mbNvkwNgAAAAC+I7wiNhxwgHTQQXmWDkvSLbdIq1dLzz8f5XEBAAAAiAmEV8SOQYOkKVOkpUv3uqp9e6lHD+nhh6Vt23wYGwAAAABfEV4ROwKlw6NH53n1rbdKy5ZJL78c5XEBAAAA8B3hFbHjwAOtfDif0uFu3aQOHaQHH5R27ozu0AAAAAD4i/CK2DJokDR5sk2x5uJ5Nvv677/SiBE+jA0AAACAbwiviC2FlA736SMdcoh0773S5s1RHhsAAAAA3xBeEVsOOkjaf3/prbfyvNrzrGnT/PnS+edbzgUAAACQ+AiviC2eJ51zjvTtt9JPP+V5k+7dpXvusXz77LPRHR4AAAAAfxBeEXvOOUcqV04aNizfm9x0k3TUUdKVV0rffx+9oQEAAADwB+EVsadqVen006U335RWrszzJikp0htvSHXr2jLZfG4GAAAAIEEQXhGbLrtM2r5deuGFfG9Star0/vsWXE85RcrIiOL4AAAAAEQV4RWx6cADpZ49pWeeKXBT10MOkZ5+WvryS+nOO6M3PAAAAADRRXhF7LriCum//6QPPijwZuecI519tm2fM3ZslMYGAAAAIKoIr4hd/fpJTZpITzxR6E2fekpq1UoaMsS20QEAAACQWAiviF0pKdKll0pTp0rTpxd407Jlbf2rJA0cKG3bFoXxAQAAAIgawiti21lnSRUqFLhtTkDjxtLrr0uzZlm/JwAAAACJg/CK2Fa5snTmmdLIkdLy5YXevH9/6eabpRdflEaMiPzwAAAAAEQH4RWx79JLpR07pOefD+rmd90ldeokXXSRNG9ehMcGAAAAICoIr4h9++0n9e0rPfushdhClCghvfmmnZ98clB3AQAAABDjCK+ID5dfLi1bJo0aFdTN69eXXnhB+vFH6fbbIzw2AAAAABFHeEV86N1batZMevLJoO9ywgnSeedJQ4dKEyZEcGwAAAAAIo7wiviQkmIthKdNk374Iei7PfaYVR2fdpq0cmUExwcAAAAgogiviB9nnCFVqhTS7Gv58taoePVq23XHuQiODwAAAEDEEF4RPypWlM4+W3r3XWnJkqDvdvDB0kMPSWPHSk89FcHxAQAAAIgYwiviy6WXShkZ0nPPhXS3yy6TjjpKuvZa6eefIzQ2AAAAABFDeEV8adJEOvpo6f77pQ4dLI2OHm2diAvgedIrr0jVq0snnSRt3hyl8QIAAAAIC8Ir4s/zz0tXXWVNnJ56Sho4UEpLkxo3loYMkZ55Rvr9973uVrOm9MYb0p9/2t0BAAAAxI8Sfg8ACFlamu1/I0nbt0uzZklTp9ppwgTpzTftuj//tO11sjniCOn666UHH5QOO0w655wojx0AAABAkTDzivhWurR0+OHS1VdL771njZymTrXrJk/O8y733GPbxp53njRiRBTHCgAAAKDICK9ILJ5nYbZ69awQm0vJktKYMVK3brb7zrvvRnWEAAAAAIqA8IrE43nWzCmf8CpJZctKH38sdewonXKKhVkAAAAAsYvwisTUvr2teV29Ot+blC9ve7+2bSudeKL9GwAAAEBsIrwiMXXoYOfff1/gzSpWlMaNk1q2lI4/Xho/PgpjAwAAABAywisSU9u2UmpqgaXDAVWqWGg94ADp2GOlr7+O/PAAAAAAhIbwisRUrpzUunVQ4VWSqlWTvvxSatJEOvpoacqUCI8PAAAAQEgIr0hcHTpI06ZJu3YFdfMaNWyb2Hr1pL59pR9+iPD4AAAAAASN8IrE1b69tGWLNHt20HepXVv66is7P/po6e+/Izg+AAAAAEEjvCJxBZo2BVk6HJCebk2cnLMZ2FWrIjA2AAAAACEhvCJx1asn1a0bcniVpH33tX1gFy+WjjlG2ro1AuMDAAAAEDTCKxKX59nsaxHCq2RVxyNG2G47Q4ZIGRlhHh8AAACAoBFekdjat5cWLpSWLCnS3QcOlB59VBo9WrruujCPDQAAAEDQCK9IbIF1r999V+SHuPJK6fLLpccek558MjzDAgAAABAawisSW+vWUunSRS4dDnj0Uem44yzIjhkTjoEBAAAACAXhFYmtVCmpbdtih9fUVOnNN6V27aSTT7Z1sAAAAACih/CKxNe+vTRzprRtW7Eeplw560Bct67Uv780b16YxgcAAACgUIRXJL4OHaQdOyzAFlPNmtKnn0qZmRZgN20Kw/gAAAAAFIrwisTXvr2dF7N0OKBZM+ndd6U5c6RLLgnLQwIAAAAoBOEVia92balJk7CFV0k64gjp9tul11+XXn01bA8LAAAAIB+EVySH9u1tuxznwvaQt90mde8uXXyx9NtvYXtYAAAAAHkgvCI5dOggLVsmLVgQtodMTZXeekuqVEkaNEjavDlsDw0AAAAgF8IrkkOHDnYextJhSapTx7bQYf0rAAAAEFmEVySH5s2lChXCHl4lW/96223Sa6+x/hUAAACIFMIrkkNqqnT44bbuNQJuv13q1o31rwAAAECkEF6RPNq3l37+OSKbswbWv1asKJ14IutfAQAAgHAjvCJ5dOggZWZK06ZF5OHT0mz96x9/SJddFpGnAAAAAJIW4RXJ4/DD7TwC614DevaUbr1VeuUVWwMLAAAAIDwIr0geVapIBx0UsXWvAXfcYetfzz7bOhCvXh3RpwMAAACSAuEVyaV9ewuvmZkRe4rUVGnMGAuuzz0nNWsmPfuslJERsacEAAAAEh7hFcmlQwdp7Vrpzz8j+jSVx4/Sk8tO1E8zM3XwwdaF+NBDpcmTI/q0AAAAQMIivCK5dOhg5xFc9ypJeuIJadQotVj+pSZMkEaNsszctat00knSokWRfXoAAAAg0RBekVyaNZOqVYtseF2xIuvxn3pKniedcIJ1Ib7jDunDD6X99pPuu49SYgAAACBYhFckF8/LWvcaKWPHSs5JRx8tffKJNH++JKlcOenOO6U5c6R+/awr8emnS7t2RW4oAAAAQKIgvCL5dOhg06Br1kTm8T/6SKpXz7o0paTYeTYNGkjvvWczr2+9JZ16qrRzZ2SGAgAAACQKwiuSTyTXvW7dKo0fLx1zjLTPPtKAAdKLL0pbtux105tvlh56SHr3XVsHu2NH+IcDAAAAJArCK5LP4YdLlStbF6VwmzDBguoxx9j/L73UOjWNHJnnza+91no7jR5t62K3bw//kAAAAIBEQHhF8ilTRjrxROn996VNm8L72B99JFWsaG2FJalLF6l5c2nYMFsHm4fLL5eeeUb6+GPpuONs8hYAAABAToRXJKfTT5c2b7Ypz3DJzLQE2revVLq0XeZ5Nvv6008FNom66CLphRekzz+3Sds8qowBAACApEZ4RXLq2FFq3Fh67bXwPeb06dKyZVklwwGnnmplyk89VeDdzz1XeuUVqzw+6qjwTwoDAAAA8YzwiuTkeTb7+vXX0r//hucxP/pISk21mdfsKlSQzj7b1tguXVrgQ5xxhjRihDR5stSnj7R+fTHG8+23kd0SCAAAAIgiwiuS12mn2TrUN98Mz+N9+KHUubNUrdre1118sW3o+sILhT7MKadYf6cffpDatZN+/70IY1m50qZvBw5kHx4AAAAkBMIrklfjxhY2X3st32ZKQfvnH+nXX6Vjj837+qZNbUb2ueeCCpODBln58Lp10mGHWW+pkNxyi03bLl1qoRoAAACIc4RXJLfTT5f+/FP68cfiPc7HH9t5//753+bSSy1MBtkkqksXaeZM6aCDbBudm26SMjKCuOOMGba37OWXSw0bSk8/HdTzAQAAALGM8IrkNmiQbZ1T3MZNH31kKbNJk/xv06ePzfYW0rgpu7p1pUmTpPPPlx54QOrXT1q9uoA7OCdddplUs6Z0993ShRdKEydKv/0W9HMCAAAAsYjwiuRWubJtrjpypLR9e9EeY+1aS5i5uwznlpIiXXKJNGWKbZ0TpNKlpeeft+WyEydKbdoUcPc337QmTfffb1/bOefYAzzzTNDPBwAAAMQiwitwxhnSmjXSp58W7f7jxlk9b37rXbM76yypbNkilfKee651Id65U+rQIY8+Uxs3StdfL7VtK515pl1Wo4Y0eLD0+uvShg0hPycAAAAQKwivQM+eUp06RS8d/ugjqXZtC42FqVpVGjLEkueaNSE/1WGH2ZLWtm3tYU488XDtv790yCHSGwfcJy1dquvKDNPJp6bonHN25/FLLrFNY0eMCP1rAwAAAGIE4RUoUcKS4Nix0qpVod13xw6bee3f38qCg3HJJdLWrdIrr4Q+VllO/vJLaehQ6dBD16pVK6ltlb900pJHNbbmmfpiw2GaMUP64APbKefvqm2lQw+12d7idlUGAAAAfEJ4BSTrOrxrl/T226Hdb9IkK8ctbL1rdgcfbFv0PPNMkO2D91aypHTdddINN/ypkSOl58tdpZIVyuio2ffrp5+kuXNt557SpaULLvTkLr7ENoydNKlIzwcAAAD4jfAKSFKLFlKrVrY2NBQffWRrWHv2DO1+l1xie8OGI0yOHWunO+6w8ufd0tOlBx+UvvpKemPnSVK1amybAwAAgLhFeAUCzjhDmj7dZiiD4ZyF1969LcCGon9/qXx56Z13Qh9nNt6OHdJVV0n77Wdb5ORy3nk2yXvlTWW1efDZVkv833/Fek4AAADAD4RXIODkk6XU1OBnX2fPlv79N7SS4YBy5ex+779v7YOLaJ/335f++kt6/HGpVKm9rk9JkYYPlzZvlm7+90IpM9MuAAAAAOIM4RUIqF1b6tvXuvIGsxb1o48kz5OOOqpoz3fiidLq1VbXWxRLlqjBG29YCO7TJ9+b7b+/dOut0pNjm2jFIX0svBYjMAMAAAB+ILwC2Z1+upXVBhMoP/xQat/eQm9R9OkjVaokvftu0e5/441K2bVLevTRQm96ww3SQQdJ1y+8RFq2zMqHAQAAgDhCeAWy699fqlKl8NLhxYttw9WilAwHlCkjHXusNHq0bbkTinnzpBEjtHjgQKlJk0JvXqqU9MIL0ohVfbSqUqNCGzc5Z1vRDhwoLVgQ2tAAAACASCC8AtmVKSMNHmyBcuPG/G/3ySd2XpzwKtlzrVsnffFFaPd7+mkpNdXCa5Dat5cuujRVQzdcJE2eLP3yS563+/NPa548ZIhN0HbuLM2ZE9rwAAAAgHAjvAK5nXGGtGWLzcL262fprVUrm+GsVcs6C190kdS0qS0oLY5evWymN5Suw5s2SS+/LA0cqB01aoT0dP/7n/RZ2lna7pVWxrBncly3dat0++1Sy5bSzJnSs8/a+Y4dUpcu0qxZuR4sI8MaRa1bF9IYAAAAgKIo4fcAgJhz+OEWXOfOlSpWtFODBln/Dpx697aGTcVRqpQ0YID03nvStm0281uYESOk9etta5wQGy9VrCjd93wNvX3MSTrltTeU+vCDUqVK+uyzrK1nhwyRHn44aynvN9/YTGz37tKnn0odOux+sC++sG16UlPz3KYHAAAACCfCK5Cb51kn4WgZPFh65RXps8+k444r+LbOSU89JbVubSly0qSQn65/f+mmIy5RqQmvae6tr+vWZZdq1CjbKnbCBKlHj5y3b9ZMmjLFAmyvXtanqmdPSePG2Q2mTQt5DAAAAECoKBsG/Najh1S9enClw19/Lf32m810FmPW94oRbTUjta12DXtGH3/kdO+90s8/7x1cA+rXtxnYJk1sZ6AxY0R4BQAAQFQRXgG/lSxpbX0//tjW2hZk2DALuiedVKynrFNHKnHe2TpQf2juB7/plluk0qULvk/t2tLEiTbpe/3AedJff0n16ll59dq1xRoPAAAAUBjCKxALBg+WNm+2RaX5WbjQypnPO8+aRhXTwbcfK0mqN2NM0PepVs2Wul7axGZdJ3e8ya6YPr3Y4wEAAAAKQngFYkHXrtbJuKDS4Wd2dwe+6KLwPGdamjWn+uCDkO5WsaJ0SZNxWlKuiY4ZebIkyf1A6TAAAAAii/AKxILUVOmEE6SxY20rnNy2bpVefNEaOtWvH77nHTDA9sNZuDD4+2zbptRJX6v2mX111ClVNEf76bdXp8m58A0LAAAAyI3wCsSKwYMtpH788d7XvfWWtGaNdPnl4X3OAQPs/MMPg7/PpEnS1q1KPaqv3nhD2rBfO9WY94POPstp167wDg8AAAAIILwCsaJTJyk9fe/SYeesUVOLFlKXLuF9zn33lQ48MLTS4XHjrLtTt25KSZHaXtJOdbRcX762WIMG2Xa1AAAAQLgRXoFYkZIiDRpk4XDDhqzLp0yxfWyKuT1OvgYMkCZPllatCu7248ZJ3bpJ5cpJkrzD2kmSnj97msaMkfr1yzl8AAAAIBwIr0AsGTxY2rEjZxnvsGFS1arSqadG5jkHDJAyM6VPPin8tv/8Y1vj9O2bddnBB0ulSqlfjWkaMcJycI8e0sqVkRkuAAAAkhPhFYglhx1me6cGSocXL5ZGj5bOOWfPTGfYHXKIPWcwpcPjbIucHOG1dGmpVStp2jSdeqrl7t9+kzp3lv79NyIjBgAAQBIivAKxJCVFOvFEafx4ae1a6bnnbFb04osj95yeZ12Mx4+3vWYLMm6c1LixrZXNrl072+s1I0NHHWUPtXSp1LGj9O671syYbsQAAAAoDsIrEGsGD5Z27pRGjpSGD5f695caNYrscw4YYJ2WPv88/9ts2yZ99ZXNuuZee9uunW3xM2eOJJt1nTRJysiwL6dhQ6l2bemoo6Q77rAK5eXL9374+fNtie+770qPPy5dd530wAM0gQIAAIBUwu8BAMilTRub3bzhBmnjRmvUFGmdO0vVqlnp8PHH532byZNtK59+/fa+rp01bdK0adJBB0mySuL586VffpF+/DHr9NlnNpksWbVylSrSkiXS6tV7P2zp0tL27dKIEXZq1aq4XygAAADiFeEViDWeZ6XDDzwgHXCAdMQRkX/OEiVshvfDD23Wt2TJvW+TbYucvey7r1S5soXXs87ac3Hp0pbF27SRLrrILtu0SZo1KyvMbtliuwTVrWs7BaWnZ/27alWbDD77bMvH99wjXXutlJoamW8DAAAAYhfhFYhFJ58sPfigdOWVkdkeJy8DBkivvWb1vj177n19ri1yckhJkdq2lX74odCnqVDBJno7dw5uWH362OztBRdIN94ojR0rvf66lSIDAAAgebDmFYhFLVtKf/8tnXde9J6zVy+pbNm8uw7Pny/9+WfOLsO5tWsnzZ5tpcVhVr26NGqUZeuffrJvz6uv0gQKAAAgmRBegVjVuHH0Zl0lm1Ht08dKhwOLUgPy2iInt3btrEPTrFkRGZ7nSaefbvm4VSurTj7hBGnVqog8HQAAAGIM4RVAlgEDpP/+s21vsstvi5zssjdtiqCGDaWvv7aq6o8/lpo1sx5S119v5cSzZkVk8hcAAAA+Y80rgCxHHWXdkD74ICuMBrbIOeusgmeC09KkffaJeHiVbIjXXy/17i098oitiZ0wQdqxw65PSZGaNpVatLAS4zPOkBo0iPiwAAAAEEHMvALIUq2aNWUaMybrsm++sZbABZUMB7RrF5XwGtCqlfTGG7YOdtMm6fffbY/YW2+Vmje3UHvXXTY7e9ll0rJlURsaAAAAwozwCiCnAQOkOXPsJGVtkdO9e+H3bddOmjcv701b8zJzpvTdd0UfazYlS9rOQoMGWWB9/33rMbVggXTmmdKzz1rl8403SmvWhOUpAQAAEEVRD6+e5+3jed4wz/O+8zxvi+d5zvO8htEeB4B8HHusnQe6Do8bJ3XtmvcWObkFSo1//LHw227caA2iOnSQLrxQ2rChaOMtRL160vPPWxY//nhp6FCpUSPbM3bjxog8JQAAACLAj5nXppJOlLRW0jc+PD+Aguyzj+3ZOmaMTVvOmRNcybAkHXqorYsNpnT4iSeklSulIUOkF16wOt/x44sz8gI1bSqNGGHdinv0kG6/3WZiH31U2rw5Yk8LAACAMPEjvE52ztV2zvWTNMqH5wdQmAEDLIC++KL9P9jwWqmS1e4WFl7XrJEefthmed94Q/r2W6l8eenII6Vzz5XWry/e+AvQvLlNKv/wg9S6tXTNNdZr6pxzpMmT2TsWAAAgVkU9vDrnMgu/FQBfHXecnT/0kNXYNmsW/H0PO8zCa0Ep8KGHrEz4nnvs/4cfbnvc3HCD9MorljADe8tGSLt2NtE7ZYrtF/vuu1Yd3bSprZmdPz+iTw8AAIAQ0bAJwN4OOEDabz/be6Zv34K3yMmtXTsrB164MO/rly2zkuGTT7a9bALKlJEeeMAaOFWqZJu3nn22tG5dsb6UwnTsKL38sg3r9dctq991l5UUd+tmWTqCE8EAAAAIEuEVQN4GDLDzYEuGAwJNm/IrHf7f/ywU33VX/vefOVO6+WZLk4ceGpX2wOXLS6edJn35pS31vfde6b//LD9Xq2YlxpddJr3zjl0erG3bIp6/AQAAkoLnfFzg5XneuZJekNTIObcgn9ucL+l8Sapdu/ahI0eOjN4A87Bp0yZVqFDB1zEAAZF8PZZesUL7jBqlf847T65UqaDv5+3apc79+um/AQM076KLcj7msmU67LTTtOzIIzX32msLfazKs2fr4Guu0Zp27fTrvfeGNgMcBs5Jv/9eSdOnV9Uvv1TWb79V1rZtqZKkOnW2qkWL9WrRYr3S0rZp9epSWrmy9F6n9etLyfOcevRYodNPX6j69bdE9WuIJv4+IpbwekQs4fWIWBLrr8fu3bvPcM61yeu6mA+v2bVp08ZNnz494uMqyMSJE9WtWzdfxwAExOzrsUMHqUQJ64CU3TnnSG++Kf31l+1hE4xhw6TLL7d1skEE3kjatUv66SdbJxs4LV+e8zbVq1vD5nr17HyffWzi+PnnpS1brFr69tutKjvRxOzrEUmJ1yNiCa9HxJJYfz16npdveC0R7cEASALt2tn2N7t2WYiVpD//lF591YJosMFVki69VJo0SbrxRgvFHTpEZMjBKFFCatPGTldeaTOz8+ZZGXHdunYqWzbv+954ozVYfuopaeRIC7G33ZaYIRYAACASWPMKIPzatbNpxt9/z7rs9tst2d10U2iP5XnSSy9JDRpIJ50krV4d3rEWg+dZd+JAl+L8gqsk1awpPfigrae99lrbrufAA22d7dy5URsyAABA3PJl5tXzvBN2//PQ3ed9Pc9bKWmlc26SH2MCEEbZmza1bGm1tu++K916q1SrVuiPV7my3b9DB+mMM6SPPpJS4vPYWyDEXnONzcQ+/bQ0YoQ1W65QIeepYsWsf1euLFWpIlWtaqfc/05Pl0qW9PmLS3QbNlj3rfr1/R4JAABJya+y4VG5/v/M7vNJkrpFdygAwq5JE0tVP/wgnXuuhdYqVSyxFdWhh0qPPmplxA8/LF1/fdiG64dataShQ20W9vXXbXehTZukjRvtPHBautQuW7/eTvm1KahcWTrqKNuit08fC74Is6uuksaMkRYtksqV83s0AAAkHV/Cq3Muui1DAUSX59ns67Rp0tSp0tix0v33W4AtjosvtvWvN99ss7CdOoVluH6qVSv4PlSZmTb5t3atTQCuXWunNWtse9yPP5beeksqVUo64ggLssccI9WpE8mvIElkZto3eM0a2y/prLP8HhEAAEmHhk0AIqNdO+m++yyZ1a5tm6QWl+dZI6iZM239608/STVqFP9x40RKiuX/vI4BnHeelJFhxwrGjLHTBRdIF14oHX641K+f1L691LatVKlSdMedEGbMsOnx1FTpuecIrwAA+CA+F40BiH3t2tls1XffSbfcIpUvH57HrVxZGjXKgsRpp9lzQJLlqs6dpUcekf7+W5o9W7rrLmn7duts3LOnBd8WLaya+8UXpV9+sdCLQowbZwdPbr7ZKgpmzvR7RAAAJB3CK4DIaNvWzuvXl84/P7yP3bq19Pjj0mef2cJRP2VmSjt2+DuGPHiehdTbbrNJwzVr7Nt1xx229+zo0TZb27KlBdojj7SS4+3b/R55jBo3zvZIuvpqayv93HN+jwgAgKRDeAUQGbVrS1dcYR/yS5cO/+NfeKE0eLDN6n78cfgfP1g332x73uTXSSlGVK1qAfWOOyyHrV5tW+++/rp0+uk2U3vqqRZsr7tO+usvv0ccQ1avtuZjffta0j/5ZEv669f7PTIAAJIK4RVA5Dz+uH3gj4TA+tdDDpFOPFGaPDkyz1OQzZulZ5+V5s2T/vijaI/hnPTFF9KuXeEdWyE8T2rWzCqvn37awur48bZn7WOP2XU9e0rvvSft3BnVocWe8ePt5xR4LV94of3s33zT33EBAJBkaNgEIH5VrGjTiJ07S/37SxMnWklxtLzzjrX/ley5Dzww9Mf47jupd2/ppZeks88O6/BCkZIi9eplp6VLpZdfloYPlwYNskn0006z3ljbtuV92rlTOuCAWura1YJxQhk3TqpePasUvk0bO2jy3HPSRRcl4BcMAEBsIrwCiG81atjMWMeOVhc7ZYpNG0bDc89ZYN2wwbbwufji0B9j/Hg7//xzX8NrdmlpVo194402rOefty12A72xSpaUypTJedq2TXr//QP12282GV27dnjG4pw1nho1ytbuNm5s3/KDDrLzmjUjnB0zM22x8JFHWkcsyZ7woots0fB339m2TQAAIOIIrwDiX716VnrbubNNHX77rS3ejKQZM6Qff5SefNLWQ37xhSWtUJPUV1/Z+ZdfWtvfQECKAamptsVOv37Sli32pZUubbO0uWVkSJdcMk+vvtpEBx1kpcgnnli0YOmc7YI0apSd/v7bnvOgg2wroMBkt2QTogcemHU6+GCpVStrSh0WM2daZ+vc5e8nnSRdc40dwCC8ApHz77/W+A8AxJpXAIliv/1shmztWivDXbUqss/3/PPWdfa006Ru3aQVK6wDUig2b5a+/96mE9esientV8qVsy83r+AqWdA96aRFmjVLatLEst2gQfZtCYZzdjzgxhulffe1qtyhQ6VGjexbvWyZzcCuWyf9959NWD/+uDRwoN333XetP1i3btZTqXFj6fjjpbvvlj76yD7/FqmnVmCLnCOPzHl5hQr2s3/3XWvoBCD8pk+XGjSwjasBQMy8AkgkhxxinYePPNKmCydMsHWx4bZhg3WbPflkS0pdu9rlEydK++8f/ON8840tFr37bmnIEJu9DayrjFMHHGAT3w8/bJ2NJ02SnnnGgmx2W7fa59KpU7NOq1ZZCD7iCAuxxx1nVeHZeZ6Unm6nXr2yLnfOAu7PP9usbeA0ZkxWaK1aVWrYUKpTx061a+/973Ll7PbOWcVw+uhPpeZt9O/amspcbWXK1avvftILLrAp5tdesy10AITXrFl2ft990rHHsr4cAOEVQILp2tVmw44/XhowQBo7Nvxb9bz5ps2aXnCB/b9pU0tTkyZZJ9pgTZgglSpl42zVyqYTb7459PEMGmQh/eWXQ79vBJQoYeGzf3/pjDOsfPjEE+3L/P57WyY6c2ZWg+VmzaSjj5a6dJGOOSZbOAyB59la3bQ0qU+frMs3bZJ++cWC7M8/S4sWScuX2yzu8uUFN3muptVaqR90j27TnfvZZSVL2qzyVVdJrVu3sLXWzz1nF0Twg3VmpgX80aNtIuqSS+z7DCS0uXPtfPp0W2JxxBH+jgeA73jrA5B4jjnGgtwZZ0innGJdgcP1Sd85CyutW2fNknqeheaJE0Nb9zphgq2XLFfOSp0fe8zSVoUKwY9n+XJLNCVKWB1tpUqhfkURc9BBFlaHDpXuvNOOKZQta9+2a6+1L/3ww202M1IqVJDat7dTbpmZVmW+fLnN2i5bZjPCKSn2I2z8w3ilPOd02J199ea+dtn339tL6403rET5kXYX6pDHTpO+/lrq0SOsY3fOllW/84597xYvth/zrl3SyJE24Rut3mSAL+bOtXUEGzdKDzxAeAXAmlcACer00y0Mjh4t/e9/4XvcH36wabsLL8wZUrt2tT1m/voruMdZvdqmAwMfxnr3thLiSZNCG8/o0ZbCduywxZ0xpkQJm0yeO9eC2Pr19iXef7/NzEYyuBYmJSWr4VOPHnac45xzpLPOks48U+qy2bbI6XNrW51yilWJP/GEzd4+9JBt79vhsRO0NqWa/r72OW3enP9zOWefvxctkpYssSXOmzdbo6vct/vpJ+mmm2zt8GGHScOG2bGSN9+0+735pjRnjk3WDxuW1QW6MNOn2xrhypXtcS++2HZo+ukn9vJFjJo7V2rRwiobvvzSXsQAkhozrwAS15VXWmK65x6bjW3VqviP+dxzVqJ78sk5L+/Wzc4nTQpuOuzrry2pBMJrx46258z48dJRRwU/nvfes+fbutWm54YMCf6+UdSwoZ3iRmCLnN699+oAXaWKzRxfcYU0enQZjb3iLA2e9YRa1l2qLoPTlJFhxyYCpzVr7Dy/gFiihFW2lyljx0MCa3979pRuv93W/lapknX7U06xYyXnnitdfrmt6335ZSsnzs05e0n+73+2pLpyZQuw//wjjRhh2xpJ9vwtW0qHHmq/JtWqSeXL730qV85ms8uUKf63GCjQrl12hOi44+xg4f/+Jz34oLUgB5C0CK8AEtuwYbZW6owzLMiWKlX0x1qzxmo4zzxz70ZQzZpZ15+JE23/z8IEmkkFSo/LlLFE8sUXwY9nxQp7vptusvD61FPWjjd70kHR5LdFTjYlS0qDB0uu1fny9n9Et+3zss5/8xZVqmQzutWrW/+uwL+rV7cfTUaGtH277Y27bVvWv7dvt1PbtrZkO3ezquzq1pU+/VR68UXrFdWihc0Kn3mmBWDnpE8+sc/7339vL80HH7QMEKgsz8y0bYhmzrROzzNmSG+/bcdnClO7dtZeu9n33S1ozJG0bZt1lF6wwE6LFkkbNqSrZEkL5ZHo24YIW7jQjvg0a2Yv2osvttLhv/6yUmIASYnwCiCxVasmDR9uM6/33GOnonr9dfuUnFdTJs+z2ddJk4Jb9zphgnUoyr4Wt3dv2zt00SLbu7YwH3xgCWTQIBvXo49a6fDpp4f0ZSEP+W2Rkwdvv2bSEUfolL+G65T1N0Ztr17Ps+MkPXtaqfPZZ1sV+cCB9lL45Reb7X7mGQu1ZcvmvH9KiuWCZs2sCZVkL6fFi628e8sWK23Oftqyxcqf582TfvvN1t1u3Jj1mDVrWoht0sS2Wq5bN+epRo2i97Vau9Z2o5ozx6pJ58+3oLpwoVXs5/7eONdMTz5p/2/a1GaUW7XK2gu4bl1/m9cuXWo/E4415SPQrClQyXLFFfbCfugh+5sOICkRXgEkvkDb2/vvtxBblO1onLMNRw8/3D795qVrV5uZnTfPPi3nZ9Eimz246KKclwf2fvniC0sihXnvPZuBaNnS/l+/vpUOE16Lb9w4qU0bqVat4G5/4YV2EOGzz0Ir+w6DRo2suODJJ20S/pNPLEC+8YbNDJcsGfxjpaTYyyhYzlnY/f13O/32m50+/dQaYeXeW7dUKWvMXaeObV1UtaqFt8C/A6fUVPsVCYTVP//MuWdwiRJWJt2woe2KFfh34JSeLo0e/Z3Klm2fY+uk997Leoy0NDt+1LWrnR94YOTD7OLF9is6cqQVgkg2I9+0qZ323Tfr302bFq3zdsDcufYyXrPGDkZkP61bZ+cpWzZp39YV1LGj1KmT/WnMfZDDN7nDa+3adpTm5ZetA1x6+p5dyw49NO53GQMQJMIrgOTw+OPW8OOMM6xOMtRFe5Mn26foV1/N/zbZ170WFF4nTLDz3J0zmze3T/XBhNdVq2zd7PXXZ33iPvFEqx1du9YSAIpmzRprzHXrrcHf59hj7Wf33HNRD6+Shc4rr7RjMwsXWiBLiUJLRs+zIoF69faepN650zo4//ff3qflyy2Mzp1rL9d16/JuPFWzppVeH3OMtN9+9u/99rPAXlgD8Zo1t6tbN9uGKWDjRuu3NmuWbdk0aZIdb5JsVrhz56ww26SJ/ZqtXJn3aft2C5uB0ulmzfL+s7JihYXmkSNta2fJtqS+/377Gv7+205TplgQyx7499/f8tppp1nYLkxmpv35eOIJC64BFSvaeucqVey8Th1pYKUvdMOkvnr2x9t02ae3S/JUsqQFwU6d7NShg49N1ebOtQFnr0W/9lpp+HBlPvq4XtpvqG69NeugxqmnWpl8KAdfimv9eunDD21v67597bXGFlZAhDnn4uZ06KGHOr99/fXXfg8B2IPXY4g++8w5ybnrrw/9vied5FyVKs5t2ZL/bTIznatVy7nTTiv4sYYMca5mTecyMva+7vTTnatePe/rshs+3L6WmTOzLps2zS575ZWC7xshBb4eH33Uvu433nBu+fKojalI3n7bvo/ffRfa/W691TnPc27SpMiMK4FlZDi3fr1zCxY4N2uWcz/84Nzq1cV7zGD+PmZmOvf33869/LJzZ5zhXKNG9qMv6FS6tHP77GO3TUnJujwlxbl993XumGOcu/FG5x57zLlevbJuc8ABzt19t3N//pn/eLZude7335376CPnHnrIuY4d7b6pqc4ddZRz773n3Pbte99v40bnnnrKuf32s9vXqePcXXc59++/zu3alccT7drlXPPmzpUs6Zzktlxyrfvow0x3ww32nKVKZX1dHTo49+yzxf95hKxnT+fatdvr4mXdT3KbUiq4KlrjOnZ0buJE526+2bkyZex00032WoqUTZuce+cd5wYMsNdC4DUhOVevnnP33uvcsmV734/3a8SSWH89Spru8smDvgfSUE6EVyAnXo9FcN559mly6tTg77N8uX3Iu+KKwm87aJB9gsnMzPv6zEzn0tKcGzw47+vfeMP+NE+fXvDz9OrlXJMmOZ8nM9O5hg2d69u38HFGQL6vx127nKtcOecn/UMPtbA3ZYpzO3dGc5iFO+00O4CQ56f+Aqxda+mhenVLRPBVUf8+/vuv/Ro++KCF2o8/du77752bN8+5DRty/spt3erczz87N3Kkc7ff7twJJzh34IHOlShhL/PGjZ275RbnZs/O/09CYebMsTCcnm6PWaOG/Sn66Scb01VXOVepkl3Xtq1zI0bkHXBzeOklu8PIkc5dcon9+6KL9hw027rVuW++ce6ee+zrkSzQDhjg3OjRzm3blvfDZmZaOH/xRTsOFwj4pUo5V768/RmoUcPCdb169v1p1cqe+u23nVu8ONuD1a9vB7x2mzvXuWOPde5gzXJOcrNPui/H93ThQudOPdXGWquWc889F74/Ldu2OTdmjHMnn2xfh2R/xq+4wo5x7djh3Acf2J9lyd4uTjrJucmTs37uvF8jlsT667Gg8OrZ9fGhTZs2brrPe3xNnDhR3QKlgYDPeD0WwcaN1pq1VClbBFeuXOH3GTpUuuEGW9R3wAEF3/bpp6VLL7W9SBo12vv6OXPsMYYPz7sr8bJlVh94//3SjTfm/RyrV9v6r2uvte6b2d1wgzU1Wb7cmlVFUb6vxx9/lNq1s71Z9t/f6hnHjbM2uJmZVhrYq5d0/vnWfchPmZlWU9mzp9Vwhurvv20T1dq1rS61cuXwjxFB8fPv486d9isYzqZQu3bZTlqvvGKlqoGtl0qUsOXWl19uL71Cn2/LFqt3rlfPXqOS/a0ZOtTWy7/0Uo7aV7d77+E33rBfieXLbVXC4MG2M1e5clYOPXmylT4vX273q1HDyq/32886bO/aZWPOfb5ihQ1j0ya7X5MmUo/2WzV8RDmtufJupdxxm+65xxrHly5t+0ZfP7GvUmfNsBr5XIt0f/zROnBPmWJdsB9+2Erat22zMt8NG+wU+Hf2yzZuzPp39tM//9h59erSCSdYg7POnfPuzTZ3rm1B9cor9tgtWlh7g9TU6erXr41q1rSvoyCbN1szsvnzs07r19vbVunSWafs/y9Txv6U5nUKNNnPyLBS+OXL7a0m+ylQfh143MBjZ/93w4bW9qF+/eg2O9u1y9bST59ur5ns34Pcp0BzuFDGt3WrPfZ339mSgKOPtrL5cPXeW7xYmjrVTt9/b78bxx1n7Thq1w7Pc4Qq1j8/ep43wznXJs8r80u1sXhi5hXIiddjEU2YYIfHr7yy8NtmZNj0QNeuwT32L7/YY7/8ct7XP/WUXT9vXv6P0bKlc92753/9iy/mPzs7fXrBzx9B+b4eH3jAxpS7lm7NGqu/O+ssmyopXdpmL/3044821tdfL/pjfP21Tb317h17s8pJJKJ/Hz/5xGbZN22K3HMUYNUq54YNsxLVHLOVwbj3XnuNf/NN1mWZmTbNKtn0cT5Ttzt3OjdunHOnnOJc2bJZhRSScw0a2ETp8OHO/fFHaDPNO3fan65HH3XuuOOc61R5tnOSO1EjXYkSVo1/9tnOLVmy+w4TJ9qTPvNMno+Xmenc++9bcYqUNRNe2KlMGftT1LSpc4cc4ly3blYGfv759nXv2BH817Rpk/2pbt167+epVMmeo0MHm00+91yb1T3sMHv+3LcvW9a5unVttUmlSlllysGeypa1gpDsxS/ZT+XKWdFO48ZWEl+rls2Sly1rJeu5b1+njv2cHnjAfhTh/jVYutRmum+80X4GgdnuYE9VqjjXqZNzF17o3NNP20qONWuyXhsLF9pM/+WXW7VC9tfH7kp6l5bm3GWXWXFQYat4sgu8lp980mbf69fP+X3u0sW+15K9rtu3tyqPOXPC+z0sTKx/fhQzr+ET60cqkFx4PRbDpZfaHiITJ9r0QH7Gj7fD9m+/nbWfSEEyM+1Q6lFH5d3c6fjjrVvM/Pn5P8Z111nr2DVrpPLl976+Tx87vD9v3t6Hl52zqYv99svZsSUK8n099u4tLVki/fpr/nf+4Qc7pP/SS8F1Wo6Ue+6Rbr/dpiaC7TSclxdftJn1yy7Tnv1aEFUR/ft48snWgWnSpIL/fsSaFSusmdwRR9hWW7k99phNW/brZ12mCmg9vHGj9PHH9ienc+fwNkrKHPW+Uk48QaNunqlvN7fWGWdIrVtnu4FzUvv2WV2/8umStGOHNSdeuNCKICpVyvu8YkX7d3G2Ac+Pc7Zt1Ucf/aLatVtoxQob9sqVyvHvcuWsWCevU61aef+p37Ura3/orVuzOknnPq1fbzPb1avb21OdOjlPFSoU/DUE9qaeM8dmDgOnv/6y61NSbIa5bVtrxt+ypf2/sL6Bztn+zL/8Yqeff7a3ggUL7PoSJeznfvjhdmrXzt4SA19z7tO2bfaz/uUXe7v55Rf72gPS0+18yRI7L1fOxty+vTUnO/xwu+yTT6wr+Kef2mPWrWvVDYMHW3VDRoY9T6DZ2rx5Wefz5tlYJNsurGNHe+yOHe37UrJk1mtizBiropg5026/3342I9u7t902kvtmx/rnx4JmXgmvIYr1HzaSC6/HYti82d4dnLPNMVevto0Xly3LOl+2zGqVMjKs7qewWq+AgQOlGTOy3oEDMjLs3WjgQAs3+fniC3v3+vRTa2GZ3Zo19unj6qulBx/M+/433ig98oiNvzh7bYQoz9fj9u32Cea886wFan6cs1LGBg2yujH7oUMHq0sL7GNSHNdcYyXczzyz97ZIiLiI/X3MzLQ0sXq11aRec034nyNSLr3UOmL/9pt9Us7L8OG29VO3brZvdGHJJhLuv9/qgzduzP/5x4yRBgywWuaTT47q8IoiEd+vV62Spk2zIPvddxbC1qzJur5evaww27KlvXX9/ntWWP3115zhsn59C4eBsNq6dfG2bnLOuptnD7OZmfbY7dtnhcn8BA7QvPuuHQvescNW46xfb2/nAWXLZm1v1aSJdezu2DG47dolC/AffWRBduJEOygh2YGFFi2yDga0aGGdzcuUser/QGl57hLz//6z4F+mTFZJeeA88O/DD5+tG29sWcTvbOQVFF5p6A0gOZUvbzOjXbvmOqQveydKS7N3jk6d7INRsMFVsg99o0fbodkGDbIunzXLDoP36FHw/Tt1sucbP37v8Prhh/bOdsIJ+d//xBMt2I4ZI51zTvDjjoTvv7cpgcK+Zs+zvS7uucfeeevWjc74sgtskXPLLeF5vKFDbYPSyy6zTzWBfXwR32bNsuAq2UK5eDF3ru1Vff75+QdXya4vV862Fevd2z61R3vt9ty5Nk1WUHA+5hhbQ//AA1YVE81FmOH2xhv2N6+wv5MxpkYNm6Tv18/+75zNas6enfP02WdZgUyyl1OLFtIpp2QFs+bNi/kyc84Oyuyzjy30lb0k9tnHTrnfSoNRsaKN8ZRTLLB+9JGFy7S0rKDatKl9VCjOy69+fTuudOml9hHhxx/t+xYI+U89lTWbm5JiAXrVqpyPUaZM1kx927YW0rdts1NgVnrbNnub27ZNatkyTAt6fUB4BZC8One2rh6LFtm7TyCwVqxYvHeirl3tfNIka4ASEJhRLOwDStmyVor4xRd7XzdqlHXNaJN3HwNJFsabNLHDxX6H1wkT7N028D0pyKmnSnffbeWYfsxmjR9v7/iBT2LFlZpqs0IdO1rN2fff24dtxLfPP7fzTp3CM0MfLTfeaJ9w77ij8NsGOjENGmR7ZAdzn3CaO9c2zi1ISoo1qDvrLAvY4fq9jbbt222mu0oVqzkNdQ/yGOJ5WQ2TsofFQMnx8uXWr3CffcJ0rCEz06Z8338/64Dx4MH2HhJmlSvbfsunnRb2h84h0L8w+7HOXbusLPmXXyzULl9ugbdx46zAWrt2aN/TiRNXhn3s0RKFLcwBIIZ16GBvdl272oelSpWK/67avLkdGp04MeflEyZY+8s6dQp/jN697Sjyf/9lXbZ2rfTllzbrWtAYPc9mXydM2PvwbFEVdYnJV19Z0N59JLxAzZrZbd98s2jPVRyB8vHq1e2wdbhUqmR1Z6VKWQvLwIwd4tf48XaA6KijLGysXev3iAr37be2xvWGG4Jvb3r88fZ15v47Fg3BhFfJpsTq1ZPuu6/of6P8NmWK1YAuWWIz4wmodGkrH+7d235cxXqL3bnT3gcvushScqdO1uW/eXP797hxWa24E0SJEnbcc9AgK04aPly69VZ7+bdvX/yZ33hDeAWAcEtJsZnTSZOyLtu+3T6kHHFEcI8ROOyaffb1o4/sTbmgkuGAE0+0RTl5NWUJ1YIFdtg51A+xmzZZGW4opXCnnmplmX/8EdpzFcfGjfYpYNQom3EK1/4IAQ0bWgn3okUWCMaOtdfGzJnW8WTpUvtexeuH72SycaPtd9G7d1b1Q6yXDjtnTeDS0qSrrgrtvl26WMVAoGYxGtassYNuwYTXUqUskE+dmvPvbTwZP97SSfv2ttZ3yxa/RxSbnJOuv96SWq9e0uuvW1h96y3revXJJ9KVV9qeRoEtoJCQCK8AEAndutnmgIsW2f+/+87WfgYbXlu0sBmS7OF11CirFWrXrvD7H3ywNUB6992Qh76XCRPsQ3uoswLffGP1TsF+zZKtXUtJid7s6y+/WAh5913pf/+zBkuR0KGDdVL+5hubge3Wzbp6NGtma/sqVrTQXKWK3c5Pv/9OkM7PpEl2AKl3b/v5SbEfXt9/3/7+3HNP3t3LC9Kliy2Qi+bXGGhhG0x4law7eZ060r33Rm5MkfT557a0YOhQqwd99ll/xpGZ6c/zBuuPP6SHHrKOTh98YAc4Ro2ynhSBxbI9e9qBgCh32kd0EV4BIBKyr3uVQlv7Kdlte/Wy8JqZaV0cxo8vvGQ4IFA6/NVXtg9DcUydaudjxthR7WBNmGAzIx06BH+fOnUs7L71VuQD1Kuv2gehDRvs+3TTTfZ9j5QhQ2xN1g8/WNnbBx9Yo5ZnnrEGW7feagvCLrnEAqQfpkyx0vaXX/bn+WPd+PG2Jr1jR+ui3bRpbK973bHD1ro2by6deWbo9+/Uyc4nTw7rsAo0d66dBxtey5aVrr3W/t7E24zbsmW2P8yRR9r3undv+1uwaVN0xzF7tpXg5rW9W6yYMcPOH3rI9pPJqw1x5cr2fkN4TWiEVwCIhJYt7cNtoNR2wgRbSxlKO8VevSx4zp5t6yaDLRkOOPFEC76jR4c09L1MnWoLlbZtC60MecIE+yBRrlxoz3fqqdbvP1IfRLdssdmas86yUr2ffgr+oEJx1atnM+dHHGEfwIYMsbVb119vzarGjLFZ2NNP92fd1nvv2fndd1vwQU7jx9useaD7eNu2sT3z+vzzti536NCilcPXqGEHM6IdXlNTrQtNsC64wNarx9vsa6CypndvO7/rLvub//TT0RvDsmVS//52fscdsbtedMYMey8prOld3752QCCwmSsSDuEVACIhJcW6GU+aZDN706aFVj4rZa17HT/eyqP22cdmCoPVooVtiVGc0uHVq61N5AUXWAfjESOCv99PPxVt64cBA6zjZiRKh//8076Hr74q3XabfW+DbWATDbVr2z6cM2ZYGXM0OWfheZ99bOPBWJ19XbfOfnZ//x3d51240F4/gaAhWcn5okVW7hlr1q+3MNSjh9SnT9Efp0sXa/iUfa+TSJo714JrqVLB36dCBVvP++mntpY8Xnz+uVSzZtZ2bYcfbl2Thw4NrcqlqLZutYNoq1ZZWfm//0rvvBP55y2KGTOkVq0KPwgTaHMc6AqOhEN4BYBI6drVPmCPHGnNk0INr2lpFkDfe8/eiE84IbSyVs+z9oQTJxb9w/X339t5x442SzhhQnBHtL/+2s5D/Zol69B7zDEWusM1C7BjhwXWNm1shmHcOJtdDHdzpnAYONC+1/fcE91ZvZ9/toB25532877vPpttjyUZGbYu+t577QN/UQ5w7NplwXzKlNDul3uWTCp606atW22fi9dfD+1+oXj+eTuI9NBDxWtF2qWLrXn/+efwja0gwXYazu3SS62y5b77wj+mSMjMtNdUr145/67fdZc1rRo2LLLP75xVn0ybZr9HN99ss+xDh8bemveMDDsoEVhnXpCWLa2PQDyWDq9ebX/fzj3X75HENMIrAERKt252fu+9VmbYvn3oj9Grl62p27HDgmioils6PHWqBby2ba2c1znp7bcLv99XX9lsSFG3nTn1VJsNGD++aPeX7Ov+5hvbQzEtzT6oHXywdTM+8siiP240DBtm639PO82CTjSMGWMfoo85xoL94sXSCy9E57mDdcMNdiDnnntsFmbIEOmMMyxcBWPKFPsAfM459hoLZTZx/HhbF3jAAVmXHXKIBcNQ171Onmyl8aNGhXa/ULz5pv3NOeSQ4j1O5852Ho3SYeeKHl4rV5Yuv9z+1v32W/jHFm4//2xdcrMfDJHsgMgxx0gPP2yz55Fy5502y/rggzb7mpJiyxd++SX2gt+ff9pyj4L2Nw/wPKs0+OKL6FULhMOUKfY37Z13pFdeiY8tuHxCeAWASDn4YPtAtWiRzWTl1WCiMIEPNnXrWklZqJo3tzVCRS0dnjrV3lDLl7fuxYcdFlzp8IQJNmNTsmTRnrdPH9srtygza7/8Yk1qGjWyMbzxhj3eJ5/YLPQ++xRtTNFUpYp9gJkzR7rllug855gx9jqtWVPq3t0qB/73v+iF58K8/rr0yCPW0OrWW212/4477PV46KFZDV3ysny5hdzOne1D4ZVXWolksGu4MzKsyVbv3jlnMStUsDAb6szrZ5/Z+eTJkfmA/fvvtlb+5JOL/1h169qSgWiE1yVLLKQUJbxK0hVX2N+qaJfcF0WgrDV3eJUsWK5bJz3+eGSe+8037QDV2Wdbs6uAk0+2dfkPPhiZ5y2qwO92MDOvkpUOr1uXVTkUyzIyrFogsJb+qafswGtxDtwmOMIrAERKamrWrEVRymclu3/FillbyITK86wMdfJkK0ULxa5dVlKWvVvwkCG2lvXXX/O/3+LFNntS1K9ZsvVugwZJH34YXOfNnTttpqJFCysbe/hhC+5vvmnB5c03paOOsm0U4kWvXhbUHnssqww7UubPt5mg446z/3uelS8uW2ZrcP02bZp0/vkWqh97zC4rUcI+5H/1lQWe9u3tuuxbfuzaZbPYzZpZxcBNN9mWGw8/bIEs2K2Rpk+30JtX0Ag0bQql1PKzz+xg1oYNVgkQbm+/bX8vTjwxPI/XpYtVMUR6O5VQOw3nVr26dPHFtlQjsOVOrBo/3v5WpaXtfV3r1rYn9KOPhn8GbupUC61du9q2PNkPxpQsaWuHJ0+OreAXbLOmgJ497f031maQc1u2zA6s3nqrvd/NnGmVQtWr2/pt5InwCgCRFCgdLmqQK1fOgmJxumj265e1vioUs2dbKMgeXgcPtg8FBc2IfvWVnRelWVN2p55qzz9mTMG327HDPqRfd52tl336aWnpUmnsWOmUU2x2LF49+KDNeJ91VmQbuHz4oZ0fe2zWZV272uv2gQekzZsj99yFWbrUmnilpVkFQe7Z/K5dLXj36yddfbXto7tihSr98ovN1Fx+uVUM/PKLzciVL2+v4SuusA/owXS1Hj/ePuT37Ln3dW3a2AGSxYuD+3oWLLAZ9SuusP+H+8BEoLS/R4/wNSPr0sXW4/3xR3geLz/FDa+SvQZKlbLXbazatMnKRPM6GBJw5532Ox/OvacXLLADVPXr2/6/eTXFOu8865QfS7Ov06cH16wpoEqV2N8y54svrDrr229tecZbb9n7V2qqBdpx42J/712fEF4BIJIuuMCCXrt2RX+M+vWt+25RHXaYfRgJlCoGK7C/a/bwWrOmvbG++Wb+b6xffWVbbLRsWbTxBnTsaF97QUF5+3ZrZDVmjM2wffutzbzUrFm8544V5ctbueyiRTYjEiljxtisdZMmOS+/+25bl/fMM5F77oJs22bBdf166aOP7HWVl+rVrQT46aft9desmQ65/HKrNgg0PNtvv5z3OessK+sPzOQWZPx4C8J5PX9gXXewpcOB38Mzz7SS43CH1+nTbXuccJQMB3TpYueRLh2eO9dmpOvWLfpj1KljAez1160BWSyaONGqRQpae9+ihR2Ue/xxO3BQXBs22IGdnTttCUX16nnfrkIFq/j48EM7yOK3jAyrTghmvWt2ffrY/ZYti8y4imrXLlsKcuSR9vfkxx+tQVP2GfB+/WzLpFjehstHhFcAiKQKFWz2rzjdPosrNdWO8H/2WWhHcqdOtQ+R9erlvPzUUy1MffPN3vdxzta7du9etDLn7FJS7Hv3xRcWoHLbts1K6z7+2MLVpZcW7/li1eGH2xrel1+2ABduK1fazzJQMpxdhw72IfDBB4NvilSYNWvsQ1lhr0XnrITuhx9s3XKLFgXf3vPswMWPP0qtWmnhKafYh++BA/P+/atQwUqR33+/4JCzYYPNzuY3S3bwwVbCHGzTps8+kxo2tNnF7t1tBi6ce2u+/bbNqB1/fPges1Ej+1sQjfC6777F/9tx3XX2M4+l2cPsxo+3kN6pU8G3u+MOq3p4+OHiPd+uXbb05M8/7WBO7gM5uV12ma2/fOih4j1vOASaNQW73jUgsGVOqAdtC/LNN/a4oTZoC1i92qpZ/vc/axr344/W4Tm3I4+01y+lw3kivAJAMujb145Ah7LdxdSpFl5yf/A/9lj74J9X46a//rLyyeKWDAeceqodec/dcCqwP+Gnn0rDh0sXXRSe54tVd9xhIem88yxshtMnn1iQzCu8Srb2dfXq8G3dcdFFNltZv741Tpo6Ne8g+/jj0muvWfnkgAHBP36LFtLEiZp/3nk2c12Qyy6z13dBX9vXX9trML/wWqaMPWcwsyQ7dtjBnT597Hm7dbMS0oKaTYUiI8O6lfbta6WT4eJ5Nvs6eXJkt1Epaqfh3OrVs5n1l14KbmuvaPv8cyt3L6yi5sADbQZ92LC8D+AFa9gwK0N9+unglrDUqmXrYt94Q/rvv6I/bziE2qwpoFUrm4UPV+nw66/b9+6zz+xnF2yzt4C//7Z1+T/8YO+dL7xgy4LyUr26HbQkvOaJ8AoAyaBPHzsP9o38v/9sNip7yXBAuXI2qzNq1N77gAbWuxanWVN2zZtb+XH20uEtW2wrifHj7cPpeeeF57liWalS9uFp3TpbwxlOY8bYh/3WrfO+vl07KzcMx9YdmzfbTHnPnhZgn3suqzz8qqtshjPQafPaa23W9LbbivecBalXzxqlvPBC/jPL48dbCC5oq6s2bYJr2jR1qoXVwO9jYE18uEqHv/nGwlo4S4YDunSxx/7nn/A/tmSzz//8E57wKtm2ShkZxZ+1DLcFCyykB7td1+2328G6oUOL9nxLltjBr379Qvtbec019v0LpuPxmjVS//520DLc3bOnTw+tWVNAYMuc8eOLN6bMTCvzDXQrnzPH3pMGDrTu58EczJk61cLomjX2HnnqqYXfp18/m5kt6h7tCYzwCgDJoHZt2+8x2PAaaGKTV3iVrOvw+vXWFCm7CRNsK5qmTYs+1txOPdUa68ybZ+Hn6KPteV55xWYHkkXLllY+PHJk1nrk4tq82T7cHXdcwaXtd91lXU+feKJ4z/fpp/ZB/JZbbOZixQqbhWjTxkq/O3SQGjSwtX7Nm0uvvlr8EtLCXHWVlQa//HLe148fb+W9eTW3CWjb1r4/hQW7zz6zhlOByoSaNe3rDFd4ffttC9r9+4fn8bIryrrXjRuDX3O4YIGFjHCF18aN7W/Hc8+Fv1qhOAJboAQbXvfbz4LTE09YN9pQXXutzfg/+WRoy1caN7bfw+eft4Nm+Zk9235/x42z1/GTT4Y+xoLMmGEH1oJt1pRdYMucH34o2nNv2WJNCv/3P1uX+tln9vP4+mvrtXDttVZJUlA4fvdd+32vVs3ex/J7T82tXz87D2yphD0IrwCQLPr2tVBa0AeRgKlTraStVau8r+/Rw0qyspcOZ2bam/oRR4R3je/JJ9vjDR9ub+iTJtks5BlnhO854sX110vp6Ra4wtGJcvz4rKZIBTnkEAu4xd264733rCQxsIVUpUoWMMaMsSD7xhv2XPvsY5dFo1N0u3Y2+/vEEzbTlN0//1i5X0FdYaWsZjKFrYUbN87WOVasmHVZ9+7WaGzHjtDHnt2OHfb9PfbY/MsRi+OAA6zBTCjh9aSTLNgHM/MVjk7Dud10k72+i3vQJZw+/9xe36HMJD78sB2AHDIktH2Xv/rKDmjceOPezdiCcf31dgDi2Wfzvn7UKKtI2L7dXsNHH22VEuFqlBVo1hRqyXBAr1528Kso616XLrXy4Pfft+//8OFZnc7LlrWDiDfeaOH+6KP37gbvnM2WDx5sfx+++y60g7qtW1uH9dwHiEF4BYCk0bevfRgIZsucqVPtQ2d+s02pqdZMaezYPfvHVvjnn6yGFOFUr57N+gwdas1tRoywD3HJqHx5mwWYNs0+lBbXmDHWiToQJgty55022x5Md968bNli62uPPz7vWZTKle3n+uGHtj1Uo0ZFe56iuOoq2+s2sGVQQOB3pbDw2ry5NbgpaN3rkiU2SxUoGQ7o3t2+N9OmhT7u3GNdsyYyJcOSHUDq3Dn48Dptms20L14cXMVHJMLr/vvbzy73mnm/7NplVSOBhjzBqlbNqhD++MPKoYOxY4c1sWvUKPj75Na6tX3/nngi5xKRjAw7MHDiiXaAc8YM62r/9NP2dV1ySXjWRhe1WVNA1aoWrkNd9/rTT3ZQ648/7G/kNdfs/fNKSZHuv1968UX7mXbsKP37r123a5fNyN5wgx3A+fLL/Ls758fz7D3788/DX4od5wivAJAsDjvMmrgU9ka+bZuVpxVW3jRkiK1TGzVKklQlUNIWrmZN2V14oQXpt9+O3IfzeHHaaTY7eeON9sGuqHbtsvWn/ftbt9zCHHywlcoVdeuOzz6z8Z5wQuj3jbTjjrMP+bn31Pz8c1uPW1igKlnSPsQXNPMaKP/LHV67drUPqsUtHX77bfuwXljQLo4uXWw2Opg9be+918ZTu7atTS/M3LkW0kL9kF+YY46xRnJ//hnexy2KadPsAFCwJcPZ9expewMPGxZcKenjj1v4GjbMZgqL6oYbbN3l66/b/9eutZnGBx6wreC+/tqqcCT7XbnnHjuo+d57RX/OgMDBoKKGV8kC4IwZwa8d/fhjq45wzg6WHnNMwbc/5xx7T120yN5jv/7a/qY+/7wF/DffLPpWd/362eslmL2okwjhFQCSRYkSWVvmFHRUfMYMC6WFhddWrawb5u7S4aozZ9p6oOLs0Zifk06ycucTTwz/Y8eblBSb/Vy8eO+wFYpvvrEPovl1Gc7LnXdaw6Fgmrjk9t57VnbatWvo94201FRrhPXtt1kBNDBL1rt3cLNkbdrYQZ/cpccBn31mZYC5t/ypVs0ODBQnvG7ZYjNEAwcWvDa3uIJd9zprloWAq66y8v5PPil87Wu4Og3ndtRRdv7JJ+F/7FB9/rn9/ha1OuX+++1v7llnFXwAadEi26P5mGOyvv6i6t7dXtsPPWSVA23b2u/F88/beuLcr7fLLrODa5dfHtwSlYLMmFG0Zk3ZBbbMCSbwDx9uZfcHHGAHGvJbNpNbz55ZS2169LAqiOHDrUqmOGv2e/a09226DudAeAWAZNKnj63lKWjLnEAzoIK6q0r2gX7IEDs6/ddfqjx7dmRmXQOKM3uQaLp0sfLbBx4o+lYgY8bYh61QZuoOOshmFZ57LrS1d1u3WpgZMCC4WV4/nH22rUUNlEVPm2br2IL9/rRta8E+UP6a3a5d9oE2sEVObt272+9d7u7dwfrkE2u+FemqhIMPtu9RYeH13nttPfNll1nQysjImrnLT6TCa4MG1uwsVsJr27Z2wKIoypa1mbxVq2zWM7+DkFdfbd/zcKz19Tybff37b5sB3bxZmjjR9kjOS4kS1r17xQqbeSyO4jRrCmjVymb/C6s4eukl+5727Wt9FdLTQ3ueAw+0hkxnnWXPFY4u+JUr2yww4TUHwisAJJNgtsyZOlXad1/rhFqYU06x8yuvVImtW8O/3hX5GzrU1rXdemvo93XOwmvv3oXvhZrblVfah+e33gr+PuPHW7CLxZLhgEqV7APnu+/azNX48aHNkhXUtGnaNJvlDswC5da9uzW9+f77oo397bdtVjfSs9qpqfZhuqDw+uuv0ujRNvNWpYrNmnXsaN2c8wtbmzdbJUEkwqtkZa6BSgOflNiwwV4bRSkZzq5VKzs48P77eR8QGD/eqhxuuUVq2LB4zxUwYIA9b9u2FigLq8o55BArcX7uOatmKIriNmsKSEnJ2jInv6qI11+33/0+fey1W9SGZ7Vr2+u8V6+ijze3o46yGe9Fi8L3mHGO8AoAySQtzT6E5BdenbPwWtisa0CDBjYL+Omncp6XtW8lIq9JEwsIr75qH/JCMWuWNRcJpWQ4oFs3m8l6/PHgm7KMGmWzTd27h/580XTZZfY1PfWUfdgNZZZs//3tQEBeTZs++8w+RPfsmfd9u3Sx64tSOrxunc3MnHhi8WaogtWli62lXLEi7+vvu8+6RF95ZdZl55xja07z2+Lpr7/sPJLhNSPD121Hqs6caR3Cw7Em+Zpr7Odw2WXWaCxg+3Zr0tS0qXTddcV/noDUVAveU6cGPyN59922Bvb884vWSXvOnOI1a8quTx9rZpZXU7S33rLZ0iOOsOBaunTxny+cAlvmhNp0KoERXgEg2fTtax9C8lqP9M8/9qE02L3opD2dfzc1bRr+Ziso2K232vf86qtD6+45ZoyFpaOPDv05Pc+Cya+/2lYchdm+XfroIwvKga0mYlXDhrZu9LnnbG/IUIJGaqrNOOU18/rZZ9Lhh1sDo7xUrmz3LUp4/eADCwfRamQWWPc6Zcre1/35p/TOO9ZtNvvfgkGDLNDm17gpEp2Gs2vXzipJfCwdrvbjj/ZzPuyw4j9YaqrNFnqedPrpWTOKjzxiBwKeeir8ISzUcv8KFaz78O+/23rZUM2YYeeBiobi6N3b/t7lDoCjRlkDvC5drNN4LC5NOeAAO0hM6fAehFcASDaBLXO+/HLv6wIzI6GE1xNOkMqV05p27cIzPgSvShXprrtsDVrubV4KMmaMbXsSTGl4Xk4+2fZrDaZx0xdf2F6RsVwynN1VV9la16LMkrVta9ts7NyZddnKlTYbm7vLcG7du1tgDrWD9NtvS40bW0CLhjZtbK10XqXD//ufXXf11Tkvr1DBmq69+669FnILhNdQ9sEMRWqqzWB9+mno245kZNh6z+JwTlWnT7fZvXCt+W7QwELqlCkWDhcssHLigQOLX5ocLkcfbb/399yTNbserBkzrJJhv/2KP45q1eygQfbw+sEH9nesQwdbjx+JvZHDwfPstfvll3YgEIRXAEg67dvbDEBeZUhTp9ravwMPDP7xqlaVfvtNC087LXxjRPDOP9+Ozl93XXDlefPmSb/8UrSS4YAyZWwfw08+KfxD6ahRFrLjZT10+/Y2S1qxYuizZG3aWNOl337LuuyLL2xWPJjwumNHaNtiLF9unV9POim0fUOLo1Qp+x7lDq/z5lkzoQsvtAMbuZ1zjq1tfeedva+bO1faZ5/Q11+H4uijbc1rqNuO3H239QC4/PKiN9SaM0dlVqwI/zZGQ4bYrPZtt2W9Boq6D3OkPPGEzQJfeGFo1SEzZtgSl3CVwvftaweRVqywv1uDB9vBpk8/tYMrsaxfP/vd+eYbv0cSEwivAJBsSpSwhhJ5bZkzdap9cA/1A0PDhsqMtbVCyaJECSsX/PtvK9MrTGCG9thji/e8gb13C+poumOHPd+xx0Z2C5dwGzHCPtSGWuYcKHHMvu513DjbIqiwtXudOtnvXSilw6NG2QxxtPc+7tLFZpjXr8+67P777bWY31rLww6zg2J5lQ5HqtNwdr17288zlNLhrVvtdyo93fZLbdcu54GJYAXW2oZ7RtTzrMS9Vi2btb/9dqlevfA+R3Glp1tX9K++kt54I7j7hKtZU3aBZmk33GCz061a2XtgxYrhe45I6d7dDgCMHev3SGIC4RUAklHfvrbFyuzZWZdt2GAzcqGUDCM29O1rH4zvvrvg/R8lKxk++GCpUaPiPWedOhaaXnkl/y6uEyZYwImXkuGAJk0sTIaqaVOragise83MtOBy5JGF7/dYsaLNBIUSXt9+W2re3E7R1KWLHfgKdJJduFB67TXr2JqWlvd9PM9mX7//3tZBBjhna2XDUR5akEqVrBvzxx8Hf5+337bfp8DBjOXL7QDFs8+GNos4fry21KsXvu6/2VWrZp2HL73USt5j0QUX2Gz91VcHt/drOJs1BRxyiIX8V1+135fPP7ff1XhQvrwFWNa9SiK8AkByymvLnB9+sA9khNf49Mgjtp7wrLOkZ56xhi4ffGBrpX74wWaMZs+2wFGckuHsrrjCPmTm14hn1CgLDeHcOiKWeZ6Fm8DM66xZtua1sJLhgO7drSPqpk2F33bhQquUiPasq2TVGSVKZJUOP/igfe3XX1/w/U47zWY/s79eVq+2QBPpmVfJSof/+MNKnAvjnFUVtGxpHbb79rXfn65dpYsvtu1jVq3K//47d9rv3qWXShMmaE04Gg/l5/DDbWY4VqsbUlJsBnv1atviqzDhbNaUfQznnmtr/cePz795Wqzq188qFIq7/joBEF4BIBmlp9vsW/bw+t139gE0HN0wEX0HHWQlcR9/bN1ezzhDOv54C46HH26zDQcfbLOB4QqvrVvbh/lhw/ZuhLNzp83yHnNM7G0/EUlt21rI2bbNyhKl4Nc6du9u38dg9sYM7PF50klFG2dxlCtnX+fkydJ//1kYPeuswktWa9a018Prr2etz450p+HsAt21gym/nDTJfo6XX561nrh2bZv9euwx+9t58ME5O25v2mR7rA4ZYrN8vXrZvp/9+mmRHwcZYknr1rYv+OOPW9VPQcLZrCm7++6z12w8dsUPlD2zZQ7hFQCSVt++9iE5sG5t6lSpRQubKUN8uu8+a+yxbJk1Upo1yz6sjR0rjRwpvfCCdXxt1Sp8z3nVVbZn7JgxOS//+msrJ463kuHiatPGAujs2RZeDz007wZGeenQwWYmCysdHjfOSsSPOso6DfuhSxcrj77zTlujeOONwd3vnHNsxjJQvhvN8NqkiTU3C6Z0+IknLOScckrOy1NSbKuo77+3v5U9e1rTtKOPtrXNgwbZz33AAPudWLVK+uADbS9qZ+9Ecs899rtx110F32769PA2a0oETZva7wilw4RXAEha2bfMycy0mVdKhuNfuXI2Q9S0qX0A7NzZSs4GD7ayuUGDwvt8Rx9tASr3tjmjRlkXz1jZtiNa2ra18y++sN+pwIxJMMqXt6ZABYXX776zhjMtWkhvvVW8sRZHly4WRF580cqBg11D3bu3VLduVunw3LlWghyJ9aB5Ofpom1XdsCH/28yfb3sTX3BB/nt/tm5tIeu88+yg0G+/WTnxpEl28Ojll61RWaxuweKHxo3te/rSS7bOOS8ZGdYMLJzrXRNFv372t2HzZr9H4ivCKwAkq/btbeZg3DhroLJhA+EVoUtNtdLKb7/NalS0a5ett+3f37bVSSb16ll57BNP2AfxYNe7BnTvbmWTeYWr336z2da6de331s8qiY4drZw2JUW6+ebg75eaKp15pjXMWbzYwmuTJuHb/7Qw/ftbSfsXX+R/m6eftq/toosKfqzy5aXnn7c1u//8Iz36qIX6aH0t8ei22+yAwC235H19oFlTJNcIx6t+/Wyv11CauiUgwisAJKuSJbO2zAmssSO8oijOOsu65Qa2zZk0yZqzJFvJsGShp21ba9RUuXLoa8i7d7fQm3tPxwULbNayTBkLXrVrh23IRVK5ss0qX3CB7YMairPPtmqPV1+NzjY52bVvb8168isd3rTJZpNPOMH2ng1G5crR22c33tWqJV1zjXVInjZt7+sDzZqYed1bly52wCTJS4cJrwCQzPr2tYYrw4fbhwq/1s8hvlWqZGsZ33nHXk+jRtmHrFBKZhNJYNaoV6/QZ+Hat7eusdlnV1assOC6ZYt1So1WiW1hxo61ztahatzYQvrLL9va7GiG1xIl7HX56ad2kCC311+3PgBXXBG9MSWba66x6oQbbth7y6Hp0yPTrCkRlC5ta6zHjg1tq6YEQ3gFgGQWKGmcOdNmXZk9QFFddpmFgWHDpNGjrbw1v/WCiS6w7jXUkmHJvmft22eF1w0bLGwtXmwfWqO9p2uknHOOrS3dti264VWy0uGVK7PK3AMyM6Unn7Sf3+GHR3dMyaRiRSsfnjjRysezmzGDZk0FGTjQGuQVVPae4AivAJDM6ta1fQwlSoZRPI0b2xY8jzxiwSAZS4YDjjxSeuop6dRTi3b/7t2tU/SyZfY9/fln24IlkX5Hjz/eym2l6IfXI4+0cJS7dPiLL6yRUPbtcRAZF1xgTb5uvNEOGkhZzZpY75q/E0+U0tKkhx/2eyS+IbwCQLILzA4l0gdj+OPKK61ZU9my1lwkWZUsaXvtFrVZVffuVhbYpYvNwL76auJ9P8uWzQr30Q6vVatKnTpJn3yS8/InnpDq1LGAgMgqVUq69147MPP223ZZoFkT613zV7q0HVz54gsL+kmI8AoAye6SS6Trrgu9sQyQW+fOFgoGD7Z1ayiaww6z4PvXX9Jjj0lDhvg9osi45x5bH52eHv3n7t/f9uL991/7/59/Wgfniy6yYIXIO+kkKxG+7TZpxw5b7yoRXgtz4YW2DdkjjxTt/i+/rBKbNoV3TFFEeAWAZFe/vjR0KNs7oPg8z9axvfyy3yOJb6VLS3feaXvnXnmlz4OJoGrV/CsvP/poOx871s6HDbPQesEF/ownGaWkSPffb2ufn3/e1rvSrKlwVarYnt0jR0qLFoV23y+/lM45R2n5dduOA4RXAAAQPqmprBcMhxtuoONtJDVrJjVtaute162z0uyTT/Z/C6Jkc+SRViZ/993S5MlS69Y0awrGlVfa0oInnwz+Pjt2WGO9Jk3038CBERtapBFeAQAAkFw8z2Zfv/rKmmtt3mxrCRFdnic98IC0apWtf6VkODgNGtja7Oeft62dgvHEE7au+MknlRnHpfGEVwAAACSf/v2l7dtt1q9TJ+mQQ/weUXJq1y6rfJzwGrxrrpE2bpReeKHw2/73n3TXXfaaj/Pmb4RXAAAAJJ9OnaRKlaSdOynR9tuDD9re0Ece6fdI4sehh1rJ9eOPW0lwQa691jrBP/54NEYWUYRXAAAAJJ9SpaRjj83aoxj+adzYti6qVcvvkcSXa6+1WdV33sn/NhMnWnOnG2+073OcI7wCAAAgOQW63NJtHfGob1/pwAOlhx+2Bk657dwpXXqp1LChNYFLAIRXAAAAJKeyZW3rESAeeZ7Nvs6ebdvg5PbUU9Jvv1m5cNmyUR9eJBBeAQAAACAenXKKlJZms6/ZLV0q3XGHzc4ec4w/Y4sAwisAAAAAxKPSpW2bp/HjbbuhgBtusG7aTzyRUHtvE14BAAAAIF5dcIFUvrz0yCP2/ylTpDfesJLifff1d2xhRngFAAAAgHhVtap07rnS229LCxZIl1wi1asn3Xyz3yMLO8IrAAAAAMSzK6+0jsO9e1sDp0cftdnYBEN4BQAAAIB41rChNGiQ9NdfUs+e0sCBfo8oItjUCgAAAADi3S23SH/+aVvkJFCTpuwIrwAAAAAQ75o3l2bO9HsUEUXZMADg/+3de7BdZXnH8e+PhGsLknApJRQEtbYwEEcptRUJ4AVhkFDBCrUFSqHKVKu1rZQWq0VRvAwohRZQLLQRYbi0AaejJAGiY8MUSk0lRSo2QWOhgoSr3PP0j7XOsGezk3NqTvZeOef7mTmzzlnrXTvPyTyTnd9+37WWJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmGV0mSJElS5xleJUmSJEmdZ3iVJEmSJHWe4VWSJEmS1HmpqlHXMGFJHgDuHXEZOwIPjrgGaYz9qC6xH9Ul9qO6xH5Ul3S9H/eoqp0GHdikwmsXJLm9qvYfdR0S2I/qFvtRXWI/qkvsR3XJptyPLhuWJEmSJHWe4VWSJEmS1HmG1/+/S0ZdgNTDflSX2I/qEvtRXWI/qks22X70mldJkiRJUuc58ypJkiRJ6jzDqyRJkiSp8wyvE5DkF5Jck+SRJI8muS7J7qOuS1NbkmOTXJvk3iRPJrk7ySeSbNs3blaSLyR5MMkTSRYn2XdUdWt6SPLVJJXkY3377UcNTZIjknw9yePt+/PtSQ7tOW4/aiiSvC7JjUl+lOSxJHckOblvzFZJPp3kvvZ9fVmSg0ZVszZ9SXZL8tdtL/2kfV9+6YBxE+q9JJslOSPJqiRPJVme5Jih/DITZHgdR5JtgJuAXwJOBH4HeAVwc5KfGWVtmvL+BHge+HPgLcDfAqcBi5JsBpAkwA3t8fcCxwCb0/TnbqMoWlNfkuOBuQP2248amiTvAhYC/wb8BvB24Gpgm/a4/aihSLIfsJimv04F3gbcBlya5LSeoZe2x/8SOBK4D/haklcNtWBNJS8HfhNYA3xjPeMm2nsfBT4CXAAcDtwKXJ3kiEmtegN4w6ZxJHkfcC7wyqq6p923J/Bd4INVde4o69PUlWSnqnqgb98JwOXAG6rqpiTzgX8CDq2qm9sxLwFWAguq6g+HXLamuCSzgLuAPwKuAM6uqjPbY/ajhqKdWbgLOKOqPruOMfajhiLJx2k+cJ5dVY/37F8GUFW/lmQu8C3g5Kr6u/b4TGAFcHdVHTX0wrXJS7JZVa1tvz8F+DywZ1Wt6hkzod5LsjPwA+Ccqvpwz/lLgJ2qar+h/FLjcOZ1fEcBt44FV4CqWgl8E5g/sqo05fUH19Zt7XZOuz0K+J+x/5i15z1CM9tgf2pj+CRwZ1V9ecAx+1HDcjKwFrhoPWPsRw3LFsCzwJN9+x/hhf9rH9WOuWrsYFU9B1wJHJZkyyHUqSlmLLiOY6K9dxhNLy/oO38BsG87eTdyhtfx7QPcOWD/CmDvIdcizWu3d7Xb9fXn7kl+dihVaVpIciBwAvAH6xhiP2pYDgS+AxyX5HtJnktyT5Le3rQfNSyXtdvzk+yaZPskpwJvAM5rj+0DrKyqn/Sdu4ImMLx8KJVqOppo7+0DPA3cM2AcdCT3GF7HN5tmHXm/h4BZQ65F01iSOcBZwOKqur3dvb7+BHtUkyTJFsDFwGeq6u51DLMfNSy70tx/4tPAOcCbgUXABe3lPmA/akiq6k7gYJoZ/R/S9N2FwLur6sp22Hj9OHsjl6npa6K9Nxt4uF58TWmnenTmqAuQNL52hmAh8BzwuyMuR9PTB4GtgbNHXYhE8+H7tsBJVXVdu++m9lrYM5KcP7LKNO0keQVwLc0M1btplg/PBy5K8lRVfWmU9UlTieF1fGsY/Onsuj7FkCZVkq1prtHaC5hXVat7Dq+vP8eOSxskzaPB/gI4Bdiy79qsLZNsDzyG/ajh+THNzOuivv030txd+OexHzU8H6e5pvDIqnq23bckyQ7A55J8mabf9hhw7lg/PjTgmDQZJtp7a4Dtk6Rv9rVTPeqy4fGtoFkD3m9v4D+HXIummSSbA9cA+wNHVNW3+4asrz+/33vXQ2kD7AVsRXPThjU9X9DcYXMNsC/2o4ZnxTjH12I/anj2BZb3BNcx/wrsAOxM0497to9g7LU38Awvvs5QmiwT7b0VwJbAywaMg47kHsPr+K4HXptkr7Ed7bKk17XHpI2ifZbrl4BDgaOr6tYBw64H5iSZ13PedsBbsT81eb4FHDLgC5pAewjNm5/9qGH5x3Z7WN/+twCrq+p+7EcNz/3Aq9p7A/T6VeApmhmrG2ieA/v2sYPt40reAdxYVU8PqVZNPxPtva/SrCB4Z9/5v03zlIGVQ6h1XC4bHt/ngfcAC5OcCRTNA3x/QHPzEmljuZDmH5qzgSeSvLbn2Op2+fD1wDJgQZI/pZkBOwMI8Kkh16spqqoeBm7p358E4N6quqX92X7UsPwzcDNwcZIdgf+m+ffyzbxwXwD7UcNyAXA1cEOSv6G55vUo4HjgvKp6Bvj3JFcBn21XVa0ETgP25MVhQZqwJMe2376m3R6e5AHggapaWlUT6r2q+lGSc2nuG/AYcAdNwD2Upp87IS++oZT6tdd7nQe8ieZNbwnw/t4HAEuTLckqBl+jAPBXVfWRdtxs4DPA0TRLO5cBH6iq5Ru/Sk1nSQo4u6rO7NlnP2oo2lnUTwDH0lzb+h3gnKq6omeM/aihSHI4cDrNUvWtgO8BlwAXV9Xz7Zixm979FrA9sBw4fewDQOmn0b4XD7K0qg5ux0yo95LMoPmQ71RgF+Bu4KyqumZj1P7TMLxKkiRJkjrPa14lSZIkSZ1neJUkSZIkdZ7hVZIkSZLUeYZXSZIkSVLnGV4lSZIkSZ1neJUkSZIkdZ7hVZKkKSrJqiQLRl2HJEmTwfAqSZIkSeo8w6skSZIkqfMMr5IkTYIkc5Ncn2RNkieTfDPJ63uOX5ZkdZJfT3JbkqfaZb3vHfBaByRZnOTxJE8kWZLkgAHj5iVZlOSRdtzyJL83YNxxSe5qx9ye5MDJ/xuQJGnjMrxKkrSBkrwa+BdgNnAqcAzwY2Bxktf0DN0OuAq4HDgauAU4P8lJPa+1H7AUmAWcBJzQnrc0ydyecfOBJcAWwLuA+cAXgT36yns98MfAh4B3ADOAryTZfgN/bUmShipVNeoaJEnapCVZAuwKzK2qZ9p9M4A7gbur6ugklwEnAsdX1ZU95y4CfhF4aVVVkmuAN7Y/P9yO2Q5YBdxSVW9LEmAl8CBwQFWtXUddq4CXAHtV1Zp23/7AbcA7q+qKSf2LkCRpI3LmVZKkDZBka2AecDWwNsnMJDOBAIuBg3qGPw9c2/cSVwK7A3Panw8CvjIWXAGq6lHg+vbPAXglzQzrF9YVXHssGwuurW+3293H/+0kSeoOw6skSRtmNs1S3A8Bz/Z9vQeYlWTs/XZNVT3bd/7/ttux8DobuG/An3M/zVJigB3a7eoJ1PdQ7w9V9XT77VYTOFeSpM6YOeoCJEnaxD0MrAUuBP5+0ICqWtus9GVWks37AuzPtdsfttuHgF0GvMwuwNgM6oPtds6AcZIkTUmGV0mSNkBVPZHkG8Bc4I5xlvHOoLmZ05U9+44Dvs8L4XUpcESSbavqMYAk2wJvpbnBE8B/0VwDe0qSS8obWEiSpgHDqyRJG+4DwNeBryW5lGbZ747Aq4EZVfVn7bjHgE8l2RH4LnA8zc2ZTuoJoB8FjgSWJPkkUMDpwDbAWQDtjZ3eD1wH3JTkIuAB4JeBnavqwxv595Ukaei85lWSpA1UVXcAv0LzeJzzgRuBzwH70oTaMY/SzLSeCCwEDgHeV1WX97zWfwAHt2MvB/4BeByYV1XLe8YtBN7U/ngpzQ2dfp9mRlaSpCnHR+VIkjQE7aNy3lhVu426FkmSNkXOvEqSJEmSOs/wKkmSJEnqPJcNS5IkSZI6z5lXSZIkSVLnGV4lSZIkSZ1neJUkSZIkdZ7hVZIkSZLUeYZXSZIkSVLn/R8A1H9HsTQoBwAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 1152x864 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA74AAALYCAYAAABbpC7yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAADIbUlEQVR4nOzdd3gU1dcH8O8Noffei0gRCNIEUUgEEQEh2Ht7bWDvvff+UxAVwa7YRYUIqKjgRnpRIXQIBCH0BAKB9Pv+cXbMZrNlZne2Jd/P8+TZZHd29mZ3s5kz59xzldYaRERERERERBVVXKQHQERERERERBRKDHyJiIiIiIioQmPgS0RERERERBUaA18iIiIiIiKq0Bj4EhERERERUYXGwJeIiIiIiIgqNAa+REQUMkqpj5RSWinVIdJjMSilhjjH9KSH265WSq1SSuU6t7nTeb1WSs0P81A9UkrNV0pxLUIiIiILGPgSEZElSqmTnQHtFqXUUefXBqXUVKXUwEiPL1BKqcEAPgJQHcAkAE8BWByBcTzpDLSHhPuxg6GUildK7XaOfUqkx0NEROQqPtIDICKi2KCUqgLgNQC3AygA8BuA7wGUAOgM4FIANyilrtJafxqxgfq3FEA3APvdrh/lvLxaa+0e8HYDcDTUAzPpKgC1Ij0ID0YDaA5AA7hEKXWn1vpYhMdEREQEgIEvERGZ9zwk6F0B4AKt9TbXG5VS9QE8CKBB2Edmgdb6KID1Hm5q6bzc7eE+nraPCK319kiPwYtrISdBJgK4C8AFAKL5BAgREVUiLHUmIiK/lFKdAdwLyZKOcg96AUBrfUhr/RCAqX72VV8p9aBSKtVZGluglNqulJqilGrhYfsGSqnnlFLrnXNvDymlNiql3lNKNXbZroZS6n6l1Gql1GGl1BGlVLpSappS6niX7crM8TV+BnCNc5Otztu1y308zvFVSrVUSk1USm1WSuUppfYppf5QSv2fyzbVlFK3K6XmKqV2On/fXUqpz53Pq+v+5gN4wvnjPGMcSqltrtt4muOrlGqmlHpTKZXh8hgfK6WO87DtNudXHef4M5VS+c75zRe4b++PUqo5gLMAzAfwMoBiSCDsbXu/z5vLthcopX5TSmUrpY457zNFKdXO/ffx8ljlni/XcnKl1LVKqb+c+/7BeXsrpdTTSqmlzrHlOx/3VaVU3UB+J6XUUOdjvuXl/qc6b/f590NERIFhxpeIiMz4P8jJ0ila632+NtRa5/vZVzcATwL4HcC3AI4B6AVgHIAzlVJ9tdbZAKCUUgB+AXCS83ImgKoAjgNwCYAJAA449/spJMu4AMC7kOxje0gJ85cAtngZzzbIfN5znOOYCOCgn98BSqluAOZBynvnOX+XegD6ArgDMl8YABpBSsQdAFIAHALQFcBFAEYqpfpprbc6tzXucxqAj51jg7/xKKWaAVgCoAOAuQA+dz7GlQBGK6UGe8haV4U8pw0BTIeUT18C4Gul1Eit9S/+ngMXV0GOKT7VWu9WSs0FMEIp1VFrne42VrPPG5RSbwC4DcBeAN8AyIK89hcCmAMg2Oz3AwASIa/LzwAOO69PgmStfwOwEFK+fTKAewAkKaUGaa0LLf5O8wFsAnCZUuoerXWe21iuc16+F+TvREREnmit+cUvfvGLX/zy+QU5mNcATrd4v4+c9+vgcl19AI08bHuFc9tHXa470Xnd6x62rw2gpss+SwB872G7agDquvw8xLnPJ/2N1eU2DWC+23UrnNdf6WH71i7fVwfQysM2pwEoAvCe2/VPOvc7xMtzOl/+fXsc++Nu11/rvH6e2/XbnNf/AKCay/XDnNf/ZPF1XguZA13X+fNlzv0842Fbs8/bWOd2KwDUd9uuput7yPn7bLPwfBnPcQ6AHh7u0xRAbQ/XP+q83xUB/k73O7e7zG2bOpCge5WV551f/OIXv/hl/oulzkREZIZRgrwz2B1pKYnO8nDTZ5BA5AwPt5VrkqS1ztWlzZM0AOVluwKt9WH364OhlDoZks37TXto5KW13unyfb7WOtPDNn9AAkZPv6+VsVQHcDEkK/qy280fAkgDMMS1NNjFXVrrApcx/QYgA0B/C49/CiSLP8Plef4BwBEAVyul4ly2Nf28AbjJeXmH1vqQ23bHvLyHrJqqtV7jYRz7tNa5HrZ/23n532tm8Xf6CEAhypeBXwwJft+3NHoiIjKNgS8REYWdUmqYUipFKbVHKVXknINZAikPbemy6VpI4PagUmq2UupmpVSCswT6P1rrHAA/AbhUKeVQSt2tlOqnpBN1KBiBoalyYOdYvlJK7XDOvzXmEPdE2d83EF0B1ACwRLuVz2qtNYA/nD/2crvfQV1aYu1qB6w1KDOCuP+CPi0NxKYDaAtguMu2Vp63/gCOaq3/tDAWq5Z7u0EpdaFS6lel1H6lVLHz9TLK6l1fM9O/k9Z6L+SkwOluc6+vA5APNgMjIgoZzvElIiIzdgM4AUBrABuC2ZFS6mIAX0BKO3+ClKkamdo7IaXBAACtdZFSaihkDu75KF1yaKdS6hWt9USXXV8AKUW9FMD/nNcdUEq9DSm5LYR96jsvy2Vy3SlZH/g3SGD/M4DNAHIhWer/g8xDDkY95+UeL7fvdtvOcMh9Q6cimDwxrpSqhdJss3vg9ymAqyGB8c/O60w/b85tM8yMIwgenzOl1H2Q7PleyFzinQCMkwpPwOU9Cmu/EyDN3y6ENFN73Dk/+BQAX9mUxSYiIg8Y+BIRkRkLIXNjT4c0pQrG45Agop/WerNxpTOLe7/7xlrr/QBuUUrdCqAHpMz0dgATlFI5WusPndvlAngIwENKqU7Osd4C4DFIkPmE+76DcNB52crEtg9B5hkP0lovdL3BeRIgWDnOy+Zebm/utp2dLgRQ1/lV6JaIN5ytlGrkDOoOOq8z87wdhPlseAnkOfakvpfrAXlflKGUioecQMkE0Mv5/jNua47y76ODzkszvxMgJ0G2QMrAnwSbWhERhQVLnYmIyIyPIMHFOKVUE18bOuec+nI8gLWuQa9TH0jTIo+0SNNaTwBwnvPqsV623ay1ngpgqHPcHrcLwjLn5Zkmtj0ewAEPQW9z523uip2XZsu0N0BOJAzw8twnOS//Mbk/K4wy5y8h81PdvxZBsqOXO7ez8rwtA1DLmTH3JxtAM2fQ+h+lVG0AnT3fxasmkOz4Iteg12mQl3EC5n4no/z8PQDtIEtAXQlgKyQgJiKiEGHgS0REfmmtNwF4FdLtdpZSqlx5rlKqrlLqWciyRL5sB9DZuQSPcd96kGWE3PfZwdNjoTSLmefcrqlSqoeH7ZpB/te5Lx0TFK31UgArAQxTSl3hfrtSqrXLj9sBNHKWtBq3VwPwJmRJIXdGuWtbk2PJB/AV5Dm5x20cV0M6Y8/XWge79E8ZStZGTgSwUWt9qdb6evcvANc7N7/WOVYrz9tk5+VEpVR9t+1qKKUauVy1DPJcXu6yjQLwAqT7txV7Ie+Xvkqp/07EKKVaAnjefWOLv5PhQ0iTqymQ9+iHzoCYiIhChKXORERk1sOQjOxtADYqpX6FNJ8qgWQuh0MyZVf62c+bkCB3pVJqOiQjOApSWuo+T7I3gOlKqSUA1kHmZLaDrLlbAGCSc7vWAP5SSv0FYLVzP82c22nIOrp2uwKyVM6nSqlrACyFlPz2hgRbfZzbvQl5bhYopb6CzKE9AxKo/YPyTaeMpaOedwbzhyCNqN70MZb7IcsjPaeUSoIEYl0BnAtpyHSTj/sG6lpIJ+0PvW2gtV6rlFoKyUb30Vr/BZPPm9Y6RSk1CaXvtx8gJwXaARgJKRH+wflQb0LmzL6nlBoOYB8kKG8Az8+xV1rrEqXUZMg6vn8ppWZB1mIeA2kU1tXD3cy+F4zH2KOUSoFULpTAx3NIRET2YMaXiIhM0VoXa61vhzTi+QLS7OoWAHdAAotvAZystZ7mZ1eTANwKmXM6DsBoAN9DSkXdG1AtB/AK5P9VMoC7IeWmMwD0dykf3gZZm/Wwcz93Oy//BJCktf4mkN/ZF631OsgyNm9DAv97AFwECWxfd9luJqQBVAak2dOFAJY4f4+DHva7FhLE7YcEfc8AuNfPWPYCOBnAWwC6O7cfBGAa5HlaH/Av6oFziaKrIEGbv07ERlBnZH1NPW/ObW+HNCvb4Ly8A8AAAN9A1s41tkuDBMMrIE3OroSclDkVHp5jEx6EzOWNh7zHkyDP7WWeNrbyO7n42Hn5s9Z6RwBjJCIiCxQra4iIiIjCSyn1GICnAZyvtf4u0uMhIqroGPgSERERhZFzGahNzh/ba62LIjkeIqLKgHN8iYiIiMLA2aH6NMic9lYAbmXQS0QUHgx8iYiIiMLjDMjc4b0AXkRp52oiIgoxljoTERERERFRhVapMr5NmjTRHTp0iNjj5+bmonZtq8sJEoUG348UTfh+pGjC9yNFE74fKZrEwvtxxYoV+7XWTd2vr1SBb4cOHbB8+fKIPf78+fMxZMiQiD0+kSu+Hyma8P1I0YTvR4omfD9SNImF96NSKsPT9VzHl4iIiIiIiCo0Br5ERERERERUoTHwJSIiIiIiogqNgS8RERERERFVaAx8iYiIiIiIqEJj4EtEREREREQVGgNfIiIiIiIiqtAY+BIREREREVGFxsCXiIiIiIiIKjQGvkRERERERFShMfAlIiIiIiKiCo2BLxEREREREVVoDHyJiIiIiIioQmPgS0RERERERBUaA18iIiIiIiKq0Bj4EhERERERUYXGwJeIiIiIiIgqNAa+REREREREVKEx8CUiIiIiIqIKjYEvERERERERVWgMfImIiIiIiKhCY+BLREREREREFRoDXyIiIiIiIqrQGPgSERERERFRhcbAl4iIiIiIiCo0Br5ERERERERUoTHwJSIiIiIiogqNgS8REREREVFlpjXw6qvAxo2RHknIMPAlIiIiIiKqzLZsAe67D3jppUiPJGQY+BIREREREVVmDodczpoFlJREdiwhwsCXiIiIiIioMjMC3z17gGXLIjuWEGHgS0REREREVJmlpgJDhgBVqgApKZEeTUgw8CUiIiIiIqqsduwA0tOBs88GBg8GZs6M9IhCgoEvERERERFRZZWaKpdJSUByMrB6NZCREdkxhQADXyIiIqLK5O+/gXPOAfLyIj0SouCVlABXX106R5WsS00F6tYFevWSwBeokOXODHyJiIiIKpPnngNmzADWr4/0SIiCt3Yt8MknFbY8NywcDmDQIJnf26WLfDHwJSIiIqKYtWcP8MMP8v2OHREdCpEtjExvZmZkxxGr9u8H1qyRMmdDcjIwfz5w+HDEhhUKDHyJiIiIKosPPwSKiuR7Br5UETDwDc6ff8plYmLpdcnJQEEB8MsvkRlTiDDwJSIiIqoMSkqAd9+Vrq1VqjDwpdindWljpl27IjuWWJWaClSvDvTvX3rdoEFAw4YVrtyZgS8RERFRZfDrr7Jkyc03A61aMfCl2JeeLpneevWY8Q2UwwEMHCjBryE+HjjrLGDWLKC4OHJjsxkDXyIiIqLKYMoUoEkT4LzzgDZtGPhS7DPKnM89FzhypMLNSQ25w4eBlSvLljkbkpNl/u+SJeEfV4gw8CUiIqLoMGmSlOGS/Xbtkk7O//d/ktlh4BsbLrsMx0+eHOlRRK/UVDmZc/rp8jPLna1ZtEimQLg2tjKMHCmZ3wpU7szAl4iIiKLDV18BCxZwfdlQ+PBDKVm84Qb52Qh8tY7suMi7Y8eA6dPRYOXKSI8kejkccrKsdWv5meXO1jgcMt//lFPK31a/vgTEFWiZKAa+REREFHnHjgFLl8r3O3dGdiwVjdHUauhQWZ8TkMA3Nxc4dCiyYyPvli4FCgpQc9cunqDwZOdOYMsWCc5atZLrGPha43AAffsCdep4vj05WdZJTk8P77hChIEvERERRd6SJUBhoXzPElx7/fILsG0bMH586XVt2sgln+vo5Zy/Gp+bCxw4EOHBRCGjm3NSEtCypXzPUmfz8vLk5IqnMmdDcrJcVpByZwa+REREFHlGkxqAwZjdpkwBmjaVBkAGBr7Rz/VvooJk3GyVmiqZyl69pCy3Zk1mfK1YtgzIz/cd+B5/PNCtGwNfIiIiItukpspBFsBgzE6ZmXLQes01QLVqpdcz8I1uhYXSeGjoUPmZgW95DoesNxsfDygl5c4MfM0zTqwMGuR7u+Rk4I8/KsS0CAa+REQUnN27Of+MglNYCCxcCIwaBTRowGBs/36goMCefb3/ftmmVoaWLSVYqOzPdbT66y+Zg33VVfIzA9+yDhwA0tLKZisZ+FrjcAAJCUDjxr63GzsWKCoCfv45POMKIQa+REQUuB07gHbtKlTXR4qAlSuBo0flILayL7NTUgJ07w688krw+youBt57DzjjDKBTp7K3Va0KtGhRuZ/raGZk40aMQH6jRtLEiUotWCCXroFvy5ac42tWUZGcbPRV5mwYOFCWjKoA5c4MfImIKHArV0q2bt26SI+EYpnRpCYxkYHvnj3Avn3A4sXB7+vnn4Ht24Fx4zzfXtmf62iWmgp07gy0bIm8Vq2Y8XXncMh61P37l15nZHxZgeTf338DR47IZ64/VaoAZ50FzJ4tAXMMY+BLRESBS0uTS5aXUTAcDjnIb9GCwVhGhlwaf1vBmDIFaNYMOPtsz7dX9uc6WpWUSODrDEqOtWzJwNedwwGcfLIEv4ZWraQ8/PDhyI0rVhgVBWYCX0Dm+WZlybzzGMbAl4iIAsfAl4JVUgL8+WdpyV2bNpL1tGuOa6wxAt9t24I7gN+xA/jxR+Daa8s2tXLFwDc6rVkDZGf/9zeR16oV8O+/lfdvwt2RI1Jt5F6myyWNzDOaCbZubW77M8+U6RExXu7MwJeIiAJnBL480KBAuR3ko00bKVWsrO8pI/AFgLVrA9/P++/LSQX3plau2rSRTq3MkEUX1/Vp4cz4ai0nQ0iyjsXF5bOVrVrJJU/E+mZUFJiZ32uoVw8YMiTm+3kw8CUiosAUFgLr18v3PNCgQLmX3BnL7Pz7b2TGE2kZGdJtGQi83NloanXmmUDHjt63M57rnTsDexwKDYdDXpsOHQAAx4yAjuXOwuGQeaennFL2ega+5qxbJ12xzZY5G5KTgQ0bUDOGq0QY+BIRUWA2bpTgt3VrNhSpDLQGZs0C8vPt3a/bQX6lX192+3ZZYqR2bWD16sD2MWeOPH/emloZKvtzHY20lr+JxMT/ToDkxXrgm5kpyzPZJTUV6NsXqFu37PWhLHXOzQXmzrV/v5FgnGy0kvEFJPAF0HjhQpsHFD4MfImIKDBGNmr4cJl7lp0d2fFQaH38MTBmjFzaRevSkjsjy1nZg7GMDDkJ0KNH4Bnf6dNlbc6xY31vV9mf62iUni6Bm0tQUtCoEVCjRuwuafTYY8CIEfacHM3Pl47nnrKV9eoBtWqFJuP79NNSQRGrr4Gr1FTJjvuqBvGkQwegZ080XLkyJMMKBwa+REQUmLQ0KTcbOlR+ZnlZxZWdDdx/v3z/xx/27XfLFjnIdz2IrVcPqFOn8gZjGRlA+/aS9Q008F2+XDreVq3qezsjk1hZn+to5Ckbp5QEKbGa8d26VZbo2r07+H0tWybBr6dspVKlSxrZKT8f+OAD+d54fWKVh4oCS2bMQNqzz9o/rjBh4EtERIFJS5MlaIyzxgx8K67HHpM5Yb17y0GTXWXt3g7yK2u34YMHgZwcoF07CXyNNX2tyM2VplgnneR/2xo1gKZNK+dzHa0cDsnWd+tW9vrjj4/dwNd4f9mxRJfxmTF4sOfbQxH4fv89sH+/fDYZjcdi1datMqffapmz4bjjoOPj7R1TGDHwJSKiwKSlycE5l5Co2FauBCZPBm6+Gbj+ejmIde08HIzUVKBJk/IH+ZU18DWeVyPjC0jXayv++Ue6tvbrZ277yvpcRytv2Tgj4xtrvRS0tjfwTU2VaQCNG3u+vWVL+/8XTZkCHHeczHGN9YyvW8fwyoaBLxERWXf0qJSpuga+zPhWPCUlEvA2aQI880zpwZJdB38Oh2Ru3A/yK2sw5inwtRosLF8ul1YC38raQTva7Nwpwa2noKRjR1m/1moFQKRlZwPHjsn3wQa+RUXAggW+gzYj42vXCYKNG4H582VZsCFD5P9eLP+vcziAhg2B7t0jPZKIYOBLRETWrVsnBxYJCdJMpEGD2D4YIM8+/BBYsgR45RV5jXv0kIMmO8r9duzwfpDfpo1kbYqKgn+cWLJ9u1y2bw+0aAE0amQ9WFixQu5rzN/1p7KeZIhGvrJxxpSSWCt3Nt5bSgUf+P7zj6w57S/wPXpUpgzYYepUID4euOaa0l4EsVzubFQUxFXOELBy/tZERBQc4wCmZ0+5DMW8KoqsrCzggQckI3vllXJdXJz8bEfG19dBfps2km22oxlOLMnIAKpXB5o1k0AhkAZXK1ZIttds45q2beW1PnrU+niD8fffwGefhfcx7bZzJ/D887Jush0cDmns1qtX+duOP14uYzXw7d9fyvZLSgLfl/GZ4Wv9WTun3uTlAR99BJxzjpxM6t1bXp9YLXfetQvYvLnSljkDDHyJiCgQaWlygG4cjIViXhVF1iOPSLOlt94qG0QlJUn5X7BBaWqq94P8tm3lsrJlIjMypLGVkY0xAl+zZZu5uVKNYaaxlcFY0mjnTmtjDdarr0r5aKzNWXX1zDPydzJ7tj37S00FBg2SDKM7Y53rWA18R46U92cw/QEcDsl8t27tfRuj0sGOE7HffSdN/Yz1sOPj5fWJ1cDXmAZxyimRHUcEMfAlIiLr0tJkjlCVKvIzM74Vy7Jl0tDl1luBE08se5uRLfjzz+Aew+HwfpBfWdeXNZYyMiQkAIcOmQ9K//7bWmMrIHLPdXq6zP3cvz+8j2uXw4dLM9ZTpgS/vwMH5HPVWzauZk35nI21dWR37JATOcOHy8+Blju7rvnti52B75QpEmgPG1Z6XWKi/A5ZWcHvP9yM597oH1AJMfAlIiLrjI7OhlatJOMby9kbEsXF0tCqeXPgqafK396nj8zrDibrceCAlD16O4itzIFvu3alP1ttcGW1sRUQ2cAXsK9DeLh98YU0mzrzTGDOnNL52YEyTiT5KuONxbV8d+yQiiDjBFqgge/69XKSxF/ga1ep8/r18hk3blzZ+bB2nfiLhLQ0+XypVy/SI4kYBr5ERGTNwYNyMOMa+LZsCRQUxOZZcCrr/fclgHr1VaB+/fK3V60KnHpqcIGvv4P8hg0lw1WZAt+8PFm31zXj26OHXJoNFlaskL9Fs42tgNKy0XA+10eOyO8KxG7gO2WK9DiYMkVO+L33XnD7S02V6SP9+3vfJhbX8t2xQ06u1Ksn7+1AA1/j88bXiQEAqFsXqF07+Izv1KnyWXfNNWWv799fXqdYLHdevbpSZ3sBBr5ERGSVsa6oe8YXYLlzrNu/H3joIclqXHaZ9+2SkoBVq+QkSCAcDt8H+UpVvm7Drh2dDY0ayd+WlYyvlWwvINn7Ro3C+1xv3Vr6fSwGvitWyPrW48fL3NuRI+WEUTBdyB0O4OSTgRo1vG/TsaOUveflBf444fbvv6VVBYE0azM4HHJSx+gr4Y1SwU+9ycsDPv4YOPdcaTTnqkYNYMCA2At8Cwsli83Al4iIyAJP84QY+FYMDz0kc0rdG1q5S0yULNeCBYE9jpmDfAa+wmywcOSIHNhaaWxlCPdz7Zq1jMXAd8oUqUi44gr5efx4+eybNSuw/R05IoG0vzLejh3l727btsAeJ9y0Lh/4rlsnQZjV/Tgc8vyY6VYebOD77bdSvTR+vOfbk5Lk9TpyJPDHCLdNm+R5Z+BLRERkwerVUk5mdN4F7F1CgiJj5Uop17zjDv8HRyefLGWAgWQ9Dh8G/vrL/0F+ZQt8jQDQU+C7dq3/JXP+/lsCBKsZXyBygW/LloHPjT18GLjgguDn1lqVkwN8/jlwySWlUwFGj5ZgK9AmV4sWyevrr4w3kCWN/v4buPhiaSQWbjk50snZNfAtLJQgzIqMDHl/+nt+DMGuMjBlCtCpEzBkiOfbk5Lk9Vq0KPDH8OT552X5pFBgYysADHyJiMgqo7GV65l3I/Blxjd2ffYZUK0a8Pjj/retWVPK/Yx1Na0we5Dfpo2UdQaz7mcsyciQJjpGkGBISJCgxbU82JNAGlsZwh34btkicz779Ak847t4MTB9OvDjj/aOzZ/PP5dgzjUbGB8PXHcd8NNPgWVjHQ7pkO9vmZmOHeXSSuD78cfA11/LV7gZ7ynXwBewXu5sdn6vwcj4BtJscc0a6UHg3tTK1SmnyOtlZ7mz1tJX4dVX7dunq7Q0+X26dQvN/mMEA18iIjJP6/IdnQEJhBo2ZOAbq7QGZs4Ehg713NDKk8REWfbo6FFrj2X2IL9NG5kzuXevtf3HqowMOWCvWrXs9T17yqW/YGHFCrm/cRLKijZt5HnOz7d+30Ckp0v2sn37wANfI/gLdM5oILSWbGCvXnLix9X118vJwECaXDkcQN++UknjS7NmMifbSuBrBGd2LLlklXvge8IJEnwFEvg2bGg+W9mqlZwsOnTI2uMAwLvvygnA//s/79vUrSsnbQI58efNrl1AdrYE3gcO2LdfQ1oa0Lmz7+kllQADXyIiMm/PHvmn7OkAhGv5xq4NG4DNm4HkZPP3SUqSwHTxYmuPlZpq7iC/si1p5L6UkaFbNwmoVq/2ff9AGlsZjOc6XH+/6emSvWzfXuZSBjJXMhKB77JlUjo8fnz5uabt2gGjRkmTKytzWPPzgSVLzGUzlZLnzexavjk5Mt5WraTSwt97yG7ugW+NGhJ8WX3NUlOBwYO9Z2DdBTr15tgxyZCfdx7QtKnvbZOS5LPPrpNFrs9JKJZK8nTCuhJi4EtEROb5micU7LwqipyUFLkcM8b8fU49VQ5ErWQ98vLMH+RXxsDXfX4vIEuzdOzoO1g4fFhOXgTS2AoI73NdXCxl20bgCwSW9XUNfMO1fvjUqZJxvfxyz7ePHw/s3l3692TGsmUSPPmb826wsqTRwoUyVeC116SLerizvjt2lHZZNljt7Lx7N7Bxo/nnBwi82eI330inem9NrVwlJcnrtmyZtcfwxnhO4uPt7xh97Jic2GTgy8CXiIgs8BX4MuMbu1JSpHzTU+DlTf36QO/e1g7SrBzkV6bAt7hYfk9vz7+/YCGYxlZAeJ/rzExZ89sodQYCC3yNrGd2dnhOuB06BHzxBXDppTI/2ZNRo+S5nDrV/H6Nv5/Bg81t37GjBL5mgn2HQwKpMWOkEdinn8r85HDZsQNo3lxKhw0JCRKEmW22ZZxYC0fgO2UK0KULcNpp/rc1Xi+7yp3T0uS5GjTI/sB33Tp5vzDwZeBLREQWpKVJCZj72oaAHGzs2hW+7AvZ48ABWZbISpmzITFRSigLCsxtbxwkmjnIb9JEDpgrQ+C7a5eUjfsKfDdu9F5WGUxjKyC8ga+RrXTN+FrtzKy1BL6BNksKxGefyXx2X9nA+HiZ6/vLL/6bkRkcDvk9Gjc2t33HjjKOPXv8b2tMK6hdW8adkxPeJlc7dnhu1qa1BGNmpKZKlr1PH/OPG0ipc1qaZMjHjTO3ZFLjxkCPHvYFqUYpcmKidL0/fNie/Rr7Bhj4goEvEVH0ysmRZiALF0Z6JKXS0kqb7bhr2VLmtoWiMQeFzpw5Ug4ZSOCblCSZmxUr/G+rtaxz2qOHuYP8uDigdevKEfh6W8rIkJAggfHGjZ5vX7FCnqsWLQJ7/Lp1JYsZ7sC3RQtp5mU145udLZ+PY8fKz6EOfI2mVn36+C8nv+46CZzefdf/fpculcDObLdiwHxn52PHZP9GpnTwYJkvHs5yZ2+BL2D+NXM4ZFqFe9M3X+rWBerUsZbx/fBDOdF29dXm75OUJCcNi4rM38eTkhJpapWQEJqlktLS5Hfr1Mm+fcYoBr5ERNFq40aZt2f3WoGBcv3n7Emg5WUUWSkpEoAEMj/UOGA3U+43fbqcxLnxRvP7ryxr+ZoJfAHvwUIwja0M4Xqut2yRrt7t2snJjbZtrQe+RpnzgAHy3g1106YlS4BVqzw3tXLXpo2s6/vBB76bXH3+uQQ5zZsD99xjfixm1/JdulQqMYzAVynJZi5ZAvzzj/nHC4anwLdTJ5lvbCbwzc6W591KmbPB6tSbBQuk03yTJubvk5gomdlgn89t2ySLn5AQmqWS0tLkpEd8vH37jFEMfImIopVRpmWmpC0ctm+X7qsMfCuOggJZe3T0aPMdU101bSpVCf4O0o4cAe68U+YEM/Atzwj8PHV1BmTeYXy852Dh8GE5SRZoYytDuJ7r9HT5PY0MXiBLGrlmja02SwrE1KlSLnzZZea2Hz9ePrdnzCh/W0kJ8Oij0iDr5JMlQDWCWTPat5cg1l/gm5oq27lOK7jqqvA1uTpyRBpFuQe+8fEShJl5zRYskGy7lYy4wUrgW1go8+StnjyycuLPF+PETUKCZKr79rU/8GWZMwAGvkRE0cv4p717d2THYfA3T4iBb+xJTZWS0UDKnA1JSbL8RnGx922eeQbYuRN46y1rWQcjGKvo88YzMoBGjeSg15Nq1YCuXT0HC3/9FVxjK0M4A1+jXBcILvA97jj5PFqzRgLKUDh4EPjySwl6/S3BZRg5UoJ79wDzyBFpMvXcczIXeO5caxlGQJYEat3a/5JGxtzhhg1Lr2vUCLjoImDatMCWkLJi5065dA98AfMnK1JT5QTJySdbf3wrqwysXSvz562ePGrTRt7LwQapxnPRo4dcJiXJCZG8vOD2C0hTtn//ZeDrxMCXiChaRWvga/xzdmfML+SSRrEjJUUyQGecEfg+kpLk4Mrbgey6dbKcyjXXyFw9K9q0kQPS/fsDH18s2L7df0dtb8FCsI2tDG3ayN+ulTVoA+Ep8DU6PVvZR/PmcqIgIUHms5ptJmXVtGmyfzNL3BiqVJHA9tdfpYMxIMH94MGSBZ4wQbLIrt2OrTA6O3tTWCjTCjyVCI8fL1UCX30V2GOb5b6Gr6uEBAnGDh3yvQ+HQ8rZa9a0/vhGxtfMSTOjR0Egf0OJiRKgB3NyLi0N6NCh9MSKnUslrVkjlwx8ATDwJSKKXkYAGU2Bb7t23pfyqFFDMgrM+MYGrYGZM4Fhw6SMM1BGuZ+nrIfWwC23SIDy0kvW911ZljTytoavq4QECXbcl6NZsUKep+bNgxtDmzbyeoXy8+bwYWDfvvKBr9bWXuMtW0r3YTTbC0W5s9HUql8/60HRtddKAPzeexKEDhggwfmsWcAdd5jrHOyNv7V8//pL3ieeAt9TT5WTl6Eud/YX+AKlQZknublyUieQMmdAAt+8PMnY+7N8ufxfC6T5U1KSnJhbv976fQ3upciDBsmlHeXO7OhcBgNfIqJoZQSQ0TLH18w8Ia7lGzvWrpUD8WDKnAE5GdK+veeDtK++AubNA55/XuYDW1UZAl+tzQe+gLxuruxobAWE57k2gjXXOa3GvGYrSxq5Zo27d5fLUAS+ixbJfq1kew2tW8v6uZMnA0OHSmC1eLGUQQerY0f5nPW2Fq4x59RT0Gg0uVq2TALkUDHeR61bl7/NTGfnJUukW3Igja0Aa0sarVgh82oD6XNgjC/QILWgQIJm1/+tjRvLz3asEZyWJicevfUPqGQY+BIRRSsjgNy3L/jlEoJVWCglq/4CXyvzqiiyUlLkcsyY4PeVlFS+3O/wYeDuu+WActy4wPZbGQLfrCzJbpkNfF2DhZwcexpbAeENfN0zvoD5eb4FBVIma+yjTh2Z6xuKwPfTT6Ua4tJLA7v/zTfLazR4sARy3brZMy7jd/dW3u1wSPbSCP7cXXmlVOiEMuu7Y4fMX65Ro/xt7drJ6+arG7fDIYGo1ekRBrM9JwoLpStzoCePjj9epvkEGvhu2iT/393/t9q1VFJammT4AwnqKyA+C0RE0WrXLimV01qC30javFkOOJnxrThSUiQo9VSKaFViolQmbNpUet1TT8l7+O235X0ciObN5b4VOfD1t5SR4bjjZK6ja4BnZOxiLePrGvi2bSuXZgPfjAxpZOWaNQ5VZ+f584EhQ7w3HfPnzDOBlSulc3qjRvaNy9daviUlchLKV6a0YUPg4ouBzz6TE1Sh4GkpI4NS/l8zhwPo1QuoXz+wxzcb+K5ZE1hjK4NS8lw7HIHN8/VWipyYKA3I/v47sHEBMp7Vq1nm7CLsga9Sqq1S6lul1CGlVI5S6jullKn8u1KqnVLqY6XUdqXUMaXURqXUs0qpICYnERFFoaIiCSSMMr5Iz/M1O0+oVSsJdkLVYZXssW+flHEGW+ZscC/3S0uTBj7XXx9YR1ZDlSrynqoMga+/UsQqVeTzwDVYsKuxFQA0aADUqhXa53rLFgm6XDsNV68umUmzga+n4DkhQcpFrTTI8mfvXtlnoHNMDX36lC7dZBdfa/muXSvr3/ob9/jxElh98YW9YzP4CnwBec1Wr/YcLBYUSFl4oGXOgPlS52AaWxmSkuT3tdqdHJC/5ypVpGu7KzuWStq7V+YfG/PgKbyBr1KqFoDfAZwA4GoAVwLoDGCev+DVefuvAJIAPAbgLADvAbgHwAchHDYRUfjt2SMHBH36lP4cSWlpUip1wgm+t2vZUoL2AwfCMy4KzOzZ8v6yK/Dt0gVo1qy03PmWWyRT88ILwe+7oq/lazbjC5TPkq1YIRnTZs2CH4dS8lz/+2/w+/LGvaOzwcqSRt4C36KishUHwTICjmCCr1Bp0kSy0J4CX+Pkk79xDxwoAdHUqfaPDzAX+B44IMGZuxUrZP5yMM99nTrSJdlfxnf5cvmssrKWsrtg5vmmpQGdO5cvCW/dWsYUTIMrNrYqJ9wZ3xsAdARwjtb6B631DABjAbQH4K9zwCBIkDxea/2x1nqe1vplABMBnO8MqomIKgbjn3XfvnIZDRnfTp38LyvBtXxjQ0qKvFbG+ytYSkmGwuEAPv9cLl94wfoapZ5U9MB3+3b5uzLzXCUkyN9WVpb8bFdjK0Oon2u7Al8jS2ww0yzJqtRUeV3sfH7topQ8j57W8nU4JGg67jj/+xg3ToJMI+tpl7w8yTT6C3wBz6+ZEewNHhzcOMxMvQmmsZWhRw+pYgg08PUWmBpLJQVaQcXAt5xwB75jASzWWm82rtBabwWwAMDZfu5rLHaW43b9QcjvEURfeCKiKGOUZxkZ32gIfM3882TgG/3y84Gff5amVsEsqeIuKQnYtg247Tagf3/guuvs2W/bthKMBbNOZjQzOjqbeS2MksU1a2QN1E2b7GlsZQhl4FtcLO8PT4Fvu3aSaTZzgG8sZeQaqHTtKuWidga+DgdwyimBr7Ubap6WNNJaAqXERHPvpyuukODe7iZXO3fKZTCB7wknBF/J4C/wLSgIrrGVIS5OgnSrZclHj8r72dv/1qQkyYoHulRSWpqcULOjIqSCiA/z4/UAMMPD9WsAXOjnvr8C2ATgJaXUTQC2AxgA4A4A72itc33dmYgophj/rDt1kpKtSJY6Hzsmza0uucT/toEEvunpqGkcKIXKzJm+y6+VAkaNCn4t1Fjwxx8yt8+uMmeDUe538GBwDa3ctWkjB4gHD5adGxqo1FR5P/tQMz6Iw6P8fFm3dehQc9ubWcrIYBwgr14tgSRgf8Y3M1P2bdfrZ9ixQzroeiopbd9enre9e6VDri+essbVq0u5vV2B76FD0lTo8cft2V8odOwIzJkjwa4R5Kany+tntkS4QQP5XP/8c+DVV72v0W6VrzV8Dc2byxJn7q9ZcbF0M7744uDH0bKl9DLwZs0aCX7tOHmUlCSVNLt3+38PG9atk9fPV+ALyIkAo9+HFcYJaztPcMa4cAe+jQBke7g+C4DP/2Za6zyl1GAA0yGBsuE9ALd6u59SahyAcQDQvHlzzJ8/3+KQ7XPkyJGIPj6RK74fo1uHhQvRPi4OjnXr0L9+fRxetQrrIvR61dm0CSeVlGBNXBz2+RmDKijAaQC2LlyIDJNzpnrffju65Odjvqf1Hm1QZ/NmnHTDDX63yxw9GhvvvTckY4gmnSdPRovq1bGgalWU2PmeKi7GwKZNsX/wYGw+ckQ64tqg6aFD6AFg2fffI9dTttCC+CNHcOq55yLOzxIhXbp2xXyj27BFrX74AV0mTsTyKVNwpEsXv9ufunkz9rdsiY1mni+tMahOHez9+WfkrV6N4wEsyMtDoU3PdaujR9GluBgLv/8eBXaUqbto8Ndf6A3g75wcHHQbb+OcHPQEsOK773DY1wG+1hi8aRN2d+yIzW776N6sGeouXYolNjwXjZYswYla4+969cqNNRI8/b9uVVSELnl5WPjddyho3BgA0GLOHJwAYGmNGjhqctz1+vVD3w8/xLrnn8ceO9YYBtBs7lx0B7Bk504c8zGOXm3aIG7hQvzlsk2dzZtx0qFDWNekCfYE+dx3LC5Gmx074Jg3z2Pw13LWLHQFsKSoyOc4zahbpw76AVj75pvYe8YZpu7T/Kef0A3Aktxcz4+vNU5p3BgHv/kG6/z11/Bw38H//IPdI0eW+1sJVkwfP2qtw/YFoADAix6ufxZAkZ/71gAwD8AGAFdAmlzdCyl9nmzm8fv166cjad68eRF9fCJXfD9Gueuv17plS/l+8GCthwyJ3Fg++URrQOt168xt37ix1jfdZG7bwkKta9TQRdWqaV1UFPgYfZkwQca/dKnW27Z5/hoxQuvOnUPz+NGkpETrdu20Hjs2NPs/elTr4mJ797lwobx+s2cHv68vvpB9/fCD9/fCHXfokrg4rQ8fDuwxLrxQHuPxx/1ve+SIbPvss+b3P3iw1omJWl9yibyWdkpJkfEsWWLvfrXW+r33ZN/p6eVvW7VKbvvqK9/72LtXtpswofxtTz2ltVLynAbrwQe1jo/XOjc3+H3ZwOP/6zlz5LlITS297ppr5PPXyt9gcbH8rzn//KDH+Z8XXpCx+fsbuu02revUkc8lw8SJct+MjODH8dprsq8DBzzfPn681vXrl338QBUXa92smfxdmnXvvVpXry7/B725+GKtW7e2PsZt2+R3f+cda/czIRaOHwEs1x5iwXDP8c2G58yut0ywq+sADAFwltZ6mtbaobV+FdLV+UalVC9bR0pEFEmZmaXNW1q0iOwc37Q0mefWqZO57a2s5bt2LZCXhyoFBZ47lNrB4QA6dJB5p+3be/464wyZL+lv6YtYt3q1NFOyu8zZULNmcE1iPLFzfdmUFJnzNmaM9/fCqFFQJSW+SyS90bq0wU1Kiv/tt2+XS7OlzkBpZ2e7G1sBoV3Ld8sWID6+dN1eV8bv76/BlaeOzoaEBHn+160LbpyAvIYnnSTLO0UrT0saORwy19TK32BcnPw9/PyzlJvbYccOKaP2t/5xQoJMuzD+DgCZitC+vf/lvcwwpt54+1xfsUL+huwoBY6LA0aPlvLzwkJz91m9GujWTf4uvElKkjnT27ZZGw8bW3kU7sB3DWSer7vuANb6uW9PANlaa/cWdkudl92CHBsRUfTIzCz9p928eWTn+Kal+f/n7KplS/MBpGs3UTsb0xiMZi/+5rwZtwezZmIsMIKx0aMjOw4rWrSQg8pgg7GiIjkoHT3a9/zVU0+FjosLrEPrpk3yt9qlC/DXX/7HHGjgm50t85TtbGwFhDbwTU+X39PT50i9ehIoBRv4AsF/jhw7BixbFp3LGLkyGqIZz0lmppxcCGTcyckSgP7xhz1j87eUkcH9NTNOHAW7drLBOHns6URsQQGwapW9J4+Sk2V++J9/mtveTNNI47mw+nlkPKc9PIVdlVe4A9+ZAAYqpf77xFJKdYAsVTTTz313A2iolHJPOZzsvAxxZxQiojByDXxbtJADXbvOxltltqOzwUrGd/ny0qxKKALfDRuAffv8Hwz26QPUrl05At/+/csuBRPtqlaVv4Fgg7EFC+TvyF+2u25dHO7cObD3gnEfY/3iH3/0vb2VNXwNrn+Ldmd8GzeWRlGhCnx9zdFu165s5s/bPgDPS/Ucf7yMPdjPkSVLJGMX7YFvtWqSPTeWNApm3eFhw2QdWTNVCmaYDXyNoMx4zTZulAZndj33vpotpqXZ19jKMHy4vC5mnsfsbMnk+vvfGuhSSWlp8ho0aGDtfhVcuAPfdwFsAzBDKXW2UmospMvzvwD+66WulGqvlCpSSrm20/sIwGEAs5VSVyulhiql7gPwKoAVkCWRiIhiX2GhBGuupc5AZLK+OTlyMGo18N2929zSJCtWACedhGOtWoUm8DUOFvxlEKpWBU49NbAsX6zYswdYujR0Zc6hZMcyOykpclB65pl+Nz3UsyeweLH1k00Oh3SqPfdcCfL8HQBnZEj22ThAN8M1g2N34KtU6JY08hf4mlnLd8sW+Vz0VIJcpYp0vg32c8ThkOdh0KDg9hMOHTuWngxwOKS0uHdv6/upVUume6Sk2LNsmNnAt359Cd6N1yyY4N0TXxlfo9rIzr+hOnXkJMLMmf6fxzXOPr3+/rfGxZWu52uF1RPWlURYA18tSw6dDmAjgE8BfAZgK4DTtdZHXDZVAKq4jk9rvQ3AQAB/Q5phzQZwA4CpAIZrrQNc3ZmIKMrs2SP/NF0zvkBk5vkGMk+oVSspK92/3/d2hYWyhuJJJyG3Q4fQBb7NmwOdO/vfNjFR5lxl+2s5EaOmT5f3VWUOfIcMAerW9bvpoV69JOhdtszaYzgcctCuFDB2LPDbb0Cuj9UWMzLkd7OyfFKTJvKZ0L69fG+3UAS+hw7JcmK+Or2bCXz9Bc/G/OdgOBzAiSfGRqbMdS1fh0NO3gW6FNfYsfL8B/v8FRTI/zAzgS9Q9jVzOGTNWRPd0E2pXVvK6D1NvVm+XF7jIDvFl5OcLCdo/K29a/zOxtrcviQmWutBUVQkc90Z+JYT7owvtNbbtdbna63raa3raq3PcQa1rtts01orrfWTbtev1VpfpLVuq7WuqbXuorW+V2tdQY9SiKhSMs5Ou87xBSKT8Z00ScoH+/c3fx9fZ9ldORtboV8/5B53nJS52V3ObczvNdO8JClJAsMFFbCA6N13gTvukJLuXjHYCzLYYGzjRvkyGfQfMg5GrWRZ/v1XGtAY1QXJyfJ+/vVX7/exsoavq8svB666yvr9zAhF4Otrbq6hfXsJkA8d8r0fX/vo2VPKRwM9eVVYKE3Nor3M2dCxo5wQ3bFDAqlgxj1mjFwGW+5sfO5bCXzXrZNgzZjfa+e6s96m3tjZ2MqV2ecxLU1OwplZNs1qD4otW+Szh4FvOWEPfImIyA/jrK57qXO4M76//w58+SXw4IOlwbcZvuZVuTJKzU46SQLfoiIJTuySkSFl2mYbpQwYIKWwFancuagIuPNOYNw4KcH7/Xf7D/TCoU0bKbvPyQns/sZBqMnAt7B+fSmbtfJecC/TTEyUUk5fB8AZGYF1r331VeDpp63fz4w2bSR4NDNVwSyzgS/gPeubny8Bnr+ML1BaRmrVypXA0aOxFfgCwCefyGUwTaFatpT5rsEGvsZJEyuBb34+MG+evPZ2P/eeAt/8fPsbWxnatpVyczOBb0KCuc9jqz0o2NHZKwa+RETRxj3j26yZXIYz8C0oAG69VZrIPPCAtfuaDXyXL5cz3p06SeAL2FvubAQtZg+kataUzHZFCXwPHpTsw8SJku398cfYKN/0xDiI3hlgH8uUFMkGWsmuJiVJ9r+42Nz2DoeUVZ54ovxctSowcqQ8756CyMJC+X0CyfiGUps28vfvb6qCFXYEvtu2SUWGr3LpYDs7m+0JEC2M5/Ojj+Sk3YABwe0vOVmaewVTXRRI4AsAb78tl3Y/955WGUhLk78/u7uiG5KTgYULpbzfE62tzcGtWhU45RTz/5vS0iSg7sYFb9wx8CUiijaZmdLQwgh4q1cHGjUKb+A7caKUn73xhgSEVhgZan/zkVasAPr2BeLicLRtW5mbZmfgm5oqgZ6Vs95JSTIuX/MyY8GmTcDAgTLHdOpUYMKEwOf+RYNgltnJzpblRazObU5KAg4flnnoZqSmSkMk16WSkpMliPA0VzgzUwLiaAx8AXvLnbdskY7R9et738Zf4GsmeG7TRk4+BPo5kpoq80utVLhEknESYNMm4OSTpTNzMJKTJSibNSvwfVgNfLt1kyBt5syyJ47sYmR8XZtNLV8ul6HI+ALyPJaUALNne759zx4Jiq3+bzLbgyItDejUKbrXoY4QBr5ERNFm1y4JHl0PoMO5lu+OHcBTT0mzE2O+khXVqknTHV8ZX5fGVgCgq1aVA067M77ugYg/SUlSHrx4sX3jCLfffpOD4P37ZX7pDTdEekTBCyYYmzNHsrZjx1q7n5X1M/ftkznr7tUFo0bJ+89T2WMgSxmFQygCX39zcwHphl29uvcljcwEvkoF3uCqpMTcmt/RpFEjCRYBezKlvXvL6x9MufOOHVLJ4+skh6uaNSVIKykBBg+29nltRqtWUsGQlVV63YoVskSQp2Wx7NCvn/wP9/Y8BlKKbKUHBTs6e8XAl4go2mRmll9ntUWL8GV8775bAoUJEwLfh7+1fNeskXlWrmfc7ejIatizR9bwtXoQe+qpkm2P1XLnt98GRoyQ53/pUuC00yI9InsY5fOBBGMzZ8qJIysN2gAJAI47ztx74c8/5dL9/daokZx8YeDrP/CNi5P5zt4yvlu2SJBkVJR4Y3yOWF2WJy1NpgfESpkzIIG+8bzaEbArJSc7f/lFGg8GwuxSRq6MZnKheO6Nzw7XCqRQNbYyxMVJ1vennyTodhdI4DtggJQ8+/s8ysuTCgAGvh4x8CUiCpX582VupVWZmeXX9QxX4Dt3LvDNN8DDDwd3NtzTvCpXLo2t/tOzpxwg21Fm7C0Q8adePcl6WF0z8eef5YRBJD31FHDLLZJlXLjQ/mU6Iql6dSn9txqMFRbKwefo0XIwalVSkrwX/AVRqalSZuppzmBysjTScQ/ojJ8DaW4VSs2aSVm8XYFvUZH8rr7m5hp8LWlkBM/+gpWEBCkjtfp5abUnQLTo2FHe26eeas/+xo6VBl/z5gV2/0ACXyNIC8Vz777KQH6+lAyHqszZkJwsUyU8BappaVLhYExnMqNmTQl+Z80Cjhzxvt369XLimoGvRwx8iYhC5eWXZY6s1aU1PAW+4Sh1zs+XhlbHHw/cd19w+/KX8V2+XIJM14PhYDuyunI45EChb1/r901KkiVNPJ2p90RraQD2+uuBZ0mCdfAg8NJLwAUXAD/8UFr+WJG0aSNLBlmRmirL4wS6dnFiopSM+1uT0+GQOdXVqpW/zXjsH38se31Ghhz4Wp1DH2pxcUDr1vYFvv/+K8GvmRMxZgJffwJtcJWaKh15oy0D78+4ccAzz5han9qUoUOlg3Cg5c6BBL6XXgrcdJP1qgwz3Jstrl4d2sZWhmHD5GSYp+cx0FLkW2+Vz6LBg71PCWBHZ58Y+BIRhUJuriwdA0jJrVmFhTJf0FOp85Ejvs/0Buv112U5oTffDL5JSqtWknHx1hHXpbHVf4LtyOrK4ZAumJ4CEX+SkiSANRqg+LN0aWkDJG8HI6E2bRpw7JgsPWX3HLloEcj6sikpki0ePjywxzQyUL7KC3NygL/+8p6t6toV6Ny5/AFwoEsZhYOda/mamZtraN9ePjfcTyBpHdrAV2t5jc2u+R1NRoyQCh271Kghfy8//mi9XLyoSCp9rAa+J5wg0zSqVrV2PzPcM76hbmxlqFULOOMM+bt3fR5LSuTkbiCB6SWXyOuydaucJFi0qPw2aWnyPHbuHPjYKzAGvkREoTB3rmRQAWtr0xrleZ5KnYHQZX23b5eswbnnyhIswWrVSoJeT0uiFBRI6af7GffjjpPsV7CB78GDEogGWjY3eLBcmi13njKl9PtIBL5aS+fmfv1CfzAXSVaDMa3loPP00yWDFYhOneRvz9d7YdEiOZj19X5LTpbS0cOHS6/bvj16s4uRDHyB8pn9vXvlZKKZcmmjhNTK58jmzfLZG0vze0MpOVleA7MdzQ27d8vfgtXAN5Rq1ZJGW8bUmxUrZO59hw6hf+zkZAlS164tvS4jQ05gB5qRHTVKmi/WrQsMGVK6hrMhLU1OJITiJEIFwMCXiCgUUlKk3DQ+3lrG130NX4MR+IZqnu9dd0mg8Prr9uzP/Sy7K0+NrQDJVHbvHnzgu3Ch/C6BBr5Nm8oSG2aaGh06BHz5ZWlG0VuZZigtXizle+PHh/+xw6lNG5k2YHYO+Pr10hAp0DJnQLJ/SUnAH394z345HPJ3PnCg9/2MHSsnfObOlZ+1jo3A12rGz5P0dDkINxMMGRlw9xNIVoJnwHqjPOPERqzN7w2V0aPlvW+13NnqUkbh4jr1JtSNrVwZqyLMnFl6nR2lyN26yXrLgwYBV18tU22M6ip2dPaJgS8Rkd1KSqQBxahRcqBmJeNrnJV2L3U21pUMRcb3p5+A774DHn3UvgNx93lVrjw1tjLY0dnZ4ZAD7ZNPDnwfSUnSIMtbqbbBKDF++mkp245E4DtlClCnjsyTq8iMg+mdO81tbxy0B7Ikl6vERDmg9/baOhxyIO0rqzxokCyfYoxp3z5530Rz4HvsmPX+BJ5s2SLZNTMl+N7W8g0k8F2zRj6LzXA4ZAm2E04wt31F17y5NFKqaIFvXl54Glu5Pm6/fmWfR+P/W48ewe27cWNpqnjjjdJP5Nxz5bMxI4OBrw8MfImI7LZsmQSoycmyNm00Z3zz84HbbpNx3nOPffv1FfguXy6lZ57KFhMSJPg/cCDwx3Y4JKiuVSvwfSQmytzN1au9b6O1BJ39+km2r1Wr8Ae+2dnAV18Bl18uwW9FZnWZnZQUoE8faVgUDCML6KncOS9P5nj7yxTGx8uJsFmz5GRKtC5lZLBzSSOzc3ONx/V0AmnLFrk0W56akCCVAWb/Hh0O+ZuPtfm9oZScLP/LfDUpdBetga+xysDq1TIPOdSNrVwlJ0tVzt698nNamnwmmV3n2JeqVYHJk6Uvx+zZpb8XA1+vGPgSEdktJUWyG6NGSWObTZvMZx4yM+W+TZuWvb5pUzkgtDvwffVVmd82aZI0AbKLkaH2tKSRr1KzYDs7Hz0qgXWwJYtmmhoZJcbjxsnPvjrS+pOVJScffvrJ2v0+/VSCr4pe5gxYC8YOHJCS92DKnA0JCUCDBp7fC0uXSgmzmbmhycmS6V2yJHYCXzs6rKenm5ubC8iBvKcTSOnp0mnabAdsKw2uduyQeZgscy5r7Fi5nDXL/H127JDXqGHD0IwpUEbGd9ky+TmcvRDGjpWTpLNny8+hKEW+5RZgzpzSpnAMfL1i4EtEZLeUFGmQ1KiRBDN5eeaXYdm1S7K77mWBRjBsZ+C7bRvw3HPA+ecDZ55p334B6abctGn5bIHR2MrbgUewnZ2XLJHO2ME2qWnbVrJLvgJf9xLjYALfVavkBMmNN0rwboaRce7fXzKbFV3r1nJpJvCdPVtONtkR+MbFyd+zp/eCwyEncIyGaL6MHCmZ35SU6F3D19C3r3SFfeKJ0iZ9gcjOli8ra0p7+juykjUGSstIzXyOcH6vZwkJ8lpYKXc2ljKKtsx5q1byf2HuXCkRDucJp9695TlJSZExrF8fmsB0+HAJ7KdNq1hruNuMgS8RkZ0yMiSIMQ64u3aVS7PzfDMzy8/vNdi9lu+dd8oBil0Nrdx5Wss3LU2CX2+Bb+vWUgIWaOBrBCKDBgV2f1dJSbI/Tw1+XEuMjfUz27WTAz9/84I9MUo5MzKAF14wd5+FC6VbaGXI9gJSut6okbnANyVF/o4CWcfZk6Qk+Rt2P/HkcAA9e5rLcDVoICdkUlKkeVOdOtGXGTNUqyZVIJs2Af/7X+D7sTo3F7An8K1XT/Zj5nPE4ZC/4V69zO+/MlBK/o/9+qvM9zYjkDV8w8GYevPLL+FrbGVQSvoM/PKLVFAUFMhnRih06iT/k8grBr5ERHYyzo67B75m5/lmZpaf32to0cK+jO+sWcCMGcDjjwc/B9IbY16VK1+NrQA5SAimwVVqqhzANmgQ2P1dJSZKaaqnkxbTppUvMW7fvnQdS6vS0yUbePHF0qhk0yb/95kyRQ7YL7nE+uPFqrZt/Qe+BQVSMj5mTNl1ooNhZAP//LP0uqIiOflgpbogOVkOfufPl/dLtGXGXI0YIdUgzz4beCVDIIGv+wmkY8ekaY/VLJbZzxGHQ06UVdT1r4ORnCzP/2+/mds+WgNf42Ty0aORWfItOVmWMHrzTfmZpcgRw8CXiMhOKSlS3tyli/zcvLkEJ9EU+OblAbffLh1M77or+P154ynju3y5BKW+DmKNA1arS6kUFFgPRHzxNs/XW4mxt460ZqSny/1ff13mWt92m+/fPysL+Ppr4IorAl+jNhaZWV/2jz9kvVw7ypwNfftKxtn1vfDXX9JAyUqJrDGmVauid36vq9dfl+A80M+JQDO+hYWln3Xbtsml2XnChoQEKSstLPS+zf79UjXBMmfPTjtNKhPMlDuXlMgJimgMfF3/p4azsZXh9NPl8+OTT+RkHLuHRwwDXyIiuxw+LJkc1wNupSTra6bUuaBADsS8lTq3aCGlzsGurfnyy3JA+uabUtIYKq1aycGra+mvmTUUExKklNhq5nTlSslO2HUQ27mznLhwD3wXLpSsnXuJcTCB75YtEhy0bClLI/38M/DDD963/+QTmXtZWcqcDWYC35QUoEYNYNgw+x63alXglFPKvheM762caOnUqfSgNxYC37ZtpSrk+++leY5VW7bIXH9jOoAZ7n9HgQTPgHyOFBRI8z5vjAw+A1/PqleXzP+PP/r/v7N3r1RBRGPg6/o/NRIZ3xo1ZA5uYaF8Bpht0hZFdu0C3n1XCmmmTo3dOcQMfImI7PLLL3Kg5Z5pMrukkZHh8Jbxbd5cgp1DhwIfY3q6zCG9+GJ7AwNPWrWSLMC+ffJzfr7vxlaGQBtcGU1q7Mr4KiX7cl/GxigxvvjistcHm/E1DuxvvVXmgN1xh2QU3RkZ55NPrnzzEtu0kffTqlXAunWev1JSgDPOCG45K0+SkuRxDx6Un1NT5SDW24kqb4zPh1gIfAHJ9nbtKlUIRtdYs6zOzQXK/x0Z898DCXwBaWjk7b3y448S3EUiCxgrkpOlcmflSt/bGQ0cwxz45uQAX3wB/N//ySIFHlfCq1lTKo2aNIlYQ7nCkfJ3v7lGAv78Uyqfo5nWcn73+edLV+sbN06uq1OnKNLDC1h8pAdARFRhpKRIsxr3xkpdu8p/5mPHfJ/p9baGr8F1Ld9A57DecYfMJQ2mYY1ZRkCQmSljT0uTM97+Al/XjqxWuk07HHKSwVhKyQ5JScC338pBePv2pSXG115bft3c2rWlY6jVwPfQITlaMw7s4+OBt96Sx37uOTnycPXnn1LC+cEHgf9eseq44+TSX8D/yCP2P3ZiohwNLlggS5WlpgLnnGN9P+ecA7zyilQUxIJq1eT9eMYZMu7HHjN/3/R0yZRb4SnjW7s20KyZtf2ccIKM/Y47fG83dKi9S7lVNGedJeW5H3zg+7M7jGv4ZmUBM2cC06eXnm+uXx/4+GN5e15yCXDzzTIb5T8dO8r/1gjMq9+1C7jh/TH4HvH4aFUfPOdcMvqEE+QpNb769AntcuybN8u/yaIiKWKJjy/9Mn4uKZHCtZkzS8859e8vU/3HjpXzSX/8sR1AbGZ9GfgSEdmhuFgaRo0aJf89XHXpIgfMmzf77uZolPb6KnUGJPANZI5QSopkOF59tXRpmFAyAvjMTJkj6a+xlaFpUwlerWR8i4slELnwwsDG6o1RApmaKgfkn37qu8S4XTvp2GvF1q1y6TqHMTERuOoqea2uvrq0SRog2d569YCLLrL2OBXBBRdIEORriZ3q1eVg3W4nnyxHhw5H6UmQQEpkTz0VWLTI7ag8yg0bJhUOzz8v88qNExC+FBbK34LVLrPuJ5CMrLHFgKWkWg2k3JuKE+tu9T1cq4F5ZdO0qXzevfOOpPy8nXQKceC7Z4/M/pg+HZg3T4K3du0kwD3/fPmzWrMGePtt+Zj+6CP5V3PLLfLWrfntt1JyHGZLlgDnnQccPNgc815ehlvP7YSB6+Xf4fLl0jds2jTZVik51/vyy8CJJwb/2EVFcp7uxx/l37/ZViPVqsmf/H33SWlzOA4XwoWBLxGRHZYskfm5nhrquC5p5Cvw9ZfxNTKZgSxpdOyYNLTq3l0uw8E18AXkv3zDhuYOmnv2tBb4pqVJ5tTuuXoJCZJKSE2VA3h/Jcbt25tfusrgrZTz5Zel8/Ztt8mcX6UkM/ztt8D111euplaG6tUDy7LaoVYtCVZTU2WNZyDw99vAgbYNK2z+9z85gr7zTnlf+rN9u5yQCmRN0fbtS08gpadLSbkFRUXADTcAH300APHxA/D008D991tv3JyaKlP6W7aUeK51a7mMpj+9bdsk42n0UwyJZ58FvvlGokiHo0y39J075aaR83aga7VqUE2aBPwwBQXycm/YIB+jxteGDaX/9jp3Bu69V4Jd93YRPXsCkycDL70kwe/bbwPXXAPcfTdw7bXH4bTT5NyVa7bT+L5qVfmob9PGvmbwH34oS7O3bi3nuk48sTcAYEwnCSgNu3ZJILx4sYy5d28Z9zPPeD8c8CY7W6bj//ijXB48KIHskCHy8o0YIVnlwkL5OykqKvt9cbEcJliZlh9LGPgSEdkhJUX+e44cWf42o6TR3+nWzEw5Mmva1PPtrhlfq154QY6Q5s2T//Dh0Ly5HJUYmWwzja0MCQnA1KlSd2XmKMTu+b2GKlWAwYPlYO/PP2VeoK8S4/btZU6h1uYzVN6a9zRvLgect90maY4LLpBavsrY1CpaJCZKANi0qRzNGgFwjPn6azmH88gj0nDWlNatgSeflDTQjz+WPXL3xEdTqjVrgPffl6TusGHysVlms3btZEkvrWU/FqY85OcDl10GfPcd8PDDspuHH5Yg4NNPzU2tzsiQX/ObbzzfbgRIrVtLwDl+fHhXqPn3X3kNv/oKWLZMPmpuvVUS8iEplW3USE7EXXst8OmnOHL+1fjuO3k+f/tNXqam2IF6NVrj0IY4dOtmfteHDwMTJ8pHW3q6fOQbmjWT53f0aClyGjlSnmd/H6316kmQd/PN0uT97bflMczM8KlRQ/5lG4szuH41bmzuY72wUILtN9+UGQJffin39aZlS/lzGjMGuOce+difNEnud999Euj7el23b5dzUd9/L/+qiovluTv3XNnn8OEVN5C1TGtdab769eunI2nevHkRfXwiV3w/2qxHD61PP9377a1ba33VVb738X//p3WbNt5vLy7WumpVrR980NrYNm3Sulo1rS+7zNr97NCsmdbjxmmdlydjf+ABj5uVez++957WgNabN5t7nAsv1LptW61LSoIbrycvvSRjOfNMrevX1zo31/u2r70m2+7fb37/N96odePGnm8rLNS6d295/+TkaN21q9annGJp+GSd18/HWbPk9VVK60svDeuY7FBUpPVDD8mvUL26XF59tdb79pncQUGB1t27a33ccVofPep723fekQf491+ttdaHD2v9/vvy9gXk46BdO/ke0LpLF61vv13rOXO0LrzlDq3r1NF650658c03TQ3vyBGthw+Xu0yYINeVlGj98cda162rdb16Wk+b5v3+ublaP/641jVqaF2zptZPPaX1gQPyETpvntz3xRe1vvVWrc89V+v+/bWuVUseb/RorR2O0HwEzZs3T2dmav3GG1oPGlT6nPXtKx9Pt90mb8l27bT+6Sf7H19rrYsKinV2t1P0oepNdcua2RrQumNHrZ94QuuNG7Xe2TlJL4hP1FWrynvM18ek1lofO6b1669r3bRp6cfrY49p/emnWi9ZonV2tr3j37NH66VLtV64UF6n337T+uef5U96xgytp0/XesoUre+9V+vkZPmojY8vfa4BGes552j9v/9pvWyZfDx7epykJNn+nns8b2PGli1aX3SR7KdlS/mXWFQkt5WUaL1mjdbPPqt1v36l4+veXeuHH9Z68WI5XAiVWDh+BLBce4gFIx6MhvOLgS9RKb4fbbRli3ycvv66922GDtV64EDf+xkxQo6kfGnTRgJks0pKtB41So76MjPN388uvXtrPWaMHCUAWn/9tcfNyr0fFy+W7X/4wf9jlJRo3by51pdfHvx4PVm0qPTI4pZbfG87fbpst2KF+f2feabv133hQtnn0KFy+eGH5vdNAfH6+XjwoEQYgNaTJ4d1TME6eFCCM0Dr66+Xnx96SA7umzSRgMNU0DZvnuzkiSd8b3f//bqkWjW9eGGxvv56iWMBrbt1k8Bh7155vA0btJ44UT6matSQbe6LlxNIM2+YKVfMnu13WNnZWp96qtZxcZ7/RNLT5XZAzlm4BlYlJVp/+aWcOwO0vuQSrbdvN/FcaAmMn3mmNIA75RT52LIr8PjmG6179876723Xs6cEPJs2ld1uwQKtTzhBtrnqKmvn3twdO6b1unUSFE6aJB97rVpp3RsrdRHi9O89btV//un2funYUR8791J99dUyhg4dtE5JKb/vggKtp06Vf2WA1mecIYFuNCoslKD+xx/lnObVV0uwb/w7qFNH/m0/+6wE04sWyXuoRg3fJ1isWLiw9GRRQoLWd98tJ4mMMQwcKCdjNmyw5/HMiIXjRwa+DHyJyuD70UYTJmi/2ckbb9S6YUPfR5Y9e2p99tm+H6tfPzlCNOv77/0H5aF01lkyZiP7k57ucbNy78ecHNn+2Wf9P8bGjbLtO+8EP15P8vMl/QNovWqV722XL5ftvvvO/P6PP17riy/2vc2118p+/WWcyRY+Px9795bXIi0tbOMJ1oYNEhTFx2v91ltlP4ZWrZKDZ0CypaaKLC67TFLG770nR/huX8WfTNO7jz9Fp1frqgHJil57rRzE+/oIPHpUMpbvj5ETSP/DXVoD+unL1+uNG73fb/durXv1kizy9OnetysslCC1ShUJUObP13rlSq0TE+X3791bAphA5OZKYrpDh9IA/4MP5OMjEEeOaH3NNbKvNm1y9eOPS5bPl7w8yZrGx0uxzddfe3++i4vlnO3332v99NMS1CUmSnGJa5bTeP3GjtX622+1LrzxVjm74Hpyr6REqoruu09rLc9rt25y33PO0TojQx7v88+17tSpNGD77bfAnptI27FD6y++0PrmmyUYdX2u2ra1dt7TjJISOQHSsaO8tsOHa/3221IQEQmxcPzIwJeBL1EZfD/aaNgw+S/vy+uvy0eur5rCxo21vukm3/sZPVrrPn3Mjy0xUWq2Aq23CtZ110md1vXX+wz8Pb4fO3SQ1Is/Dzwgz20oT3mPHSsZV3/27bN2oqGwUI5kHnrI93Z790pK6d57ze2XguLz8/GRR+S9GcpaQhvNmSPnS5o0kWStJ0VFEhDXrSvZqhdflMycV5mZskP3CMnt6+eGF+mpU7U+dMjioJ0nkHK79NLFULp2fJ5WSkpQf/+97MdIRobWnTtLcPbzz+Z2v2SJBGBKyVeTJlLmapSSBqOwUAK8Xr3kaWjdWs7J+Xw+3axZI2WrSmn96KNa//rrfEtj+Oef0hLYs8+Wc4OLFsk4brpJyqXr1i19qZSSDGxSkhQUPf20nMNYuFBOKpT52M7Olqh64MDSv4G9e2VHEyf+t1l+vtYvvCDnDGvXLg2ETzxRMsGhKAmPlP37pVz6lVek1DlUioqi47xnLBw/MvBl4EtUBt+PNjl4UAKX++/3vZ0xN/DPPz3fnpcntz/9tO/9GIGkGUVFcjR4++3mtg+Fxx6T7MCJJ8ppai88vh/HjJHT6b6sWydpHivl34HIy5P6P39KSuRI7667zO1361Z53d97z/+2OTmRO4FRyfj8fCws9Hn0uXatBCvr19s/LitKSrR++WX58+vVS95q/uzYofV55+n/Smo/+8zHVN5DhySicn4dXrlRP3PVRt1FbdQnN9qoZ7y6UZccywts8MYJJKW0bttW79olc2+NcuJevaSc+Z9/JMNWv76U+lpx+LDWd9wh55KysgIbpi8lJZK9NubkdukiGVN/Ad+HH8pHSLNmWs+dK9cF8v+6sFACMaN83PiqV0/Oh95yi5QcL1kSQDD18ceys/ffl59XrpSfPaTbt26V+dB9+kiWNEbOF5EPsXD86C3wtalhNxFRJfXzz7IGgKdljFy5LmnkidGp2d/aBS1aAHv3lm196c369cDRo9JJOVJatZKxrlplfRwJCfI7FBR4vl1raWVau7asXxFK1aubWwNSKWkba6xB6o+Pzrfl1K1bfo1oCr/4eFnayIPPP5cVj559FujWTdbvXLLE/K43bQIefFCaRQ8bJl2Fvb39fcnNBa68UpbwOf98WcvTTAPq1q2lgfgPP8hHx+WXS8fZm26S7sFau2xcrx7QuTN0p874+q/O6DK6Mx7/tDPOuKkzftrSGWPv6QxVo7r1wQPSArdmTXnAjh3RogXw1FPSvfb996Vr7TXXyKpi+fnA/PmyjqsVdeoAEyYAr7wiq6zZTSlZOiY1FZg5U942F1wgK1n98Uf57Y8ckSW7r7lGtvn7b+kIHKj4eOkGvHq1dDOeMUOWDD94UDr/vvmmLPk0YIDXt7N3V14p3e4feEDWs/axhm+HDtJhe+VK4JJL7FsqiCgQfPsREQUjJUUO0k45xfd27dvLMkLeljTyt4avoXlzOeo7cMD/2FaskMuTTvK/bai0bFn6fSCBb1GRRAOefPONrKXx3HOydkO0sBL4elvDl2JKfr4snXL55bIG519/yVJB8+dLEDNkiCynUyZwdDp6VJaFOe00WTLl1VdlHc30dOCii2Rln0cekdXIfNm/H/joIwm2mzUDPvtMAvCvvrK+7uzZZ8s5ut9/B8aOlaVmBgyQdVJfe03OvQHA5s2yxMzFF8uf+pIlwFtvAQ0aWHu8cowTSECZv40aNWRFnVWrgF9/lTVSU1PlOY9WSsl50VWrZCW0zEx5P4wZI0EpIMuQ9+8v74MnnpAV0Vw/OoPRqZMsrTN2rAShZldZ80kpeaGzs4FHH/UZ+BJFEwa+RESBKioCZs8GzjpL1nv1JT5ejkC8ZXyNtW7NZHwBc2v5rlghR7xGtjkSXH8fqwG4sTBmWlr52w4fBu66C+jTJ/rWtLWa8a1alQeMNsnMlPVpv/kG+OcfCSpDbds2SX5NniwZtnnzJBB75hnJUL72mpzfOOssuf6zz+Sj46+/ZK3RVq2Aq66Ssb/4oqzROnu23GfOHODkk+X6jh1lPdOUFDn3Bcg5oVdfBZKS5JzYNddIZvb//k+yvI88EnigExcHDB0KfPKJfDxNmSJZ0nvukczw8OHyJ7p4MfDGG8DSpRK82cZD4GtQSjLikyfLyYJYUKWKvD4bN8qSuAsWSMb67LPlpEJ2tgTzTz7p/99JVDjxRKm4eecdSSfHx8ubkCiKsWaKiChQixZJmZe/MmdDly7+M77+TvO7Br49e/redvlyOdKO5FGUEfg2alR6IGtW164y9rQ0SSm5euYZec6mT4++o8T27SX9lpvrP9WWni5pmGj7HWJIcTHwyy/A1Kllg0JD27byp9e1q1x26SJZWDvKW1NSJGjVGvj+e+Ccc8reXqeOnJ+55Rbgiy8k4LniCskO5+RIBvOCC4Drr5fg1TVIjYuTbOrIkRIMv/ce8O67krlr21b2vW6dbNurlyTezj5bzgXZktVzUb8+MG6cfK1dC3z4ofzpnXeelNHalZ0sw/i8OP74EOw8cmrWBO67D7juOjmh8cYbcuJk2rTSj/eY8dRTUlLw889SmsDPMYpyDHyJiAI1c6Zk60aMMLd9166SwikuLn+AkJkpZ8ybNPG9D+PIaM8e39sVFckksRtuMDe2UGneXI7CTzrJ+tF4jRpA587lM75r1wKvvy5HjgMH2jdWuxgH7Nu3y0RPX9LTgY4dsXmzBBU9esiczMTE8B9Dbt8ucw/POUemE4dbcbFkaOPizJXm7tghpaPvvy9jb9ZMMq5XXSXzYjdulPNMGzfK12efAYcOyX2rVAEGDZJzVmPGyJ+mlbdnUZEEmi+9JIHmt9/6rlavVk3mb155JTBrFvDllzIn9bLLzAXgbdtKjPHoo8CPP0oAXFQkc2/HjrV+TikY3bvLvNhXXgnxA/nI+FYEjRrJiZAnnpA5tnafrAiL+vWl5OCKK1i1QjGBgS8RUSD27JGjz1GjpMmLGV26yBF5Rkb5g7nMTEmb+Ov8YZSS+St1jobGVoAE88nJ5rPi7hISJIA3aC3ps7p1gRdesGWItmvXTi7NBL5btiCna3+cfro0nVm0SJrONG0qAej550u5abVq3ndRXCwZwc2b5S3Uo4e14R47JkHMiy/K961ayXmFCy80fzBeUgJ8/bUk4vfskfFWqyY9wdy/LymRZHhurrxFje/z80v3V6+eHEe3bi2Xrt8XFEjGcdYs2dfw4ZJ1HDu27PPkPu9Ta2DfPsmSzp0r2dr77pOvTp0kAB4zRk46uCoslDJU4ysrSwJeh0Oq7CdMMNf3DJA/72D+HKpWBc49V74qvKFD5UXs3j3SIwkpq/Ovo85ll0mps9UPHqIIYOBLRBSIBx6Qo/aXXzZ/H2Ou7YYN5QPfXbvM1QvWrSu1cv4C32hobGWYMSPw+yYkSE3l0aOSFvnyS+kYNHmyRIfRyMhU+Zvn64ykJs3qiCNKAqlOnYCffpJf+Ysv5NxKgwYSKJ13nrz0mzZJkGt8padLcGY44wyZhzlihO/AVWt5ae66S+apXnihZEsff1wqy999V4Jwf1PEf/1V/hxWrpRpf5dcIsFpfr5cun6fny/Z1pYt5YC/Vi25dP2+uBjYuVO+duyQBP+uXWUbmTdvLo953XXmK2GVkqxws2bSSOrZZ+XcxKxZEgRPnixBbL16QJMmJ6GgQF6i3Nzy+6pVSxoRXXGFucemAJxyikyEpuimlJz1IooBDHyJiKz6809pc/rww9YaRxldWDZulEyxq8xMiXr8UUrKnc0EvrVrx07nF28SEiRCW7dOyp7vuUeC+UiXcPvSqpVEd34C3z2Lt6I5gHUFx2OuozRDecEF8pWXJ5nJ6dOlqv7TT0vvW6uWvF0SEiQz3KmTBIBLlgCTJsnbq3t36eZ6+eXlM5Lr1wN33CFzY3v0kObYp58ut40aJf1qHnlEppHfd598777kycqVsvTO3LkS63/6qSR/QrFcSVGRvOV37pRzIIMHS/YzWO3aSbnwTTdJgPvrrxIIr12bh86d66BhQ5T7atRInutoPe9CRESeMfAlIrKiqEhKbdu1k8DXiqZNJX3nqcFVZqZ0tzGjRQv/c3yXL5fJh7HebMS1s/Pnn0v0M2NGdP9e8fFSk+sj8M3MBF68dgveAPDAlI7o6aEivUaN0rLYwkLpAluligS5LVp4zuYOHSrB7pdfSvnv9dfL2/TWWyW4q1YNePppYOJEOS8yYYI0WnINIqtUkbf4BRdI0Pv88zI/duJEKSfeulXmmn7xhQSBr70m+6ge4JKtZhhPaSinEdauLc2hzj4bmD8/DUOGDAndgxERUdhxOSMiIiveeksWZJwwwfrkLKUkQ+y+pFF+vkwcNNsatXlz3xlfo7FVNJQ5B+v44yWi+uoribxuuMHmNVOCt2aNBJhDh0qzpaNH4XNJoz17ZCmWBlnpAICeZ/tv3lO1qqz9mZgobxNfJczVqknJ8t9/SwazXz8pX27bVoLm//1Pbt+4UbK+3jKnzZvLUjZ//CEV9uecI73ETjgB+OEHCajT06VUOpRBLxERkR2Y8SUiMmvXLokgRo4sv26JWV26yEKf7vsF/K/ha2jRQsqtvVm3TroURbqxlR3i43G4XXfUnTMHuTUa4fVaz6PKCxKI1a0rS7rUrStPnZEcDgetJaj83/9kJY+aNSWwvO466Sz8S4v26HVwPtxjyn37JOjdvh24aUQ6sLhpyFooG2udDhtW2gjbeAsPGGB+P0lJUtY8aZJ8XXONdKI1+3YlIiKKBgx8iahyOnZM0l0XXihtYc24/36ZeDlpUuBrT3TtKpMhXdd4NdbwtRL47t8v9a+e0nXR1NgqSNOmAdicgCvwFx5WL+KtSY3LrdNquPtu6TUWyiro/Hwp8X3tNWD1ankpnn0WuPFGKftNTQXefhv45et26KN3YtTwItx4azzGjJGuzWecIVnSWbOAls+nh22plu7dpVlVoKpWlef37rvtGxMREVE4MfAlosrpq68kEnj/fUmF3Xab72DW4ZAo7LHHzDWh8sZoNrVpU2k3I6sZX2NJo337PN9nxQpJhcZwYyutZXnI++8H7up9DS7sXx8T37kOE5ScezhyBDh8uPTLCEbXrZPv69e39nhFRXIuoqhIvgoLy35fUCDB6qRJUmWekCBL6lx6adky36Qk+Tp0UntUua8E2Wk7cc457dGunTSH2rpV1mEdOhTAdVuicx1iIiKiCoiBLxFVTlOmSGB4wgmS+V2zRtZu8ZRBLSyU7j0dOkgb22C4LmlkBL5GxtfsHN8WLeRy927Pga/R2CoU7XXDoKREMosTJwIXXQS88MlQVK8+FACgIGXFNWuW7ao7eLB0IL71VlkFJSXF3DI3hYXyVnjySeDAAf/bn3mmNPQePtz3eZL6J8qSRn9+noGUg+3x9tuyRu8PP0jWF4WFUu982WX+H5SIiIiCxsCXiCqfVauAxYslRXjHHbJWy4svSrefb78FGjcuu/2kSRIYz5hRfk0Xq4xssWuDq8xMCbjdH9cb18DXXVER8M8/Unsbg/LzpfHS118Dd94pc2jNxu833ijnMi64QOawTp8uDaE80RqYPVvm465fLxnYMWPkZYiPL/1y/bl7d/kyxbmWb/yODJx7JXDuuRLQ//e7/PuvLFhrdhFaIiIiCgoDXyKqfKZOlfrUq66SSOSFF2Qx0+uvl4gpJaU0wsnMlE4+o0fLujLBqlVLlkJyXdIoM1OyvWYjPCPw9bSkUQw3tjp0SALEefOAV16RJXutTqU+/XRg6VJ5qYYPlybc48aV3Wb1atn33LmyNPCMGbJ9oNO2PWrXTi5dOjuXeXnTpaNzuOb4EhERVXaxWQdHRBSo3FxpLnXBBWUzrFdcAcyfL7cPHCjpQEBSgoWFwBtv2BcZdelSNuO7a5f5MmegdI6vp4zv8uVyGWONrfbvr4akJGkO9emn8rQH+nR36iQJ/TPOAMaPB26/XRLhe/bIz717y9M0YYIsDzx2rM1BLyC12M2aeV/Ld8sWuWTgS0REFBbM+BJR5fLVV0BOjkRA7gYOBJYtk0goORm49lrplPTkk/YGKEZnZ60l4srMtNaIqmZNoF49z4HvihWyPE7nzvaNN4S0liD01lv7IjdXGkideWbw+61fXxL3998vvcsWL5aS5mPHJBB+7DHpwhxS7drJPF5P0tNlwV2uCURERBQWzPgShduMGVKPqXWkR1I5TZkCdOsm3ZA8adtW1sg95xzgvfck4L3/fnvH0KWLBN9798rPmZnWA6AWLbxnfKO8sdWhQ8B33wE33CCx4YABQEFBHObPtyfoNcTHyzTu996Tac9Dh8pU7ddfD0PQC8g8X28Z3/R04LjjQrv2EhEREf2HGV+icJs9WyYx5uRYX3OFgvP33zIBdMIE37WttWsD33wDfPAB0L+/ZFjt5NrZuX59IDvbWqkzIIGv+xxfo7HVzTfbM06blJTIsObMAX76CVi4UPo61asn5ciPPw40abIM/foNCsnjX3cdcOWVkmANq/bt5e/dyOy7Sg/fGr5ERETEwJco/IymNvv3M/ANt6lTgRo1JAryJy5Oml2FglHWvHGjZJgB6xnf5s2lO7WrtWtlkdsoaWxVXAx8/rn0Btu6Va7r2xd44AFg5EipLDdWj5o/vzCkYwl70AtI4HvsmKy33KxZ6fVayxzfU06JwKCIiIgqp+ithSOqqIzA18yioWSfI0eAadOACy8MU52rD+3aSVfpDRtK1/C1o9Q5ShpbaS2Z3b59pXF2w4bARx9JD68VK4DnngMSEz0vmVyhOJc0KlfunJ0t9d5cyoiIiChsGPgShVNRUelBMAPf8PryS+DwYc9NrcKtShVpPhVs4HvokGR4DUZjK2Ot4AhYtgwYNgwYNUrONXz5pVx39dWlqzBVGt4CXy5lREREFHYMfInCaft2qf8EGPiG25QpslbvqadGeiTCWNJo1y75OZA5vkDZeb7Ll0uaNQKNrTZtAi66SBpVpaUBkybJksIXXxzVfbZCi4EvERFR1OAcX6JwMg54AZnjW5Hl5gK1aoVggVQnrSWlWLeu/21XrpSg0M61eIPVtSswc6YERVWrll1T2AzXtXzbt5e1hv/5B7j1VtuHWlQEHDwoFbqevjZuBD77TKq3n3gCuOcecy9LhdegAVCnTvkljYw1fI87LuxDIiIiqqwY+BKFk2vgW5Ezvnv3ykH9tGnAuefav/+sLEkvLloEfPIJcP75vrefMkWaWl1xhf1jCVSXLhJRLlwoZc5WA3Ij42vM8127FsjPt9TY6uBBefocDjmHcPSonK/IzS39/ujRstXUntSuLRXkjz1WGo8T5DX1tKRRero8UXXqRGZcRERElRADX6JwSk+X7F6dOhU78F2yRCKmxYvtD3zXrweSkyWL1qULcMEFwNNPA48+6jl4PHxYWgtffLF0WYoWxpJGy5cH1ozKvdTZZGMrrWXO7TvvyPzbY8dkSnCjRpKgb9lSAtlatcpeNmzo/atGDevDrzS8Bb4scyYiIgorBr5E4bRli2RClarYpc4rVsjlhg327vfnnyWArV4d+P13yW6OGycLwa5dK+vuuq+5+8UXks6MhqZWrowljYqKrDe2AkqXxzEyvitWyMK4XjoFHzkiT8U770jld+3asqrT+PEyLZhCpH17qUxwtWULMHhwZMZDRERUSVXWliNEkWFkeho3rtgZXyP7uHGjPfvTGpg4ETjrLAkkli4FBg2SVOPHHwMvvQR89RWQlFTaJdkwZQrQs6csGhtNGjcundcbSOBrzAs2Al8Pja20Bv76C7jlFnmIceNkKvDbb8vTNGUKg96Qa99eJkIfPiw/FxQA//7LjC8REVGYMfAlCqf0dMnIVeTAV+vSjO/mzZLRDEZBgURsd94JjB0LLFhQ2i0XkOz5/fcDP/wgZdD9+0stLyDB4MqVktaMlqZWroysr9WOzgZjLd/CQmDVqv/KnDMygBdeABISJLB9/33gnHNkOvE//wA33STJYQoD987O27cDJSVcw5eIiCjMGPgShUt2tnQTqugZ38xMCcZ695aAbNu2wPe1fz9w5pnAe+8BDz8MTJ/uvSHQ2LES2VWrJpnfr76SlGbNmsDllwc+hlAy5vkGkvEFJPDdswdYswbIz8ev2f2QmAh06CBPV8OGwOTJwM6d0sTqlFOiM/6v0Nq1k0ujszOXMiIiIooIzvElChdjCZOOHWXt1oo6x9fI9l5+OfD331Lu3KmT9f2sWweMGSNR22efAZdd5v8+PXtKGfR55wGXXALEx8tE1gYNrD9+OBgZ3wADX928OY7+vhgfXrMctwK48f2TUK0b8Nxz8nR16GDbSClQ7hlf188BIiIiChtmfInCxTXT07ixrBFz9GhkxxQKK1bIPNOLLpKfA21wddddwKFDwB9/mAt6DU2bAr/+Clxzjfx8882BPX44nHyyBOdGAGzB5s3Adwuk1LnexuU4Vr0+vllxPNaskWwvg94o0bKlzMc2At/0dGnOFmh5OxEREQWEgS9RuLgHvkDFLHdevhzo3h1o21ZqbQNtcPX338DZZ0twaFX16jKxdf/+wJYKCpfTTwf27bMUpebny+pNCQnAX7tboDaO4sp2f6DmqX3Rp69iKXO0iYuTvwXXwLdjxzJNyIiIiCj0+J+XKFzS02UJmjp1gCZN5LqKVu5sNLbq108mk3btGljGd98+mbuakBD4WJQC6tcP/P7hYqEM+9dfpZr7iSekWdU9L8tavmr9+ugO8Cs717V8uYYvERFRRHCOL1G4bNlSesBbUTO+O3dKwNqvn/zcpQvw22/W97NmjVwGE/hGsWPHZGnX338H0tIk4dujhyTKu3eXRLmr3buBu++WdXg7dZLljM88E8AvzUs3Mp5zij7t2wO//CInhrZsARITIz0iIiKiSoeBL1G4pKcDp54q31fUwNdobGVkH7t2lXbCR45478bsSVqaXFaQwLewUFZY+v13+Vq4UEqWq1QBOncG5s4tO927ZcvSQLh+fVnCOC9PMr0PPijLFwOQrs4GZnyjV7t20tBu1y5Zz5dLGREREYUdA1+icCgslOVMrrxSfq7IgW9cHNCrl/xsNG3auFEWlDUrLQ1o1KhsYBdjDh4Evv5alhd2OIDcXLm+d2/glltkem9ioqynW1Iib481a4C1a0sv339f7jd8OPDWWxIkl2E8Pw0asHw2mrVvL9leh0N+5mtFREQUdgx8icIhI0OiG/dS54o2x3f5cklV1qolPxvr1AYS+CYkxNyis8XFMg/3o48k4M3Lk9Lkq6+WQPe000qnd7uKi5Ny5w4dgNGjS68vKZFzI02aeHkqGjeWtHHfvjH3XFUqxpJG8+bJJQNfIiKisGPgSxQOrh2dAVnepF69ipXxNRpbnXVW6XWdOklAZqXBldYS+F5+uf1jDJF164CPPwY+/RTIzJQ5utdeC/zf/0kFcqAxaVycrM7kVZUqwIgRst4xRS/3wPe44yI3FiIiokqKgS9ROBiBr+vcvsaNK1bgu2MHsHdv2SZLNWvK/EYrSxrt3Cnr90b5/F6tpZT5tdeApUslBh01SubjJifLikphMWtWmB6IAta2rVxu2iQTuI2KCCIiIgobBr5E4ZCeLpFQy5al1zVpUrFKnd0bWxmsLmkUA42tdu0CbrwRmDlTGlD973/AZZfF9JRkCiXjb3/XLpY5ExERRQjX8SUKhy1bpLwxzuVPrqJlfFeskLSn0djK0KWLZHy1NrcfI/Dt0cPe8dlAa2DaNBnaL78Ar74KrFolSw0x6CWfjHJnBr5EREQRwcCXKBzS08sf8Fa0wNdobFWzZtnru3aVJVx27za3n9WrgVatpKtzFMnMBM4+Wxpzd+sG/P03cM89EusT+dWunVwy8CUiIooIBr5Eoaa1BL7ua3dWpMDXaGzlOr/X4LqkkRlGR+coobUsRdyjh6y3+9prsiqN0bCayBQj48s1fImIiCKCgS9RqGVlATk55TM9TZrI9QUFkRmXnf79F9i3z3Pga0SIZub5FhfLArZREvju3CmNqq6+Woa0ahVw113M8lIAWOpMREQUUQx8iUJtyxa59FTqDEhgHOu8NbYCpKNtjRrmAt/0dFn8NsKBb04O8OSTwAknAL//DkyYAPzxB9C5c0SHRbFs9Gjg0kuBPn0iPRIiIqJKiV2diULNfQ1fgxH4HjgQ+52RjMZWJ55Y/ra4OIkYzZQ6G42teva0d3wm5eUBkycDzz0nL8uFFwIvvMDqVLJBhw7A559HehRERESVFjO+RKFmJvCNdcuXS5bWvbGVoUsXcxnftDRAKekeFUZFRcCHH8ow774b6NsXWLZM1ull0EtEREQU+xj4EoVaerpkdGvVKnt9kyZyGetr+fpqbGXo2lWeh8JC3/tKS5MTBLVr2ztGL7QGvv9eEtXXXisv06+/ylJFnqq2iYiIiCg2MfAlCrUtWzw3tKkoGd/t2yV49xf4FheXZr+9CWNH5z17gEGDgPPOkwB4+nRgyRJg2LCwPDwRERERhREDX6JQ87SUEVBxAl9fja0MZpY0ys+X28MQ+O7bJwHuP/8A770nSwefd55UWRMRERFRxcPAlyiUCgpkqR9PGd9atWRObKyXOq9YAcTHe25sZTACX1/zfDdulMm2IQ58s7KAM8+URHxKCnDddTJ8IiIiIqq4eLhHFEoZGVJH623tzsaNYz/ju3w50KOHLFnkTaNGMqfZV+BrdHQOYeB76BAwYoQsFTxzJnD66SF7KCIiIiKKIsz4EoWStzV8DbEe+BqNrcx0gura1Xepc1qapF6N7LDNDh8GRo0C/v4b+PZbCYCJiIiIqHJgxpcolIxmTt7WxIn1wHf7dhm/r8ZWhi5dgNmzvd+elibBcbVqACSm3rVLHuLff+XS+DJ+rlIFGDcOuOUWoFkz77vOzQXGjAGWLpUlipKTLf6eRERERBTTGPgShVJ6upQAt2jh+fYmTSQFGauWL5dLsxnfDz+UeuP69cvfnpYG9O8PQILeSy8Fvvqq7CZ16gDt2slXv37Azp3A008DL70EXHmlrMHrvgTwsWPA2WcDf/4JfPaZNLEiIiIiosqFgS9RKKWnS5mzt3bBsZ7xNRpb9ezpf9uuXeVy48b/Atz/5ObKc3XNNQCAH3+UoPfGG4HRo0uD3fr1yz+VGzYAr78OfPyxdGgeNQq45x6Zv1tQAJx/PvD778BHHwGXXBL8r0xEREREsYeBL1EoeVvD19C4MZCdLWvcVqkSvnHZZflyaUblq7GVwXVJI/fAd+1auUxIQF4ecMcdQPfuwBtvAFWr+t5t167AO+8Azzwjl2++CZxxBtCrlzy9v/8OTJ0KXHWV9V+PiIiIiCoGNrciChWtva/ha2jSBCgpAQ4eDNuwbGOlsRUgz0NcnOfOzqtXy2VCAl55Bdi61VzQ66ppU+Cxx6SR9nvvAYWFEvS++SZwww3m90NEREREFQ8zvkShsn8/cOSI/4wvIOXOxvexIiNDFsU109gKAKpXBzp08Bz4pqUBNWtimzoOzz8PXHghMGxYYMOqUUPW5r32WmmO1apVYPshIiIiooqDGV+iUDE6OpsNfGONlcZWBm9LGqWlAd274577qyAuDnj11eCHpxSDXiIiIiISDHyJQsVYw9dXqXMsB74rVkgtspnGVoYuXSTwLSkpe31aGnY27onvvgMeeUQaWRERERER2YWBL1GoGBnfDh28b9OkiVzu3x/y4djOaGxVvbr5+3TtChw9CmRmll534ACwaxem/ZWATp2kIzMRERERkZ0Y+BKFSnq61NrWrOl9m1jN+FptbGUwOju7zvNdswYA8Pu+BEycaC2OJiIiIiIyg4EvUagYa/j6Uq+erIMba4Hvtm2yDJPZxlYG17V8nQ7+mQYAaHlGAs46y6bxERERERG5YOBLFCpbtvie3wtIB6bGjWOv1Pm33+RywABr92vdGqhVq0zGd/lHachGAzw2mZ2oiIiIiCg0GPgShUJeHrBzp/+MLyCBb6xlfKdOBbp3B3r3tnY/pUobXAFwOIBqm9KQ0zYBx3dS9o+TiIiIiAgMfIlCIyND5sFWxMD3r7+AZcswIW88tmUEEKx27Qps2ICiIuC2WzVOjEtDqxEJ9o+TiIiIiMiJgS9RKBgdnf2VOgOxF/hOnYqCKjXwVPqVuOkmie8t6dIF2LYNUyflY9/qXWhQko2qvRn4EhEREVHoMPAlCgVjDV8zGd8mTWJnju+RI9CffYZvq1yMqk0b4qefgG++sbiPrl2BkhJ89NgWXNVHGlshgYEvEREREYUOA1+iUEhPlyZOzZr539bI+FpOnUbAF19AHT6MSQXj8eGH0tT5jjuAQ4fM76Kooyxp1LlkA+4/yxn49ugRgsESEREREQkGvkShYCxlpEzMgW3cGCgsBI4cse/x58wpu1auXaZMwba6PZHRYiBGjgSmTAH27gUeftj8Ll77UQLfe8duRKPMNKBFC8l6ExERERGFCANfolBITzc3vxeQwBewr9w5Nxc491zgwQft2Z9hxQpgxQq8njsOV1ypUKWKZHxvuw2YPBlYvNj/LhYtAh5+qT4O1miOPrU2AGlpLHMmIiIiopBj4EtkN61LM75mGNlOuxpczZ0L5OcDqalASYk9+wSAKVNQWLUmPi65AldfXXr1M88ArVoB48dL4tqbnBzg8suBtm2BOn27AuvXA2vWMPAlIiIiopBj4Etkt717JetqNvA1Mr52Bb4zZ5bub906e/Z5+DDw+eeYVfcSdOrXoMyU3Lp1gTffBFatAiZM8L6L226TVZ6mTQPie3QFli4Fjh5l4EtEREREIcfAl8huxlJGkQh8S0qAWbOA/v3l59TU4PcJAJ9/DuTm4oWs8WWyvYZzzgHOPht44glg27byt3/5JfDJJ8CjjwKDBkGWNCoulhsZ+BIRERFRiDHwJbKblTV8gdJSZzvm+C5dKhnnO+6Q+mOHI/h9ag1MmYKdTXvhr/gBuPRSz5tNmgTExQG33FK2QXVGBnDjjcDAgcBjjzmv7Nq1dAN2dCYiIiKiEGPgS2S3LVukm3P79ua2b9hQtrcj45uSAlSpAowaBSQlSeAb7DJJy5cDf/2FSXnjMHqM8tqAuW1bme87ezYwfbpcV1wMXHmlJKI/+wyIj3du3EU6O+O444A6dYIbHxERERGRHwx8ieyWng60bg3UqGFu+ypVgAYN7At8Bw0CGjUCEhOBnTs91x5bMWUKiqrXwuTDl+Oqq3xvetttQJ8+wO23y9q+L74o1dZvveVW+d2xo/zeLHMmIiIiojBg4Etkt61bJZNpRePGwZc6Z2QAq1cDycnyc1KSXAZT7pyTA3zxBRytLkXVxvUxerTvzePjZW3f3buBiy+WOb+XXAJccYXbhlWrAnffDVx7beBjIyIiIiIyiYEvkd0yMsyXORuaNAk+45uSIpdjx8pl9+5SRh1M4PvZZ8DRo3hsx3hceilQrZr/u/TvD9x6K/Dzz5L4njxZKrnLefll6YpFRERERBRi8f43ISLTiouBHTusB76NGwOZmcE9dkqKzJ015s/GxUm5c6CdnZ1Nrfa17YOF/56EiR66OXvz7LNS6nzLLVLFTUREREQUScz4EtkpM1OC30AC32AyvocPA/Pnl5Y5G5KSgE2bgF27rO9z6VLgn3/wQdXx6N5doV8/83etVw/4+GNgwADrD0tEREREZDcGvkR2ysiQy3btrN2vSZPg5vj+8gtQUFA+8E1MlMtAsr5TpqCkVm08l34prr7aS7kyEREREVEMYOBLZCcj8A0k43v0KJCXF9jjpqTIfN5Bg8pe36cPULu29cD30CHgyy+xostlyI2rV745FRERERFRDGHgS2SnQDO+jRvLZSDlzsXFwKxZsnZvvNu0/apVgVNPtd7gato04NgxPLl7PIYPB1q1sj4sIiIiIqJowcCXyE4ZGUDTpkCtWtbuZwS+gZQ7L14s93MvczYkJsoyR1lZ5vbnbGqV06UfZu/uh6stNLUiIiIiIopGDHyJ7LR9u/UyZ0Dm+AKBZXxTUiTTO3Kk59uTkiSYXbDgv6vefx944YUT8O67wIYNcvN/Fi8GVq/G9MbjUa8eVxwiIiIiotjH5YyI7JSRIevnWhVMqXNKigS33tYNGjBAFuBNTQWSk7FkCTB+PFClSlP88ots0qyZJIaTkoArfp2ChnXq4KF/LsGFlwI1a1ofEhERERFRNGHGl8guWkvga3V+LxB44JueDqxd673MGZDItX9/wOHAkSPAFVcArVsD06cvwoYNwLvvAiNGACtWAE/ckY2aKV/hvaOXY8/RuixzJiIiIqIKgRlfIrscOCCdmQMpdQ50jm9Kilz6CnwBSeW+8goeuj0XW7bUxvz5QElJEbp0Abp0Aa6/XjbLemoaaj6Zh73njse4xsDgwdaGQ0REREQUjZjxJbJLoEsZAUD16kCdOtYzvikpQLduwPHH+94uKQkoKsLaDxfjvvvkx3K0RqNvpgD9++ORb/tgyhSu3UtEREREFQMDXyK7BBP4ApL1tRL4HjoE/PGH/2wvgL2dTkUx4nBRcweeftrLRgsXAmvWyARgIiIiIqIKhIEvkV3sCHytlDr//DNQVOQ38NUauO6uevhH9cYV7RyoXt3LhlOmAHXrApdcYn4MREREREQxIOyBr1KqrVLqW6XUIaVUjlLqO6WU6W5ASqluSqlvlFL7lVLHlFIblFJ3hHLMRKZs3y7lyg0bBnb/Jk2sZXxTUiRYPuUUn5u9+y7w44+ASkpC7dWLgYKC8htlZQFffy2dr2rXtjhwIiIiIqLoFtbAVylVC8DvAE4AcDWAKwF0BjBPKeX3aFspdRKAJQCqA7gewFkA/gegSqjGTGSa0dE50ImxVkqdi4qA2bOB0aOBKt7f/hs3AnfdBQwfDvS6NRHIywOWLy+/4SefAPn5LHMmIiIiogop3F2dbwDQEUBXrfVmAFBKrQKwCcB4AK95u6NSKg7AJwB+01qf63LTvNANl8iCjIzAy5wBa4HvokWSpfVR5lxYCFx5pfTN+vBDIK5aotzgcACnnlq6odbA1KnAyScDvXoFPn4iIiIioigV7lLnsQAWG0EvAGittwJYAOBsP/cdAqAbfATHRBFlR+B78KBkc/1JSQGqVgXOPNPrJs89ByxdKlN3W7cG0LSpdIBOTS274Z9/AuvWMdtLRERERBVWuAPfHgDSPFy/BkB3P/c1VhStoZRarJQqVErtVUq9oZSqaesoiazKzZVsbTCBb5MmcpmV5X/blBRgyBCgXj2PNy9eDDz7LHDVVcCFF7rckJgogW5xcel1U6YA9esDF18c8NCJiIiIiKJZuEudGwHI9nB9FgB/HYFaOS+/AvAmgAcBnATgaQBtAZzr6U5KqXEAxgFA8+bNMX/+fMuDtsuRI0ci+vgUOrW2bcMAAGtzc7E3wNe42Z496A5g6Zw5OOojgK65YwdOXr8em4YPx04Pj7VvXzXcfnsfNGmicOGFyzB/fmmQ26xJE3TPycHyDz7AkZYt8eeMGTj166+ROWYMNi9dGtC4iezAz0eKJnw/UjTh+5GiSSy/H8Md+AbDyE5P01o/7vx+vlKqCoAXlVLdtNbr3O+ktZ4KYCoAnHTSSXrIkCFhGawn8+fPRyQfn0Lop58AAN1HjUJ31/mzVji7LQ/o2FEys968JtX+ne++G507dChz0549wI03SgL6t9+A/v3d9nP88cDzz+Oko0cxv04dDF65EigsRJunnkKbnj0DGzeRDfj5SNGE70eKJnw/UjSJ5fdjuEuds+E5s+stE+zK6Poz1+36X5yXfYIYF1FwjDV825lemas8o9TZX4OrlBQgIQFwC3oPHJDuzf/+Kw2f+/f3cN+2baUcOzW1tKnVKacADHqJiIiIqAILd+C7BjLP1113AGtN3NeXkoBGRGSHjAwgPh5o2TLwfTRuLJe+At/sbAlax44tc/XBg9LnauNGYOZMYPBgz3cHACQlAQ4HGvzzD7BhA5taEREREVGFF+7AdyaAgUqpjsYVSqkOAAY5b/NlDoB8ACPcrh/pvPSwOClRmGRkSDbVx5q6fpkJfH/6SRpTuSxjdPgwcNZZwOrVwHffAcOG+XmcpCRg3z50fOcdoEED4KKLAh8zEREREVEMCHfg+y6AbQBmKKXOVkqNBTADwL8AphgbKaXaK6WKlFLGXF5orQ8AeAHAjUqp55VSZyilHgTwOICPXZdIIgq7YJcyAoDatYFq1YD9+71vk5ICNGsGDBgAADh6VGLgpUuBL7+UANgv5/zhehs2SNvnmmyKTkREREQVW1gDX611LoDTAWwE8CmAzwBsBXC61vqIy6YKQBUP43sawP0ALgIwG8BNAF4BcENoR07khx2Br1Iyz9dbxrewEJgzBxg9GoiLQ34+cO65gMMBfPIJcN55Jh+nSxcJngFg3LjgxkxEREREFAPC3tVZa70dwPl+ttkGCX7dr9cAXnN+EUWHwkIgMzP4wBeQcmdvge+CBTKZNzkZhYVSofzLL8AHHwCXXWbhMZQCLr4Y+1esQJMenqbcExERERFVLLG0nBFRdNq5EygpCa6js6FxY++lzikpQLVqKDhtOK64XJpYvfUWcM01ATzOG28gbf58DAlmrEREREREMSLcc3yJKh5jKSM7Mr7eSp21BmbORP7g0zHs7Dr45hvgf/8Dbr45+IckIiIiIqromPElCpadga+3UucNG4DNm/HMgbuwIk8aWV18cfAPR0RERERUGTDwJQqWEfi2bRv8vho3BrKypHQ6rrQgY/XzKegJYG71MUidC/TrF/xDERERERFVFix1JgpWRgbQogVQo0bw+2rcWNbpPXQIgMS/Tz4JZH+ago21emPGX+0Y9BIRERERWcTAlyhYdixlZGjSRC4PHEBurnRunvTUAQxWC9DxjmS0aGHPwxARERERVSYMfImCtX27PR2dAcn4AtiVdgCDBgHffw9Mu2wO4nQJ4s9NtucxiIiIiIgqGQa+RMHQWgJfuzK+zsD3nqv3Y9s2YNYsYFRRipRSs8aZiIiIiCggbG5FFIy9e4G8PNsD37oFB7D4L+CEjgXAxT9JzXMcz1MREREREQWCR9JEwbBzKSMAizfLHN8Lhh7ACScASE0FcnKAZJY5ExEREREFioEvUTBsDHy1Bu59pj6KEYfTEpxr+aakSLfoM84Iev9ERERERJUVA1+iYNgY+M6YASxYFIeCOo1QLWe/RMIzZwLDhgG1agW9fyIiIiKiyoqBL1Ewtm8H6tUD6tcPajdFRcBDDwEnnADUaNMEOHAAWLsW2LoVGDvWpsESEREREVVODHyJgmHTGr4ffACsXw+88AKgGjeWwDclRW4cMybo/RMRERERVWYMfImCYUPgm5sLPPEEcOqpwNlnQzo7798vgW+/fkCrVvaMlYiIiIiokmLgSxQMGwLf118Hdu8GXnkFUAoS+G7bBixaxG7OREREREQ24Dq+RIHKyQEOHgwq8N23D3j5ZeCccyTjCwBo0gQ4fFi+Z+BLRERERBQ0ZnyJAmVDR+dnngGOHpW5vf9p3FguW7cG+vQJfHxERERERASAgS9R4IzAt127gO6+ZQvwzjvAdddJN+f/GIHvmDHO2mciIiIiIgoGA1+iQG3fLpcBZnwfeQSoWhV48km3G1q0kEsuY0REREREZAsGvkSBysgAqlUDmje3fNdly4CvvgLuvhto2dLtxpEjge+/B0aNsmecRERERESVHJtbEQUqI0PKnOOsnT/SGnjgAelhdd99HjaIj5duV0REREREZAsGvkSBCnApozlzgHnzgIkTgXr1QjAuIiIiIiIqg6XORIEKIPDNyQFuugno0gW48cYQjYuIiIiIiMpgxpcoEPn5wK5dljs633MPsGMHsGCBTA8mIiIiIqLQY8aXKBA7dsilhYzvTz8B770H3HsvMHBgiMZFRERERETlMPAlCoSxhq/JwPfgQeD664Hu3YGnngrdsIiIiIiIqDyWOhMFwmLge9ddwO7dskpRjRohHBcREREREZXDjC9RIDIyAKWANm38bvrjj8BHHwEPPgj07x/6oRERERERUVkMfIkCkZEBtGrlt0NVVhZwww3AiScCjz8eprEREREREVEZLHUmCkRGhqmOzrffDuzfD8yezS7ORERERESRwowvUSBMrOH7/ffAZ58Bjz4K9OkTpnEREREREVE5DHyJrCopAf7912fgu38/cOONEvA+/HAYx0ZEREREROWw1JnIqt27gcJCn4HvLbcA2dnAr78CVauGcWxERERERFQOA18iq/wsZfTFF8DXXwPPPQf07BnGcRERERERkUcsdSayykvgqzXw1lvAlVcCp5wC3H9/BMZGRERERETlMPAlssoIfF26OhcWAjffDNx6K3DWWcDPPwPxrKcgIiIiIooKDHyJrMrIABo2BOrWBQAcOACMGAG88w7w4IPSzdl5ExERERERRQHmpIis2r79vzLndeuA5GRp8vzJJ1LmTERERERE0YWBL5FVGRnA8cfjp5+Aiy8GatYE/vgDGDgw0gMjIiIiIiJPWOpMZEV2NvS6dVhyNAGjRwMdOwJLlzLoJSIiIiKKZqYyvkqpgQBGAhgIoBWAmgD2A9gA4A8AP2its0M1SKKo8dNPUMXFuGPuGJxznpQ3164d6UEREREREZEvPjO+SqmrlVKrASwEcBeAWgA2AVgCIBvAyQDeA7BTKfWRUuq4EI+XKKKOfJmCPWiGXtcPwDffMOglIiIiIooFXjO+SqlVAJoC+ATAVQD+1lprD9vVBzAGwOUA1iql/k9r/VWIxksUOYWFqPLLHMzGuXjksTjEcaIAEREREVFM8FXq/D6AKVrrPF870FofAvAZgM+UUr0AtLBxfERRo9ixADXzDmJ7r2TXJXyJiIiIiCjKeQ18tdYTre5Ma/0PgH+CGhFRlPr3rZloiWroec/wSA+FiIiIiIgsCLhYUylVSynFGY5UOWiN6nNTkFr1dIy+qE6kR0NERERERBZYDnyVUu2UUg4AhwHkKKVS2dSKKrrsxRvQ8shmHBycjOrVIz0aIiIiIiKyIpCM7zuQZYx6ARgMoATAVDsHRRRtVj+fAgDo+dCYCI+EiIiIiIis8tXV+Xyt9XQPN50MoLXR9Eop9QyA70I0PqKI0xqo9XsKNtTqja7D2dWKiIiIiCjW+Mr4TlJKzfJQxpwB4EIAUErFATgHwLaQjI4oCqycewB9ji7AkaHJkR4KEREREREFwFfg2xXAZgD/KKUeU0pVc15/LyQo3gcgG8CVzuuIKqS/X5iDKihB13sY+BIRERERxSKvga/W+rDW+g4AiQBGAkhTSg3XWv8O4HgA1wC4HMDxWutfwjJaIjtobXrT3FygwZ8pOFizBeqc1i+EgyIiIiIiolDx29xKa/2P1noQgJcAfKaU+hJAVa31j86v/SEfJZFdli4FGjQAFi82tfn0LwpwRtFPyBs2BogLePUvIiIiIiKKINNH8lrr9yHlzzkA1iml7nTO8SWKHZ9/DuTkADfdBBQX+9185QQH6iMHza9nmTMRERERUazyGrgqpRoopT5QSu1SSmUrpWYDaKa1Hgcpfb4CwEql1MBwDZYoKFoDKSlA8+bA338Dkyf73Hz9euC4NSkojK8BNfyM8IyRiIiIiIhs5ytj+x6AkwDcAeAqAArAbKWU0lovAdDfuc0spdS7IR8pUbDWrQPS04EnnwSGDwcefRTYs8fr5h+8r5GMFBQPGQbUqhW+cRIRERERka18Bb5nALhXa/211joFkuE9DtLYClq8CaA7gGred0MUJVJS5HLMGGDSJODoUeCBBzxuWlgILP5gLTpiK2pcwDJnIiIiIqJY5ivw3QEpaTaMBlAMYLfrRlrrPVrrq0MwNiJ7paQAffoAbdoAXbsC994LfPwx8Oef5TadNQs4NcslUCYiIiIiopjlK/C9E8A1SqmDSqm9AN6FZICPhGVkRHbatw9YuBBIdsnePvII0LYtcMstQFFRmc3ffx84v2oKdN9+QOvWYR4sERERERHZydc6vr9CSpsvAzAOQGet9cRwDYzIVrNnS3OrsWNLr6tdG5gwAVi1Cnjrrf+u3rkTWDprH/oVLoIayzJnIiIiIqJY53M5Iq31Qa31bK31D1rr7eEaFJHtUlKAVq2Avn3LXn/uucDIkcDjjwO7dgGQ6ueRejbioMtmiImIiIiIKCb5Ws6or7fbfNynhlLqhOCGRGSz/Hzg559lrq5SZW9TCnjjDSAvD7j/fpSUAB98AFzbJEVKnPv0icyYiYiIiIjINr4yvg6l1Eyl1EillM/MsFKqnVLqYQBbAbATEEWXP/4Ajhzxnr3t3Bm4/35g2jRMu+EP/LslH6ce8RIoExERERFRzIn3cVtXAM8AmAEgRym1CMA/APYByAfQEEBHAAMAJECC3nu01p+HdMREVqWkADVrAsOGed/moYeQ9+6n6PvBLXj2tJdQ9Q8fgTIREREREcUUr4Gv1nongGuVUg8CuAbACAB3A6jpstlWAA4ADwL4WWutQzhWIuu0lsD3jDMk+PViz+FaeCB/Ij7COei+4QbZ9vTTwzhQIiIiIiIKFZ8lzACgtd6rtX5Ja3261ro2gEYAWgGorrU+Xmt9jdb6Jwa9FJXS0oCMDJ/Z2+Ji4PLLga+OjUVO4lmI270LOPNMn4EyERERERHFDl+lzh5prQ+GYBxEoTFzplyO8T71/Nlngd9+A957T6HekDeAkxZKJExERERERBWC5cCXKKakpAAnnQS0bOnx5t9+A556CrjySuDaawGo44EDB4A4v8UQREREREQUI3h0TxXXnj3A0qXA2LEeb969WxK7J5wATJ7s0sCZQS8RERERUYXCjC9VXLNmSXMrD/N7i4uByy4DcnIk61u7dgTGR0REREREYcHAlyqulBSgbVugV69yNz31FDBvHvDhh0CPHhEYGxERERERhQ1rOqliyssDfvlFmlr9V8MsfvlFGlr93//JFxERERERVWymAl+l1OdKqcRQD4bINvPmAUePlitz3rsXuOIKoHt34K23IjQ2IiIiIiIKK7MZ34EA5iul1iilbldKNQjhmIiCl5IiE3eHDi1z9bRpwL59wOefA7VqRWhsREREREQUVqYCX611RwBnAdgA4FUAO5VSHyqlBoZycEQB0VoC3+HDgRo1ytw0YwZw4onyRURERERElYPpOb5a65+11ucBaAfgRQBDASxQSv2llLpRKVUnVIMksuTvv4EdO8qVOe/fD/z5J3D22ZEZFhERERERRYbl5lZa691a62cAnAogFUAvAG8DyFRKvaKU4sIwFFkpKdLQavToMlf/+CNQUgKcc05khkVERERERJFhOfBVSp2ulPoawFYAPQG8DgmCJwG4EcAnto6QyKqUFGDAAKB58zJX//CDrG7Up09khkVERERERJFhah1fpVRjANcAGAfgeAArIUHuF1rrPOdmi5VSqwG8H4qBEpmSmQksXw4891yZq48elWWMrruu3OpGRERERERUwZkKfAHsBFAC4CsAl2utl3nZbj2AvXYMjMijyZN9r0N05Ihcus3vnTsXOHaM83uJiIiIiCojs4HvwwA+1Fpn+9pIa/03gOOCHRSRV99/D+zeDQwZ4n2biy4CEhLKXDVjBlC/PnDaaaEdHhERERERRR9Tga/W+rVQD4TIlKws4OSTgW+/NX2X4mKZ9jt6NFC1agjHRkREREREUclUcyul1OtKqU+93PapUuoVe4dF5EVWFtC4saW7LFwoSxmxmzMRERERUeVktqvzWAC/eLntZwDn2DIaIn+ysoBGjSzd5YcfgGrVgJEjQzMkIiIiIiKKbmYD39YAtnu5bYfzdqLQKioCDh2yFPhqLfN7hw0D6tYN4diIiIiIiChqmQ18swF08nJbJwBH7BkOkQ/Zzt5qFgLfNWuALVvYzZmIiIiIqDIzG/j+CuBRpVRz1yudPz8MYK7dAyMqJytLLi0EvjNmyOXYsSEYDxERERERxQSzyxk9BmAZgE1KqR9RWt48BkAegEdDMzwiFwEEvj/8IE2gW7YMzZCIiIiIiCj6mcr4aq23AegP4AcAQwHc6bz8HsAArfXW0AyPyIXFwHfnTmD5cnZzJiIiIiKq7MxmfI3g96rQDYXIDyPwNbmc0cyZcsn5vURERERElZvZOb5EkWcx4/vDD0CXLsAJJ4RuSEREREREFP1MZ3yVUs0AXAqgK4AabjdrrfV1dg6MqJwDBwClgPr1/W566BAwbx5w551yFyIiIiIiqrxMBb5Kqa4AFjm3rw1gP4BGAKpAljo6FKoBEv0nKwto2BCI81+oMGcOUFjI+b1ERERERGS+1PkVSFfn5gAUgFEAagK4HsBRAOeGZHRErrKyTJc5z5gBNGsmHZ2JiIiIiKhyM1vq3B/AjQDynT/Haa2LAHyglGoKYAKkyzNR6JgMfAsKgNmzgYsuAqpUCcO4iIiIiIgoqpnN+NYBkKW1LoGUNTdxuW0ZJDAmCi2Tge/8+UBODrs5ExERERGRMBv4bgPQwvn9BgAXutw2BsBB+4ZE5EVWlqmljH74AahdGxg2LPRDIiIiIiKi6Gc28J0LYLjz+9cAXKOU2qCUWgPgDgAfhGJwRGWYyPiWlMj6vSNGADVrhmlcREREREQU1czO8X0IQHUA0Fp/rZQ6BuBiALUATATwbmiGR+RUXAwcPOg38F2xAti5k2XORERERERUym/gq5SqAuAEAJnGdVrrFAApIRwXUVkHDwJa+w18p00D4uOB0aPDMywiIiIiIop+ZkqdNYDlAPqEeCxE3mVlyaWPwHfPHmDqVODKK01NBSYiIiIiokrCb+Dr7OT8L4DaoR8OkRcmAt///U+WMnrooTCNiYiIiIiIYoLZ5lZTANyplKoWysEQeeUn8N2/H3j7beCSS4DOncM4LiIiIiIiinpmm1vVBXA8gHSl1E8AdkFKoA1aa/2E3YMj+o8R+HqpYZ44ETh6FHjkkTCOiYiIiIiIYoLZwPdhl++v9XC7BsDAl0LHR8b34EHgjTeA888HuncP77CIiIiIiCj6mQp8tdZmS6KJQuPAAbls0KDcTZMmATk5wKOPhndIREREREQUGxjQUmzIypKgt0qVMlcfPgxMmACMHQv06hWRkRERERERUZRj4EuxISvLY5nz22/LTcz2EhERERGRN6ZKnZVSJSjbzKocrXUVX7cTBcVD4JubK0sYjRgB9O8foXEREREREVHUM9vc6mmUD3wbAzgTQHUAH9k4JqLyPAS+774L7NsHPPZYhMZEREREREQxwWxzqyc9Xa+UqgIgBcAhG8dEVF5WFtCx438/5uUBL78MDB0KDBoUwXEREREREVHUC2qOr9a6GMDbAO60ZTRE3rhlfD/4ANi1i9leIiIiIiLyz47mVtUBlO86RGSXkhIgO/u/wLegAHjxRcn0DhkS2aEREREREVH0M9vcqp2Hq6sBSADwIoDldg6KqIxDhyT4dQa+n3wC/PuvzPFVKsJjIyIiIiKiqGe2udU2eO7qrABsAXCLXQMiKicrSy4bNUJREfDCC9LF+cwzIzssIiIiIiKKDWYD32tRPvDNA5ABYJlzri9RaLgEvl98AaSnA6+/zmwvERERERGZY7ar80chHgeRdy6B79SXgO7dgeTkyA6JiIiIiIhih6nmVkqpLkqp07zclqSU6mzvsIhcOAPfw9UaY9Ei4JxzmO0lIiIiIiLzzHZ1ngDAW45tDIDXbRkNkSfOwPeP1Y1QXAyMGBHh8RARERERUUwxG/ieBMDh5TYHgP72DIfIgwMHAACzFjZE3brAKadEeDxERERERBRTzAa+dSHNrDwpBFDfnuEQeZCVBV2vHubMjcewYUDVqpEeEBERERERxRKzgW86gGFebjsdstwRUWhkZaGwbiNkZLDMmYiIiIiIrDMb+H4C4C6l1C1KqeoAoJSqrpS6BcCdAD42+4BKqbZKqW+VUoeUUjlKqe+UUu2sDlwp9aBSSiul/rR6X4oxWVk4qBoBYOBLRERERETWmV3H91XIPN5JACYqpbIANIIEztMBvGRmJ0qpWgB+B5AP4GrI2sDPApinlDpRa51rcj8dATwKYK/J8VMsy8rCzmON0LkzcNxxkR4MERERERHFGrPr+BYDuEApdTqA4QAaA9gP4Bet9XwLj3cDgI4AumqtNwOAUmoVgE0AxgN4zeR+JgP4DEBXs78Dxa6SA1nYfLAdRlwa6ZEQEREREVEsshQ0aq1/h2RsAzUWwGIj6HXuc6tSagGAs2Ei8FVKXQagL4BLAXwXxFgoRhTtzcK+4kYscyYiIiIiooCYmuOrlBqjlLrVy223KKXOMvl4PQCkebh+DYDuJsbRELJm8P1a6yyTj0mxTGtUOZSFQ3GNMGRIpAdDRERERESxyGxzq8cA1PZyW03n7WY0ApDt4fosAA1N3P8VABsBfGTy8SjW5eSgii5G/eMaoU6dSA+GiIiIiIhikdlS5xMArPRy29+QRlMhpZRKBHAVgL5aa23hfuMAjAOA5s2bY/78+aEZoAlHjhyJ6OPHoqNr9uMsALXaFPG5sxnfjxRN+H6kaML3I0UTvh8pmsTy+9Fs4BsHwFu+rS6Aqib3kw3PmV1vmWBXUwC8D2CHUqqB87p4AFWcPx/TWue730lrPRXAVAA46aST9JAI1svOnz8fkXz8WJQyfwUAIOmcE9CRz52t+H6kaML3I0UTvh8pmvD9SNEklt+PZkud/wFwuZfbLgewyuR+1kDm+brrDmCtn/t2A3AjJEA2vgYBGOj8/iaTY6AYsvoPmcrdoW+jCI+EiIiIiIhildmM7/8ATFdKfQPgXQA7ALSGlBCfC+BCk/uZCeBVpVRHrXU6ACilOkAC2Af93Heoh+smAKgC4DYAmz3cTjGsuBhIXy6Bb1zTxhEeDRERERERxSqz6/h+r5S6A8BzAM5zXq0AHAFwu9ba7LJC7wK4FcAMpdSjADSAZwD8Cylllh0r1R7AFgBP/3979x5nV1XfffzzSyZXCLlAAgrkBkmEgFiTCgIKBJCLCCK2Yq1irUD7aq3ah9by2FpLq9W2j4iXPgLl8V5vBCGh1ZCQRE0MSqIlIcAEQkICBCZkQkLul1nPH/sMHCZnZk4yZ845s8/n/XrNa8/svfae38B6MXxnrb1WSummQg0LOz4sIl4Emg5yL2H1Eb/5DQzYVli8e5QjvpIkSZIOTdn7+KaUvhwR3wDOBI4EXgB+mVLadhDP2B4RM8i2JPo2WXi+H/hYh+cE2UhuuVOxlUNz5sCRbMq+GFnOot+SJEmSdKCygy9ASuklYE7xuYg4B7gmpfShMp+xDriqmzZrycJvd886t5zvqb5pzhz4kzGtsONwGDiw1uVIkiRJ6qMOaUQ1Ik6MiJsiYg2wAPj9ypalRrdlCyxZAicd3eo0Z0mSJEk9UnbwjYjhEXFdRCwGmoFP8spqyq/tpfrUoObPzxa3GjfM4CtJkiSpZ7oMvhHRLyIujYgfABuArwHjgK8WmnwspXRrSmlrL9epBjNnDgwbBiMx+EqSJEnqmU6Db0T8H+AZYDZwGfBj4GJgLPApyngHVzoUKWXBd8YM6Le5FY50KyNJkiRJh66rEd+PA2OA/wbGppTel1K6L6XURrYNkdQrHn8c1q6Fiy4CNm1yxFeSJElSj3QVfO8AXgLeDjRHxFci4k3VKUuNbE5h3fCL3pag1anOkiRJknqm0+CbUroWOAZ4H7AUuB5YEhGPAp/AUV/1kjlz4MQTYeKYbbBvn8FXkiRJUo90ubhVSmlXSul7KaX2d3tvBPYDf0P2ju/nIuIPI2Jw75eqRrB7NyxYUJjm3NqanTT4SpIkSeqBsrczSiltSCn9S0rpFOBNZCs7TwK+Rbbis9RjixbBjh0GX0mSJEmVU3bwLZZSWppS+gjZ/r1XAQsrWZQa15w5MGAAnHceBl9JkiRJFdHUk5tTSnvJtjn6cWXKUaObMwfOOgsOP5xXgq/bGUmSJEnqgUMa8ZV6w5NPwvLlcNllhRObNmVHR3wlSZIk9YDBV3Vj5szseNVVhRPtI74jR9akHkmSJEn5YPBV3Zg5E6ZNg/HjCydaW2HoUBjsouGSJEmSDp3BV3Vh/Xr41a+KRnshC75Oc5YkSZLUQwZf1YW77sqOBl9JkiRJlWbwVV2YORNOOQUmTy46afCVJEmSVAEGX9Xcc8/BokUdRnshC75uZSRJkiSphwy+qrm774aUSgTfTZsc8ZUkSZLUYwZf1dzMmTBpUjbV+WUpOdVZkiRJUkUYfFVTmzbBggXZaG9E0YUdO2DPHoOvJEmSpB4z+KqmZs2C/fvh3e/ucKG1NTsafCVJkiT1kMFXNXXnnTB+PLzxjR0uGHwlSZIkVYjBVzWzZQvMnQvveleHac5g8JUkSZJUMQZf1cy998LevSVWc4ZXgq/bGUmSJEnqIYOvKuuFF8puOnMmvPa1cMYZJS5u2pQdHfGVJEmS1EMGX1XO/PkwZgwsWtRt0+3b4ac/hSuvhH6leqFTnSVJkiRViMFXlfPDH2b77371q902/clPYOfOTqY5QxZ8Bw+GIUMqW6MkSZKkhmPwVWWklL20GwF33QUbN3bZfOZMOOooeMtbOmnQ2uporyRJkqSKMPiqMn77W3jmGbjhBtizB775zU6b7tqVZeQrr4Smpk4aGXwlSZIkVYjBV5Uxe3Y22nvDDXDWWXDbbdkocAn33QfbtnUxzRkMvpIkSZIqxuCrypg9O1ueecwYuP56ePxxWLCgZNOZM2HECDjvvC6e19rqVkaSJEmSKsLgq5575hlYtgze8Y7s63e/G0aOhFtvPaDpnj0waxZcfjkMHNjFMzdtcsRXkiRJUkUYfNVz996bHduD75AhcM018OMfQ0vLq5ouWAAvvtjNNGdwqrMkSZKkijH4qudmz4bx42Hq1FfOXXcd7N0L3/jGq5rOnAmHHw5ve1sXz9u5M1sBy+ArSZIkqQIMvuqZHTvg/vuz0d6IV86fdFK2V9Ftt0FbGwD798Pdd8Pb355t0dup1tbsaPCVJEmSVAEGX/XMvHnZ6Gz7NOdi118Pq1fD/PkAPPhgtr3vFVd080yDryRJkqQKMviqZ2bPhmHD4JxzDrx21VXZysyFRa7mzctOX3BBN880+EqSJEmqIIOvDl1bW7aw1cUXl16iefDgbJGru++G559n7lz4nd+B0aO7eW578HU7I0mSJEkVYPDVoVu2DJ57rvQ053bXXgv79rH7a19nyZIyRnsh28oIHPGVJEmSVBEGXx262bOhXz+49NLO27zudXDOOez72u3s29vGhReW8VynOkuSJEmqIIOvDt2sWXDmmd1PSb7+eg577kkuHTCPs88u47mtrdnU6aFDK1KmJEmSpMZm8NWhWbcOHnqo62nO7d71Llr7H8UnRtzKkCFlPLu1NRvtLd4eSZIkSZIOkcFXh+bee7NjGcH3uc2DuGP/Bzlz0yzYsKH7Z7cHX0mSJEmqAIOvDs3s2XDCCdk7vN2YNw9u51r6t+2Dr3+9+2cbfCVJkiRVkMFXB2/bNpg/PxvtLWM68rx5sGnUZNJ558Htt2fbIHWltdWtjCRJkiRVjMFXB2/uXNizBy6/vNumKWXNzz8f4vrrYe1a+MlPur5p0yZHfCVJkiRVjMFXB2/2bBg+nHKWaH70UXj2WbJtjK68EiZMgL/6K9i7t/ObnOosSZIkqYIMvjo4bW3wX/8Fl1wCAwZ023zevOx4wQVkWxTdckuWhm+5pfQNu3bBjh0GX0mSJEkVY/DVwfn1r6GlpbxtjMimOZ9wQjbQC2T3veMd8OlPw9NPH3jD5s3Z0eArSZIkqUIMvjo4s2ZB//7ZiG839u6FhQsL05yL3XIL7N8PN9xw4E2trdnR4CtJkiSpQgy+OjizZ2fv9o4c2W3TX/0qWwD6ggs6XJgwAW68EX7wA7j//ldfM/hKkiRJqjCDr8q3di08/HDZ05znzct2O5oxo8TFv/5rmDgR/uzPshWi27UHX7czkiRJklQhBl+Vr30booN4v3f69E4GhwcPhi9/GZqb4eabXzm/aVN2dMRXkiRJUoUYfFW+NWuywDppUrdNt27Npjof8H5vsUsvhXe+E266Cdavz8451VmSJElShRl8Vb7nn4cxY7L5y91YuDBbv+qA93s7uvlmSAk+/vHs69ZWaGqCww/vcbmSJEmSBAZfHYyWliz4lmHePBgyBM48s5uG48fDJz8JM2fCnDlZ8B01qqxwLUmSJEnlMPiqfAcRfOfOhbe+FQYNKqPxDTdk06c/8hHYsMFpzpIkSZIqyuCr8pUZfJ9+Gh57rJv3e4sNGpQtdPX449l2SQZfSZIkSRVk8FV5Uio7+M6blx3LDr4AF10EV12VfR+3MpIkSZJUQQZflWfr1my/3TKC79y5WbNTTjnI7/GFL8DQoXDMMYdWoyRJkiSV0FTrAtRHtLRkx26Cb0rZiO8FF0C/g/2zytix8OCDjvhKkiRJqiiDr8pTZvBdsSJr2u02Rp05+eRDvFGSJEmSSnOqs8pTZvBtf7/3kIOvJEmSJFWYwVflKTP4zp0LU6bA8cdXoSZJkiRJKoPBV+VpD76jR3faZPdu+PnPD3I1Z0mSJEnqZQZflaelBUaMgIEDO23ywAOwY4fTnCVJkiTVF4OvylPGHr6/+EV2fMtbqlCPJEmSJJXJ4KvylBF8Fy+GqVNh1Kgq1SRJkiRJZTD4qjzdBN+2NliyBM46q4o1SZIkSVIZDL4qTzfBd+VK2LLF4CtJkiSp/hh81b19+2DTpi6D76JF2fHss6tUkyRJkiSVyeCr7m3aBCl1GXwXL4ZjjoEJE6pYlyRJkiSVweCr7rXv4dtN8D3rLIioUk2SJEmSVCaDr7rXTfB95hlYu9ZpzpIkSZLqk8FX3WsPvkcfXfLy4sXZ0YWtJEmSJNUjg6+6182I7+LFMHQovOEN1StJkiRJkspl8FX3WlqgqQlGjCh5edEiOP10GDCgumVJkiRJUjkMvupeSwuMHg39Duwu27bBQw85zVmSJElS/TL4qnstLZ1Oc/7Vr2D/foOvJEmSpPpl8FX3ugi+ixdnWxi9+c1VrkmSJEmSymTwVfe6CL6LFsGpp8Lw4VWuSZIkSZLKZPBV9zoJvvv3wwMPOM1ZkiRJUn0z+KprO3ZkK1iVCL4rVsBLL8HZZ9egLkmSJEkqk8FXXdu4MTuWCL6LFmVHR3wlSZIk1TODr7rW0pIdSwTfxYvh2GNh7Ngq1yRJkiRJB8Hgq649/3x27CT4nn12tqqzJEmSJNUrg6+61smI77p1sH6905wlSZIk1T+Dr7rWHnxHj37V6cWLs6PBV5IkSVK9M/iqay0tcNhh2UeRxYvh8MPh9a+vUV2SJEmSVCaDr7rWyR6+ixfDGWdAU1MNapIkSZKkg2DwVddKBN+tW2H5cqc5S5IkSeobDL7qWong+8AD0NaWregsSZIkSfXO4KuulQi+ixdDv35w+uk1qkmSJEmSDoLBV51ra4ONG0sG39NOg2HDalSXJEmSJB0Eg6869+KLsG/fq4Lvvn3ZVGff75UkSZLUVxh81bn2PXyLgu9DD8H27b7fK0mSJKnvMPiqcyWC7+LF2dERX0mSJEl9hcFXnSsRfBctgrFj4bjjalSTJEmSJB0kg6861yH4ppSN+DrNWZIkSVJfYvBV59qD71FHAfDUU/Dss05zliRJktS3GHzVuZYWOPJIaGoCsmnOYPCVJEmS1LcYfNW5lpZXvd/729/CkCFwyik1rEmSJEmSDpLBV53rEHxXroSTToL+/WtYkyRJkiQdJIOvOtch+D7yCJx8cg3rkSRJkqRDYPBV54qC79atsH69wVeSJElS32PwVWl79sDmzS8H38cey05PnVrDmiRJkiTpEBh8VdoLL2THQvB95JHsS0d8JUmSJPU1Bl+V1r6H79FHA9nCVoMGwYQJNaxJkiRJkg6BwVeltQffohHf173OFZ0lSZIk9T0GX5VWIvg6zVmSJElSX2TwVWlFwXf7dli71uArSZIkqW8y+Kq0lhYYOBCOOMIVnSVJkiT1aQZflda+h28EK1dmpxzxlSRJktQXGXxVWnvwJXu/d8AAOOGEGtckSZIkSYfA4KvSOgTfKVOgqanGNUmSJEnSITD4qrQOwddpzpIkSZL6KoOvDpQSPP88jBnDzp3w5JMubCVJkiSp7zL46kDbtsGuXTBmDI89luVgR3wlSZIk9VUGXx2oaA/fRx7JPjX4SpIkSeqrDL46UIfg29QEJ55Y25IkSZIk6VAZfHWgDsF30iQYOLC2JUmSJEnSoTL46kBFwXflShe2kiRJktS3VT34RsTxEXFnRGyJiK0RcVdEjC3jvukRcVtEPBYROyJiXUR8NyImVKPuhlIIvruGjWb1at/vlSRJktS3VTX4RsRQYD7wOuAa4P3AJGBBRBzWze1XA1OBLwGXAH8DvBFYGhHH91rRjailBY44glXrBtPWZvCVJEmS1Lc1Vfn7XQtMBKaklJ4AiIjlwOPA9cAXurj38ymljcUnImIxsKbw3E/1SsWNqKXFFZ0lSZIk5Ua1pzpfDjzQHnoBUkprgMXAFV3d2DH0Fs49BWwEjq1wnY2tKPj26weTJ9e6IEmSJEk6dNUOvlOBh0ucXwkc9LhiRJwEjAEe7WFdKlYIvitXZtsYDRpU64IkSZIk6dBVO/iOAjaXON8KjDyYB0VEE/A1shHfO3peml5WNOLris6SJEmS+rpqv+NbSV8BzgTenlIqFaYBiIjrgOsAjj76aBYuXFid6krYtm1bTb9/Wfbv55wXXmDNtp2sWpWYPn0dCxeuqXVV6gV9oj+qYdgfVU/sj6on9kfVk77cH6sdfDdTemS3s5HgkiLic2Rh9pqU0n1dtU0p3QbcBjB9+vR07rnnll1spS1cuJBafv+ybNwIbW0MGfe7tLUFl1wyjnPPHVfrqtQL+kR/VMOwP6qe2B9VT+yPqid9uT9WO/iuJHvPt6OTgUfKeUBEfBL4BPCRlNK3K1ib4OU9fNfuHAO4orMkSZKkvq/a7/jOAs6IiIntJyJiPHBW4VqXIuIvgH8CPplS+kpvFdnQCsG3uXUM/frBlCk1rkeSJEmSeqjawfd2YC1wT0RcERGXA/cA64Fb2xtFxLiI2BcRnyo6dzXwReCnwPyIOKPow3HJSikE3+XPjWHiRBgypMb1SJIkSVIPVXWqc0ppe0TMAG4Gvg0EcD/wsZTStqKmAfTn1cH84sL5iwsfxX4GnNtLZTeWQvB98KkxTnOWJEmSlAtVX9U5pbQOuKqbNmvJQm7xuQ8CH+ytulTQ0kLq148HV4/i41fWuhhJkiRJ6rlqT3VWvWtpYf/Io9i9r78jvpIkSZJyweCrV2tpYfthrugsSZIkKT8Mvnq1lhZa+48hAk46qdbFSJIkSVLPGXz1ai0tbNg/hvHjYejQWhcjSZIkST1n8NWrtbSwZrsrOkuSJEnKj6qv6qw6tmsXbN1Kc/+jDb6SJEmScsMRX71i40YAnt3viK8kSZKk/DD46hUtLdkBg68kSZKk/DD46hVFwdcVnSVJkiTlhcFXrygE36bXjmHYsBrXIkmSJEkVYvDVKwrBd/TJY2pciCRJkiRVjqs662VtG55nF0OYeOphtS5FkiRJkirG4KuXbV/TwibGcPLUqHUpkiRJklQxTnXWy/Y9soq1jGfq1FpXIkmSJEmVY/BVZu9eDn/yIZYy3RWdJUmSJOWKwVeZRx5hwL5dPDliGsOH17oYSZIkSaocg68yy5YBsG3KtBoXIkmSJEmVZfBVZulStsYRDD7lxFpXIkmSJEkVZfAVAPt/vYxl6Y1MPNEuIUmSJClfTDmCvXuJFdnCViecUOtiJEmSJKmyDL6ClSvpt2c3y5hm8JUkSZKUOwZfvbywlcFXkiRJUh411boA1YGlS9kxcDgvHn6CWxlJkiRJyh1HfAXLltF8mAtbSZIkSconk06j27MHli/nwTanOUuSJEnKJ6c6N7qVK2H3bhbsmc6JBl9JkiRJOeSIb6MrLGz1YJrGxIk1rkWSJEmSeoHBt9EtXcrew4azmhOc6ixJkiQplwy+jW7ZMp4/dhoQBl9JkiRJuWTwbWSFha0eP2IagwfDa15T64IkSZIkqfIMvo3s4Ydhzx6WMp2JE6GfvUGSJElSDhl1GllhYav5W9zKSJIkSVJ+GXwb2dKlpBEj+PnTEw2+kiRJknLL4NvIli1jzynT2LHTha0kSZIk5ZfBt1Ht3g3Ll7Nx7DQAg68kSZKk3DL4NqqHH4a9e3lixHTA4CtJkiQpvwy+jaqwsNVv+02jXz8YP7625UiSJElSbzH4NqqlS2HkSJZumsDxx8PAgbUuSJIkSZJ6h8G3US1bBtOmsfpJF7aSJEmSlG8G30a0ezesWJEF39W+3ytJkiQp3wy+jWjFCti7lx0nT+eFFwy+kiRJkvLN4NuIli4FYM0otzKSJEmSlH8G30a0bBmMHMmjO8cDBl9JkiRJ+WbwbUTLlsH06ax+MgCDryRJkqR8M/g2ml27XrWw1VFHwRFH1LooSZIkSeo9Bt9Gs2IF7NuXjfi6orMkSZKkBmDwbTSFha3cykiSJElSozD4Npply2DUKPa8Zhzr1xt8JUmSJOWfwbfRFBa2empd0NZm8JUkSZKUfwbfRrJrFzz88MvTnMHgK0mSJCn/DL6NZPnyVy1sBQZfSZIkSfln8G0kHRa2GjoUjjmmtiVJkiRJUm8z+DaSZcvgyCNh7FhWr4aJEyGi1kVJkiRJUu8y+DaSwsJWRLwcfCVJkiQp7wy+jWL/fnjsMTjlFFKCJ5/0/V5JkiRJjcHg2yjWrYPdu2HKFDZsgJ07Db6SJEmSGoPBt1E0N2fHKVNc0VmSJElSQzH4NopVq7KjwVeSJElSgzH4NormZjjiCBgzhtWroV8/GDeu1kVJkiRJUu8z+DaKVatgypSXV3QeOxYGDqx1UZIkSZLU+wy+jaK5GSZPBmD1aqc5S5IkSWocBt9GsGMHrF+fjfhi8JUkSZLUWAy+jeDxx7Pj5Mls2QKbNhl8JUmSJDUOg28jcEVnSZIkSQ3M4NsI2vfwnTTJ4CtJkiSp4Rh8G0FzMxx3HBx2mMFXkiRJUsMx+DaC9q2MyBa2Gj0ahg2rcU2SJEmSVCUG37xLya2MJEmSJDU0g2/ebdwIW7a4lZEkSZKkhmXwzbv2ha0mT2b37mw7X4OvJEmSpEZi8M27oq2M1q7NZj4bfCVJkiQ1EoNv3jU3w8CBMG6cKzpLkiRJakgG37xbtQpOPBH69zf4SpIkSWpIBt+867Ci82GHwdFH17gmSZIkSaoig2+e7duXpd2iFZ0nToSIGtclSZIkSVVk8M2ztWth796Xg+8TTzjNWZIkSVLjMfjmWdFWRhs3wmOPwbRptS1JkiRJkqrN4JtnRVsZzZuXffq2t9WuHEmSJEmqBYNvnjU3w8iRcOSRzJ2bfeqIryRJkqRGY/DNs1WrYMoUEsF998H550P//rUuSpIkSZKqy+CbZ4WtjB57DJ55xmnOkiRJkhqTwTevtm2DZ5+FKVO4777s1IUX1rYkSZIkSaoFg29etS9sNXkyc+fCpEkwfnxNK5IkSZKkmjD45lVhK6O9E6ewcKGjvZIkSZIal8E3r1atgggeeOFEtm83+EqSJElqXAbfvGpuhrFjmfPzIfTvD+edV+uCJEmSJKk2DL55VdjK6L774PTTYfjwWhckSZIkSbVh8M2jlKC5mV1jJ7N0qdsYSZIkSWpsBt88eu452LaNR9qmkJLv90qSJElqbAbfPCqs6PzzDZM54gh405tqXI8kSZIk1ZDBN48Ke/j+aPkUZsyApqYa1yNJkiRJNWTwzaPmZtoGDWbJM8f7fq8kSZKkhmfwzaPmZlqPnESin+/3SpIkSWp4Bt88WrWKVWkyEybACSfUuhhJkiRJqi2Db97s2UN68kl+uWkKF14IEbUuSJIkSZJqy+CbN2vWEPv3s3zPFN/vlSRJkiQMvvlT2MroiZjMjBk1rkWSJEmS6oDBN28KWxkd/sbJjBxZ41okSZIkqQ64w2vO7F7RzFaO4vRLRtW6FEmSJEmqC4745sxLS1fRjO/3SpIkSVI7g2/ODFjTzJqmyZxxRq0rkSRJkqT6YPDNky1bGL7zefZPmsKAAbUuRpIkSZLqg8E3R55ZkC1sNfrMyTWuRJIkSZLqh8E3R5pnZVsZnXzllBpXIkmSJEn1w+CbI5uWrGI//Rh//gm1LkWSJEmS6obBNyeeew76r25m07DxxOBBtS5HkiRJkuqGwTcH7rkHTj0VJu5bRdNUpzlLkiRJUrGmWheggqVLYcmSg7pl92646y745RK44Th4/aBm+p1+Ti8VKEmSJEl9k8G3XsybBzfeeFC3DALeW/jg6cLJN7+5snVJkiRJUh9n8K0XH/0oXHttt8327YNbboHPfx6OOQb+/d/h7LMLF5uaYPjw3q1TkiRJkvoYg2+9GDIk++jCmjXw/vfD4sVw9dVZ6B05skr1SZIkSVIf5eJWfcC2bfDZz8Jpp8GKFfCd78D3vmfolSRJkqRyOOJbx3bvhltvhc98Blpa4LLL4MtfhvHja12ZJEmSJPUdBt86tG8ffOtb8A//AOvWwbnnwt13u26VJEmSJB0KpzrXkbY2+OEPYepU+OM/zhavmjsX5s839EqSJEnSoTL41on582HaNHjPe2DAgGyE94EH4IILIKLW1UmSJElS3+VU5zqxdi1s3ZotXHX11dC/f60rkiRJkqR8MPjWiQ98INuqaMCAWlciSZIkSfli8K0TTf6bkCRJkqRe4Tu+kiRJkqRcM/hKkiRJknLN4CtJkiRJyjWDryRJkiQp16oefCPi+Ii4MyK2RMTWiLgrIsaWee/giPjXiNgQETsjYklEvLW3a5YkSZIk9V1VDb4RMRSYD7wOuAZ4PzAJWBARh5XxiDuAa4FPAZcBG4A5EfGGXilYkiRJktTnVXsTnWuBicCUlNITABGxHHgcuB74Qmc3RsRpwB8AH0opfb1w7mfASuAm4PLeLV2SJEmS1BdVe6rz5cAD7aEXIKW0BlgMXFHGvXuBHxTduw/4PnBRRAyqfLmSJEmSpL6u2sF3KvBwifMrgZPLuHdNSmlHiXsHAif2vDxJkiRJUt5UO/iOAjaXON8KjOzBve3XJUmSJEl6lWq/41t1EXEdcB3A0UcfzcKFC2tWy7Zt22r6/aVi9kfVE/uj6on9UfXE/qh60pf7Y7WD72ZKj+x2Nprb8d5xndwLr4z8vkpK6TbgNoDp06enc889t6xCe8PChQup5feXitkfVU/sj6on9kfVE/uj6klf7o/Vnuq8kuxd3Y5OBh4p494JhS2ROt67B3jiwFskSZIkSY2u2sF3FnBGRExsPxER44GzCte6MhsYAPxe0b1NwHuA+1JKuyterSRJkiSpz6t28L0dWAvcExFXRMTlwD3AeuDW9kYRMS4i9kXEp9rPpZR+S7aV0Rcj4sMRcT7ZVkYTgL+v4s8gSZIkSepDqhp8U0rbgRnAKuDbwHeBNcCMlNK2oqYB9C9R3x8BXwf+Cfgv4Hjg4pTSb3q5dEmSJElSH1X1VZ1TSuuAq7pps5Ys/HY8vxP4y8KHJEmSJEndqvZUZ0mSJEmSqsrgK0mSJEnKNYOvJEmSJCnXDL6SJEmSpFwz+EqSJEmScs3gK0mSJEnKNYOvJEmSJCnXDL6SJEmSpFwz+EqSJEmScs3gK0mSJEnKtUgp1bqGqomIjcBTNSzhKOCFGn5/qZj9UfXE/qh6Yn9UPbE/qp70hf44LqU0uuPJhgq+tRYRS1NK02tdhwT2R9UX+6Pqif1R9cT+qHrSl/ujU50lSZIkSblm8JUkSZIk5ZrBt7puq3UBUhH7o+qJ/VH1xP6oemJ/VD3ps/3Rd3wlSZIkSbnmiK8kSZIkKdcMvpIkSZKkXDP49rKIOD4i7oyILRGxNSLuioixta5L+RYR746ImRHxVETsjIjmiPjniBjWod3IiPiPiHghIrZHxLyIOLVWdatxRMRPIyJFxD91OG+fVFVExKUR8fOI2Fb4/bw0ImYUXbcvqioi4qyIuC8iWiLipYj4TUR8qEObwRHxrxGxofB7fUlEvLVWNavvi4jjIuLLhb60o/A7eXyJdmX1vYjoFxE3RsTaiNgVEQ9FxFVV+WHKZPDtRRExFJgPvA64Bng/MAlYEBGH1bI25d4NwH7gfwMXA/8X+FNgbkT0A4iIAGYXrn8EuAoYQNY/j6tF0WoMEfFe4LQS5+2TqoqIuB64B1gGXAn8HvAjYGjhun1RVRERrwfmkfWva4F3AQ8Cd0TEnxY1vaNw/VPAZcAGYE5EvKGqBStPTgR+H9gM/KKLduX2vX8EPg18BbgEeAD4UURcWtGqe8DFrXpRRHwU+AIwJaX0ROHcBOBx4K9TSl+oZX3Kr4gYnVLa2OHcB4BvAuenlOZHxBXA3cCMlNKCQpvhwBrgOymlv6hy2WoAETESeBT4OPCfwGdSSn9buGafVK8rjGg8CtyYUvpiJ23si6qKiPgs2R+rR6WUthWdXwKQUnpzRJwG/A/woZTS1wvXm4CVQHNK6fKqF64+LyL6pZTaCp9/GLgdmJBSWlvUpqy+FxFjgPXA51JKf190//3A6JTS66vyQ3XDEd/edTnwQHvoBUgprQEWA1fUrCrlXsfQW/Bg4Xhs4Xg58Gz7/9QV7ttCNsph/1Rv+TzwcErpeyWu2SdVDR8C2oCvddHGvqhqGQjsBXZ2OL+FV/4//fJCmx+0X0wp7QO+D1wUEYOqUKdypj30dqPcvncRWV/+Tof7vwOcWhj4qzmDb++aCjxc4vxK4OQq1yKdUzg+Wjh21T/HRsThValKDSMizgY+APxZJ03sk6qGs4HHgKsjYnVE7IuIJyKiuF/aF1Ut3ygcvxQRr42IERFxLXA+cHPh2lRgTUppR4d7V5KFjROrUqkaUbl9byqwG3iiRDuok9xj8O1do8jmzXfUCoysci1qYBFxLHATMC+ltLRwuqv+CfZRVVBEDARuBf4tpdTcSTP7pKrhtWTrbfwr8DngbcBc4CuFV5TAvqgqSSk9DJxLNpPgGbJ+91XgT1JK3y80664/jurlMtW4yu17o4AX04Hv0NZVH22qdQGSeldhZOIeYB/wRzUuR43rr4EhwGdqXYgaXj9gGPDBlNJdhXPzC+/+3hgRX6pZZWo4ETEJmEk2MvYnZFOerwC+FhG7UkrfrWV9Up4YfHvXZkr/Vbizv55IFRURQ8jeSZsInJNSerroclf9s/261GORbeH2SeDDwKAO76MNiogRwEvYJ1Udm8hGfOd2OH8f2SrOr8G+qOr5LNk7lJellPYWzt0fEUcCt0TE98j627gS97b3x9YS16RKKLfvbQZGRER0GPWtqz7qVOfetZJszntHJwOPVLkWNZiIGADcCUwHLk0prejQpKv+ua54dUmphyYCg8kWudhc9AHZaqabgVOxT6o6VnZzvQ37oqrnVOChotDb7tfAkcAYsv44obBNZrGTgT0c+F6lVCnl9r2VwCDghBLtoE5yj8G3d80CzoiIie0nClOpzipck3pFYa/e7wIzgHemlB4o0WwWcGxEnFN03xHAO7B/qrL+BzivxAdkYfg8sl+e9klVw48Lx4s6nL8YeDql9Bz2RVXPc8AbCusgFDsd2EU2UjabbJ/f32u/WNhS5j3AfSml3VWqVY2n3L73U7KZC+/rcP8fku3ksKYKtXbLqc6963bgz4F7IuJvgUS2ufN6skVepN7yVbL/SH0G2B4RZxRde7ow5XkWsAT4TkT8Fdmo241AAP9S5XqVYymlF4GFHc9HBMBTKaWFha/tk6qG/wYWALdGxFHAk2T/vXwbr6yDYF9UtXwF+BEwOyL+newd38uB9wI3p5T2AL+NiB8AXyzM5loD/CkwgQODhlS2iHh34dNpheMlEbER2JhS+llKqay+l1JqiYgvkK2T8BLwG7JwPIOsP9eFOHDxLVVS4d22m4ELyX5h3g98rHhzaKnSImItpd/JAPiHlNKnC+1GAf8GvJNsKuoS4C9TSg/1fpVqdBGRgM+klP626Jx9Ur2uMHr7z8C7yd7lfQz4XErpP4va2BdVFRFxCfAJsun1g4HVwG3ArSml/YU27YsD/gEwAngI+ET7Hw6lQ1H4PVzKz1JK5xbalNX3IqI/2R8IrwWOAZqBm1JKd/ZG7YfC4CtJkiRJyjXf8ZUkSZIk5ZrBV5IkSZKUawZfSZIkSVKuGXwlSZIkSblm8JUkSZIk5ZrBV5IkSZKUawZfSZJUUkSsjYjv1LoOSZJ6yuArSZIkSco1g68kSZIkKdcMvpIk1YGIOC0iZkXE5ojYGRGLI+ItRde/ERFPR8SZEfFgROwqTEX+SIlnvSki5kXEtojYHhH3R8SbSrQ7JyLmRsSWQruHIuKPS7S7OiIeLbRZGhFnV/6fgCRJvcfgK0lSjUXEG4FfAqOAa4GrgE3AvIiYVtT0COAHwDeBdwILgS9FxAeLnvV64GfASOCDwAcK9/0sIk4rancFcD8wELgeuAL4f8C4DuW9BfhfwN8B7wH6A/dGxIge/tiSJFVNpJRqXYMkSQ0tIu4HXgucllLaUzjXH3gYaE4pvTMivgFcA7w3pfT9onvnApOB8SmlFBF3AhcUvn6x0OYIYC2wMKX0rogIYA3wAvCmlFJbJ3WtBYYDE1NKmwvnpgMPAu9LKf1nRf9BSJLUSxzxlSSphiJiCHAO8COgLSKaIqIJCGAe8Nai5vuBmR0e8X1gLHBs4eu3Ave2h16AlNJWYFbh+wBMIRvZ/Y/OQm+RJe2ht2BF4Ti2+59OkqT6YPCVJKm2RpFNH/47YG+Hjz8HRkZE++/rzSmlvR3uf75wbA++o4ANJb7Pc2TTnwGOLByfLqO+1uIvUkq7C58OLuNeSZLqQlOtC5AkqcG9CLQBXwW+VapBSqktm53MyIgY0CH8Hl04PlM4tgLHlHjMMUD7yO0LheOxJdpJkpQ7Bl9JkmoopbQ9In4BnAb8ppupx/3JFr76ftG5q4F1vBJ8fwZcGhHDUkovAUTEMOAdZIthAawie+f3wxFxW3LBD0lSzhl8JUmqvb8Efg7MiYg7yKYqHwW8EeifUvqbQruXgH+JiKOAx4H3ki1k9cGi8PqPwGXA/RHxeSABnwCGAjcBFBbB+hhwFzA/Ir4GbAROAsaklP6+l39eSZKqynd8JUmqsZTSb4DfJdvC6EvAfcAtwKlkgbjdVrIR3muAe4DzgI+mlL5Z9KzlwLmFtt8Evg1sA85JKT1U1O4e4MLCl3eQLX51HdlIsCRJueJ2RpIk9QGF7YwuSCkdV+taJEnqaxzxlSRJkiTlmsFXkiRJkpRrTnWWJEmSJOWaI76SJEmSpFwz+EqSJEmScs3gK0mSJEnKNYOvJEmSJCnXDL6SJEmSpFz7/+M6isRpXylAAAAAAElFTkSuQmCC",
+      "text/plain": [
+       "<Figure size 1152x864 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9IAAALYCAYAAACQf8oMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABsx0lEQVR4nO3dd5hcZdnH8e+dntASehUSQJoBlIA0IYCR3gWkSC8qiICKoiC8gIoiTZCOgBRBkN4hEFQM0kEiXTpIDYFdQurz/vHMsJvNJtlNdudM+X6u61xn98xzZu4JR+GXp0VKCUmSJEmS1DE9ii5AkiRJkqRaYpCWJEmSJKkTDNKSJEmSJHWCQVqSJEmSpE4wSEuSJEmS1AkGaUmSJEmSOsEgLUlSA4uIvSMiRcTeRdciSVKtMEhLktQBEbFMKXDeUnQt9Swijiv9Obc+miLi8Yj4WUT064LPSBExqgvKlSQ1qF5FFyBJkgp1PfAg8HbRhbRxDfAfIIDFgO2AXwIbASOKK0uSJIO0JEkNLaU0DhhXdB3t+EtK6dryLxHxU+Ap4OsRsXFK6d7iSpMkNTqHdkuS1A0iYtmIuDgi3oiIiaXz2RGxUDtt942ImyLi1YiYEBHvR8SNETGsnbafz2mOiG0j4p+loc9PlF4fVXq9d2mY9Mul93w+Ir43s/drda08jP2SiFguIq6PiLER0RwR90TEajP4zruUhmB/FhFvRsRpEdG/K4ZSp5Q+BG4s/bpGm89dIyL+EBFjIuLjUp2PRcT3IiJatRseEan064Ztho+3/v49IuKAiPhX6c+2qfTnvMOcfAdJUv2wR1qSpC4WEesAdwD9gJuAl4EVge8Am0bEmqVgWPYH4HHgLuADYBlg21Lb4SmlB9v5mF2Ar5fe/+9M/+/0PwNrAbcDU4CdgT9ExKSU0gUd/CrLkId9jwH+CCxbquu+iFgppfROq+98IHBeqf6LgM+A7YHlO/hZnTGpze8HAFsBfwNuBeYBvkH+c/0icFip3SvA/wHHAq8Cl7R6jycASsH7z+Q/r/8Al5Ze3xL4a0QcllI6owu/iySpBhmkJUnqQhHRB7iKHF5XTyk90+q1nYC/AMcDh7S6beWU0stt3mcl4CHgRHJgbusbwCYppVEzKGVJ4EsppY9L73cG8DTwQ6CjQXpD4Kcppd+0qusE4GhgH+Ck0rVBwKnAR8BXUkqvla7/Ahjdwc+aqYiYnxziAR5o8/KvgO+llKa2at8LuAX4fkScllJ6NaX0CnBcRBwLvJJSOq6djzqQHKLPBg5NKU0pvd+RwL3AbyPimpTSW13xvSRJtcmh3ZIkda2tgC8AJ7UO0QAppWuAR8m9ya2vTxOiS9eeAe4DvlYK521dP5MQDXBUOUSX3u85cgBdISLm6eB3eRk4uc21i0rnNVtd2xaYCzi/HKJLn9lMXiBsduxcGpr+fxFxPvAssARwVkrp4dYNU0qvtQ7RpWuTgfPJ/62zUSc+92DynPHDyyG69H7NwAlAH8Ah3pLU4OyRliSpa321dF4lIo5r5/X+wIIRsWBK6X2AiFgO+Bk58C1ODmutLcD0q2o/Mos6Hm3n2hul80Dgk1ncD/BE24Da5j3KynOm2+t9/mcHPqc9O7Vz7cyU0qFtL0ZEX+BQ8l9QrADM3abJYh35wIgYAHwJeB34Wavp1WXl+e0rduT9JEn1yyAtSVLXmr903nMW7eYC3o+I5clDuOcB7iFvR9UETCVv+bQa0Led+99p59rnWvdGtzK5dO45i9rKpnuPlNLkUsBs/R7lHu732nmPdzv4WW3tlFK6NiJ6AysDZ5KHaT+dUjq/TdvrgC3IvdZXluqYTJ7jvRft//m1ZxB5u60vkOdRz8hcHf0SkqT6ZJCWJKlrlcPn5imlOzrQ/jBy7+7uKaUrW78QEV+lpbe3rTSD60Uo925PtyI5sPCcvHFKaRLwZERsDTwHnB4Rt6aU3gSIiDXJIfoOYMs286R3IQfpjir/s/tXSmntOalbklTfnCMtSVLXeqh07mgQW7Z0vqn1xYjoB3ylq4rqZk+Wzu1953W64gNK+10fSx4a37q3uPznd2s7w9DXm8HbTaWdXvmU0ifkXu2VOzGPXJLUgAzSkiR1rRvIc2x/HBHTBcvSvspfbXWpvDjXeq3aBPBr5rA3t4JuAj4FDoyIJcsXS3OOf96Fn3Mx+c9274hYpnRtuj+/0mevTV6Buz0fklc1b8+Z5KHq55b+MmMaEbFKRNTKPxdJUjdxaLckSZ2zWkRcMoPX/pFSurC0zdXtwD8j4i7yfsS9yHN2NyQvyrVZ6Z7zyFtJXRcRV5NXjP4aMBgYBQzvlm/RhVJKH0bEj8hbRj0REVfRso/0M8Cq5F7gOf2ciRFxEnl/6GOA/YB/kRde+1ZELAo8DAwBtiEH/B3beat7yauC30Dev3sKcFNK6SngHGBdYHfyiun3Av8jL1g2FPgyuZd9dud+S5LqgEFakqTOWZKZz7u9MKX0r4hYHTgS2Jy8Gven5BWv/1Q6AEgpPRoRm5G3VtoJmEgO0LvStb253SqldE5EfAT8FNif3Ot7NXl/6dfo2CrhHXEReYXzPSPiVymllyJiK+A35L21v0oO73sDb9F+kP5B6bwxsDV5hN4bwFMppQTsERG3l77HtsAA8uJuzwDfA/7dRd9FklSjIv/7QpIkqetFxCbk1chPTikdWXQ9kiR1BedIS5KkORYR85f2c259bT7gl6Vfb6x8VZIkdQ+HdkuSpK6wMXmBrjvJw6QXATYFFgWuSCk9UGRxkiR1JYO0JEnqCv8mL+K1IbAgeXGx58hzl88ssC5Jkrqcc6QlSZIkSeoEe6TnwIILLpiWWWaZwj6/ubmZueaaq7DPl1rzeVQ18XlUNfF5VDXxeVQ1qYXn8dFHH30/pbRQ2+sG6TmwzDLL8MgjjxT2+aNGjWL48OGFfb7Ums+jqonPo6qJz6Oqic+jqkktPI8R8Wp71121W5IkSZKkTjBIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVInGKQlSZIkSeoEg7QkSZIkSZ1gkJYkSZIkqRMM0pIkSZIkdYJBWpIkSZKkTjBIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVInGKQlSZIkSeoEg7QkSZIkSZ1gkJYkSZIkqRMM0pIkSZIkdYJBWpIkSZKkTjBIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVInGKQlSZIkSeoEg7QkSZIkSZ1gkJYkSZIkqRMqHqQjYqmIuDYixkXExxFxXUR8oYP39ouIkyPi7YgYHxGjI2KDdtodERE3l9qliDhuJu+5XUQ8HhGfRcSrEXF0RPScg68oSZIkSapjFQ3SETEAuBdYEdgL+DawPHBfRMzVgbe4CDgA+AWwFfA2cGdErN6m3QHAwsANs6hnU+CvwMPA5sAZwNHArzr0hSRJkiRJDadXhT/vAGAIsEJK6UWAiHgKeAE4CDh1RjdGxGrAbsC+KaWLS9fuB8YAxwPbtGq+SkppakT0Ar4zk3pOAv6RUjqw9Pt9ETE3cHREnJZS+t/sfElJkiRJUv2q9NDubYAHyyEaIKX0MvAAsG0H7p0EXN3q3snAVcCmEdG31fWpsyokIpYCVgcub/PSZUBvcg+1JEmSJEnTqHSP9CrAje1cHwPs1IF7X04pfdrOvX2A5Uo/d6YWgKdbX0wpvRwRnwIrd+K9GtLf/gaXXDJn79GvH5x4Isw/f5eUJEmSJEndrtJBen5gbDvXPwQGzcG95dc7WwszeM+xM3q/iDgQOBBgkUUWYdSoUZ382K7T1NRU6Of/+Mer8uSTAxk0aOJsv8e77/Zj/PiX2WuvV7uwMhWh6OdRas3nUdXE51HVxOdR1aSWn8dKB+mal1I6HzgfYNiwYWn48OGF1TJq1CiK+vypU+H552HffeHcc/vN9vtsvjncccdgzjtvMH36dGGBqrgin0epLZ9HVROfR1UTn0dVk1p+His9R3os7fc8z6i3uaP3QkvPdGdqYQbvOWg23q+hPPMMfPwxrLPOnL3P978P//sfXHdd19QlSZIkSd2t0kF6DC1zk1tbGfhPB+4dXNpCq+29E4EXp79llu9H23oiYhlgQAfqaWijR+fznAbpzTaDZZeFs86a85okSZIkqRIqHaRvAtaOiCHlC6Xgul7ptZm5mbya9ueLkpW2t9oFuCulNKEzhaSUXgOeBHZv89Ie5NXBb+/M+zWa0aPzAmHLLz9n79OjBxx8MDzwADz+eNfUJkmSJEndqdJB+gLgFeDGiNg2IrYhr+L9OnBeuVFELB0RkyPiF+VrKaXHyVtfnR4R+0fEJuStrwYDx7b+kIgYFhHfBHYoXVo5Ir5ZOlr3aP8M2DAizouI4RFxOHA0cIZ7SM/c6NGw9toQMefvtc8+MGAAnHnmnL+XJEmSJHW3igbplFIzsDHwPHm/5iuAl4GNU0pNrZoG0LOd+vYBLgZOBG4FlgI2Syk91qbdIcA1tOw5vVPp92uAhVvVcxvwTWBt4E7gcOBXwE/n5HvWu48+ynOk53RYd9nAgfDtb8OVV8IHH3TNe0qSJElSd6n4qt2lIdU7zqLNK+Qw3fb6eOCI0jGz+/cG9u5gPdcBLnXVCf/6Vz53VZAGOOQQOO88uPBC+MlPuu59JUmSJKmrVXpot+rA6NF5SPeaa3bde37pSzB8OJx9NkyZ0nXvK0mSJEldzSCtThs9Ogffeeft2vf9/vfhtdfg5pu79n0lSZIkqSsZpNUpU6fmod1dOay7bJttYKmlXHRMkiRJUnUzSKtTnn0Wxo3rniDdqxd897tw773wH3fxliRJklSlDNLqlNGj83nttbvn/Q84APr2hbPO6p73lyRJkqQ5ZZBWp4weDYMGwRe/2D3vv+CCsOuu8Kc/5Z5vSZIkSao2Bml1yoMP5t7oHt345BxyCDQ3wyWXdN9nSJIkSdLsMkirw8aNy3OXu2tYd9kaa+Q52GedlRc3kyRJkqRqYpBWh/3rX5BS9yw01tYhh8CLL8Jdd3X/Z0mSJElSZxik1WGjR0MEfPWr3f9Z3/wmLLqoW2FJkiRJqj4GaXXYgw/CKqvAvPN2/2f16QMHHQS33557piVJkiSpWhik1SFTp7YsNFYpBx2Ue8D/9KfKfaYkSZIkzYpBWh3y3HPw0UeVmR9dtthisNZacPfdlftMSZIkSZoVg7Q65MEH87mSQRpgxAh46KEc4iVJkiSpGhik1SGjR8PAgbDCCpX93BEj8rDy++6r7OdKkiRJ0owYpNUho0fn1bp7VPiJWXttmHtuh3dLkiRJqh4Gac3SuHEwZkzlh3UD9O4NG25okJYkSZJUPQzSmqWHH4aUignSkId3v/givPJKMZ8vSZIkSa0ZpDVLo0fnbai++tViPn/EiHy+555iPl+SJEmSWjNIa5ZGj4aVVoL55ivm81daCRZf3OHdkiRJkqqDQVozNXVq3vqqqGHdkHvDR4yAkSNzPZIkSZJUJIO0ZuqFF2Ds2GKDNOQg/cEH8PjjxdYhSZIkSQZpzdTo0fm89trF1vH1r+ezw7slSZIkFc0grZkaPTrPjV5ppWLrWGQRGDrUIC1JkiSpeAZpzdTo0Xm17h5V8KSMGAH/+Ad8+mnRlUiSJElqZFUQj1StPvkEnn66+PnRZSNGwMSJOUxLkiRJUlEM0pqhhx6ClIqfH122wQbQp4/DuyVJkiQVyyCtGSovNPbVrxZbR9mAAbDeegZpSZIkScUySGuGHnwwLzI2aFDRlbQYMQKefBLeeafoSiRJkiQ1KoO0Zuill2CVVYquYlojRuTzyJHF1iFJkiSpcRmkNUNNTTD33EVXMa0vfzn3kDu8W5IkSVJRDNKaoebm6gvSPXvCJpvkIJ1S0dVIkiRJakQGac1QczPMNVfRVUxvxAh480147rmiK5EkSZLUiAzSatekSXnP5moN0uDwbkmSJEnFMEirXc3N+VyNQXrwYFh2WYO0JEmSpGIYpNWucpCutjnSZSNGwKhRuedckiRJkirJIK12VXOPNOQg/ckn8K9/FV2JJEmSpEZjkFa7qj1Ib7QR9Ojh8G5JkiRJlWeQVruamvK5WoP0oEEwbJhBWpIkSVLlGaTVrmqfIw15ePdDD8G4cUVXIkmSJKmRGKTVrmof2g05SE+ZkhcdkyRJkqRKMUirXbUQpNdZJ9d3111FVyJJkiSpkRik1a5aCNJ9+sDw4XDnnUVXIkmSJKmRGKTVrvJiY9U8Rxpg883hpZfghReKrkSSJElSozBIq13lHukBA4qtY1Y23zyf77ij2DokSZIkNQ6DtNrV3Az9++e9mqvZkCGw/PJw++1FVyJJkiSpUVR5TFJRmpure350a5tvDvfdB+PHF12JJEmSpEZgkFa7mppqK0h/9hn87W9FVyJJkiSpERik1a7m5upfaKxsww2hXz+Hd0uSJEmqDIO02lVLQ7v798/bYBmkJUmSJFWCQVrtqqUgDXl49/PPw3//W3QlkiRJkuqdQVrtqqU50gCbbZbPboMlSZIkqbsZpNWuWpojDXkLrCFDHN4tSZIkqfsZpNWuWhvaHZGHd997L0yYUHQ1kiRJkuqZQVrtqrUgDXl496efwt//XnQlkiRJkuqZQVrTSan25kgDbLQR9Onj8G5JkiRJ3csgrelMnAhTptTWHGnIwX/DDQ3SkiRJkrqXQVrTaW7O51rrkYY8vPuZZ+DVV4uuRJIkSVK9MkhrOrUcpDffPJ/dBkuSJElSdzFIazpNTflci0F6xRVh6aUd3i1JkiSp+xikNZ1yj3StzZGGvA3WZpvByJF5rrckSZIkdTWDtKZTy0O7IQ/vbmqCBx4ouhJJkiRJ9cggrenUepDeeGPo3dvh3ZIkSZK6h0Fa06n1ID3PPLD++i44JkmSJKl7GKQ1nVpebKxs883h3/+GN94ouhJJkiRJ9cYgrenU8mJjZW6DJUmSJKm7GKQ1nVof2g2wyiqwxBIGaUmSJEldzyCt6TQ3522k+vcvupLZF5F7pe++GyZNKroaSZIkSfXEIK3pNDXBgAE5jNayzTeHjz+G0aOLrkSSJElSPTFIazrNzbU9P7psk02gVy+Hd0uSJEnqWgZpTae5ubbnR5fNNx+suy7cdlvRlUiSJEmqJwZpTadegjTAFlvAk0/C668XXYkkSZKkemGQ1nSamuonSG+9dT7femuxdUiSJEmqHwZpTade5kgDrLQSDBkCN99cdCWSJEmS6oVBWtOpp6HdEblXeuTIlv2xJUmSJGlOGKQ1nXoK0gBbbQUTJuQwLUmSJElzyiCt6dTTHGmADTaAeed1eLckSZKkrmGQ1nTqaY40QJ8+sOmmcMstMHVq0dVIkiRJqnUGaU0jpfob2g15nvT//gePPlp0JZIkSZJqnUFa0/jssxym6y1Ib7459OiRe6UlSZIkaU4YpDWNpqZ8rrcgveCCsM46zpOWJEmSNOcM0ppGeYuoegvSkId3P/44vPFG0ZVIkiRJqmUGaU2jHKTrabGxsq23zmeHd0uSJEmaEwZpTaOee6RXWgkGDzZIS5IkSZozBmlNo56DdETulR45Ej79tOhqJEmSJNUqg7SmUa+LjZVtvXVemfyee4quRJIkSVKtMkhrGvU8Rxpggw1gnnlcvVuSJEnS7DNIaxr1PLQboE8f2HRTuPVWmDq16GokSZIk1SKDtKZR70Ea8vDut9+Gxx4ruhJJkiRJtcggrWnU+xxpgC22gB49HN4tSZIkafYYpDWN5mbo2RP69i26ku6z4IKwzjoGaUmSJEmzxyCtaTQ3597oiKIr6V5bbw2PPw5vvll0JZIkSZJqjUFa0ygH6Xq31Vb5fMstxdYhSZIkqfYYpDWNpqbGCNIrrwyDBzu8W5IkSVLnGaQ1jebm+t1DurWIPLx75Ej49NOiq5EkSZJUSwzSmkajDO2GHKQ/+yyHaUmSJEnqKIO0ptFIQXqDDWCeeRzeLUmSJKlzDNKaRqPMkQbo0wc23TQvODZ1atHVSJIkSaoVBmlNo1HmSJdtsw28/TY88kjRlUiSJEmqFQZpTaORhnYDbLkl9OwJ119fdCWSJEmSaoVBWtNotCA9//wwfLhBWpIkSVLHGaT1ualT81ZQjRSkAbbfHp57Dp55puhKJEmSJNUCg7Q+V95PudGC9Hbb5bO90pIkSZI6wiCtzzU353MjLTYGsMQSsNZaBmlJkiRJHWOQ1ufKQbrReqQhD+9+5BF4/fWiK5EkSZJU7QzS+lyjB2mAG24otAxJkiRJNaDiQToiloqIayNiXER8HBHXRcQXOnhvv4g4OSLejojxETE6IjZop12PiDgqIl6JiM8i4smI2LGddgMi4v8i4vnS+70eEX+KiGW64KvWnKamfG7EIL3CCrDSSg7vliRJkjRrFQ3SETEAuBdYEdgL+DawPHBfRHQkvl0EHAD8AtgKeBu4MyJWb9PuBOA44Cxgc+BB4JqI2KJNuwuBHwMXAFsARwMbACMjosFmCjfuHOmy7beHv/0NPvig6EokSZIkVbNK90gfAAwBtksp3ZBSuhHYBlgaOGhmN0bEasBuwOEppQtSSiOBnYHXgONbtVsY+BFwUkrpdyml+1JKBwH3ASe1ajegdP/pKaWTS+0uBb5TqnG9LvvWNaKRh3ZDDtJTpsDNNxddiSRJkqRqVukgvQ3wYErpxfKFlNLLwAPAth24dxJwdat7JwNXAZtGRN/S5U2BPsDlbe6/HBgaEYNLv/csHR+3afdR6dxw88cbPUivsQYstZTDuyVJkiTNXKXD4irA0+1cHwOs3IF7X04pfdrOvX2A5Vq1mwC82E47yp+TUvoEuAw4NCI2ioi5I2IV4GTgSWDkrL9OfWnkOdIAEXlP6bvuavlLBUmSJElqq1eFP29+YGw71z8EBs3BveXXy+ePUkppFu0A9gF+T563XfYvYERKaWJ7RUTEgcCBAIsssgijRo2aRdndp6mpqUs//8knlwSW44kn/sGLL07usvetJYMHD+Szz1bnd797mg03fL/ocmpKVz+P0pzweVQ18XlUNfF5VDWp5eex0kG62pwI7EGeU/0w8AXgWOD2iNgwpTRdv2RK6XzgfIBhw4al4cOHV67aNkaNGkVXfv4//pHPm266Pr17d9nb1pT114df/hJeeOFLHHts0dXUlq5+HqU54fOoauLzqGri86hqUsvPY6WD9Fja73meUW9z23uXnsG90NLjPBYYGBHRpld6mnalYdw/BfZPKV1UbhQR/wKeB/YHzphFTXWluRl696ZhQzRAr16w9dZ5nvTEidCnT9EVSZIkSao2lZ4jPYY8h7mtlYH/dODewaXVttveO5GWOdFjgL7Asu20o9XnDC2dH27dKKX0AnnBsZVmUU/daWpq3PnRrW2/PYwbBzU6ykSSJElSN6t0kL4JWDsihpQvRMQy5K2mbprFvTcDvYGdWt3bC9gFuCulNKF0+Q7y6t67t7l/D+Dp0irhAP8rnddq3SgivggMBN7s0DeqI83NjbuHdGsjRuS/UHD1bkmSJEntqfTQ7guAQ4AbI+JoIAEnAK8D55UbRcTSwEvA8Sml4wFSSo9HxNXA6RHRG3gZ+C4wmFahOaX0bkScChwVEZ8Aj5HD9sbkLbTK/k5enfuUiBgEPEKeI300MA64tOu/fnVrbrZHGqB/f9hsM7jxRvjDH6BHw22EJkmSJGlmKhoRSot3bUyeg3wZcAU5EG+cUmpq1TTIezy3rW8f4GLyImG3AksBm6WUHmvT7uelNj8A7iT3eO+cUrqlVS1TgE2AC8mrcN9Wuucx4Ksppdfm9PvWGoN0i+23h7ffhn/9q+hKJEmSJFWbiq/aXQqoO86izSvkMN32+njgiNIxs/unkEPxibNo9wHww9LR8Jwj3WLLLfPCY9dfD+usU3Q1kiRJkqqJg1b1OXukWwwcCBtvnIP0dDuSS5IkSWpoBml9zsXGprX99vDiizBmTNGVSJIkSaomBml9zh7paW27LUS4erckSZKkaRmk9TnnSE9rscVg7bUN0pIkSZKmZZDW5+yRnt7228Pjj8MrrxRdiSRJkqRqYZAWAFOmwIQJzpFua4cd8vmvfy22DkmSJEnVwyAtIPdGgz3SbS27LHzlK/CXvxRdiSRJkqRqYZAWYJCemZ12gocecni3JEmSpMwgLSAvNAYG6fbstFM+X3ttsXVIkiRJqg4GaQEtPdLOkZ5eeXj3NdcUXYkkSZKkamCQFuDQ7lnZeWeHd0uSJEnKDNICDNKz4vBuSZIkSWUGaQHOkZ6VIUNgjTVcvVuSJEmSQVolzpGetZ12gocfdni3JEmS1OgM0gIc2t0R5eHdLjomSZIkNTaDtACDdEeUh3cbpCVJkqTGZpAW4Bzpjtp55zy8++WXi65EkiRJUlEM0gJyj3TfvtCzZ9GVVDdX75YkSZJkkBaQg7QLjc3a4MEwbJird0uSJEmNzCAtIAdph3V3zE47wSOPOLxbkiRJalQGaQF5jrRBumNcvVuSJElqbAZpAfZId8bgwbDmmgZpSZIkqVEZpAU4R7qzysO7//vfoiuRJEmSVGkGaQH2SHeWq3dLkiRJjcsgLcA50p21zDJ5eLerd0uSJEmNxyAtwB7p2bHzzvDoow7vliRJkhqNQVqAc6Rnxze/mc8uOiZJkiQ1FoO0AHukZ8cyy8BaaxmkJUmSpEZjkBYTJ8KkSQbp2bHTTg7vliRJkhqNQVo0N+ezQbrzyqt3X311sXVIkiRJqhyDtD4P0s6R7ryll4Z114U//7noSiRJkiRVikFa9kjPod12g3//Ox+SJEmS6p9BWgbpObTTTtCzJ1x5ZdGVSJIkSaoEg7Roaspng/TsWXhhGDEiD++eOrXoaiRJkiR1N4O0nCPdBXbfHV59FUaPLroSSZIkSd3NIC2HdneBbbeF/v3hiiuKrkSSJElSdzNIyyDdBeaZB7bZBv7yl7wntyRJkqT6ZZCWc6S7yO67wwcfwN13F12JJEmSpO5kkJY90l1k001h0CBX75YkSZLqnUFanwfpAQOKraPW9emTt8K64YaWP1NJkiRJ9ccgLZqbc4ju4dMwx3bfPf953nRT0ZVIkiRJ6i5GJ9HU5LDurrL++rDkkg7vliRJkuqZQVo0Nxuku0qPHrDrrnDHHfD++0VXI0mSJKk7GKRFczPMPXfRVdSP3XeHyZPh2muLrkSSJElSdzBIyx7pLrbqqrDyyg7vliRJkuqVQVrOke5iEbDbbvD3v8OrrxZdjSRJkqSuZpCWPdLdYNdd8/mqq4qtQ5IkSVLXM0jLOdLdYMgQWGcdh3dLkiRJ9cggLXuku8luu8FTT8HTTxddiSRJkqSuZJCWQbqb7Lwz9Oxpr7QkSZJUbwzSDS4lFxvrLgsvDCNG5CCdUtHVSJIkSeoqBukGN2ECTJ3qHOnusttueeXuf/6z6EokSZIkdRWDdINrbs5ne6S7x3bbQf/+cMUVRVciSZIkqasYpBucQbp7zTMPbLtt3gZrwoSiq5EkSZLUFQzSDa6pKZ8N0t1n771h7Fi45ZaiK5EkSZLUFQzSDc4e6e739a/D4ovDJZcUXYkkSZKkrmCQbnDlIO1iY92nZ0/YYw+4/XZ4552iq5EkSZI0pwzSDc4e6crYay+YMsU9pSVJkqR6YJBucM6RroyVV4Y113R4tyRJklQPDNINzh7pytlrL3jqKXjiiaIrkSRJkjQnDNINzjnSlfOtb0GfPnDppUVXIkmSJGlOGKQbnD3SlbPAArD11nDFFTBpUtHVSJIkSZpdBukG19QEEdCvX9GVNIa99oL33ssreEuSJEmqTQbpBtfcnHujI4qupDFsthksvLDDuyVJkqRaZpBucM3Nzo+upN69Yffd4eab4YMPiq5GkiRJ0uwwSDe4co+0KmevvfIc6T//uehKJEmSJM0Og3SDa2oySFfaaqvB6qs7vFuSJEmqVQbpBmePdDH22gseeQTGjCm6EkmSJEmdZZBucM6RLsZuu0GvXvZKS5IkSbXIIN3g7JEuxsILwxZbwOWXw+TJRVcjSZIkqTMM0g3OIF2cvfaCt9+Gu+8uuhJJkiRJnWGQbnAuNlacLbeE+ed3eLckSZJUawzSDc4e6eL07Qu77go33AAffVR0NZIkSZI6yiDdwFKCTz91sbEi7b03TJgAV19ddCWSJEmSOsog3cDGj89h2h7p4qyxBqy8MlxySdGVSJIkSeoog3QDa2rKZ4N0cSJgn33gwQfdU1qSJEmqFQbpBtbcnM8G6WLttRf07g0XXlh0JZIkSZI6wiDdwMpB2jnSxVpoIdhuO/jTn/J8aUmSJEnVzSDdwOyRrh777w8ffgjXX190JZIkSZJmxSDdwJwjXT2+/nVYZhm44IKiK5EkSZI0KwbpBmaPdPXo0QP22w/uvRdeeqnoaiRJkiTNjEG6gTlHurrsvXcO1BddVHQlkiRJkmbGIN3A7JGuLksuCVtsARdfDJMnF12NJEmSpBkxSDcw50hXnwMOgP/9D269tehKJEmSJM2IQbqB2SNdfbbYAhZbzEXHJEmSpGpmkG5gzc3Qqxf06VN0JSrr1Qv22Qduvx3eeKPoaiRJkiS1xyDdwJqbc290RNGVqLX99oOpU/NcaUmSJEnVxyDdwJqaHNZdjYYMgU02yat3T51adDWSJEmS2jJIN7Byj7SqzwEHwKuvwj33FF2JJEmSpLYM0g2sudk9pKvVdtvBAgu46JgkSZJUjQzSDcwe6erVty/suSfceCO8+27R1UiSJElqzSDdwAzS1W3//WHSJPjTn4quRJIkSVJrBukG5mJj1W3llWHddeHCCyGloquRJEmSVGaQbmD2SFe/Aw6A556Df/yj6EokSZIklRmkG5iLjVW/nXaCeed10TFJkiSpmhikG5g90tVvrrlgt93gmmtg7Niiq5EkSZIEBumGNXUqjB9vkK4F3/kOfPYZXHJJ0ZVIkiRJAoN0w2puzmeDdPVbbbW86Ng55+S/AJEkSZJULIN0gyoHaedI14aDD4YXXoCRI4uuRJIkSZJBukHZI11bdtwRFloIzj676EokSZIkGaQbVFNTPhuka0PfvrD//nDTTfDaa0VXI0mSJDU2g3SDske69hx0EKQE559fdCWSJElSYzNINyjnSNeepZeGrbbKe0pPnFh0NZIkSVLjMkg3KHuka9PBB8O778J11xVdiSRJktS4DNINyjnStWnECFh2WfjDH4quRJIkSWpcBukGZY90berRA777XfjHP+Cpp4quRpIkSWpMBukG5Rzp2rXPPtCvH5xzTtGVSJIkSY2p4kE6IpaKiGsjYlxEfBwR10XEFzp4b7+IODki3o6I8RExOiI2aKddj4g4KiJeiYjPIuLJiNhxBu85KCJOj4jXImJCRLwREZfM4desevZI167554ddd4XLLoOPPy66GkmSJKnxVDRIR8QA4F5gRWAv4NvA8sB9EdGRSHcRcADwC2Ar4G3gzohYvU27E4DjgLOAzYEHgWsiYos29QwC/gF8HTgaGAH8CPik89+utjQ3Q58+0KtX0ZVodnzve/mf4Z/+VHQlkiRJUuOpdIw6ABgCrJBSehEgIp4CXgAOAk6d0Y0RsRqwG7BvSuni0rX7gTHA8cA2pWsLk8PwSSml35Vuvy8ilgNOAm5r9ba/BuYGhqaUWvftXTWH37PqNTXZG13Lhg2DNdeEs8/OK3lHFF2RJEmS1DgqPbR7G+DBcogGSCm9DDwAbNuBeycBV7e6dzI59G4aEX1LlzcF+gCXt7n/cmBoRAwGKPWA7wlc2CZEN4TmZoN0rfve9+CZZ+D++4uuRJIkSWoslQ7SqwBPt3N9DLByB+59OaX0aTv39gGWa9VuAvBiO+1o9TlrAP2Bd0pztsdHRFNE3FAO2/WsudmFxmrdLrvk+dJnn110JZIkSVJjqfTQ7vmBse1c/xAYNAf3ll8vnz9KKaVZtFu8dP4dcDu5x3sh8nDvURHxpZTSdHOlI+JA4ECARRZZhFGjRs2i7O7T1NQ025//2mtDmTq1D6NGPdq1RamiRowYwrXXLsm11z7IggtOLLSWOXkepa7m86hq4vOoauLzqGpSy89jIy81Ve6N/y/wrXLwjoiXyIuT7QFMt8FQSul84HyAYcOGpeHDh1ek2PaMGjWK2f38vn1h0UWZ7ftVHZZaCq6+GsaMWZdjjy22ljl5HqWu5vOoauLzqGri86hqUsvPY6WHdo+l/Z7nGfU2d/ReaOlxHgsMjJhu+aW27T4onUe27r1OKf0L+Bj48izqqWnOka4Pyy4Lm20G558PkyYVXY0kSZLUGCodpMeQ5zC3tTLwnw7cO7i0hVbbeyfSMid6DNAXWLaddrT6nDHM3NRZvF7TnCNdP773PXjrLbj++qIrkSRJkhpDpYP0TcDaETGkfCEilgHWK702MzcDvYGdWt3bC9gFuCulNKF0+Q7y6t67t7l/D+Dp0irhpJTeAB4BRrTuvY6IdYB5gYc7++VqiT3S9WOLLXLP9BlnFF2JJEmS1BgqHaQvAF4BboyIbSNiG+BG4HXgvHKjiFg6IiZHxC/K11JKj5O3vjo9IvaPiE3IW18NBo5t1e5d8n7UR0XEERExPCLOATYGjmpTz0/JPdXXRsTmEbEn8BfgWeDKLv7uVcV9pOtHz55w6KHwz3/CQw8VXY0kSZJU/yoapFNKzeRA+zxwGXAF8DKwcUqpqVXTAHq2U98+wMXAicCtwFLAZimlx9q0+3mpzQ+AO8k93junlG5pU89IYGvgC8D1wGnAfcDwlNL4OfqyVc4e6fqyzz4w77xw+ulFVyJJkiTVv4qv2p1Seg3YcRZtXiGH6bbXxwNHlI6Z3T+FHKRP7EA9t5O3v2oYkyfDxInOka4n88wD++0HZ54Jv/0tLLlk0RVJkiRJ9avSQ7tVBZqb89ke6fry/e/D1Klw9tlFVyJJkiTVN4N0A2oqDaI3SNeXwYNhu+3gvPPg00+LrkaSJEmqXwbpBmSPdP067DD48EO47LKiK5EkSZLql0G6AZWDtHOk68/668NXvpIXHZta1zuhS5IkScUxSDcge6TrV0TulX72Wbj77qKrkSRJkuqTQboBGaTr2y67wKKLuhWWJEmS1F0M0g3IxcbqW58+cPDBcMcd8MwzRVcjSZIk1R+DdAOyR7r+HXQQ9O0LZ5xRdCWSJElS/TFINyAXG6t/Cy0Ee+wBf/oTfPBB0dVIkiRJ9cUg3YDskW4MP/gBjB8PF1xQdCWSJElSfTFIN6DyHOkBA4qtQ91r6FDYZBM46yyYNKnoaiRJkqT6YZBuQM3N0K8f9OxZdCXqbocdBm++CddeW3QlkiRJUv0wSDeg5mbnRzeKLbaA5ZeH006DlIquRpIkSaoPBukG1Nzs/OhG0aNHniv98MMwenTR1UiSJEn1wSDdgJqaDNKNZK+9YOBAOOWUoiuRJEmS6oNBugHZI91Y5p4bvvc9uP56eO65oquRJEmSap9BugE5R7rxHHoo9Oljr7QkSZLUFQzSDcge6cazyCKw995w6aXw9ttFVyNJkiTVNoN0A3KOdGP60Y/yftK//33RlUiSJEm1zSDdgOyRbkzLLQc77gjnnAMff1x0NZIkSVLtMkg3IOdIN64jj4Rx4+D884uuRJIkSapdBukGk5I90o1szTVho43gtNNg4sSiq5EkSZJqk0G6wUycCJMnG6Qb2ZFHwltvwRVXFF2JJEmSVJsM0g2muTmfDdKNa9NNYdVV4eSTYerUoquRJEmSao9BusEYpBWRe6WfeQZuuaXoaiRJkqTaY5BuMOUg7WJjjW3nnWHppeG3vy26EkmSJKn2GKQbjD3SAujdG444Ah54IB+SJEmSOs4g3WCamvLZIK399oP557dXWpIkSeosg3SDsUdaZXPNBYccAjfdlOdLS5IkSeoYg3SDcY60WjvkEOjfP6/gLUmSJKljDNINxh5ptbbQQrDvvnD55fDmm0VXI0mSJNUGg3SDcY602vrhD2HKFDjttKIrkSRJkmqDQbrB2COttgYPhm99C849F95/v+hqJEmSpOpnkG4wzc0QkefFSmU//zl8+imcfnrRlUiSJEnVzyDdYJqbYcAA6OE/ebWy8sqw445w5pnw0UdFVyNJkiRVN+NUg2lqcli32nf00fDxx/D73xddiSRJklTdDNINprnZIK32rbYabLNNHt798cdFVyNJkiRVL4N0g2ludg9pzdgxx8DYsXD22UVXIkmSJFUvg3SDsUdaMzNsGGy2GZxySssK75IkSZKmZZBuMM6R1qwcc0zeBuu884quRJIkSapOBukGY4+0ZmXddWHjjeHkk2H8+KKrkSRJkqqPQbrBGKTVEcccA//7H1x0UdGVSJIkSdXHIN1gXGxMHbHhhrD++vCb38CECUVXI0mSJFUXg3SDsUdaHRGRe6XfeAMuvbToaiRJkqTqYpBuICm52Jg6bsQIWGst+PWvYdKkoquRJEmSqodBuoF89lkO0wZpdUS5V/qVV+Dyy4uuRpIkSaoeBukGUt4X2DnS6qgtt4Qvfxl+9SuYPLnoaiRJkqTq0OEgHRE9I2K1iFioOwtS9ykHaXuk1VHlXukXX4Srry66GkmSJKk6dKZHOgGPAF/uplrUzZqa8tkgrc7Ydlv40pfgxBNhypSiq5EkSZKK1+EgnVKaCrwOGMNqlD3Smh09esCxx8Kzz8Kf/1x0NZIkSVLxOjtH+jzgsIjo0x3FqHs5R1qza4cdYPXV4bjjXMFbkiRJ6tXJ9vMAywL/jYg7gLfJQ77LUkrp2K4qTl3LHmnNrh494IQTYOut877S++9fdEWSJElScTobpH/W6ud923k9AQbpKuUcac2JLbeEr34Vjj8evv1t6Nu36IokSZKkYnRqaHdKqccsjp7dVajmnD3SmhMRecGx11+HCy4ouhpJkiSpOO4j3UCcI605tckmsOGG8MtfwqefFl2NJEmSVIxZBumImBoRUzp4TK5E0Zo99khrTkXkudL/+x+cfXbR1UiSJEnF6Mgc6eOZdkEx1aimprxolHNbNSe+9jXYdFM46SQ46CCYZ56iK5IkSZIqa5ZBOqV0XAXqUAU0N+fe6IiiK1GtO+EEWGstOOMMOProoquRJEmSKss50g2kHKSlObXmmrDttvC738HYsUVXI0mSJFWWQbqBNDe70Ji6zvHHw7hxcMopRVciSZIkVZZBuoHYI62utOqqsPPOcPrp8N57RVcjSZIkVY5BuoE0NRmk1bWOOw7Gj4ff/KboSiRJkqTKMUg3EHuk1dVWWgn22AP+8Ad4//0+RZcjSZIkVYRBuoE4R1rd4Re/gMmT4Yorli66FEmSJKkiDNINxB5pdYdll4V994VbblmM//636GokSZKk7meQbiDOkVZ3+cUvoGfP5J7SkiRJaggG6QZij7S6yxJLwI47vsGf/wyPPlp0NZIkSVL3Mkg3iKlT4dNPnSOt7rPrrq+xwAJw5JGQUtHVSJIkSd3HIN0gxo/PZ3uk1V3mnnsKxxwD994Ld95ZdDWSJElS9zFIN4impnw2SKs7fec7MHgw/OQnMGVK0dVIkiRJ3cMg3SCam/PZIK3u1Lcv/OpX8NRTcMUVRVcjSZIkdQ+DdIMoB2nnSKu77bwzrLEGHH00fPZZ0dVIkiRJXc8g3SDskVal9OgBJ58Mr78OZ55ZdDWSJElS1zNINwjnSKuSNtoINt88D/P+8MOiq5EkSZK6lkG6QdgjrUo76SQYNy6HaUmSJKmeGKQbhEFalbbqqrDXXnl496uvFl2NJEmS1HUM0g3CxcZUhOOPz3Omjzmm6EokSZKkrmOQbhD2SKsISy0Fhx4Kl18OTzxRdDWSJElS1zBINwgXG1NRjjoKBg2Cn/yk6EokSZKkrmGQbhDNzdCrF/TpU3QlajQDB8LPfw533QV33ll0NZIkSdKcM0g3iOZm50erOAcfDMsuC0ccAZMnF12NJEmSNGcM0g2iudlh3SpO377wu9/Bf/4D555bdDWSJEnSnDFIN4imJoO0irXttrDxxnDssfDhh0VXI0mSJM0+g3SDsEdaRYuA00+Hjz6C444ruBhJkiRpDhikG4RzpFUNhg6Fgw6Cs8+GMWOKrkaSJEmaPQbpBmGPtKrF8cfDPPPkhcdSKroaSZIkqfMM0g3COdKqFgsumOdJ33UX3Hpr0dVIkiRJnWeQbhD2SKuaHHwwrLBC7pWeOLHoaiRJkqTOMUg3COdIq5r07g2nngovvABnnVV0NZIkSVLnGKQbhD3SqjZbbAGbbZbnTL/3XtHVSJIkSR1nkG4AU6bAZ58ZpFV9Tj01z98/5piiK5EkSZI6ziDdAJqb89kgrWqz0kp5vvQFF8CTTxZdjSRJktQxBukGYJBWNTvuOBg0CA47zO2wJEmSVBsM0g2gHKRdbEzVaNCgPE961Ci47rqiq5EkSZJmzSDdAOyRVrU78EAYOhQOP7zleZUkSZKqlUG6ATQ15bNBWtWqVy84+2x4/XU44YSiq5EkSZJmziDdAOyRVi1Yf33Ye2845RR45pmiq5EkSZJmzCDdAJwjrVrxm9/k5/Tgg114TJIkSdXLIN0A7JFWrVh4Yfj1r+G+++DPfy66GkmSJKl9BukG4Bxp1ZIDDoBhw+CHP4Rx44quRpIkSZqeQboB2COtWtKzJ5xzDrzzDhx7bNHVSJIkSdMzSDcAg7RqzbBh8J3vwJlnwhNPFF2NJEmSNC2DdANoboa+ffMWQ1Kt+OUvYYEF4Hvfg6lTi65GkiRJamGQbgBNTfZGq/YMGgS//S2MHg2XXFJ0NZIkSVILg3QDaG42SKs27bln3l/6yCPhgw+KrkaSJEnKDNINoLnZPaRVm3r0gLPPho8+gp/9rOhqJEmSpKziQToiloqIayNiXER8HBHXRcQXOnhvv4g4OSLejojxETE6IjZop12PiDgqIl6JiM8i4smI2HEW771uREyNiBQRdTWb2B5p1bKhQ+HQQ+GCC+Chh4quRpIkSapwkI6IAcC9wIrAXsC3geWB+yKiI1HvIuAA4BfAVsDbwJ0RsXqbdicAxwFnAZsDDwLXRMQWM6irN3Ae8E7nvlFtcI60at3//R8sthgceCBMmlR0NZIkSWp0le6RPgAYAmyXUrohpXQjsA2wNHDQzG6MiNWA3YDDU0oXpJRGAjsDrwHHt2q3MPAj4KSU0u9SSvellA4C7gNOmsHb/xgI4I9z9O2qlD3SqnXzzANnnQVPPgmnnlp0NZIkSWp0lQ7S2wAPppReLF9IKb0MPABs24F7JwFXt7p3MnAVsGlE9C1d3hToA1ze5v7LgaERMbj1xYhYFjga+F7p/euOc6RVD7bfHnbYAY47Dl54oehqJEmS1MgqHaRXAZ5u5/oYYOUO3PtySunTdu7tAyzXqt0E4MV22tHO55wLXJNS+tssPr9m2SOtenHmmXlP9AMPhJSKrkaSJEmNqtKLas0PjG3n+ofAoDm4t/x6+fxRStP9Z3bbdkTEHsAawO6z+OzPRcSBwIEAiyyyCKNGjerorV2uqampQ58/btx6fPTRO4wa1fbvFqSu09HncU7tv/9inHLKChx55LNsueX/uv3zVJsq9TxKHeHzqGri86hqUsvPY12tTt0ZETE/cCrws5TSux29L6V0PnA+wLBhw9Lw4cO7p8AOGDVqFB35/M8+gy9+cUmGD1+y+4tSw+ro8zinNtgAHnkELrxwRY44YkUWW6zbP1I1qFLPo9QRPo+qJj6Pqia1/DxWemj3WNrveZ5Rb3NH74WWHuexwMCIiFm0O5G86vdfImJgRAwE+pVem6+Dq4hXvUmT8uHQbtWLHj3g/PNh/Pi8LZYkSZJUaZUO0mPIc5jbWhn4TwfuHVzaQqvtvRNpmRM9BugLLNtOO1p9zsrAqsAH5PA9FvhJ6bX3gStmUU9NaG7OZxcbUz354hfh2GPh2mvhhhuKrkaSJEmNptJB+iZg7YgYUr4QEcsA65Vem5mbgd7ATq3u7QXsAtyVUppQunwHefXttvOe9wCeLq0SDnAYsFGb49LSa18nr+Rd88pB2h5p1Zsf/QhWXRUOPhjGjSu6GkmSJDWSSs+RvgA4BLgxIo4GEnAC8DpwXrlRRCwNvAQcn1I6HiCl9HhEXA2cHhG9gZeB7wKDaRWaU0rvRsSpwFER8QnwGDlsb0zeQqvc7om2xUXE8NKP95e21qp5TU35bJBWvendGy68ENZeG376UzjnnKIrkiRJUqOoaI90SqmZHGifBy4jD59+Gdg4pdTUqmkAPdupbx/gYvL85luBpYDNUkqPtWn381KbHwB3knu8d04p3dKlX6gG2COterbmmnDYYXDuufD3vxddjSRJkhpFxVftTim9Buw4izavkMN02+vjgSNKx8zun0IO0id2srbjgOM6c0+1c4606t3xx8N118EBB8ATT0C/frO8RZIkSZojlZ4jrQqzR1r1bq654Lzz4Lnn4MRO/dWZJEmSNHsM0nXOOdJqBN/4Buy5J5x0Ejz6aNHVSJIkqd4ZpOucPdJqFKefDossAnvtBRMmzLK5JEmSNNsM0nXOOdJqFIMG5VW8x4zJe0xLkiRJ3cUgXefskVYj2Xxz2H9/OPlkePDBoquRJElSvTJI17nyHOkBA4qtQ6qUU06BJZfMQ7w//bToaiRJklSPDNJ1rrkZ+veHHv6TVoOYd164+GJ4/nn4+c+LrkaSJEn1yHhV55qbnR+txrPxxnDwwXDGGfC3vxVdjSRJkuqNQbrONTc7P1qN6Te/gSFDYO+9W6Y4SJIkSV3BIF3nDNJqVHPNBZdcAq+8AkceWXQ1kiRJqicG6TrX1GSQVuNaf304/HA45xy4556iq5EkSVK9MEjXOXuk1ehOPBFWXBH23RfGjSu6GkmSJNUDg3Sdc7ExNbr+/eHSS+HNN3PvtCRJkjSnDNJ1zh5pCdZaC37607wt1vXXF12NJEmSap1Bus45R1rKjj0W1lgD9t8/905LkiRJs8sgXefskZayPn3gyivhs89gr71g6tSiK5IkSVKtMkjXsZScIy219sUvwhlnwMiRcOqpRVcjSZKkWmWQrmMTJ8KUKfZIS63ttx/ssAP87Gfw2GNFVyNJkqRaZJCuY01N+WyQllpEwAUXwMILw2675VEbkiRJUmcYpOtYOSAYpKVpzT8/XHYZPP88HHFE0dVIkiSp1hik61g5SDtHWpreRhvBj38M558PN9xQdDWSJEmqJQbpOmaPtDRzJ5zQsiXWW28VXY0kSZJqhUG6jjlHWpq5Pn3giitg/HjYc0+3xJIkSVLHGKTrmD3S0qytsIJbYkmSJKlzDNJ1zDnSUse03hLr0UeLrkaSJEnVziBdx+yRljqmvCXWoovCTjvBRx8VXZEkSZKqmUG6jhmkpY6bf364+mp4/XXYd19IqeiKJEmSVK0M0nXMxcakzllnHTjpJLj+evj974uuRpIkSdXKIF3HmpvzkNX+/YuuRKodRxwB22yT95h+6KGiq5EkSVI1MkjXsebm3BsdUXQlUu2IgEsugcUXh513hg8/LLoiSZIkVRuDdB0rB2lJnTNoEPzlL/DWW7DPPs6XliRJ0rQM0nWsqckgLc2utdaCk0+Gm25yf2lJkiRNyyBdx+yRlubMoYfm/aV/+lMYPbroaiRJklQtDNJ1rLkZ5p676Cqk2hUBF10ESy0Fu+wCH3xQdEWSJEmqBgbpOmaPtDTnBg6Ea66Bd96BPfeEqVOLrkiSJElFM0jXMedIS11jjTXyPOnbbsv7TEuSJKmxGaTrmD3SUtf53vdg113h6KPhjjuKrkaSJElFMkjXMedIS10nAi68EIYOzYH6pZeKrkiSJElFMUjXMXukpa41YABcf30O1dtvn/83JkmSpMZjkK5TKRmkpe4wZAhcdRWMGQP77Zf/tyZJkqTGYpCuU+PH5//AN0hLXe8b34Bf/hKuvhpOOaXoaiRJklRpBuk6VR5y6hxpqXv85CfwzW/m8z33FF2NJEmSKskgXafKQdoeaal7RMAf/wgrrgjf+ha88krRFUmSJKlSDNJ1yiAtdb955oEbboDJk2GHHfKUCkmSJNU/g3SdamrKZ4O01L2WXx6uuAKeeAIOPNDFxyRJkhqBQbpOLbUUnHYafOlLRVci1b8tt4TjjoPLL4czzii6GkmSJHW3XkUXoO6x+OJw2GFFVyE1jqOPhscfhx/+EFZYATbfvOiKJEmS1F3skZakLtCjB1x2GQwdCrvskveZliRJUn0ySEtSF5l7brj55rw2wdZbw3vvFV2RJEmSuoNBWpK60FJLwY03wttvw/bbw4QJRVckSZKkrmaQlqQuttZacOml8MADruQtSZJUj1xsTJK6wc47w7PPwrHHwkorwU9/WnRFkiRJ6ioGaUnqJscck8P0UUfllby3377oiiRJktQVHNotSd0kAi66CL76Vdhjj7w9liRJkmqfQVqSulH//nDDDbDAAnkl77feKroiSZIkzSmDtCR1s0UXhVtugXHjYJttoKmp6IokSZI0JwzSklQBq64KV12Vh3fvvDNMmlR0RZIkSZpdBmlJqpAtt4Rzz4Xbb4fvfMdtsSRJkmqVq3ZLUgUdcAC8/jqccAIstRQcd1zRFUmSJKmzDNKSVGH/93/wxhv5vOSSsP/+RVckSZKkzjBIS1KFRcB558Hbb+ch3osvDltsUXRVkiRJ6ijnSEtSAXr3hmuugdVWg512gocfLroiSZIkdZRBWpIKMvfccOutsPDCeSGyl14quiJJkiR1hEFakgq06KJwxx0wdSpsthm8917RFUmSJGlWDNKSVLAVVoCbb84LkG21FTQ1FV2RJEmSZsYgLUlVYJ114Kqr4JFHYLvtYMKEoiuSJEnSjBikJalKbLst/PGPMHIk7LorTJ5cdEWSJElqj0FakqrIXnvBGWfA9dfn/aWnTi26IkmSJLXlPtKSVGUOPRQ++giOPRbmmw9OPz3vPS1JkqTqYJCWpCp0zDE5TJ92GgwaBMcdV3RFkiRJKjNIS1IVioBTTslh+v/+DwYOhMMOK7goSZIkAQZpSapaEXD++fDxx3D44XmY9z77FF2VJEmSDNKSVMV69YIrroBPPsmLj807L+y4Y9FVSZIkNTZX7ZakKte3L1x3Hay9dt4W67bbiq5IkiSpsRmkJakGzDUX3HorDB0KO+wAd95ZdEWSJEmNyyAtSTVi4EC4+25YaSXYdlu4666iK5IkSWpMBmlJqiHzzw/33AMrrpjD9D33FF2RJElS4zFIS1KNWWCBHKCXXx622QbuvbfoiiRJkhqLQVqSatCCC8LIkTBkCGy1Fdx/f9EVSZIkNQ6DtCTVqIUWymF6mWVgiy3g738vuiJJkqTGYJCWpBq2yCJ5aPcXvgCbbw4PPFB0RZIkSfXPIC1JNW7RRXOYXmIJ2Gwzw7QkSVJ3M0hLUh1YbDG47z5YfHH4xjdcgEySJKk7GaQlqU4svnhedGzIkDxn+rbbiq5IkiSpPhmkJamOLLoojBoFq6wC220Hf/1r0RVJkiTVH4O0JNWZBRbIq3kPGwa77AKXX150RZIkSfXFIC1JdWjgQLjrLthgA9hzTzj//KIrkiRJqh8GaUmqU3PPDbfemrfFOuggOOOMoiuSJEmqDwZpSapj/fvD9dfDDjvAYYfBr35VdEWSJEm1zyAtSXWuTx+4+mrYfXf4+c/hqKMgpaKrkiRJql29ii5AktT9evWCSy/Nw71POgnefRfOOy9flyRJUuf4n1CS1CB69oRzzoGFF4YTToD33oOrroIBA4quTJIkqbY4tFuSGkgEHH88nHUW3HILfOMbMHZs0VVJkiTVFoO0JDWggw/O86Yffhi+9jV4882iK5IkSaodBmlJalA77QS33w6vvQbrrgvPPlt0RZIkSbXBIC1JDWzjjWHUKPjsM1h/ffjXv4quSJIkqfoZpCWpwX3lK/DAAzDffDlY33FH0RVJkiRVN4O0JInllsth+otfhK22ggsuKLoiSZKk6mWQliQBsOii8Le/5ZW8DzwQfvITmDq16KokSZKqj0FakvS5eeaBm26C734Xfvtb2HlnGD++6KokSZKqS6+iC5AkVZdeveAPf4Dll4cf/hDeeANuvBEWWaToyiRJkqqDPdKSpOlEwOGHw3XXwVNPwdprw3/+U3RVkiRJ1cEgLUmaoe22g/vvz8O7110XRo4suiJJkqTiGaQlSTO15pp5f+kll4TNNoM//rHoiiRJkoplkJYkzdLSS+ftsTbaCPbbD444AiZPLroqSZKkYhikJUkdMt98cOut8P3vw2mnwRZbwNixRVclSZJUeRUP0hGxVERcGxHjIuLjiLguIr7QwXv7RcTJEfF2RIyPiNERsUE77XpExFER8UpEfBYRT0bEjm3aLBYRv46IRyLio4h4LyJGtvd+kqSsd2/4/e/hwgth1ChYay0XIZMkSY2nokE6IgYA9wIrAnsB3waWB+6LiLk68BYXAQcAvwC2At4G7oyI1du0OwE4DjgL2Bx4ELgmIrZo1WYNYBfgRmAnYG/gM2BURGzV+W8nSY1jv/1ykP7kE/jqV/Pe05IkSY2i0vtIHwAMAVZIKb0IEBFPAS8ABwGnzujGiFgN2A3YN6V0cena/cAY4Hhgm9K1hYEfASellH5Xuv2+iFgOOAm4rXTtH8AXU0qTW33GnaX3OxK4pSu+sCTVq3XXhUceySt7b7cdnHAC/OxneessSZKkelbpod3bAA+WQzRASull4AFg2w7cOwm4utW9k4GrgE0jom/p8qZAH+DyNvdfDgyNiMGlez9qHaJbvd8TwBKd+1qS1JiWXBL+/nfYdVc4+mjYZRdobi66KkmSpO5V6SC9CvB0O9fHACt34N6XU0qftnNvH2C5Vu0mAC+2046ZfU5E9AHWAZ6ZRS2SpJL+/eHyy+G3v4Vrr4X11oP//rfoqiRJkrpPpYP0/EB7a7x+CAyag3vLr5fPH6WU0izatec4YEngN7OoRZLUSgT8+Mdw223w6quwxhpwixNkJElSnar0HOmqFRG7AT8FTkgp/X0m7Q4EDgRYZJFFGDVqVGUKbEdTU1Ohny+15vMogH794A9/6Mdxx63C1lvPw+67v8o++7xMz56VrcPnUdXE51HVxOdR1aSWn8dKB+mxtN/zPKPe5rb3Lj2De6Glx3ksMDAiok2vdNt2n4uIrYFLgItSSsfOrIiU0vnA+QDDhg1Lw4cPn0XZ3WfUqFEU+flSaz6Pam2HHfJ+0xdeuDRvv700f/4zLLxw5T7f51HVxOdR1cTnUdWklp/HSg/tHkOew9zWysCsdiIdAwwubaHV9t6JtMyJHgP0BZZtpx1tPyciNgGuAa4nrxwuSZpD/frBBRfAH/8I//wnfPnL8MADRVclSZLUNSodpG8C1o6IIeULEbEMsF7ptZm5GehN3vO5fG8v8l7Qd6WUJpQu30Fe3Xv3NvfvATxdWiW8fP865H2kRwJ7pJSmzsZ3kiTNwD77wOjReUGy4cPh9NNhuhUsJEmSakylh3ZfABwC3BgRRwMJOAF4HTiv3CgilgZeAo5PKR0PkFJ6PCKuBk6PiN7Ay8B3gcG0Cs0ppXcj4lTgqIj4BHiMHLY3prTXdOkzVgRuBd4HTgbWiFabn6aUHuzyby9JDWj11fN+03vvDYcfnnuoL7wQ5p236MokSZJmT0WDdEqpOSI2Bk4DLgOC3Bt8WEqpqVXTAHoyfY/5PsAvgROBgcCTwGYppcfatPs50AT8AFgUeA7YOaXUeg3ZtcnztQcB97VTbrRzTZI0GwYOhOuvh5NPhqOOgsceg6uugmHDiq5MkiSp8yo9tJuU0msppR1TSvOmlOZJKW2XUnqlTZtXUkqRUjquzfXxKaUjUkqLppT6pZS+mlIa1c5nTEkpnZhSWjql1DeltGpK6do2bS4pfUa7Rzd8dUlqaBFw5JFw//0wcSKsuy6ceipMdVKNJEmqMRUP0pKkxrb++vDEE7DVVvDDH+bzu+8WXZUkSVLHGaQlSRU3//zw17/C2WfDvffCaqvByJFFVyVJktQxBmlJUiEi4LvfhYcegkGDYMQI+NnPYNKkoiuTJEmaOYO0JKlQq64KDz8M++0Hv/41bLABvPzyrO+TJEkqikFaklS4ueaCCy7IK3n/5z95qPfFF7vntCRJqk4GaUlS1dhlF3jqKfjKV2DffWH77V2ITJIkVR+DtCSpqiy9dF6A7He/g9tvh6FD4aabiq5KkiSphUFaklR1evTIW2M9+igsthhsu22eQ/3JJ0VXJkmSZJCWJFWxL30pr+p91FFwySV57vTf/150VZIkqdEZpCVJVa1PH/jVr+Bvf8tbZm24Ifz4xzB+fNGVSZKkRmWQliTVhPXWgyeegP33z/OnV18d/vGPoquSJEmNyCAtSaoZ88wD558Pd90FEybkPacPPRSamoquTJIkNRKDtCSp5owYAU8/DYccAmeemVf2Hjmy6KokSVKjMEhLkmrS3HPD73+f50737g1f/zoceCCMG1d0ZZIkqd4ZpCVJNe1rX4Mnn4Qjj4SLLoJVVoHRo+cvuixJklTHDNKSpJrXvz/85jfw4IMwcCD87GersvPO8NZbRVcmSZLqkUFaklQ31lwTHnsM9tvvv9x0E6y4Ipx1FkyZUnRlkiSpnhikJUl1pU8f2GOP13j6aVhnHfj+92HttXPAliRJ6goGaUlSXVpuObjjDvjzn+H113Nv9WGHwSefFF2ZJEmqdQZpSVLdioBvfQuefRYOOiiv8r3SSnDddZBS0dVJkqRaZZCWJNW9gQPh7LPhn/+EBRaAHXeEzTfPAVuSJKmzDNKSpIax9trw6KNw2mkwejQMHQo//jF8/HHRlUmSpFpikJYkNZRevfJc6RdegL32glNOgRVWgD/9CaZOLbo6SZJUCwzSkqSGtPDCcOGF8K9/wdJL51C9/vq5x1qSJGlmDNKSpIa25pp57vTFF8NLL+XfDzwQ3n236MokSVK1MkhLkhpejx6w997w/PNw+OE5VC+3HPz61zB+fNHVSZKkamOQliSpZL758pzpp5+GjTaCn/0MVlwRrrjC+dOSJKmFQVqSpDZWWAFuvBHuuw8WXBD22AO++lX429+KrkySJFUDg7QkSTMwfDg8/DBcdhn873+w4Yaw/fZ5CLgkSWpcBmlJkmaiR4/cI/388/DLX8I998Aqq8DBB8PbbxddnSRJKoJBWpKkDujfP8+ZfvFF2H9/OP98WHbZfG3s2KKrkyRJlWSQliSpExZZBM45B555BrbbLq/sPWQInHQSfPpp0dVJkqRKMEhLkjQbllsOrrwSnngC1lsPjjoq91CffTZMnFh0dZIkqTsZpCVJmgOrrQa33AL/+Acsv3yeO73SSnmBssmTi65OkiR1B4O0JEldYL314P774bbbYN55Yc8986Jkl19uoJYkqd4YpCVJ6iIRsPnm8OijcN11eYGyb3/bQC1JUr0xSEuS1MV69Mj7TT/2mIFakqR6ZJCWJKmbzCxQO4dakqTaZZCWJKmbtReo99wzL0529tkwfnzRFUqSpM4wSEuSVCGtA/VNN8Fii+VVvgcPht/8Bj7+uOgKJUlSRxikJUmqsB49YOut4YEHYNQoWH11+OlP4QtfgJ//HN59t+gKJUnSzBikJUkqSARsuCHccQc88giMGAG//jUsswwceii8/HLRFUqSpPYYpCVJqgJrrAHXXAPPPAO77grnngvLLQc77QQPPlh0dZIkqTWDtCRJVWSFFeCii3Jv9JFHwj33wDrrwLrrwl//ClOmFF2hJEkySEuSVIWWWCIP8379dTjzTHjnHfjmN/NK37//PTQ1FV2hJEmNyyAtSVIVm3tuOOQQeP753CO92GLwgx/AUkvBj34E//1v0RVKktR4DNKSJNWAnj1hhx3ySt+jR+eFyU4/Pc+j3mYbuPtuSKnoKiVJagwGaUmSaszaa8Nf/gKvvpq3y3rwQfjGN2ClleCss+CTT4quUJKk+maQliSpRi2xBJxwQp5H/ac/wbzzwve/n68feig8+2zRFUqSVJ8M0pIk1bi+feHb34aHHoJ//Qu23TZvn7XSSnmf6iuvhAkTiq5SkqT6YZCWJKmOrLUWXHYZvPEGnHRSPu++e+6l/tGP8qJlkiRpzhikJUmqQwsvDD/5CbzwAtx5Z+6ZPv30vE/1xhvD1VfDxIlFVylJUm0ySEuSVMd69MgLkf31r3ku9Ykn5i2zvvWt3Et9+OHw738XXaUkSbXFIC1JUoNYbLG8yvdLL8Htt8Pw4fCHP8Cqq8KwYXD22TB2bNFVSpJU/QzSkiQ1mJ49YbPN4Jpr4K238pDvSZPg4INz2N5117wv9ZQpRVcqSVJ1MkhLktTAFlwQfvADeOIJePRROOCAPKf6G9+AwYPhqKNgzJiiq5QkqboYpCVJEhHwla/AmWfmXuqrroKhQ+Hkk+FLX4IvfxlOPRXefrvoSiVJKp5BWpIkTaNfP9hlF7j11hyqzzgDeveGH/4Qllwy91Zfdhk0NRVdqSRJxTBIS5KkGVp4YTj0UHjoIXj2WfjZz/KWWnvuCYssklf/vv56+OyzoiuVJKlyDNKSJKlDVlgBTjghb5/1j3/kMD1yJOywQw7ce+4Jt93m/tSSpPpnkJYkSZ0SAeutB+eck+dM33UX7LQT3HwzbLllXvn7gANyyJ48uehqJUnqegZpSZI023r1ghEj4KKL4J13cpjefPO8WNnXv55D9f77532r7amWJNULg7QkSeoSffrAVlvB5ZfDu+/CX/+aFyb7y19giy3y8O9vfzvPqR4/vuhqJUmafQZpSZLU5fr3z3Onr7gC3nsPbrkl/37bbfm84IJ5OPiVV8JHHxVdrSRJndOr6AIkSVJ969s3z53eckuYNAnuvz/3Vl9/PVx7bR4evsEGsO22sM02sMwyRVcsSdLM2SMtSZIqpnfvPHf6nHPyHtX//Cf86Ed50bIf/AAGD4bVVoNjjoFHHoGUiq5YkqTpGaQlSVIhevSAddaBX/8a/vMfeP55+N3vYOBA+NWvYM01YYkl8mJl110Hn3xSdMWSJGUGaUmSVBWWXx5++MM89Pudd+CSS2D99eGaa2DHHWGBBWCTTeCUU+DZZ+2tliQVxyAtSZKqzoILwl575RW/338fRo2Cww/PAftHP4KVVoJll4VDDslbbtlbLUmqJIO0JEmqar17w4Ybwm9+A08/Da+8kudYr7IKXHxxXqBsgQVg+PA8JPyRR2Dq1KKrliTVM4O0JEmqKUsvDd/5Tu6J/vBDGDkSjjgCPv4Yfv7zPLd64YVh113z8PDXXy+6YklSvXH7K0mSVLP69oWNN87HSSflod/33AN33gl33QVXXZXbLb98nl+98caw0UZ56LgkSbPLIC1JkurGIovA7rvnIyX4979zj/W998IVV8C55+Z2q6/eEqy/9jWYZ55Cy5Yk1RiDtCRJqksRsOqq+Tj8cJg0Kc+fHjkyH2eemVcA79kTvvKVPA97ww3zSuEDBxZdvSSpmhmkJUlSQ+jdO+9bvc46cPTRMH48PPBA3m7r/vvh97/P+1j36JF7rFsH6wUWKLp6SVI1MUhLkqSG1L8/fP3r+YAcrB98sCVYn302nHZafm3llXOgLh/LLJN7vCVJjckgLUmSRA7WG22UD4DPPoOHHoJ//CP3XF99NZx/fn5t8cVhvfVyqF5vvTx8vHfv4mqXJFWWQVqSJKkd/frBBhvkA/Le1GPG5GBdPq65Jr/Wvz8MG9YydHzttWHRRYurXZLUvQzSkiRJHdCjBwwdmo/vfjdfe/11+Oc/YfToPCz8tNPgt7/Nry2zTA7VCyywBP365XnX/foVVb0kqSsZpCVJkmbTUkvBLrvkA/Jw8Mcey8F69Og81/qtt5bnrLOgV688BHyttVqOFVfMq4ZLkmqLQVqSJKmL9OsH666bj7JrrhlNr17r8NBD8PDDcOWVLftZzz03rLHGtMfyy+feb0lS9TJIS5IkdaOFFprA8OGw/fb596lT4fnnc6guh+uzz8692ZDD9Ze/PG24/uIX7bmWpGpikJYkSaqgHj3ykO4VV4RvfztfmzQJnnkGHn205Tj33JZwPWBAHha++uotx9Ch+bokqfIM0pIkSQXr3TsH5VVXhX32ydcmT24J108+CU88AVdd1TIsvEcPWGGFHKpXWy3fO3QoLLGEe1xLUnczSEuSJFWhXr1aVgkvSwlefTWH6vLxwAPw5z+3tBk0KN9TDuZDh8KXvpSHjEuSuoZBWpIkqUZE5G21llkGttuu5fpHH8HTT8NTT+Xj3/+GSy6BpqaWNksvDauskkP1KqvkY6WVHB4uSbPDIC1JklTjBg6E9dfPR9nUqbn3+qmncsgeMyYf99wDEyfmNhEweHBLqG59zDtvIV9FkmqCQVqSJKkO9eiRQ/LgwbDtti3XJ0+GF19sCdZPPw3/+Q/ccUde9Kxs8cWnDdYrrJAP52BLkkFakiSpofTq1bJq+I47tlyfPBn++9+8wFnro+0Q8bnmyntdl4P1Civk7bmWXx7mm6/iX0eSCmGQliRJEr165UD8xS9O24OdErz1Fjz3XMtR3gf7mmvyEPKyhRbKgXr55WG55ab92aHikuqJQVqSJEkzFJGHcy+xBGy88bSvTZiQh4k/91w+v/BCPu6+Gy69dNq2Cy0Eyy7b/rHIIg4Xl1RbDNKSJEmaLX37tqwA3lZzM7z0Uku4fumlfPz973Dllbmnu2zAABgypGVO9+DBeWXy8s/2ZkuqNgZpSZIkdbm55mrZy7qtiRPhlVdysP7vf1vOL78Mo0bBJ59M237++XOgXnrpfCyzzLQ/DxzY7V9HkqZhkJYkSVJF9enTMh+7rZTgww9zqG57/Oc/cPvtMH78tPfMO28O1V/4Aiy11PTnJZbInylJXcUgLUmSpKoRAQsskI9hw6Z/PSV4//28R/Yrr+Rz+Xj9dXjwQfjgg+nfc9FFc6hecsn2D8O2pM4wSEuSJKlmROSFyxZaqP2gDXl+9htvwGuv5eP11/P5jTfyll733AMffzz9fQstlAP14ovP+LzggnmPbkmNzSAtSZKkujLXXC17XM/Ixx/Dm2/mcP3GGzlsv/VWvvbWW/Doo/Duu9MuigZ5m7BFFoHFFmv/WHTRfCyySF6MTVJ9MkhLkiSp4cw7bz5WWmnGbSZNgv/9L4frN9+Et9+e9nj11TyU/L332r9/0KCWUF0O2AsvnH8vn8s/9+vXPd9TUvcwSEuSJEnt6N07z6teaqmZt5s0Cd55J4frd97J4bvt8cgj+fXm5vbfY955c6BeeOE8xLx8bvvzQgvl4eX2dkvFMkhLkiRJc6B375ZFy2aluTkPGX/33Ry6257fey9vB/bgg3lRtSlT2n+feebJgXrBBVvCdfkoL9bW9jB8S13HIC1JkiRVyFxz5T2xBw+eddupU2Hs2Byu33svh+3332853nsvn995B8aMyb9/+umM32/uuWGuudZm8cXz3tzzz58Ddvnn8jFoUD7KP/fvnxd5k9TCIC1JkiRVoR49WnqTV1yxY/eMH5+3/2rveP99GDPmI/r2XZQPP4Snnsp7dn/44Yx7viFvC1YO1+Vj4MCWc/ko/z7ffC3n+eZzWzHVJ4O0JEmSVCf695/5MPNRo55l+PBFp7mWEnzySQ7UH3yQe8HbHh9+2PLzO+/As8/CRx/lY+rUWdfUXsCed95pz22vzTtvHsI+77y5J99ecVUTg7QkSZLUwCJagusyy3Tu3nIIL4fqsWNh3Lj887hx0/5cPo8dC6+8krcgGzdu5sPRy3r0aAnV88zTcrT9vfUx99wt57Y/9+tnMNecMUhLkiRJmi2tQ/gXvjB77zF5ckuoLp8/+ST/PKPjk0/y8fbbLT9/8kl+r47o0aMlVLc+5pqr5dz25/aOAQOm/9mh7I3BIC1JkiSpML16tSx0NidSggkTWkJ1U1M+yj+3Pjc3t7ze1NTy+3vvwcsv59/Lx4QJnf8+AwZ07OjfPx/ln1tfm9UxYED+LBXDP3pJkiRJNS8iD9nu1y9vCdZVJk+eNliXg/enn+aj/HPrc3NzXvit3KZ8vPtuS5vx41vazGyxt5np2TN/3/79pz23/XlmR9++7f/e+tz6aHutZ8+u+7OuJRUP0hGxFHAaMAII4B7gsJTSax24tx9wArAHMBB4AvhJSulvbdr1AH4CHAQsCjwHHJ9S+ms773kA8ENgMPAKcFpK6dzZ+3aSJEmS6kmvXi2LoXWXSZNyoG4dvstBe0bHZ59Nf2577eOPc3hv2+azzzrf0z4jPXvmQN2nz/Shu/X11ufyz01NyzJ8eNfUUWkVDdIRMQC4F5gA7AUk4ETgvohYNaXUPIu3uAjYEvgx8F/gYODOiFgnpfREq3YnAD8Cfg48CnwLuCYitkop3daqngOA84BfkwP9JsDZEREppXPm9PtKkiRJ0qz07t39Yb2tlGDixOnDddvfWx/tXWt9TJzY/u8TJ+a5761/nzABevacw/H8Bap0j/QBwBBghZTSiwAR8RTwArn3+NQZ3RgRqwG7AfumlC4uXbsfGAMcD2xTurYwOUSflFL6Xen2+yJiOeAk4LZSu17AL4HLUko/b9VuceCEiLgwpTSpy765JEmSJFWJiJZe40oG+NZGjXoYGF7Mh8+hHhX+vG2AB8shGiCl9DLwALBtB+6dBFzd6t7JwFXAphHRt3R5U6APcHmb+y8HhkbE4NLv6wALtdPuMmABYP0OfidJkiRJUgOpdJBeBXi6netjgJU7cO/LKaW2O82NIQfn5Vq1mwC82E47Wn3OKqVz23ratpMkSZIk6XOVHto9PzC2nesfAoPm4N7y6+XzRyml1IF2tPOebdtNIyIOBA4EWGSRRRg1atQsyu4+TU1NhX6+1JrPo6qJz6Oqic+jqonPo6pJLT+Pbn/VSSml84HzAYYNG5aGF7jM3KhRoyjy86XWfB5VTXweVU18HlVNfB5VTWr5eaz00O6xtN/zPKPe5o7eCy09yWOBgRERHWhHO+/Ztp0kSZIkSZ+rdJAeQ8vc5NZWBv7TgXsHl7bQanvvRFrmRI8B+gLLttOOVp9Tngvdtp627SRJkiRJ+lylg/RNwNoRMaR8ISKWAdYrvTYzNwO9gZ1a3dsL2AW4K6VU3lL8DvLq3ru3uX8P4OnSKuEAo4H3Z9DuQ/JK4pIkSZIkTaPSc6QvAA4BboyIo4EEnAC8DpxXbhQRSwMvAcenlI4HSCk9HhFXA6dHRG/gZeC7wGBaheGU0rsRcSpwVER8AjxGDtsbU9prutRuUkQcA5wdEW8C95Ta7At8P6U0sZv+DCRJkiRJNayiQTql1BwRGwOnkfdrDmAkcFhKqalV0wB6Mn2P+T7AL4ETgYHAk8BmKaXH2rT7OdAE/ABYFHgO2DmldEubes6NiAT8EPgx8BpwSErp7Dn8qpIkSZKkOlXxVbtTSq8BO86izSvkMN32+njgiNIxs/unkMP2iR2o5zxa9YZLkiRJkjQzlZ4jLUmSJElSTTNIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVInGKQlSZIkSeoEg7QkSZIkSZ1gkJYkSZIkqRMM0pIkSZIkdYJBWpIkSZKkTjBIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVInGKQlSZIkSeoEg7QkSZIkSZ1gkJYkSZIkqRMM0pIkSZIkdYJBWpIkSZKkTjBIS5IkSZLUCZFSKrqGmhUR7wGvFljCgsD7BX6+1JrPo6qJz6Oqic+jqonPo6pJLTyPS6eUFmp70SBdwyLikZTSsKLrkMDnUdXF51HVxOdR1cTnUdWklp9Hh3ZLkiRJktQJBmlJkiRJkjrBIF3bzi+6AKkVn0dVE59HVROfR1UTn0dVk5p9Hp0jLUmSJElSJ9gjLUmSJElSJxikJUmSJEnqBIN0jYmIpSLi2ogYFxEfR8R1EfGFoutSfYuIb0bEXyPi1YgYHxHPRcSvI2KeNu0GRcSFEfF+RDRHxD0RMbSoutU4IuKOiEgRcWKb6z6TqoiI2CIi/hYRTaV/Pz8SERu3et1nURUREetFxF0R8W5EfBIRj0XEvm3a9IuIkyPi7dK/10dHxAZF1az6EBFLRsSZpefp09K/l5dpp12Hnr+I6BERR0XEKxHxWUQ8GRE7VuTLdIBBuoZExADgXmBFYC/g28DywH0RMVeRtanu/QiYAvwM2Aw4B/gucHdE9ACIiABuLr3+fWBHoDf5+VyyiKLVGCJiV2C1dq77TKoiIuIg4EbgUWB7YCfgGmBA6XWfRVVERKwK3EN+vg4AdgAeBi6KiO+2anpR6fVfAFsBbwN3RsTqFS1Y9WY5YGdgLPD3mbTr6PN3AnAccBawOfAgcE1EbNGlVc8mFxurIRHxA+BUYIWU0oula4OBF4AjU0qnFlmf6ldELJRSeq/NtT2BS4FNUkr3RsS2wA3Aximl+0pt5gNeBi5PKR1a4bLVACJiEPAMcDhwJfDLlNLRpdd8JtXtSr0tzwBHpZROn0Ebn0VVRET8ivyX3/OnlJpaXR8NkFJaJyJWA54A9k0pXVx6vRcwBngupbRNxQtXXYiIHimlqaWf9wcuAAanlF5p1aZDz19ELAy8DpyUUjq21f0jgYVSSqtW5EvNhD3StWUb4MFyiAZIKb0MPABsW1hVqnttQ3TJw6XzEqXzNsBb5f9ILN03jtwL4/Op7vIb4OmU0p/bec1nUpWwLzAVOHcmbXwWVSl9gEnA+DbXx9Hy3/3blNpcXX4xpTQZuArYNCL6VqBO1aFyiJ6Fjj5/m5Kf58vb3H85MLTUmVgog3RtWQV4up3rY4CVK1yLtGHp/EzpPLPn8wsRMXdFqlLDiIj1gT2Bg2fQxGdSlbA+8CzwrYh4KSImR8SLEdH6ufRZVKVcUjr/PiIWj4iBEXEAsAlwWum1VYCXU0qftrl3DDm4LFeRStWoOvr8rQJMAF5spx1UQfYxSNeW+clzDtr6EBhU4VrUwCJiCeB44J6U0iOlyzN7PsFnVF0oIvoA5wG/Syk9N4NmPpOqhMXJ65WcDJwEfAO4GzirNCULfBZVISmlp4Hh5JEOb5Kfuz8A30kpXVVqNqvncf5uLlONraPP3/zAR2n6echV85z2KroASbWl1HNyIzAZ2KfgctS4jgT6A78suhA1vB7APMDeKaXrStfuLc2dPioifl9YZWo4EbE88Fdyr913yEO8twXOjYjPUkpXFFmfVE8M0rVlLO3/rfWM/mZH6lIR0Z88p28IsGFK6Y1WL8/s+Sy/Ls2xyFv+/RzYH+jbZj5f34gYCHyCz6Qq4wNyj/Tdba7fRV6lezF8FlU5vyLPP90qpTSpdG1kRCwAnBERfyY/b0u3c2/5efywndekrtLR528sMDAiok2vdNU8pw7tri1jyPMF2loZ+E+Fa1GDiYjewLXAMGCLlNK/2zSZ2fP5WuvVQ6U5NAToR15wZGyrA/JqtWOBofhMqjLGzOL1qfgsqnKGAk+2CtFlDwELAAuTn8fBpW1VW1sZmMj0c1KlrtTR528M0BdYtp12UAXZxyBdW24C1o6IIeULpaFj65Vek7pFaa/oK4CNge1SSg+20+wmYImI2LDVffMCW+Pzqa71BLBROwfkcL0R+V/EPpOqhOtL503bXN8MeCOl9D98FlU5/wNWL60j0dpXgc/IvXg3k/eZ3qn8Ymn7oV2Au1JKEypUqxpTR5+/O8ijK3Zvc/8e5N06Xq5ArTPl0O7acgFwCHBjRBwNJPJG5a+TF92RussfyP+H90ugOSLWbvXaG6Uh3jcBo4HLI+LH5F7Bo4AAflvhelXHUkofAaPaXo8IgFdTSqNKv/tMqhJuA+4DzouIBYH/kv//8hu0rCPhs6hKOQu4Brg5Is4mz5HeBtgVOC2lNBF4PCKuBk4vjTZ7GfguMJjpQ4vUKRHxzdKPa5TOm0fEe8B7KaX7U0odev5SSu9GxKnktSY+AR4jh+2Nyc904WL6hdBUzUpzA08DRpD/BTwSOKz1RudSV4uIV2h/PgvA/6WUjiu1mx/4HbAdeejtaOCIlNKT3V+lGl1EJOCXKaWjW13zmVS3K/Uu/xr4Jnku9LPASSmlK1u18VlURUTE5sBPyNMJ+gEvAecD56WUppTalBdr3A0YCDwJ/KT8F5HS7Cr9u7g996eUhpfadOj5i4ie5L90PABYFHgOOD6ldG131N5ZBmlJkiRJkjrBOdKSJEmSJHWCQVqSJEmSpE4wSEuSJEmS1AkGaUmSJEmSOsEgLUmSJElSJxikJUmSJEnqBIO0JEmqiIh4JSIuL7oOSZLmlEFakiRJkqROMEhLkiRJktQJBmlJkupQRKwWETdFxNiIGB8RD0TE11q9fklEvBER60bEwxHxWWno9ffbea+1IuKeiGiKiOaIGBkRa7XTbsOIuDsixpXaPRkR+7XT7lsR8UypzSMRsX7X/wlIktR9DNKSJNWZiPgK8E9gfuAAYEfgA+CeiFijVdN5gauBS4HtgFHA7yNi71bvtSpwPzAI2BvYs3Tf/RGxWqt22wIjgT7AQcC2wB+BpduU9zXgh8AxwC5AT+CWiBg4h19bkqSKiZRS0TVIkqQuFBEjgcWB1VJKE0vXegJPA8+llLaLiEuAvYBdU0pXtbr3buCLwDIppRQR1wJfL/3+UanNvMArwKiU0g4REcDLwPvAWimlqTOo6xVgPmBISmls6dow4GFg95TSlV36ByFJUjexR1qSpDoSEf2BDYFrgKkR0SsiegEB3ANs0Kr5FOCvbd7iKuALwBKl3zcAbimHaICU0sfATaXPAViB3PN84YxCdCujyyG65N+l8xdm/e0kSaoOBmlJkurL/OTh0scAk9ochwCDIqL87/+xKaVJbe5/p3QuB+n5gbfb+Zz/kYd7AyxQOr/Rgfo+bP1LSmlC6cd+HbhXkqSq0KvoAiRJUpf6CJgK/AH4U3sNUkpT82hsBkVE7zZhepHS+c3S+UNg0XbeZlGg3LP8fum8RDvtJEmqOwZpSZLqSEqpOSL+DqwGPDaLodY9yQuRXdXq2reA12gJ0vcDW0TEPCmlTwAiYh5ga/LiZADPk+dM7x8R5ycXYJEk1TmDtCRJ9ecI4G/AnRFxEXlo9oLAV4CeKaWfltp9Avw2IhYEXgB2JS8stnerMHwCsBUwMiJ+AyTgJ8AA4HiA0qJkhwHXAfdGxLnAe8BKwMIppWO7+ftKklRRzpGWJKnOpJQeA9Ykb3n1e+Au4AxgKDlgl31M7oHeC7gR2Aj4QUrp0lbv9RQwvNT2UuAyoAnYMKX0ZKt2NwIjSr9eRF6M7EByT7UkSXXF7a8kSWpApe2vvp5SWrLoWiRJqjX2SEuSJEmS1AkGaUmSJEmSOsGh3ZIkSZIkdYI90pIkSZIkdYJBWpIkSZKkTjBIS5IkSZLUCQZpSZIkSZI6wSAtSZIkSVIn/D9nKvBASJhCgQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 1152x864 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "logger.setLevel(3)\n",
+    "matplotlib.rcParams.update({'font.size': 16})\n",
+    "plt.figure(figsize=(16,12))\n",
+    "plt.plot(train_result['epoch'], train_result['train_loss'], color='b', label='train')\n",
+    "plt.plot(train_result['epoch'], train_result['val_loss'], color='r', label='va')\n",
+    "plt.title('CXE Loss')\n",
+    "plt.ylabel('CXE')\n",
+    "plt.xlabel('epoch')\n",
+    "plt.grid(True)\n",
+    "plt.show()\n",
+    "\n",
+    "plt.figure(figsize=(16,12))\n",
+    "plt.plot(train_result['epoch'], train_result['train_acc'], color='b', label='train')\n",
+    "plt.plot(train_result['epoch'], train_result['val_acc'], color='r', label='va')\n",
+    "plt.title('Classification Accuracy')\n",
+    "plt.ylabel('Accuracy (%)')\n",
+    "plt.xlabel('epoch')\n",
+    "plt.grid(True)\n",
+    "plt.show()\n",
+    "\n",
+    "plt.figure(figsize=(16,12))\n",
+    "plt.plot(train_result['epoch'], train_result['lr'], color='b')\n",
+    "plt.title('Learning Rate')\n",
+    "plt.ylabel('lr')\n",
+    "plt.xlabel('epoch')\n",
+    "plt.grid(True)\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = TDNNXVec(16, 2, 32, 16, 100)\n",
+    "state_dict=torch.load(\"./tdnn_xvec/model_ep0099.pth\")\n",
+    "model.load_state_dict(state_dict['model_state_dict'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the Trials dataset with different speakers than the train data.\n",
+    "#trial_data = IVDataset(num_spks=20, seed=4321)\n",
+    "trial_loader = DataLoader(train_data, batch_size=100, shuffle=True)\n",
+    "# sample enrollment data and compute x-vectors\n",
+    "x_e, y_e = next(iter(trial_loader))\n",
+    "z_e = model(x_e, infer=True).detach().cpu().numpy()\n",
+    "# sample test data and compute x-vectors\n",
+    "x_t, y_t = next(iter(trial_loader))\n",
+    "z_t = model(x_t, infer=True).detach().cpu().numpy()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.12987226757966816\n"
+     ]
+    }
+   ],
+   "source": [
+    "from hyperion.utils.math import cosine_scoring\n",
+    "from hyperion.np.metrics import compute_eer\n",
+    "scores = cosine_scoring(z_e, z_t)\n",
+    "key = (y_e[:, None] - y_t[None,:])==0\n",
+    "tar_scores = scores[key==1]\n",
+    "non_scores = scores[key==0]\n",
+    "eer = compute_eer(tar_scores, non_scores)\n",
+    "print(eer)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "488a239b304e646027d6710c3377746db4487e56624448f35f81edd765904a6d"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.12 ('py38_pt101_cu112')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 1d41410f40ee45eb8e75cf84269c1993f815b46b Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 Jun 2022 08:48:01 -0400
Subject: [PATCH 018/154] added default config to voxceleb/v2

---
 egs/voxceleb/v2/run_030_extract_xvectors.sh | 77 ++++++++--------
 egs/voxceleb/v2/run_040_eval_be.sh          | 97 +++++++--------------
 2 files changed, 72 insertions(+), 102 deletions(-)

diff --git a/egs/voxceleb/v2/run_030_extract_xvectors.sh b/egs/voxceleb/v2/run_030_extract_xvectors.sh
index 77c46672..da3ffde8 100755
--- a/egs/voxceleb/v2/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v2/run_030_extract_xvectors.sh
@@ -7,64 +7,67 @@
 . ./path.sh
 set -e
 
-stage=1
+stage=2
 config_file=default_config.sh
 use_gpu=false
-nnet_stage=1
+nnet_stage=3
 xvec_chunk_length=120 #seconds
 . parse_options.sh || exit 1;
 . $config_file
 
 if [ "$use_gpu" == "true" ];then
-    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
-    xvec_cmd="$cuda_eval_cmd --mem 6G"
+  xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+  xvec_cmd="$cuda_eval_cmd --mem 6G"
 else
-    xvec_cmd="$train_cmd --mem 12G"
+  xvec_cmd="$train_cmd --mem 12G"
 fi
 
-
-if [ $nnet_stage -eq 2 ];then
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
   nnet=$nnet_s2
-  nnet_name=$nnet_name_s2
+  nnet_name=$nnet_s2_name
 elif [ $nnet_stage -eq 3 ];then
   nnet=$nnet_s3
-  nnet_name=$nnet_name_s3
+  nnet_name=$nnet_s3_name
 fi
 
 xvector_dir=exp/xvectors/$nnet_name
 
 if [ $stage -le 1 ]; then
-    # Extract xvectors for training LDA/PLDA
-    for name in voxceleb2cat_train
-    do
-	if [ $plda_num_augs -eq 0 ]; then
-    	    steps_xvec/extract_wav2vec2xvectors.sh --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
-		--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
-    		$nnet data/${name} \
-    		$xvector_dir/${name}
-	else
-	    steps_xvec/extract_wav2vec2xvectors.sh --cmd "$xvec_cmd" --nj 300 ${xvec_args} \
-		--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
-		--aug-config $plda_aug_config --num-augs $plda_num_augs \
-    		$nnet data/${name} \
-    		$xvector_dir/${name}_augx${plda_num_augs} \
-		data/${name}_augx${plda_num_augs}
-	fi
-    done
+  # Extract xvectors for training LDA/PLDA
+  for name in voxceleb2cat_train
+  do
+    if [ $plda_num_augs -eq 0 ]; then
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}
+    else
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
+	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+	--aug-config $plda_aug_config --num-augs $plda_num_augs \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}_augx${plda_num_augs} \
+	data/${name}_augx${plda_num_augs}
+    fi
+  done
 fi
 
-
 if [ $stage -le 2 ]; then
-    # Extracts x-vectors for evaluation
-    for name in voxceleb1_test 
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_wav2vec2xvectors.sh \
-	  --cmd "$xvec_cmd" --nj $nj ${xvec_args} \
-	  $nnet data/$name \
-	  $xvector_dir/$name
-    done
+  # Extracts x-vectors for evaluation
+  for name in voxceleb1_test 
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 100 ? $num_spk:100))
+    steps_xvec/extract_wav2vec2xvectors.sh \
+      --cmd "$xvec_cmd" --nj $nj ${xvec_args} \
+      $nnet data/$name \
+      $xvector_dir/$name
+  done
 fi
 
 exit
diff --git a/egs/voxceleb/v2/run_040_eval_be.sh b/egs/voxceleb/v2/run_040_eval_be.sh
index d9c03bba..ac561344 100755
--- a/egs/voxceleb/v2/run_040_eval_be.sh
+++ b/egs/voxceleb/v2/run_040_eval_be.sh
@@ -7,27 +7,29 @@
 . ./path.sh
 set -e
 
-stage=1
+# By default we evaluate the nnet after finetuning stage 3 and only with cosine scoring
+stage=3
 config_file=default_config.sh
-nnet_stage=1
+nnet_stage=3
 
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh 
 
-if [ $nnet_stage -eq 2 ];then
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
   nnet=$nnet_s2
-  nnet_name=$nnet_name_s2
+  nnet_name=$nnet_s2_name
 elif [ $nnet_stage -eq 3 ];then
   nnet=$nnet_s3
-  nnet_name=$nnet_name_s3
+  nnet_name=$nnet_s3_name
 fi
 
-
 plda_label=${plda_type}y${plda_y_dim}_v1
 be_name=lda${lda_dim}_${plda_label}_${plda_data}
 
-
 xvector_dir=exp/xvectors/$nnet_name
 be_dir=exp/be/$nnet_name/$be_name
 score_dir=exp/scores/$nnet_name/${be_name}
@@ -35,46 +37,44 @@ score_plda_dir=$score_dir/plda
 score_cosine_dir=exp/scores/$nnet_name/cosine
 
 if [ $stage -le 1 ]; then
-
     echo "Train PLDA on Voxceleb2"
-    steps_be/train_be_v1.sh --cmd "$train_cmd" \
-				--lda_dim $lda_dim \
-				--plda_type $plda_type \
-				--y_dim $plda_y_dim --z_dim $plda_z_dim \
-				$xvector_dir/$plda_data/xvector.scp \
-				data/$plda_data \
-				$be_dir &
-
+    steps_be/train_be_v1.sh \
+      --cmd "$train_cmd" \
+      --lda_dim $lda_dim \
+      --plda_type $plda_type \
+      --y_dim $plda_y_dim --z_dim $plda_z_dim \
+      $xvector_dir/$plda_data/xvector.scp \
+      data/$plda_data \
+      $be_dir &
 
     wait
-
 fi
 
 
 if [ $stage -le 2 ];then
 
     echo "Eval Voxceleb 1 with LDA+CentWhiten+LNorm+PLDA"
-    steps_be/eval_be_v1.sh --cmd "$train_cmd" --plda_type $plda_type \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$be_dir/lda_lnorm.h5 \
-    	$be_dir/plda.h5 \
-    	$score_plda_dir/voxceleb1_scores
-
+    steps_be/eval_be_v1.sh \
+      --cmd "$train_cmd" --plda_type $plda_type \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $be_dir/lda_lnorm.h5 \
+      $be_dir/plda.h5 \
+      $score_plda_dir/voxceleb1_scores
+    
     $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-    	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
-
+    	       local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+    
     for f in $(ls $score_plda_dir/*_results);
     do
-	echo $f
-	cat $f
-	echo ""
+      echo $f
+      cat $f
+      echo ""
     done
-
+    
 fi
 
-
 score_plda_dir=$score_cosine_dir
 
 if [ $stage -le 3 ];then
@@ -98,39 +98,6 @@ if [ $stage -le 3 ];then
 
 fi
 
-be_dir=exp/be/$nnet_name/cw
-score_plda_dir=$score_dir/cw_cosine
-
-if [ $stage -le 4 ]; then
-    echo "Train centering+whitening on Voxceleb2"
-    steps_be/train_be_v2.sh --cmd "$train_cmd" \
-	$xvector_dir/$plda_data/xvector.scp \
-	data/$plda_data \
-	$be_dir
-fi
-
-
-if [ $stage -le 5 ];then
-
-    echo "Eval Voxceleb 1 with CentWhiten + Cosine scoring"
-    steps_be/eval_be_v2.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-	$be_dir/cw.h5 \
-    	$score_plda_dir/voxceleb1_scores
-
-    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
-
-    for f in $(ls $score_plda_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
 
 exit
 

From cf433a73403a7319c0999752b9d3de22344b897f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 4 Jul 2022 12:12:30 -0400
Subject: [PATCH 019/154] added recipes with wavlmlarge

---
 egs/voxceleb/v2/cmd.sh                        |   2 +-
 ...ain_hubertbase_ecapatdnn512x2_default.yaml |   6 -
 ...v2vec2base960h_ecapatdnn512x2_default.yaml |   6 -
 ...n_wav2vec2base_ecapatdnn512x2_default.yaml |   6 -
 ...baseplus6l_ecapatdnn512x3_stage1_v1.0.yaml |  43 +++
 ...s6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml |  43 +++
 ...baseplus9l_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...s9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...lus_linfus_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...lmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...lmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml |  50 +++
 ...vlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml |  45 +++
 ...wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml |  49 +++
 ...wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml |  50 +++
 .../v2/conf/trainer_phase1_adam_default.yaml  |  20 --
 .../v2/conf/trainer_phase1_sgd_default.yaml   |  18 -
 .../v2/conf/trainer_phase2_sgd_default.yaml   |  18 -
 .../v2/conf/trainer_phase3_sgd_default.yaml   |  18 -
 .../conf/wavlmbaseplus6l_ecapatdnn512x3.yaml  |  41 +++
 ...wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml |  41 +++
 .../conf/wavlmbaseplus9l_ecapatdnn512x3.yaml  |  41 +++
 ...wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml |  41 +++
 .../wavlmbaseplus_linfus_ecapatdnn512x3.yaml  |  40 +++
 .../v2/conf/wavlmlarge12l_ecapatdnn512x3.yaml |  41 +++
 .../v2/conf/wavlmlarge6l_ecapatdnn512x3.yaml  |  41 +++
 ...x2.yaml => wavlmlarge_ecapatdnn512x3.yaml} |  17 +-
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  48 ---
 ...tdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh |  48 ---
 ...dnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh |  51 ---
 ...dnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh |  53 ---
 ...dnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh |  49 ---
 ...fig_wavlmbaseplus6l_ecapatdnn512x3_v1.0.sh |  49 +++
 ...lmbaseplus6l_linfus_ecapatdnn512x3_v1.0.sh |  49 +++
 ...fig_wavlmbaseplus9l_ecapatdnn512x3_v1.0.sh |  49 +++
 ...lmbaseplus9l_linfus_ecapatdnn512x3_v1.0.sh |  49 +++
 ...ig_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh |  52 ---
 ...nfig_wavlmbaseplus_ecapatdnn512x3_v1.10.sh |  52 ---
 ...onfig_wavlmbaseplus_ecapatdnn512x3_v1.9.sh |  36 --
 ...avlmbaseplus_linfus_ecapatdnn512x3_v1.0.sh |  49 +++
 ...onfig_wavlmlarge12l_ecapatdnn512x3_v1.0.sh |  49 +++
 ...config_wavlmlarge6l_ecapatdnn512x3_v1.0.sh |  49 +++
 .../config_wavlmlarge_ecapatdnn512x3_v1.0.sh  |  49 +++
 egs/voxceleb/v2/run_030_extract_xvectors.sh   |   3 +-
 .../xvectors/extract_wav2vec2xvectors.sh      |   7 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |  20 +-
 hyperion/bin/finetune_wav2vec2xvector.py      |  20 +-
 hyperion/bin/finetune_xvector_from_wav.py     |   2 -
 hyperion/torch/data/weighted_seq_sampler.py   | 164 ++++++---
 hyperion/torch/layers/attention.py            |   1 +
 hyperion/torch/layers/margin_losses.py        |  83 ++++-
 .../hf_hubert2resnet1d_xvector.py             |  11 +-
 .../hf_wav2vec2resnet1d_xvector.py            |  10 +
 .../models/wav2xvectors/hf_wav2xvector.py     |  34 +-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py |  10 +
 .../torch/models/wav2xvectors/wav2xvector.py  |   3 +
 .../models/xvectors/efficient_net_xvector.py  |  33 +-
 .../torch/models/xvectors/resnet1d_xvector.py |  54 ++-
 .../torch/models/xvectors/resnet_xvector.py   |  23 +-
 .../torch/models/xvectors/spinenet_xvector.py |  22 +-
 .../torch/models/xvectors/tdnn_xvector.py     |  23 +-
 .../models/xvectors/transformer_xvector_v1.py |  48 +++
 hyperion/torch/models/xvectors/xvector.py     |  55 ++-
 hyperion/torch/narchs/classif_head.py         |   9 +
 hyperion/torch/narchs/efficient_net.py        |  63 +++-
 hyperion/torch/narchs/resnet1d_encoder.py     |  70 +++-
 hyperion/torch/narchs/resnet2d_encoder.py     |  17 +
 hyperion/torch/narchs/resnet_factory.py       |  39 ++-
 hyperion/torch/narchs/spinenet_factory.py     |  39 ++-
 hyperion/torch/narchs/tdnn_factory.py         |  39 ++-
 .../torch/narchs/transformer_encoder_v1.py    |  28 +-
 hyperion/torch/torch_model.py                 |  10 +
 hyperion/torch/tpm/hf/hf_hubert.py            | 121 ++++++-
 hyperion/torch/tpm/hf/hf_wav2vec2.py          | 123 ++++++-
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 319 +++++++++++++++++-
 hyperion/torch/tpm/hf/hf_wavlm.py             | 121 ++++++-
 hyperion/torch/utils/eval_utils.py            | 171 ----------
 77 files changed, 2716 insertions(+), 762 deletions(-)
 delete mode 100644 egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
 delete mode 100644 egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus6l_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus_linfus_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmlarge6l_ecapatdnn512x3.yaml
 rename egs/voxceleb/v2/conf/{wavlmbase_ecapatdnn512x2.yaml => wavlmlarge_ecapatdnn512x3.yaml} (75%)
 delete mode 100644 egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_linfus_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_linfus_ecapatdnn512x3_v1.0.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_linfus_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmlarge6l_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v1.0.sh

diff --git a/egs/voxceleb/v2/cmd.sh b/egs/voxceleb/v2/cmd.sh
index 00f8d40a..71f3bae0 100755
--- a/egs/voxceleb/v2/cmd.sh
+++ b/egs/voxceleb/v2/cmd.sh
@@ -11,12 +11,12 @@
 # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
 
 if [ "$(hostname -d)" == "cm.gemini" ];then
-    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
     #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 10G"
     #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
     export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
diff --git a/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
deleted file mode 100644
index 6cec83c8..00000000
--- a/egs/voxceleb/v2/conf/train_hubertbase_ecapatdnn512x2_default.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model: hubertbase_ecapatdnn512x2.yaml
-trainer: trainer_swa_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
deleted file mode 100644
index a7fc925e..00000000
--- a/egs/voxceleb/v2/conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model: wav2vec2base960h_ecapatdnn512x2.yaml
-trainer: trainer_swa_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml b/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
deleted file mode 100644
index 90f35805..00000000
--- a/egs/voxceleb/v2/conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model: wav2vec2base_ecapatdnn512x2.yaml
-trainer: trainer_swa_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..570aad6a
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,43 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus6l_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..9838b855
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,43 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..1028f79a
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus9l_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..2c2e5b64
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..eb32ce0c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus_linfus_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..895bcb2b
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmlarge12l_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
new file mode 100644
index 00000000..1721e337
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
@@ -0,0 +1,50 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 2.3e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 2e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 2
+  eff_batch_size: 192
+  train_mode: full
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..181d8fd7
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmlarge6l_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..1af241ea
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wavlmlarge_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
new file mode 100644
index 00000000..1298a056
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
@@ -0,0 +1,49 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5.5e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 4.4e-3
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 20
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
new file mode 100644
index 00000000..2867cfef
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
@@ -0,0 +1,50 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 16
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 2.3e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 2e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 2
+  eff_batch_size: 192
+  train_mode: full
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml b/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
deleted file mode 100644
index 03c5cc84..00000000
--- a/egs/voxceleb/v2/conf/trainer_phase1_adam_default.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-optim:
-  opt_type: adam
-  lr: 0.05
-  amsgrad: true
-  beta1: 0.9
-  beta2: 0.95
-  weight_decay: 4e-4
-lrsched:
-  lrsch_type: exp_lr
-  decay_steps: 8000
-  hold_steps: 40000
-  min_lr: 1.0e-05
-  decay_rate: 0.5
-  warmup_steps: 1000
-  update_lr_on_opt_step: true
-use_amp: true
-log_interval: 1000
-epochs: 30
-eff_batch_size: 1024
-train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
deleted file mode 100644
index 7fc848a0..00000000
--- a/egs/voxceleb/v2/conf/trainer_phase1_sgd_default.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-optim:
-  opt_type: sgd
-  lr: 0.45
-  momentum: 0.9
-  weight_decay: 4e-4
-lrsched:
-  lrsch_type: exp_lr
-  decay_rate: 0.5
-  decay_steps: 2100
-  hold_steps: 1000
-  min_lr: 4e-4
-  warmup_steps: 1000
-  update_lr_on_opt_step: true
-use_amp: true
-log_interval: 1000
-epochs: 30
-eff_batch_size: 1024
-train_mode: hf-feats-frozen-nograd
diff --git a/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
deleted file mode 100644
index ae708b62..00000000
--- a/egs/voxceleb/v2/conf/trainer_phase2_sgd_default.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-optim:
-  opt_type: sgd
-  lr: 5.5e-3
-  momentum: 0.9
-  weight_decay: 1e-4
-lrsched:
-  lrsch_type: exp_lr
-  decay_rate: 0.5
-  decay_steps: 5000
-  hold_steps: 6000
-  min_lr: 4.4e-3
-  warmup_steps: 6000
-  update_lr_on_opt_step: true
-use_amp: true
-log_interval: 1000
-epochs: 7
-eff_batch_size: 512
-train_mode: full
diff --git a/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml b/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
deleted file mode 100644
index 2529e25a..00000000
--- a/egs/voxceleb/v2/conf/trainer_phase3_sgd_default.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-optim:
-  opt_type: sgd
-  lr: 2.3e-4
-  momentum: 0.9
-  weight_decay: 1e-4
-lrsched:
-  lrsch_type: exp_lr
-  decay_rate: 0.5
-  decay_steps: 5000
-  hold_steps: 6000
-  min_lr: 2e-4
-  warmup_steps: 6000
-  update_lr_on_opt_step: true
-use_amp: true
-log_interval: 1000
-epochs: 7
-eff_batch_size: 192
-train_mode: full
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus6l_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus6l_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..dbe4ff65
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus6l_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 6
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..99a3778b
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 6
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: linear
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..ddbf3ca4
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 9
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..90b0fbef
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 9
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: linear
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus_linfus_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus_linfus_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..6f1e9f56
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus_linfus_ecapatdnn512x3.yaml
@@ -0,0 +1,40 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: linear
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..0de43fd4
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+  drop_layers_gt: 12
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmlarge6l_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wavlmlarge6l_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..062137f3
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmlarge6l_ecapatdnn512x3.yaml
@@ -0,0 +1,41 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+  drop_layers_gt: 6
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml b/egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3.yaml
similarity index 75%
rename from egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml
rename to egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3.yaml
index b5d14412..f36ac70c 100644
--- a/egs/voxceleb/v2/conf/wavlmbase_ecapatdnn512x2.yaml
+++ b/egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3.yaml
@@ -1,8 +1,8 @@
 hf_feats:
-  pretrained_model_path: microsoft/wavlm-base
+  pretrained_model_path: microsoft/wavlm-large
 xvector:
   resnet_enc:
-    in_feats: 80
+    in_feats: 1024
     in_conv_channels: 512
     in_kernel_size: 5
     in_stride: 1
@@ -10,6 +10,7 @@ xvector:
     resb_repeats:
       - 1
       - 1
+      - 1
     resb_channels:
       - 512
     resb_kernel_sizes:
@@ -17,6 +18,7 @@ xvector:
     resb_dilations:
       - 2
       - 3
+      - 4
     resb_strides:
       - 1
     res2net_width_factor: 1
@@ -28,10 +30,11 @@ xvector:
   pool_net:
     pool_type: ch-wise-att-mean+stddev
     inner_feats: 128
-  embed_dim: 256
-  cos_scale: 30.0
-  margin: 0.3
-  margin_warmup_epochs: 20.0
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
   dropout_rate: 0.0
-feat_fusion_start: 2
 feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
deleted file mode 100644
index 9ea07c9c..00000000
--- a/egs/voxceleb/v2/global_conf/config_hubertbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-# Hubert base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=hubertbase
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_hubert2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-lr=0.002
-xvec_train_base_cfg=conf/train_hubertbase_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
deleted file mode 100644
index b6cbdf30..00000000
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base960h_ecapatdnn512x2_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wav2vec2base960h
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wav2vec2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-lr=0.002
-xvec_train_base_cfg=conf/train_wav2vec2base960h_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
-
-nnet_name=${hf_model_name}_ecapatdnn512x3_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
deleted file mode 100644
index b40ff3d1..00000000
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v3.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wav2vec2base
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wav2vec2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-
-lr=0.001
-#lr=0.005
-xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v3 #v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-nnet=$nnet_dir/model_ep0060.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
deleted file mode 100644
index 1509e46e..00000000
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v4.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wav2vec2base
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wav2vec2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-
-lr=0.001
-#lr=0.005
-xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v4 #v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/model_ep0030.pth
-nnet=$nnet_dir/model_ep0020.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh b/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
deleted file mode 100644
index ba4272a2..00000000
--- a/egs/voxceleb/v2/global_conf/config_wavlmbase_ecapatdnn512x2_arcs30m0.3_adam_lr0.002_amp.v1.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-# WavLM base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wavlmbase
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wavlm2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-lr=0.002
-lr=0.001
-xvec_train_base_cfg=conf/train_wavlmbase_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..d02c11f7
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus6l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_linfus_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_linfus_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..639225c3
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus6l_linfus_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus6l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_linfus_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..58bded52
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus9l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_linfus_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_linfus_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..4553f40b
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_linfus_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus9l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_linfus_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
deleted file mode 100644
index b580508a..00000000
--- a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.4.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
-
-# hugging face model
-hf_model_name=wavlmbaseplus
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wavlm2resnet1d
-
-xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
-xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 4200 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 60 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
-
-nnet_name=${hf_model_name}_ecapatdnn512x3_v1.10
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-
-xvec_train_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
-xvec_train_s2_args="--trainer.epochs 20"
-nnet_name_s2=${nnet_name}.s2
-nnet_s2_dir=exp/xvector_nnets/$nnet_name_s2
-nnet_s2=$nnet_s2_dir/model_ep0007.pth
-nnet_s2=$nnet_s2_dir/model_ep0020.pth
-
-xvec_train_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
-xvec_train_s3_args="--trainer.epochs 10 --data.train.dataset.min-chunk-length 6 --data.train.dataset.max-chunk-length 6 --model.xvector.intertop-margin 0.1"
-nnet_name_s3=${nnet_name}.s3.4
-nnet_s3_dir=exp/xvector_nnets/$nnet_name_s3
-nnet_s3=$nnet_s3_dir/model_ep0002.pth
-nnet_s3=$nnet_s3_dir/model_ep0006.pth
-#nnet_s3=$nnet_s3_dir/model_ep0010.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
deleted file mode 100644
index b84c1f15..00000000
--- a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.10.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
-
-# hugging face model
-hf_model_name=wavlmbaseplus
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wavlm2resnet1d
-
-xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
-xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 4200 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 60 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
-
-nnet_name=${hf_model_name}_ecapatdnn512x3_v1.10
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-
-xvec_train_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
-xvec_train_s2_args="--trainer.epochs 20"
-nnet_name_s2=${nnet_name}.s2
-nnet_s2_dir=exp/xvector_nnets/$nnet_name_s2
-nnet_s2=$nnet_s2_dir/model_ep0007.pth
-nnet_s2=$nnet_s2_dir/model_ep0020.pth
-
-xvec_train_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
-xvec_train_s3_args="--trainer.epochs 10 --data.train.dataset.min-chunk-length 6 --data.train.dataset.max-chunk-length 6"
-nnet_name_s3=${nnet_name}.s3
-nnet_s3_dir=exp/xvector_nnets/$nnet_name_s3
-nnet_s3=$nnet_s3_dir/model_ep0002.pth
-nnet_s3=$nnet_s3_dir/model_ep0006.pth
-nnet_s3=$nnet_s3_dir/model_ep0010.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
deleted file mode 100644
index dccd01e1..00000000
--- a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.9.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
-
-# hugging face model
-hf_model_name=wavlmbaseplus
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wavlm2resnet1d
-
-xvec_train_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
-xvec_train_args="--model.xvector.margin-warmup-epochs 5 --trainer.lrsched.decay-steps 3150 --trainer.lrsched.warmup-steps 1500 --trainer.lrsched.hold-steps 1500 --trainer.epochs 45 --model.feat-fusion-method weighted-avg --model.feat-fusion-start 2 --model.xvector.intertop-margin 0.1"
-
-nnet_name=${hf_model_name}_ecapatdnn512x3_v1.9
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0045.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_linfus_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_linfus_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..c75280f0
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_linfus_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_linfus_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..11425baa
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge12l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmlarge6l_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmlarge6l_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..e3c9466b
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmlarge6l_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge6l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..8e870abe
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/run_030_extract_xvectors.sh b/egs/voxceleb/v2/run_030_extract_xvectors.sh
index da3ffde8..67122f85 100755
--- a/egs/voxceleb/v2/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v2/run_030_extract_xvectors.sh
@@ -11,12 +11,13 @@ stage=2
 config_file=default_config.sh
 use_gpu=false
 nnet_stage=3
+hf_chunk_length=120 #seconds
 xvec_chunk_length=120 #seconds
 . parse_options.sh || exit 1;
 . $config_file
 
 if [ "$use_gpu" == "true" ];then
-  xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+  xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
   xvec_cmd="$cuda_eval_cmd --mem 6G"
 else
   xvec_cmd="$train_cmd --mem 12G"
diff --git a/hyp_utils/xvectors/extract_wav2vec2xvectors.sh b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
index 56ed6b56..6c6f0fdf 100755
--- a/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
+++ b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
@@ -4,7 +4,8 @@
 nj=30
 cmd="run.pl"
 
-chunk_length=0     # The chunk size over which the embedding is extracted.
+hf_chunk_length=0     # The chunk size over which the embedding is extracted.
+xvec_chunk_length=0     # The chunk size over which the embedding is extracted.
 use_gpu=false
 write_utt2speech_dur=true  # If true writes utt2speech_dur.
 stage=0
@@ -87,7 +88,7 @@ if [ $stage -le 0 ];then
 	${args} $write_speech_dur_opt \
 	--part-idx JOB --num-parts $nj \
 	--input $data_dir/wav.scp \
-	--model-path $nnet_file --chunk-length $chunk_length \
+	--model-path $nnet_file --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length \
 	--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
     set -e
 fi
@@ -109,7 +110,7 @@ if [ $stage -le 1 ];then
 		 ${args} $write_speech_dur_opt \
 		 --part-idx $i --num-parts $nj \
 		 --input $data_dir/wav.scp \
-		 --model-path $nnet_file --chunk-length $chunk_length \
+		 --model-path $nnet_file --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length \
 		 --output ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
 	fi
     done
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 5675ace1..cfa28f0a 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -92,7 +92,8 @@ def extract_xvectors(
     scp_sep,
     vad_path_prefix,
     model_path,
-    chunk_length,
+    hf_chunk_length,
+    xvec_chunk_length,
     embed_layer,
     random_utt_length,
     min_utt_length,
@@ -188,7 +189,8 @@ def extract_xvectors(
                             y = (
                                 model.extract_embed(
                                     x,
-                                    chunk_length=fs * chunk_length,
+                                    hf_chunk_length=hf_chunk_length,
+                                    xvec_chunk_length=xvec_chunk_length,
                                     embed_layer=embed_layer,
                                 )
                                 .cpu()
@@ -259,11 +261,21 @@ def extract_xvectors(
 
     parser.add_argument("--model-path", required=True)
     parser.add_argument(
-        "--chunk-length",
+        "--hf-chunk-length",
         type=int,
         default=0,
         help=(
-            "number of frames used in each forward pass "
+            "max. chunk length used in each forward pass "
+            "of the hf encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+    parser.add_argument(
+        "--xvec-chunk-length",
+        type=int,
+        default=0,
+        help=(
+            "max. chunk length used in each forward pass "
             "of the x-vector encoder,"
             "if 0 the full utterance is used"
         ),
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index fda819ad..f2c740da 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -71,13 +71,25 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
+# def init_model(num_classes, in_model_file, rank, **kwargs):
+#     xvec_args = kwargs["model"]["xvector"]
+#     if rank == 0:
+#         logging.info("xvector network ft args={}".format(xvec_args))
+#     xvec_args["num_classes"] = num_classes
+#     model = TML.load(in_model_file)
+#     model.rebuild_output_layer(**xvec_args)
+#     if rank == 0:
+#         logging.info("model={}".format(model))
+#     return model
+
+
 def init_model(num_classes, in_model_file, rank, **kwargs):
-    xvec_args = kwargs["model"]["xvector"]
+    model_args = kwargs["model"]
     if rank == 0:
-        logging.info("xvector network ft args={}".format(xvec_args))
-    xvec_args["num_classes"] = num_classes
+        logging.info("xvector network ft args={}".format(model_args))
+    model_args["xvector"]["num_classes"] = num_classes
     model = TML.load(in_model_file)
-    model.rebuild_output_layer(**xvec_args)
+    model.change_config(**model_args)
     if rank == 0:
         logging.info("model={}".format(model))
     return model
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 5ddc4d82..b5a7f63b 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -77,8 +77,6 @@ def init_xvector(num_classes, in_model_path, rank, **kwargs):
     xvec_args["num_classes"] = num_classes
     model = TML.load(in_model_path)
     model.rebuild_output_layer(**xvec_args)
-    # if train_mode == "ft-embed-affine":
-    #    model.freeze_preembed_layers()
     if rank == 0:
         logging.info("x-vector-model={}".format(model))
     return model
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index 34c3fcbc..e679251b 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -15,14 +15,37 @@
 
 
 class ClassWeightedSeqSampler(Sampler):
+    """Samples utterances following:
+        1. It samples a class with a given probability.
+        2. It samples an random utterance from the class.
+
+    Attributes:
+      dataset: dataset containing audio or feature sequences.
+      batch_size: batch size per gpu for the largest chunk-size.
+      num_egs_per_utt_epoch: number of samples per utterance and epoch.
+      num_egs_per_class: number of samples per class in each batch.
+      num_egs_per_utt: number of samples per utterance in each batch.
+      var_batch_size: whether to use variable batch size when using
+        variable utterance length.
+      num_hard_prototypes: number of hard prototype classes per random class
+        in a batch.
+      num_egs_per_hard_prototype: number of utterances per each hard
+        prototype in a batch.
+      iters_per_epoch: deprecated, if not None, will overwrite "num_egs_per_utt_epoch".
+    """
+
     def __init__(
         self,
         dataset,
         batch_size=1,
-        iters_per_epoch="auto",
+        num_egs_per_utt_epoch="auto",
         num_egs_per_class=1,
         num_egs_per_utt=1,
         var_batch_size=False,
+        num_hard_prototypes=0,
+        num_egs_per_hard_prototype=1,
+        affinity_matrix=None,
+        iters_per_epoch=None,
     ):
 
         super().__init__(None)
@@ -34,72 +57,101 @@ def __init__(
             rank = 0
             world_size = 1
 
+        if iters_per_epoch is not None:
+            num_egs_per_utt_epoch = iters_per_epoch
+
         self.dataset = dataset
         self.batch_size = batch_size
         self.num_egs_per_class = num_egs_per_class
         self.num_egs_per_utt = num_egs_per_utt
         self.var_batch_size = var_batch_size
+        self.num_hard_prototypes = num_hard_prototypes
+        self.num_egs_per_hard_prototype = num_egs_per_hard_prototype
         self.batch = 0
 
         self.rank = rank
         self.world_size = world_size
-
         if rank > 0:
             # this will make sure that each process produces different data
             # when using ddp
             dummy = torch.rand(1000 * rank)
             del dummy
 
-        if iters_per_epoch == "auto":
-            self._compute_iters_auto()
-        else:
-            self.iters_per_epoch = iters_per_epoch
-
-        if var_batch_size:
-            avg_batch_size = self._compute_avg_batch_size()
-        else:
-            avg_batch_size = self.batch_size
-
-        self._len = int(
-            math.ceil(
-                self.iters_per_epoch * dataset.num_seqs / avg_batch_size / world_size
-            )
-        )
-        print(
-            "num_batches",
-            self.iters_per_epoch,
-            dataset.num_seqs,
-            avg_batch_size,
-            world_size,
+        self.has_short_seqs = self.dataset.short_seq_exist
+        self.set_num_egs_per_utt_epoch(num_egs_per_utt_epoch)
+        self._compute_avg_batch_size()
+        self._compute_len(world_size)
+        self._compute_num_classes_per_batch()
+        self.set_hard_prototypes(affinity_matrix)
+        logging.info(
+            "batches/epoch=%d classes/batch=%d avg-batch-size/gpu=%d samples/(utt*epoch)=%d",
             self._len,
-            flush=True,
+            self._num_classes_per_batch,
+            self.avg_batch_size,
+            self.num_egs_per_utt_epoch,
         )
-        self.avg_batch_size = avg_batch_size
-        logging.info("num batches per epoch: %d", self._len)
-
-        self._num_classes_per_batch = int(
-            math.ceil(avg_batch_size / num_egs_per_class / num_egs_per_utt)
-        )
-        logging.info("num classes per batch: %d", self._num_classes_per_batch)
-
-        # self.weights = torch.as_tensor(dataset.class_weights, dtype=torch.double)
 
     def _compute_avg_batch_size(self):
+        if not self.var_batch_size:
+            self.avg_batch_size = self.batch_size
+            return
+
         dataset = self.dataset
         avg_chunk_length = int(
             (dataset.max_chunk_length + dataset.min_chunk_length) / 2
         )
         batch_mult = dataset.max_chunk_length / avg_chunk_length
-        return int(self.batch_size * batch_mult)
+        self.avg_batch_size = int(self.batch_size * batch_mult)
 
-    def _compute_iters_auto(self):
+    def set_num_egs_per_utt_epoch(self, num_egs_per_utt_epoch):
+        if num_egs_per_utt_epoch == "auto":
+            self._compute_num_egs_per_utt_epoch_auto()
+        else:
+            self.num_egs_per_utt_epoch = num_egs_per_utt_epoch
+
+    def _compute_num_egs_per_utt_epoch_auto(self):
         dataset = self.dataset
         avg_seq_length = np.mean(dataset.seq_lengths)
         avg_chunk_length = int(
             (dataset.max_chunk_length + dataset.min_chunk_length) / 2
         )
-        self.iters_per_epoch = math.ceil(avg_seq_length / avg_chunk_length)
-        logging.debug("num iters per epoch: %d" % self.iters_per_epoch)
+        self.num_egs_per_utt_epoch = math.ceil(avg_seq_length / avg_chunk_length)
+        logging.debug("num iters per epoch: %d", self.num_egs_per_utt_epoch)
+
+    def _compute_len(self, world_size):
+        self._len = int(
+            math.ceil(
+                self.num_egs_per_utt_epoch
+                * self.dataset.num_seqs
+                / self.avg_batch_size
+                / world_size
+            )
+        )
+
+    def _compute_num_classes_per_batch(self):
+        self._num_classes_per_batch = int(
+            math.ceil(
+                self.avg_batch_size / self.num_egs_per_class / self.num_egs_per_utt
+            )
+        )
+
+    def _get_class_weights(self, chunk_length):
+        if not self.has_short_seqs:
+            return self.dataset.class_weights
+
+        # get classes with utt shorter than chunk length and put weight to 0
+        zero_idx = self.dataset.class2max_length < chunk_length
+        if not np.any(zero_idx):
+            return self.dataset.class_weights
+
+        class_weights = self.dataset.class_weights.clone()
+        class_weights[zero_idx] = 0
+        # renormalize weights
+        class_weights /= class_weights.sum()
+        return class_weights
+
+    def _get_seq_weights(self, chunk_length):
+        pass
 
     def __len__(self):
         return self._len
@@ -108,9 +160,21 @@ def __iter__(self):
         self.batch = 0
         return self
 
+    def hard_prototype_mining(self):
+        return self.num_hard_prototypes > 0
+
+    def set_hard_prototypes(self, affinity_matrix):
+        if affinity_matrix is None:
+            self.hard_prototypes = None
+            return
+
+        affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
+        self.hard_prototypes = torch.topk(
+            affinity_matrix, self.num_hard_prototypes, dim=-1
+        ).indices
+
     def _get_utt_idx_basic(self, batch_mult=1):
         dataset = self.dataset
-
         num_classes_per_batch = batch_mult * self._num_classes_per_batch
 
         if dataset.class_weights is None:
@@ -212,7 +276,6 @@ def __next__(self):
             logging.info("batch 0 uttidx=%s", str(utt_idx[:10]))
 
         self.batch += 1
-
         index = [(i, chunk_length) for i in utt_idx]
         return index
 
@@ -226,8 +289,11 @@ def filter_args(**kwargs):
             "batch_size",
             "var_batch_size",
             "iters_per_epoch",
+            "num_egs_per_utt_epoch",
             "num_egs_per_class",
             "num_egs_per_utt",
+            "num_hard_prototypes",
+            "num_egs_per_hard_prototype",
         )
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
@@ -255,6 +321,13 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--iters-per-epoch",
+            default=None,
+            type=lambda x: x if (x == "auto" or x is None) else float(x),
+            help=("number of times we sample an utterance in each epoch"),
+        )
+
+        parser.add_argument(
+            "--num-egs-per-utt-epoch",
             default="auto",
             type=lambda x: x if x == "auto" else float(x),
             help=("number of times we sample an utterance in each epoch"),
@@ -272,9 +345,20 @@ def add_class_args(parser, prefix=None):
             default=1,
             help=("number of samples per utterance in batch"),
         )
+        parser.add_argument(
+            "--num-hard-prototypes",
+            type=int,
+            default=0,
+            help=("number of hard prototype classes per batch"),
+        )
+        parser.add_argument(
+            "--num-egs-per-hard-prototype",
+            type=int,
+            default=1,
+            help=("number of samples per hard prototype class in the batch"),
+        )
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='weighted seq sampler options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/layers/attention.py b/hyperion/torch/layers/attention.py
index 7b4f5c06..8ab75111 100644
--- a/hyperion/torch/layers/attention.py
+++ b/hyperion/torch/layers/attention.py
@@ -468,6 +468,7 @@ def _compute_softmax(
             else:
                 # case when mask is 1d vector per batch element,
                 # meaning that time1 and time2 are the same, so mask is symmetric
+                pad2 = 0  # fix this
                 mask = nn.functional.pad(mask, (0, pad2))
                 mask = mask.squeeze(1).eq(0)  # (batch, 1, time)
 
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index d7a086d1..6443ea02 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -19,6 +19,11 @@ def _l2_norm(x, axis=-1):
     return y
 
 
+def _cosine_affinity(kernel):
+    kernel_norm = _l2_norm(kernel, axis=0)
+    return torch.mm(kernel_norm.transpose(0, 1), kernel_norm)
+
+
 class ArcLossOutput(nn.Module):
     """Additive angular margin softmax (ArcFace) output layer.
 
@@ -63,7 +68,9 @@ def __init__(
 
         self._compute_aux()
 
+        # each column is the prototype vector of a class
         self.kernel = nn.Parameter(torch.Tensor(in_feats, num_classes))
+        # we normalize prototypes to have l2 norm = 1
         self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
 
     def __repr__(self):
@@ -99,9 +106,6 @@ def update_margin(self, epoch):
         Args:
           epoch: value of current epoch.
         """
-        # if self.margin_warmup_epochs == 0:
-        #    return
-
         if epoch < self.margin_warmup_epochs:
             self.cur_margin = self.margin * epoch / self.margin_warmup_epochs
             self.cur_intertop_margin = (
@@ -145,11 +149,7 @@ def forward(self, x, y=None):
                 cos_theta_m = cos_theta * self.cos_m - sin_theta * self.sin_m
 
                 idx_ = torch.arange(0, batch_size, dtype=torch.long)
-                # if torch.distributed.get_rank() == 0:
-                #     print("o1", output[idx_, y])
                 output[idx_, y] = cos_theta_m[idx_, y]
-                # if torch.distributed.get_rank() == 0:
-                #     print("o2", output[idx_, y])
                 if self.cur_intertop_margin > 0:
                     # implementation of intertop-K
                     # set positive scores to -inf so they don't appear in the top k
@@ -167,17 +167,16 @@ def forward(self, x, y=None):
                         + sin_theta[idx_, topk_idx] * self.intertop_sin_m
                     )
                     # take the maximum for the cases where m' is larger than theta to get cos(max(0, theta-m'))
-                    # if torch.distributed.get_rank() == 0:
-                    #     print("o3", output[idx_, topk_idx])
                     output[idx_, topk_idx] = torch.maximum(
                         output[idx_, topk_idx], cos_theta_m
                     )
-                    # if torch.distributed.get_rank() == 0:
-                    #     print("o4", output[idx_, topk_idx], flush=True)
 
             output *= s  # scale up in order to make softmax work
             return output
 
+    def compute_prototype_affinity(self):
+        return _cosine_affinity(self.kernel)
+
 
 class CosLossOutput(nn.Module):
     """Additive margin softmax (CosFace) output layer.
@@ -314,6 +313,9 @@ def forward(self, x, y=None):
             output *= s  # scale up in order to make softmax work
             return output
 
+    def compute_prototype_affinity(self):
+        return _cosine_affinity(self.kernel)
+
 
 class SubCenterArcLossOutput(ArcLossOutput):
     """Sub-Center Additive angular margin softmax (ArcFace) output layer.
@@ -352,6 +354,11 @@ def __init__(
         )
         self.num_classes = num_classes
         self.num_subcenters = num_subcenters
+        # this variable counts which subcenter is used more time during training
+        # Therefore, which subscenter correspond to the clean label.
+        self.register_buffer(
+            "subcenter_counts", torch.zeros(num_classes, num_subcenters)
+        )
 
     def __str__(self):
         s = "%s(in_feats=%d, num_classes=%d, num_subcenters=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
@@ -367,6 +374,12 @@ def __str__(self):
         )
         return s
 
+    def _update_counts(self, y, proto_idx):
+        self.subcenter_counts[y, proto_idx] += 1
+        # we make counts relative to avoid risk of overflowing the integers
+        min_counts, _ = torch.min(self.subcenter_counts, dim=1, keepdim=True)
+        self.subcenter_counts -= min_counts
+
     def forward(self, x, y=None):
         """Computes penalized logits.
 
@@ -385,17 +398,17 @@ def forward(self, x, y=None):
             kernel_norm = _l2_norm(self.kernel, axis=0)
             # cos(theta+m)
             cos_theta = torch.mm(x, kernel_norm).float()
-            cos_theta = torch.max(
+            cos_theta, proto_idx = torch.max(
                 cos_theta.view(-1, self.num_classes, self.num_subcenters), dim=-1
-            )[0]
-
+            )
             cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
-            # print(cos_theta)
+
             output = (
                 cos_theta * 1.0
             )  # a little bit hacky way to prevent in_place operation on cos_theta
 
             if y is not None and self.training:
+                self._update_counts(y, proto_idx)
                 cos_theta_2 = torch.pow(cos_theta, 2)
                 sin_theta_2 = (1 + 1e-10) - cos_theta_2
                 sin_theta = torch.sqrt(sin_theta_2)
@@ -426,3 +439,43 @@ def forward(self, x, y=None):
 
             output *= s  # scale up in order to make softmax work
             return output
+
+    def get_main_prototype_kernel(self):
+        _, idx2 = torch.max(
+            self.subcenter_counts, dim=-1
+        )  # get indices for the main prototype
+        idx1 = torch.arange(self.num_classes)
+        kernel = kernel.view(-1, self.num_classes, self.num_subcenters)[:, idx1, idx2]
+        return kernel
+
+    def compute_prototype_affinity(self):
+        kernel = self.get_main_prototype_kernel()
+        return _cosine_affinity(kernel)
+
+    def to_arc_loss(self):
+        loss = ArcLossOutput(
+            in_feats=self.in_feats,
+            num_classes=self.num_classes,
+            cos_scale=self.cos_scale,
+            margin=self.margin,
+            margin_warmup_epochs=self.margin_warmup_epochs,
+            intertop_k=self.intertop_k,
+            intertop_margin=self.intertop_margin,
+        )
+        kernel = self.get_main_prototype_kernel()
+        loss.kernel.data = kernel
+        return loss
+
+    def to_cos_loss(self):
+        loss = CosLossOutput(
+            in_feats=self.in_feats,
+            num_classes=self.num_classes,
+            cos_scale=self.cos_scale,
+            margin=self.margin,
+            margin_warmup_epochs=self.margin_warmup_epochs,
+            intertop_k=self.intertop_k,
+            intertop_margin=self.intertop_margin,
+        )
+        kernel = self.get_main_prototype_kernel()
+        loss.kernel.data = kernel
+        return loss
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index bf0552dc..bd5c3f1b 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -52,7 +52,6 @@ def __init__(
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = HFWav2XVector.filter_args(**kwargs)
         child_args = HFHubert.filter_args(**kwargs["hf_feats"])
         base_args["hf_feats"] = child_args
@@ -73,12 +72,22 @@ def add_class_args(parser, prefix=None):
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFHubert.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
     @staticmethod
     def add_finetune_args(parser, prefix=None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        HFHubert.add_finetune_args(parser, prefix="hf_feats")
         ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 3cabb1d5..a5166d4d 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -77,12 +77,22 @@ def add_class_args(parser, prefix=None):
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
     @staticmethod
     def add_finetune_args(parser, prefix=None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
         ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index d75a257b..3fed7143 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -83,6 +83,9 @@ def _fuse_hid_feats(self, hid_feats):
 
         return feats
 
+    def compute_prototype_affinity(self):
+        return self.xvector.compute_prototype_affinity()
+
     def update_loss_margin(self, epoch):
         """Updates the value of the margin in AAM/AM-softmax losses
            given the epoch number
@@ -114,14 +117,22 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
-    def forward_feats(self, x, x_lengths, return_feat_layers=None):
+    def forward_feats(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
         return_hid_states = (
             False
             if return_feat_layers is None and self.feat_fusion_method == "last"
             else True
         )
         with self._hf_context:
-            hf_output = self.hf_feats(x, x_lengths, return_hid_states=return_hid_states)
+            hf_output = self.hf_feats(
+                x,
+                x_lengths,
+                return_hid_states=return_hid_states,
+                chunk_length=chunk_length,
+                detach_chunks=detach_chunks,
+            )
         feat_lengths = hf_output["hidden_states_lengths"]
         if return_hid_states:
             hid_feats = hf_output["hidden_states"]
@@ -203,7 +214,8 @@ def extract_embed(
         x,
         x_lengths=None,
         vad_samples=None,
-        chunk_length=0,
+        hf_chunk_length=0,
+        xvec_chunk_length=0,
         embed_layer=None,
         detach_chunks=False,
     ):
@@ -211,8 +223,15 @@ def extract_embed(
         if vad_samples is not None:
             x, x_lengths = remove_silence(x, x_lengths)
 
-        feats, _, feat_lengths = self.forward_feats(x, x_lengths)
-        xvec_chunk_length = int(chunk_length * feats.size(-1) // x.size(-1))
+        feats, _, feat_lengths = self.forward_feats(
+            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
+        )
+        xvec_chunk_length = int(
+            xvec_chunk_length
+            * self.hf_feats.sample_frequency
+            * feats.size(-1)
+            // x.size(-1)
+        )
         return self.xvector.extract_embed(
             feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
         )
@@ -329,6 +348,11 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def change_config(self, hf_feats, xvector):
+        logging.info("changing hf wav2xvector config")
+        self.hf_feats.change_config(**hf_feats)
+        self.xvector.change_config(**xvector)
+
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
 
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index efac4e50..2f4b66ce 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -73,12 +73,22 @@ def add_class_args(parser, prefix=None):
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWavLM.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ResNet1dXVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
     @staticmethod
     def add_finetune_args(parser, prefix=None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        HFWavLM.add_finetune_args(parser, prefix="hf_feats")
         ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 83c95222..c7a77f3e 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -57,6 +57,9 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
+    def compute_prototype_affinity(self):
+        return self.xvector.compute_prototype_affinity()
+
     def forward(
         self,
         x,
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index 21eb9dbe..df5965cd 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -204,6 +204,17 @@ def get_config(self):
         config.update(base_config)
         return config
 
+    def change_config(
+        self, override_dropouts=False, dropout_rate=0, drop_connect_rate=0, **kwargs
+    ):
+        xvec_args = XVector.filter_finetune_args(**kwargs)
+        xvec_args["override_dropouts"] = False
+        super().change_config(**xvec_args)
+
+        if override_dropouts:
+            self.encoder_net.change_dropouts(dropout_rate, drop_connect_rate)
+            self.classif_net.change_dropouts(dropout_rate)
+
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
 
@@ -215,6 +226,7 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    @staticmethod
     def filter_args(**kwargs):
 
         base_args = XVector.filter_args(**kwargs)
@@ -236,6 +248,25 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='xvector options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = EN.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        EN.add_finetune_args(parser)
+        XVector.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index e4495182..5957c9f5 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -81,6 +81,38 @@ def get_config(self):
         config.update(base_config)
         return config
 
+    def change_config(
+        self,
+        resnet_enc,
+        override_dropouts=False,
+        dropout_rate=0,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0,
+        num_subcenters=2,
+    ):
+        super().change_config(
+            False,
+            dropout_rate,
+            num_classes,
+            loss_type,
+            cos_scale,
+            margin,
+            margin_warmup_epochs,
+            intertop_k,
+            intertop_margin,
+            num_subcenters,
+        )
+        if override_dropouts:
+            logging.info("chaning x-vector head dropouts")
+            self.classif_net.change_dropouts(dropout_rate)
+
+        self.encoder_net.change_config(**resnet_enc)
+
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
 
@@ -118,6 +150,26 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='xvector options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = Encoder.filter_finetune_args(**kwargs["resnet_enc"])
+        base_args["resnet_enc"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        Encoder.add_finetune_args(
+            parser, prefix="resnet_enc", skip=set(["head_channels"])
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index 99385cae..fe88ff57 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -191,6 +191,7 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    @staticmethod
     def filter_args(**kwargs):
 
         base_args = XVector.filter_args(**kwargs)
@@ -210,6 +211,26 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='xvector options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = RNF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        RNF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 676952da..1e616570 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -219,8 +219,8 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    @staticmethod
     def filter_args(**kwargs):
-
         base_args = XVector.filter_args(**kwargs)
         child_args = SNF.filter_args(**kwargs)
 
@@ -240,3 +240,23 @@ def add_class_args(parser, prefix=None):
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = SNF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        SNF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index 7816c7ea..f28dc9b3 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -153,8 +153,8 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    @staticmethod
     def filter_args(**kwargs):
-
         base_args = XVector.filter_args(**kwargs)
         child_args = TF.filter_args(**kwargs)
 
@@ -172,6 +172,25 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='xvector options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = TF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        TF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 742fadc8..b3428783 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -365,3 +365,51 @@ def add_class_args(parser, prefix=None):
             # help='xvector options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        """Filters arguments correspondin to TransformerXVector
+            from args dictionary
+
+        Args:
+          kwargs: args dictionary
+
+        Returns:
+          args dictionary
+        """
+        base_args = XVector.filter_finetune_args(**kwargs)
+
+        valid_args = (
+            "pos_dropout_rate",
+            "att_dropout_rate",
+        )
+
+        child_args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        """Adds TransformerXVector config parameters for finetuning to argparser
+
+        Args:
+           parser: argparse object
+           prefix: prefix string to add to the argument names
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        parser.add_argument(
+            "--pos-dropout-rate",
+            default=0.1,
+            type=float,
+            help="positional encoder dropout",
+        )
+        parser.add_argument(
+            "--att-dropout-rate", default=0, type=float, help="self-att dropout"
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index c35e6a4a..197ef5a9 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -4,7 +4,7 @@
 """
 import logging
 from enum import Enum
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 from typing import Optional
 
 import torch
@@ -531,6 +531,36 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
         return model
 
+    def change_config(
+        self,
+        override_dropouts=False,
+        dropout_rate=0,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+    ):
+        logging.info("changing x-vector config")
+        self.rebuild_output_layer(
+            num_classes=num_classes,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+        )
+
+        if override_dropouts:
+            logging.info("overriding x-vector dropouts")
+            self.encoder_net.change_dropouts(dropout_rate)
+            self.classif_net.change_dropouts(dropout_rate)
+
     def rebuild_output_layer(
         self,
         num_classes=None,
@@ -547,6 +577,7 @@ def rebuild_output_layer(
         ):
             # if we change the number of classes or the loss-type
             # we need to reinitiate the last layer
+            logging.info("rebuilding output layer")
             self.classif_net.rebuild_output_layer(
                 num_classes, loss_type, cos_scale, margin, margin_warmup_epochs
             )
@@ -602,6 +633,9 @@ def _train(self, train_mode: str):
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
+    def compute_prototype_affinity(self):
+        return self.classif_net.compute_prototype_affinity()
+
     @staticmethod
     def valid_train_modes():
         return ["full", "frozen", "ft-embed-affine"]
@@ -850,9 +884,26 @@ def add_finetune_args(parser, prefix=None):
             help="number of subcenters in subcenter losses",
         )
 
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='xvector finetune opts')
 
     add_argparse_args = add_class_args
     add_argparse_finetune_args = add_finetune_args
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 6a886e44..adfeceb3 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -5,6 +5,7 @@
 
 from jsonargparse import ArgumentParser, ActionParser
 
+import torch
 import torch.nn as nn
 from torch.nn import Linear
 
@@ -310,6 +311,14 @@ def extract_embed(self, x, embed_layer=0):
             y = self.fc_blocks[l](x)
         return y
 
+    def compute_prototype_affinity(self):
+        if self.loss_type != "softmax":
+            return self.output.compute_prototype_affinity()
+
+        kernel = self.output.weight  # (num_classes, feat_dim)
+        kernel = kernel / torch.linalg.norm(kernel, 2, dim=1, keepdim=True)
+        return torch.mm(kernel, kernel.transpose(0, 1))
+
     def get_config(self):
 
         hid_act = AF.get_config(self.fc_blocks[0].activation)
diff --git a/hyperion/torch/narchs/efficient_net.py b/hyperion/torch/narchs/efficient_net.py
index 8a71d6f4..273fa183 100644
--- a/hyperion/torch/narchs/efficient_net.py
+++ b/hyperion/torch/narchs/efficient_net.py
@@ -4,7 +4,7 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 import torch
 import torch.nn as nn
@@ -395,6 +395,17 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def change_dropouts(self, dropout_rate, drop_connect_rate):
+        super().change_dropouts(dropout_rate)
+        from ..layers import DropConnect2d
+
+        for module in self.modules():
+            if isinstance(module, DropConnect2d):
+                module.p *= drop_connect_rate / self.drop_connect_rate
+
+        self.drop_connect_rate = drop_connect_rate
+        self.dropout_rate = dropout_rate
+
     @staticmethod
     def filter_args(**kwargs):
 
@@ -424,7 +435,6 @@ def filter_args(**kwargs):
         )
 
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
         return args
 
     @staticmethod
@@ -590,6 +600,53 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='efficientnet options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        parser.add_argument(
+            "--drop-connect-rate",
+            default=0.2,
+            type=float,
+            help="layer drop probability",
+        )
+
+        try:
+            parser.add_argument(
+                "--dropout-rate", default=0, type=float, help="dropout probability"
+            )
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        valid_args = (
+            "out_units",
+            "override_dropouts",
+            "drop_connect_rate",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index 2044f528..794f8144 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -3,8 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
+import logging
 
 import numpy as np
 
@@ -518,6 +519,22 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def change_config(self, override_dropouts, dropout_rate, drop_connect_rate):
+        if override_dropouts:
+            logging.info("chaning resnet1d dropouts")
+            self.change_dropouts(dropout_rate, drop_connect_rate)
+
+    def change_dropouts(self, dropout_rate, drop_connect_rate):
+        super().change_dropouts(dropout_rate)
+        from ..layers import DropConnect1d
+
+        for module in self.modules():
+            if isinstance(module, DropConnect1d):
+                module.p *= drop_connect_rate / self.drop_connect_rate
+
+        self.drop_connect_rate = drop_connect_rate
+        self.dropout_rate = dropout_rate
+
     @staticmethod
     def filter_args(**kwargs):
         if "wo_norm" in kwargs:
@@ -791,6 +808,55 @@ def add_class_args(parser, prefix=None, skip=set(["in_feats"])):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='ResNet1d encoder options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        valid_args = (
+            "override_dropouts",
+            "drop_connect_rate",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set([])):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument(
+                "--dropout-rate", default=0, type=float, help="dropout probability"
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument(
+                "--drop-connect-rate",
+                default=0,
+                type=float,
+                help="layer drop probability",
+            )
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index a72cabac..b27e883d 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -5,6 +5,7 @@
 
 import math
 from jsonargparse import ArgumentParser, ActionParser
+import logging
 
 import torch
 import torch.nn as nn
@@ -352,6 +353,22 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def change_config(self, override_dropouts, dropout_rate, drop_connect_rate):
+        if override_dropouts:
+            logging.info("chaning resnet2d dropouts")
+            self.change_dropouts(dropout_rate, drop_connect_rate)
+
+    def change_dropouts(self, dropout_rate, drop_connect_rate):
+        super().change_dropouts(dropout_rate)
+        from ..layers import DropConnect2d
+
+        for module in self.modules():
+            if isinstance(module, DropConnect2d):
+                module.p *= drop_connect_rate / self.drop_connect_rate
+
+        self.drop_connect_rate = drop_connect_rate
+        self.dropout_rate = dropout_rate
+
     @staticmethod
     def filter_args(**kwargs):
 
diff --git a/hyperion/torch/narchs/resnet_factory.py b/hyperion/torch/narchs/resnet_factory.py
index bd58cd2b..645b7f2b 100644
--- a/hyperion/torch/narchs/resnet_factory.py
+++ b/hyperion/torch/narchs/resnet_factory.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 from .resnet import *
 
@@ -308,6 +308,41 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='ResNet options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        valid_args = (
+            "override_dropouts",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/spinenet_factory.py b/hyperion/torch/narchs/spinenet_factory.py
index 02e36244..9e94a1be 100644
--- a/hyperion/torch/narchs/spinenet_factory.py
+++ b/hyperion/torch/narchs/spinenet_factory.py
@@ -2,7 +2,7 @@
  Copyright 2020 Magdalena Rybicka
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 from .spinenet import *
 
@@ -266,3 +266,40 @@ def add_class_args(parser, prefix=None):
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        valid_args = (
+            "override_dropouts",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/tdnn_factory.py b/hyperion/torch/narchs/tdnn_factory.py
index 584e9243..6a9e6010 100644
--- a/hyperion/torch/narchs/tdnn_factory.py
+++ b/hyperion/torch/narchs/tdnn_factory.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 from .tdnn import TDNNV1
 from .etdnn import ETDNNV1
@@ -264,3 +264,40 @@ def add_class_args(parser, prefix=None):
             # help='TDNN options')
 
     add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        valid_args = (
+            "override_dropouts",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index f1a5b26c..d2949c12 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -125,9 +125,6 @@ def __init__(
         if self.norm_before:
             self.norm = nn.LayerNorm(d_model)
 
-    # def _make_in_layer(self, in_layer_type, in_feats, d_model,
-    #                    dropout_rate, pos_dropout_rate,
-    #                    padding_idx, time_dim):
     def _make_in_layer(self):
 
         in_feats = self.in_feats
@@ -240,6 +237,31 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def change_dropouts(self, pos_dropout_rate, att_dropout_rate, ff_dropout_rate):
+
+        assert pos_dropout_rate == 0 or self.pos_dropout_rate > 0
+        assert att_dropout_rate == 0 or self.att_dropout_rate > 0
+        assert ff_dropout_rate == 0 or self.ff_dropout_rate > 0
+
+        for module in self.modules():
+            if isinstance(module, PosEncoder):
+                for layer in module.modules():
+                    if isinstance(layer, nn.Dropout):
+                        layer.p = pos_dropout_rate
+
+            elif isinstance(module, EBlock):
+                for layer in module.modules():
+                    if isinstance(layer, nn.Dropout):
+                        layer.p = ff_dropout_rate
+
+                for layer in module.self_attn.modules():
+                    if isinstance(layer, nn.Dropout):
+                        layer.p = att_dropout_rate
+
+        self.pos_dropout_rate = pos_dropout_rate
+        self.att_dropout_rate = att_dropout_rate
+        self.ff_dropout_rate = ff_dropout_rate
+
     def in_context(self):
         return (self.att_context, self.att_context)
 
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index af3a305c..1d01e02b 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -51,6 +51,16 @@ def unfreeze(self):
         for param in self.parameters():
             param.requires_grad = True
 
+    def change_dropouts(self, dropout_rate):
+        """Changes all dropout rates of the model."""
+        for module in self.modules():
+            if isinstance(module, nn.modules.dropout._DropoutNd):
+                module.p = dropout_rate
+
+        if hasattr(self, "dropout_rate"):
+            assert dropout_rate == 0 or self.dropout_rate > 0
+            self.dropout_rate = dropout_rate
+
     @property
     def train_mode(self):
         return self._train_mode
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 82ce70bd..ba331573 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -131,6 +131,10 @@ class HFHubert(HFWav2VecBase):
             and uses the ones passed as arguments.
         override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
             configuration in the pretrained model and uses the ones passed in the arguments.
+        left_encoder_context (`int`): past context frames used by the transformer encoder when the signal is evaluated
+          chunk by chunk, if it is too long to fit in GPU.
+        right_encoder_context: (`int`): future context frames used by the transformer encoder.
+        sample_frequency: (`int`) waveform sample frequency used to train the model.
     """
 
     def __init__(
@@ -175,6 +179,9 @@ def __init__(
         ignore_pretrained: bool = False,
         override_dropouts: bool = False,
         override_spec_augment: bool = False,
+        left_encoder_context: int = 16,
+        right_encoder_context: int = 16,
+        sample_frequency: int = 16000,
     ):
 
         super().__init__(
@@ -189,6 +196,9 @@ def __init__(
             ignore_pretrained=ignore_pretrained,
             override_dropouts=override_dropouts,
             override_spec_augment=override_spec_augment,
+            left_encoder_context=left_encoder_context,
+            right_encoder_context=right_encoder_context,
+            sample_frequency=sample_frequency,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -216,7 +226,9 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
-            self.change_hyperparams(
+            self.change_config(
+                override_dropouts=self.override_dropouts,
+                override_spec_augment=self.override_spec_augment,
                 hidden_dropout=hidden_dropout,
                 activation_dropout=activation_dropout,
                 attention_dropout=attention_dropout,
@@ -571,3 +583,110 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_finetune_args(parser)
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index e91fe8c4..579574a1 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -144,6 +144,10 @@ class HFWav2Vec2(HFWav2VecBase):
             and uses the ones passed as arguments.
         override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
             configuration in the pretrained model and uses the ones passed in the arguments.
+        left_encoder_context (`int`): past context frames used by the transformer encoder when the signal is evaluated
+          chunk by chunk, if it is too long to fit in GPU.
+        right_encoder_context: (`int`): future context frames used by the transformer encoder.
+        sample_frequency: (`int`) waveform sample frequency used to train the model.
     """
 
     def __init__(
@@ -193,6 +197,9 @@ def __init__(
         ignore_pretrained: bool = False,
         override_dropouts: bool = False,
         override_spec_augment: bool = False,
+        left_encoder_context: int = 16,
+        right_encoder_context: int = 16,
+        sample_frequency: int = 16000,
     ):
 
         super().__init__(
@@ -207,6 +214,9 @@ def __init__(
             ignore_pretrained=ignore_pretrained,
             override_dropouts=override_dropouts,
             override_spec_augment=override_spec_augment,
+            left_encoder_context=left_encoder_context,
+            right_encoder_context=right_encoder_context,
+            sample_frequency=sample_frequency,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -234,7 +244,9 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
-            self.change_hyperparams(
+            self.change_config(
+                override_dropouts=self.override_dropouts,
+                override_spec_augment=self.override_spec_augment,
                 hidden_dropout=hidden_dropout,
                 activation_dropout=activation_dropout,
                 attention_dropout=attention_dropout,
@@ -316,7 +328,7 @@ def change_dropouts(
         self.hf_model.feature_projection.dropout.p = feat_proj_dropout
         for module in self.hf_model.encoder.modules():
             if isinstance(module, nn.Dropout):
-                t.p = hidden_dropout
+                module.p = hidden_dropout
 
         for module in self.hf_model.encoder.modules():
             if isinstance(module, t.Wav2Vec2Attention):
@@ -667,6 +679,113 @@ def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_finetune_args(parser)
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
     """
     Things I think I don't need:
     feat_quantizer_dropout (`float`, defaults to 0.0): the dropout probabilitiy for quantized feature encoder states.
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 87f19835..1dceed1c 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -5,6 +5,7 @@
 
 import os
 import logging
+from turtle import right
 from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 from typing import Optional, Tuple, Union, List
@@ -49,6 +50,10 @@ class HFWav2VecBase(TorchModel):
             and uses the ones passed as arguments.
         override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
             configuration in the pretrained model and uses the ones passed in the arguments.
+        left_encoder_context (`int`): past context frames used by the transformer encoder when the signal is evaluated
+          chunk by chunk, if it is too long to fit in GPU.
+        right_encoder_context: (`int`): future context frames used by the transformer encoder.
+        sample_frequency: (`int`) waveform sample frequency used to train the model.
     """
 
     def __init__(
@@ -64,6 +69,9 @@ def __init__(
         ignore_pretrained: bool = False,
         override_dropouts: bool = False,
         override_spec_augment: bool = False,
+        left_encoder_context: int = 16,
+        right_encoder_context: int = 16,
+        sample_frequency: int = 16000,
     ):
         super().__init__()
         self.pretrained_model_path = pretrained_model_path
@@ -75,6 +83,8 @@ def __init__(
         self.ignore_pretrained = ignore_pretrained
         self.override_dropouts = override_dropouts
         self.override_spec_augment = override_spec_augment
+        self.right_encoder_context = right_encoder_context
+        self.left_encoder_context = left_encoder_context
 
         if pretrained_model_path is not None and not ignore_pretrained:
             rank = ddp_get_rank()
@@ -130,9 +140,14 @@ def __init__(
             ddp_wait_for_all_procs()
             normalize_input = feature_extractor.do_normalize
             use_input_attention_mask = feature_extractor.return_attention_mask
+            sample_frequency = feature_extractor.sampling_rate
 
         self.normalize_input = normalize_input
         self.use_input_attention_mask = use_input_attention_mask
+        self.sample_frequency = sample_frequency
+
+        self._feature_encoder_context = None
+        self._frame_shift = None
 
     def __deepcopy__(self, memo):
         """Reimplementation of deepcopy for Hugging Face models.
@@ -149,11 +164,65 @@ def __deepcopy__(self, memo):
         new_obj.to(device)
         return new_obj
 
-    def change_hyperparams(self, **kwargs):
-        if self.override_spec_augment:
+    @property
+    def feature_encoder_context(self):
+        if self._feature_encoder_context is not None:
+            return self._feature_encoder_context
+
+        total_context = 0
+        total_stride = 1
+        for kernel, stride in zip(
+            self.hf_model.config.conv_kernel, self.hf_model.config.conv_stride
+        ):
+            total_context += total_stride * (kernel - 1) / 2
+            total_stride *= stride
+
+        self._feature_encoder_context = (int(total_context + 0.5), int(total_context))
+        return self._feature_encoder_context
+
+    @property
+    def frame_shift(self):
+        if self._frame_shift is not None:
+            return self._frame_shift
+
+        total_stride = 1
+        for stride in self.hf_model.config.conv_stride:
+            total_stride *= stride
+
+        self._frame_shift = total_stride
+        return total_stride
+
+    @property
+    def context(self):
+        left, right = self.feature_encoder_context
+        left += self.left_encoder_context
+        right += self.right_encoder_context
+        return left, right
+
+    def max_out_length(self, max_in_length):
+        return self.hf_model._get_feat_extract_output_lengths(max_in_length).item()
+        # left_context, right_context = self.feature_encoder_context
+        # max_in_length = max_in_length - left_context - right_context
+        # return max_in_length // self.frame_shift
+
+    def out_lengths(self, in_lengths):
+        return self.hf_model._get_feat_extract_output_lengths(in_lengths)
+        # left_context, right_context = self.feature_encoder_context
+        # in_lengths = in_lengths - left_context - right_context
+        # return torch.div(in_lengths, self.frame_shift, rounding_mode="floor")
+
+    def out_shape(self, in_shape):
+        out_length = self.max_out_length(in_shape[1])
+        C = self.hf_model.config.hidden_size
+        return (in_shape[0], out_length, C)
+
+    def change_config(self, override_dropouts, override_spec_augment, **kwargs):
+        if override_spec_augment:
+            logging.info("overriding speech augment")
             self.change_spec_augment(**kwargs)
 
-        if self.override_dropouts:
+        if override_dropouts:
+            logging.info("overriding hf model dropouts")
             self.change_dropouts(**kwargs)
 
     def change_spec_augment(
@@ -217,6 +286,51 @@ def forward(
         x_lengths: Optional[torch.LongTensor] = None,
         return_attentions: bool = False,
         return_hid_states: bool = False,
+        chunk_length: float = 0,
+        detach_chunks: bool = True,
+    ):
+        r"""Forward function for long utterances that do not fit in GPU memory.
+
+        Args:
+          x: input audio of shape = (batch, sequence_length).
+          x_lengths: lengths of the audio waveforms in samples with shape = (batch,).
+          return_attentions: whether or not to return the attentions tensors of
+            all attention layers.
+          return_hid_states: whether or not to return the hidden states of all layers.
+          chunk_size: chunk size in seconds.
+
+        Returns:
+          Dictionary with:
+            last_hidden_state: sequence of hidden-states at the output of the last
+                layer of the model (torch.FloatTensor of shape
+                (batch_size, sequence_length, hidden_size)).
+            extract_features: sequence of extracted feature vectors of the last
+                convolutional layer of the model. (torch.FloatTensor of shape
+                (batch_size, sequence_length, conv_dim[-1])
+            hidden_states: hidden-states of the model at the output of each layer
+                plus the initial embedding outputs (tuple(torch.FloatTensor)).
+            attentions: Attentions weights after the attention softmax, used to
+                compute the weighted average in the self-attention heads
+                (tuple(torch.FloatTensor)).
+        """
+        if chunk_length == 0 or x.size(1) < chunk_length * self.sample_frequency:
+            return self.forward_impl(x, x_lengths, return_attentions, return_hid_states)
+        else:
+            return self.forward_long_impl(
+                x,
+                x_lengths,
+                return_attentions,
+                return_hid_states,
+                chunk_length,
+                detach_chunks,
+            )
+
+    def forward_impl(
+        self,
+        x: torch.Tensor,
+        x_lengths: Optional[torch.LongTensor] = None,
+        return_attentions: bool = False,
+        return_hid_states: bool = False,
     ):
         r"""Forward function for wav2vec style models.
 
@@ -259,6 +373,143 @@ def forward(
 
         return output
 
+    def forward_long_impl(
+        self,
+        x: torch.Tensor,
+        x_lengths: Optional[torch.LongTensor] = None,
+        return_attentions: bool = False,
+        return_hid_states: bool = False,
+        chunk_length: float = 120.0,
+        detach_chunks: bool = True,
+    ):
+        r"""Forward function for long utterances that do not fit in GPU memory.
+
+        Args:
+          x: input audio of shape = (batch, sequence_length).
+          x_lengths: lengths of the audio waveforms in samples with shape = (batch,).
+          return_attentions: whether or not to return the attentions tensors of
+            all attention layers.
+          return_hid_states: whether or not to return the hidden states of all layers.
+          chunk_size: chunk size in seconds.
+
+        Returns:
+          Dictionary with:
+            last_hidden_state: sequence of hidden-states at the output of the last
+                layer of the model (torch.FloatTensor of shape
+                (batch_size, sequence_length, hidden_size)).
+            extract_features: sequence of extracted feature vectors of the last
+                convolutional layer of the model. (torch.FloatTensor of shape
+                (batch_size, sequence_length, conv_dim[-1])
+            hidden_states: hidden-states of the model at the output of each layer
+                plus the initial embedding outputs (tuple(torch.FloatTensor)).
+            attentions: Attentions weights after the attention softmax, used to
+                compute the weighted average in the self-attention heads
+                (tuple(torch.FloatTensor)).
+        """
+        # output0 = self.forward_impl(x, x_lengths)
+        # mol0 = output0.last_hidden_state.size(1)
+        print("long", flush=True)
+        max_in_length = x.size(-1)
+        x, x_mask = self._preprocess(x, x_lengths)
+        # we transform the chunk length from seconds to samples,
+        # making sure that the chunk_length corresponds to an integer number of output samples.
+        chunk_frames = int(chunk_length * self.sample_frequency) // self.frame_shift
+        chunk_length = chunk_frames * self.frame_shift
+        num_chunks = (x.size(1) + chunk_length - 1) // chunk_length
+        left_context, right_context = self.context
+        max_out_length = self.max_out_length(x.size(1))
+        start = 0
+        outputs = []
+        for i in range(num_chunks):
+            if i < num_chunks - 1:
+                start_i = max(start - left_context, 0)
+            else:
+                # last chunk has special treatment, we forward pass
+                # a chunk with chunk_length size ending at the end.
+                # but we will just use the output frames that don't overlap
+                # with the second last chunk.
+                start_i = max(x.size(1) - chunk_length - left_context, 0)
+
+            stop_i = min(start + chunk_length + right_context, x.size(1))
+            x_i = x[:, start_i:stop_i]
+            x_mask_i = None if x_mask is None else x_mask[start_i:stop_i]
+            output_i = self.hf_model(
+                x_i,
+                x_mask_i,
+                output_attentions=return_attentions,
+                output_hidden_states=return_hid_states,
+            )
+
+            if i < num_chunks - 1:
+                start_out_i = max(
+                    output_i.last_hidden_state.size(1)
+                    - chunk_frames
+                    - self.right_encoder_context,
+                    0,
+                )
+                stop_out_i = start_out_i + chunk_frames
+            else:
+                # we just use the frames that do not overlap
+                # with the second last chunk
+                remaining_frames = max_out_length - i * chunk_frames
+                start_out_i = -remaining_frames
+                stop_out_i = output_i.last_hidden_state.size(1)
+
+            output_i.last_hidden_state = output_i.last_hidden_state[
+                :, start_out_i:stop_out_i
+            ]
+            if detach_chunks:
+                output_i.last_hidden_state.detach_()
+
+            if return_hid_states:
+                output_i.hidden_states = [
+                    h[:, start_out_i:stop_out_i] for h in output_i.hidden_states
+                ]
+                if detach_chunks:
+                    output_i.hidden_states = [
+                        h.detach() for h in output_i.hidden_states
+                    ]
+
+            outputs.append(output_i)
+            start += chunk_length
+
+        # concatenate outputs from different chunks
+        output = outputs[0]
+        output.last_hidden_state = torch.cat(
+            [o.last_hidden_state for o in outputs], dim=1
+        )
+        if return_hid_states:
+            hidden_states = []
+            for j in range(len(outputs[0].hidden_states)):
+                hidden_states_j = torch.cat(
+                    [o.hidden_states[j] for o in outputs], dim=1
+                )
+                hidden_states.append(hidden_states_j)
+            output.hidden_states = hidden_states
+
+        if return_attentions:
+            attentions = []
+            for j in range(len(outputs[0].attentions)):
+                attentions_j = [o.attentions[j] for o in outputs]
+                attentions.append(attentions_j)
+            output.attentions = attentions
+
+        feat_lengths = (
+            None
+            if x_lengths is None
+            else scale_seq_lengths(x_lengths, max_out_length, max_in_length)
+        )
+        output["hidden_states_lengths"] = feat_lengths
+        # print(
+        #     "lens",
+        #     mol0,
+        #     max_out_length,
+        #     output.last_hidden_state.size(1),
+        #     output.hidden_states[0].size(1),
+        #     flush=True,
+        # )
+        return output
+
     def get_config(self):
         """Returns the configuration arguments for the object in a dictionary."""
 
@@ -274,6 +525,9 @@ def get_config(self):
             "ignore_pretrained": self.ignore_pretrained,
             "override_dropouts": self.override_dropouts,
             "override_spec_augment": self.override_spec_augment,
+            "left_encoder_context": self.left_encoder_context,
+            "right_encoder_context": self.right_encoder_context,
+            "sample_frequency": self.sample_frequency,
         }
 
         base_config = super().get_config()
@@ -298,6 +552,9 @@ def filter_args(**kwargs):
             "ignore_pretrained",
             "override_dropouts",
             "override_spec_augment",
+            "left_encoder_context",
+            "right_encoder_context",
+            "sample_frequency",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         return args
@@ -383,5 +640,61 @@ def add_class_args(parser, prefix=None, skip=set()):
                 "arguments instead of the defaults in the pretrained model."
             ),
         )
+        parser.add_argument(
+            "--left-encoder-context",
+            default=16,
+            type=int,
+            help=(
+                "past context frames used by the transformer encoder "
+                "when the signal is evaluated chunk by chunk."
+            ),
+        )
+        parser.add_argument(
+            "--right-encoder-context",
+            default=16,
+            type=int,
+            help=(
+                "future context frames used by the transformer encoder "
+                "when the signal is evaluated chunk by chunk."
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        valid_args = (
+            "override_dropouts",
+            "override_spec_augment",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--override-dropouts",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the dropout probabilities passed in the "
+                "arguments instead of the defaults in the pretrained model."
+            ),
+        )
+        parser.add_argument(
+            "--override-spec-augment",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the spec augment config. passed in the "
+                "arguments instead of the defaults in the pretrained model."
+            ),
+        )
+
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index c75cb6e8..15b8248d 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -144,6 +144,10 @@ class HFWavLM(HFWav2VecBase):
             and uses the ones passed as arguments.
         override_spec_augment (`bool` defaults to False): if True, it ingnores the spec. augment.
             configuration in the pretrained model and uses the ones passed in the arguments.
+        left_encoder_context (`int`): past context frames used by the transformer encoder when the signal is evaluated
+          chunk by chunk, if it is too long to fit in GPU.
+        right_encoder_context: (`int`): future context frames used by the transformer encoder.
+        sample_frequency: (`int`) waveform sample frequency used to train the model.
     """
 
     def __init__(
@@ -193,6 +197,9 @@ def __init__(
         ignore_pretrained: bool = False,
         override_dropouts: bool = False,
         override_spec_augment: bool = False,
+        left_encoder_context: int = 16,
+        right_encoder_context: int = 16,
+        sample_frequency: int = 16000,
     ):
 
         super().__init__(
@@ -207,6 +214,9 @@ def __init__(
             ignore_pretrained=ignore_pretrained,
             override_dropouts=override_dropouts,
             override_spec_augment=override_spec_augment,
+            left_encoder_context=left_encoder_context,
+            right_encoder_context=right_encoder_context,
+            sample_frequency=sample_frequency,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -234,7 +244,9 @@ def __init__(
                 )
             ddp_wait_for_all_procs()
             self.hf_model.config.layerdrop = 0.0
-            self.change_hyperparams(
+            self.change_config(
+                override_dropouts=self.override_dropouts,
+                override_spec_augment=self.override_spec_augment,
                 hidden_dropout=hidden_dropout,
                 activation_dropout=activation_dropout,
                 attention_dropout=attention_dropout,
@@ -640,3 +652,110 @@ def add_class_args(parser, prefix=None, skip=set()):
         )
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args_base = HFWav2VecBase.filter_args(**kwargs)
+        valid_args = (
+            "hidden_dropout",
+            "activation_dropout",
+            "attention_dropout",
+            "feat_proj_dropout",
+            "apply_spec_augment",
+            "mask_time_prob",
+            "mask_time_length",
+            "mask_time_min_masks",
+            "mask_feature_prob",
+            "mask_feature_length",
+            "mask_feature_min_masks",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(args_base)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2VecBase.add_finetune_args(parser)
+        parser.add_argument(
+            "--hidden-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "fully connected layers in the embeddings, encoder, and pooler"
+            ),
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            default=0.1,
+            type=float,
+            help=(
+                "the dropout probability for all "
+                "intermediate layer in feedforward transformer layers"
+            ),
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.1,
+            type=float,
+            help=("the dropout ratio for the attention probabilities"),
+        )
+        parser.add_argument(
+            "--apply-spec-augment",
+            default=True,
+            action=ActionYesNo,
+            help=(
+                "whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-prob",
+            default=0.05,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the time axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-time-length",
+            default=10,
+            type=int,
+            help=("length of vector span along the time axis"),
+        )
+        parser.add_argument(
+            "--mask-time-min-masks",
+            default=2,
+            type=int,
+            help=(
+                "the minimum number of masks of length `mask_time_length` generated along the time axis"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-prob",
+            default=0.0,
+            type=float,
+            help=(
+                "percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked"
+            ),
+        )
+        parser.add_argument(
+            "--mask-feature-length",
+            default=10,
+            type=int,
+            help=(" length of vector span along the feature axis"),
+        )
+        parser.add_argument(
+            "--mask-feature-min-masks",
+            default=0,
+            type=int,
+            help=(
+                "The minimum number of masks of length `mask_feature_length` generated along the feature axis"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/utils/eval_utils.py b/hyperion/torch/utils/eval_utils.py
index e8fa9c86..f1ae1edb 100644
--- a/hyperion/torch/utils/eval_utils.py
+++ b/hyperion/torch/utils/eval_utils.py
@@ -8,12 +8,8 @@
 
 
 def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1):
-    # model_device = next(nnet.parameters()).device
-    # print(device, model_device, x.device)
-    # assume time is the last dimension
 
     device = None if nnet.device == x.device else nnet.device
-
     T = x.shape[time_dim]
     if T <= chunk_length or chunk_length == 0:
         if device is not None:
@@ -183,170 +179,3 @@ def eval_nnet_overlap_add(
     y = y.transpose(0, time_dim) / count
 
     return y
-
-
-# """
-#  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-# """
-
-# import math
-# import torch
-
-# def eval_nnet_by_chunks(x, nnet, chunk_length=0, device=None, time_dim=-1):
-#     # model_device = next(nnet.parameters()).device
-#     # print(device, model_device, x.device)
-#     #assume time is the last dimension
-#     T = x.shape[time_dim]
-#     if T <= chunk_length or chunk_length == 0:
-#         if device is not None:
-#             x = x.to(device)
-#         return nnet(x) #.detach()
-
-#     try:
-#         left_context, right_context = nnet.in_context()
-#     except:
-#         left_context = right_context = 0
-
-#     in_shape = x.shape
-#     chunk_shift_in = chunk_length - left_context - right_context
-
-#     try:
-#         out_shape = nnet.out_shape(in_shape)
-#         T_out = out_shape[time_dim]
-#         r = float(T_out)/T
-#     except:
-#         out_shape = None
-
-
-#     num_chunks = int(math.ceil((T-chunk_length)/chunk_shift_in+1))
-#     #move time dimension to dim 0
-#     x = x.transpose(0, time_dim)
-#     y = None
-#     tbeg_in = 0
-#     tbeg_out = 0
-#     for i in range(num_chunks):
-#         tend_in = min(tbeg_in + chunk_length, x.shape[0])
-#         #get slice and move back time dimension to last dim
-#         x_i = x[tbeg_in:tend_in].transpose(0, time_dim)
-#         if device is not None:
-#             x_i = x_i.to(device)
-
-#         y_i = nnet(x_i).detach()
-#         chunk_length_out = y_i.shape[time_dim]
-#         if out_shape is None:
-#             # infer chunk_shift in the output
-#             r = float(chunk_length_out)/chunk_length
-
-#             # infer total output length
-#             T_out = int(r * T)
-#             out_shape = list(y_i.shape)
-#             out_shape[time_dim] = T_out
-
-#         if y is None:
-#             right_context_out = int(math.floor(r*right_context))
-#             left_context_out = int(math.floor(r*left_context))
-#             chunk_shift_out = chunk_length_out - right_context_out - left_context_out
-#             # create output tensor
-#             y = torch.zeros(out_shape)
-#             #move time dimension to dim 0
-#             y = y.transpose(0, time_dim)
-
-#         y_i = y_i.transpose(0, time_dim)
-
-#         if i == 0:
-#             tend_out = min(tbeg_out + chunk_length_out, T_out)
-#             y[tbeg_out:tend_out] = y_i
-#             tbeg_out =+ (chunk_length_out - right_context_out)
-#         else:
-#             tend_out = min(int(round(tbeg_out)) + chunk_length_out - left_context_out, T_out)
-#             dt = tend_out - tbeg_out
-#             if dt > 0:
-#                 #print('eu', tbeg_out, tend_out, left_context_out,left_context_out+dt, T_out, chunk_length, chunk_length_out, tbeg_in, tend_in)
-#                 y[tbeg_out:tend_out] = y_i[left_context_out:left_context_out+dt]
-#                 tbeg_out += chunk_shift_out
-
-#         tbeg_in += chunk_shift_in
-
-#     # put time dimension back in his place
-#     y = y.transpose(0, time_dim)
-
-#     return y
-
-
-# def eval_nnet_overlap_add(x, nnet, chunk_length=0, chunk_overlap=None, device=None, time_dim=-1):
-
-#     #assume time is the last dimension
-#     T = x.shape[time_dim]
-#     if T <= chunk_length or chunk_length == 0:
-#         if device is not None:
-#             x = x.to(device)
-#         return nnet(x).detach()
-
-#     if chunk_overlap is None:
-#         #infer chunk overlap from network input context
-#         try:
-#             left_context, right_context = nnet.in_context()
-#         except:
-#             left_context = right_context = 0
-
-#         chunk_overlap = left_context + right_context
-
-
-#     in_shape = x.shape
-#     chunk_shift_in = chunk_length - chunk_overlap
-
-#     try:
-#         out_shape = nnet.out_shape(in_shape)
-#         T_out = out_shape[time_dim]
-#         r = float(T_out)/T
-#     except:
-#         out_shape = None
-
-
-#     num_chunks = int(math.ceil((T-chunk_length)/chunk_shift_in+1))
-#     #move time dimension to dim 0
-#     x = x.transpose(0, time_dim)
-#     y = None
-#     N = None
-#     tbeg_in = 0
-#     tbeg_out = 0
-#     for i in range(num_chunks):
-#         tend_in = min(tbeg_in + chunk_length, x.shape[0])
-#         #get slice and move back time dimension to last dim
-#         x_i = x[tbeg_in:tend_in].transpose(0, time_dim)
-#         if device is not None:
-#             x_i = x_i.to(device)
-
-#         y_i = nnet(x_i).detach()
-#         chunk_length_out = y_i.shape[time_dim]
-#         if out_shape is None:
-#             # infer chunk_shift in the output
-#             r = float(chunk_length_out)/chunk_length
-
-#             # infer total output length
-#             T_out = int(r * T)
-#             out_shape = list(y_i.shape)
-#             out_shape[time_dim] = T_out
-
-#         if y is None:
-#             chunk_shift_out = r*chunk_shift_in
-#             # create output tensor
-#             y = torch.zeros(out_shape)
-#             #move time dimension to dim 0
-#             y = y.transpose(0, time_dim)
-#             count = torch.zeros(T_out)
-
-#         y_i = y_i.transpose(0, time_dim)
-
-#         tend_out = min(int(round(tbeg_out)) + chunk_length_out, T_out)
-#         dt = tend_out - tbeg_out
-#         y[tbeg_out:tend_out] += y_i[:dt]
-#         count[tbeg_out:tend_out] += 1
-#         tbeg_out += chunk_shift_out
-#         tbeg_in += chunk_shift_in
-
-#     # put time dimension back in his place and normalize
-#     y = y.transpose(0, time_dim)/count
-
-#     return y

From 4825d393cf1af165699eac7e9ac54cf224bb6daa Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 8 Jul 2022 17:43:09 -0400
Subject: [PATCH 020/154] added configs for w2v2 xlsr

---
 ...c2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml | 45 +++++++++++++++++
 ...c2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml | 49 ++++++++++++++++++
 ...c2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml | 50 +++++++++++++++++++
 ...vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml | 45 +++++++++++++++++
 ...vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml | 49 ++++++++++++++++++
 ...vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml | 50 +++++++++++++++++++
 .../conf/wav2vec2xlsr300m_ecapatdnn512x3.yaml | 40 +++++++++++++++
 .../conf/wav2vec2xlsr53_ecapatdnn512x3.yaml   | 40 +++++++++++++++
 ...fig_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh | 49 ++++++++++++++++++
 ...onfig_wav2vec2xlr53_ecapatdnn512x3_v1.0.sh | 49 ++++++++++++++++++
 10 files changed, 466 insertions(+)
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2xlsr53_ecapatdnn512x3.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2xlr53_ecapatdnn512x3_v1.0.sh

diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..e1d1b1ea
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
new file mode 100644
index 00000000..1298a056
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
@@ -0,0 +1,49 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5.5e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 4.4e-3
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 20
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
new file mode 100644
index 00000000..2867cfef
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
@@ -0,0 +1,50 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 16
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 2.3e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 2e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 2
+  eff_batch_size: 192
+  train_mode: full
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..247f8a7c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr53_ecapatdnn512x3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
new file mode 100644
index 00000000..1298a056
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
@@ -0,0 +1,49 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5.5e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 4.4e-3
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 20
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
new file mode 100644
index 00000000..2867cfef
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
@@ -0,0 +1,50 @@
+data:
+  train:
+    dataset:
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 16
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      aug_cfg: conf/reverb_noise_aug.yaml
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 2.3e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 2e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 2
+  eff_batch_size: 192
+  train_mode: full
+
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..1cc7df4c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3.yaml
@@ -0,0 +1,40 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wav2vec2xlsr53_ecapatdnn512x3.yaml b/egs/voxceleb/v2/conf/wav2vec2xlsr53_ecapatdnn512x3.yaml
new file mode 100644
index 00000000..1975bada
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2xlsr53_ecapatdnn512x3.yaml
@@ -0,0 +1,40 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-large-xlsr-53
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..9225389a
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2xlr53_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr53_ecapatdnn512x3_v1.0.sh
new file mode 100644
index 00000000..014a5d03
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr53_ecapatdnn512x3_v1.0.sh
@@ -0,0 +1,49 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr53
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+

From 9a3a16837c077aecc83c50ca8a9e974265e74855 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 22 Jul 2022 17:53:20 -0400
Subject: [PATCH 021/154] added hard negative mining

---
 ...c2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml |  3 +-
 ...fig_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh |  1 +
 hyperion/bin/finetune_wav2vec2xvector.py      | 17 +++++++++++
 hyperion/torch/data/weighted_seq_sampler.py   | 30 ++++++++++++-------
 4 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
index 2867cfef..fb264a53 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
@@ -8,6 +8,7 @@ data:
     sampler:
       batch_size: 16
       iters_per_epoch: 6
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
@@ -43,7 +44,7 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 2
+  epochs: 5
   eff_batch_size: 192
   train_mode: full
 
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
index 9225389a..8144f6eb 100644
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2xlr300m_ecapatdnn512x3_v1.0.sh
@@ -33,6 +33,7 @@ nnet_s3_args=""
 nnet_s3_name=${nnet_name}.s3
 nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
 nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index f2c740da..25722b35 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -95,6 +95,22 @@ def init_model(num_classes, in_model_file, rank, **kwargs):
     return model
 
 
+def init_hard_prototype_mining(model, train_loader, val_loader, rank):
+    if not train_loader.batch_sampler.hard_prototype_mining:
+        return
+
+    if rank == 0:
+        logging.info("setting hard prototypes")
+
+    affinity_matrix = model.compute_prototype_affinity()
+    train_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+    if not val_loader.batch_sampler.hard_prototype_mining:
+        return
+
+    val_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+
 def train_model(gpu_id, args):
 
     config_logger(args.verbose)
@@ -112,6 +128,7 @@ def train_model(gpu_id, args):
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
     model = init_model(train_loader.dataset.num_classes, **kwargs)
+    init_hard_prototype_mining(model, train_loader, val_loader, rank)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index e679251b..c50d577d 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -43,7 +43,6 @@ def __init__(
         num_egs_per_utt=1,
         var_batch_size=False,
         num_hard_prototypes=0,
-        num_egs_per_hard_prototype=1,
         affinity_matrix=None,
         iters_per_epoch=None,
     ):
@@ -66,7 +65,6 @@ def __init__(
         self.num_egs_per_utt = num_egs_per_utt
         self.var_batch_size = var_batch_size
         self.num_hard_prototypes = num_hard_prototypes
-        self.num_egs_per_hard_prototype = num_egs_per_hard_prototype
         self.batch = 0
 
         self.rank = rank
@@ -160,6 +158,7 @@ def __iter__(self):
         self.batch = 0
         return self
 
+    @property
     def hard_prototype_mining(self):
         return self.num_hard_prototypes > 0
 
@@ -168,14 +167,22 @@ def set_hard_prototypes(self, affinity_matrix):
             self.hard_prototypes = None
             return
 
-        affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
+        # affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
+        # hard prototypes for a class are itself and k-1 closest to it.
         self.hard_prototypes = torch.topk(
             affinity_matrix, self.num_hard_prototypes, dim=-1
         ).indices
 
+    def get_hard_prototypes(self, class_idx):
+        return self.hard_prototypes[class_idx].flatten()
+
     def _get_utt_idx_basic(self, batch_mult=1):
         dataset = self.dataset
         num_classes_per_batch = batch_mult * self._num_classes_per_batch
+        if self.hard_prototype_mining:
+            num_classes_per_batch = int(
+                math.ceil(num_classes_per_batch / self.num_hard_prototypes)
+            )
 
         if dataset.class_weights is None:
             class_idx = torch.randint(
@@ -188,6 +195,9 @@ def _get_utt_idx_basic(self, batch_mult=1):
                 replacement=True,
             )
 
+        if self.hard_prototype_mining:
+            class_idx = self.get_hard_prototypes(class_idx)
+
         if self.num_egs_per_class > 1:
             class_idx = class_idx.repeat(self.num_egs_per_class)
 
@@ -206,6 +216,10 @@ def _get_utt_idx_seq_st_max_length(self, chunk_length, batch_mult=1):
         dataset = self.dataset
 
         num_classes_per_batch = batch_mult * self._num_classes_per_batch
+        if self.hard_prototype_mining:
+            num_classes_per_batch = int(
+                math.ceil(num_classes_per_batch / self.num_hard_prototypes)
+            )
 
         # first we sample the batch classes
         class_weights = dataset.class_weights.clone()
@@ -219,6 +233,9 @@ def _get_utt_idx_seq_st_max_length(self, chunk_length, batch_mult=1):
             class_weights, num_samples=num_classes_per_batch, replacement=True
         )
 
+        if self.hard_prototype_mining:
+            class_idx = self.get_hard_prototypes(class_idx)
+
         utt_idx = torch.zeros(
             (len(class_idx) * self.num_egs_per_class,), dtype=torch.long
         )
@@ -293,7 +310,6 @@ def filter_args(**kwargs):
             "num_egs_per_class",
             "num_egs_per_utt",
             "num_hard_prototypes",
-            "num_egs_per_hard_prototype",
         )
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
@@ -351,12 +367,6 @@ def add_class_args(parser, prefix=None):
             default=0,
             help=("number of hard prototype classes per batch"),
         )
-        parser.add_argument(
-            "--num-egs-per-hard-prototype",
-            type=int,
-            default=1,
-            help=("number of samples per hard prototype class in the batch"),
-        )
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))

From 45b5dcd309beb525712969023c26d69e40d6bdca Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 28 Jul 2022 17:55:29 -0400
Subject: [PATCH 022/154] refactorized segment samplers

---
 hyperion/torch/data/bucketing_seg_sampler.py  | 123 ++++++
 .../data/class_weighted_seg_chunk_sampler.py  | 408 ++++++++++++++++++
 hyperion/torch/data/hyp_sampler.py            |  43 ++
 hyperion/torch/data/seg_chunk_sampler.py      | 148 +++++++
 hyperion/torch/data/seg_sampler.py            | 189 ++++++++
 hyperion/torch/data/seg_sampler_factory.py    | 225 ++++++++++
 hyperion/torch/optim/factory.py               |   2 +
 hyperion/utils/class_info.py                  |  28 ++
 hyperion/utils/feature_set.py                 |  74 ++++
 hyperion/utils/info_table.py                  | 383 ++++++++++++++++
 hyperion/utils/recording_set.py               |  61 +++
 hyperion/utils/segment_set.py                 |  11 +
 12 files changed, 1695 insertions(+)
 create mode 100644 hyperion/torch/data/bucketing_seg_sampler.py
 create mode 100644 hyperion/torch/data/class_weighted_seg_chunk_sampler.py
 create mode 100644 hyperion/torch/data/hyp_sampler.py
 create mode 100644 hyperion/torch/data/seg_chunk_sampler.py
 create mode 100644 hyperion/torch/data/seg_sampler.py
 create mode 100644 hyperion/torch/data/seg_sampler_factory.py
 create mode 100644 hyperion/utils/class_info.py
 create mode 100644 hyperion/utils/feature_set.py
 create mode 100644 hyperion/utils/info_table.py
 create mode 100644 hyperion/utils/recording_set.py
 create mode 100644 hyperion/utils/segment_set.py

diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
new file mode 100644
index 00000000..85b6772e
--- /dev/null
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -0,0 +1,123 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import math
+from jsonargparse import ArgumentParser, ActionParser
+import logging
+
+import numpy as np
+
+import torch
+from .hyp_sampler import HypSampler
+from .seg_sampler import SegSampler
+import torch.distributed as dist
+
+
+class BucketingSegSampler(HypSampler):
+    def __init__(
+        self,
+        seg_set,
+        base_sampler=SegSampler,
+        num_buckets=10,
+        length_column="duration",
+        seed=1234,
+        **base_kwargs
+    ):
+        super().__init__(shuffle=False, seed=seed)
+        self.seg_set = seg_set
+        self.base_sampler = base_sampler
+        self.base_kwargs = base_kwargs
+        self.base_kwargs["seed"] = seed
+        self.num_buckets = num_buckets
+        self.length_column = length_column
+        self._create_bucket_samplers()
+        self._compute_len()
+        self.depleted_buckets = torch.zeros((num_buckets,), dtype=torch.bool)
+
+    @staticmethod
+    def create_buckets(self, seg_ids, seg_lengths):
+        sort_idx = torch.argsort(seg_lengths)
+        sort_ids = seg_ids[sort_idx]
+        sort_lengths = seg_lengths[sort_ids]
+        cum_lengths = torch.cumsum(sort_lengths)
+        bucket_length = cum_lengths[-1] / self.num_buckets
+        buckets = []
+        for i in range(self.num_buckets):
+            bucket_idx = (cum_lengths <= bucket_length) & (cum_lengths > 0)
+            bucket_i = sort_ids[bucket_idx]
+            buckets.append(bucket_i)
+            cum_lengths -= bucket_length
+
+        return buckets
+
+    def _create_bucket_samplers(self):
+        buckets = self.create_buckets(
+            self.dataset["ids"], self.dataset[self.length_column]
+        )
+        bucket_samplers = []
+        for i in range(self.num_buckets):
+            dataset_i = self.dataset.create_bucket(buckets[i])
+            sampler_i = self.base_sampler(dataset_i, self.seed, **self.base_kwargs)
+            bucket_samplers.append(sampler_i)
+
+        self.bucket_samplers = bucket_samplers
+
+    def _compute_len(self):
+        self._len = 0
+        for i in range(self.num_buckets):
+            self._len += len(self.bucket_samplers[i])
+
+    def set_epoch(self, epoch):
+        for i in range(self.num_buckets):
+            self.bucket_samplers[i].set_epoch(epoch)
+
+    def __iter__(self):
+        super().__iter__()
+        for i in range(self.num_buckets):
+            self.bucket_samplers[i].__iter__()
+
+        return self
+
+    def all_buckets_depleted(self):
+        return torch.all(self.depleted_buckets).item()
+
+    def __next__(self):
+
+        if self.batch == self._len or self.all_buckets_depleted():
+            raise StopIteration
+
+        while True:
+            bucket_idx = torch.randint(
+                low=0, high=self.num_buckets, size=(1,), generator=self.rng
+            ).item()
+            if self.depleted_buckets[bucket_idx]:
+                continue
+
+            bucket = self.buckets[bucket_idx]
+            try:
+                batch = next(bucket)
+                break
+            except StopIteration:
+                self.depleted_buckets[bucket_idx] = True
+                if self.all_buckets_depleted():
+                    raise StopIteration()
+
+        if self.batch == 0:
+            logging.info("batch 0 chunks=%s", str(batch[:10]))
+
+        self.batch += 1
+        return batch
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "num_buckets",
+            "length_column",
+            "shuffle",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
new file mode 100644
index 00000000..1a9f98b8
--- /dev/null
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -0,0 +1,408 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import math
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+import logging
+
+import numpy as np
+import pandas as pd
+
+import torch
+from .hyp_sampler import HypSampler
+
+
+class ClassWeightedRandomSegChunkSampler(HypSampler):
+    def __init__(
+        self,
+        seg_set,
+        class_info,
+        min_chunk_length,
+        max_chunk_length=None,
+        min_batch_size=1,
+        max_batch_size=None,
+        max_batch_length=None,
+        num_chunks_per_seg_epoch="auto",
+        num_segs_per_class=1,
+        num_chunks_per_seg=1,
+        num_hard_prototypes=0,
+        affinity_matrix=None,
+        class_column="class_id",
+        length_column="duration",
+        iters_per_epoch=None,
+        batch_size=None,
+        seed=1234,
+    ):
+        super().__init__(seed=seed)
+        self.class_column = class_column
+        self.length_column = length_column
+        self.seg_set = seg_set
+        self.class_info = class_info
+        self.min_chunk_length = min_chunk_length
+        self.max_chunk_length = (
+            min_chunk_length if max_chunk_length is None else max_chunk_length
+        )
+
+        # computing min-batch-size
+        if batch_size is not None:
+            min_batch_size = batch_size
+
+        min_batch_size = max(num_segs_per_class * num_chunks_per_seg, min_batch_size)
+
+        # computing max-batch-size
+        if max_batch_length is None:
+            max_batch_size_0 = int(min_batch_size * max_chunk_length / min_chunk_length)
+        else:
+            max_batch_size_0 = int(max_batch_length / max_chunk_length)
+
+        max_batch_size = (
+            max_batch_size_0
+            if max_batch_size is None
+            else min(max_batch_size_0, max_batch_size)
+        )
+
+        self.min_batch_size = min_batch_size
+        self.max_batch_size = max_batch_size
+        self.avg_batch_size = (min_batch_size + max_batch_size) / 2
+        self.var_batch_size = self.min_batch_size != self.max_batch_size
+
+        self.num_segs_per_class = num_segs_per_class
+        self.num_chunks_per_seg = num_chunks_per_seg
+
+        self.num_hard_prototypes = num_hard_prototypes
+        self.batch = 0
+
+        # compute the number of batches / epoch
+        # legacy config parameter
+        num_chunks_per_seg_epoch = (
+            iters_per_epoch if iters_per_epoch is not None else num_chunks_per_seg_epoch
+        )
+        self._set_num_chunks_per_seg_epoch(num_chunks_per_seg_epoch)
+        self._compute_len()
+
+        self._gather_class_info()
+
+        self.set_hard_prototypes(affinity_matrix)
+
+        logging.info(
+            "batches/epoch=%d min-batch-size=%d, max-batch-size=%d avg-batch-size/gpu=%.2f avg-classes/batch=%.2f  samples/(seg*epoch)=%d",
+            self._len,
+            self.min_batch_size,
+            self.max_batch_size,
+            self.avg_batch_size,
+            self.avg_batch_size / num_segs_per_class / num_chunks_per_seg,
+            self.num_chunks_per_seg_epoch,
+        )
+
+    def _set_seed(self):
+        self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.rank)
+
+    def _set_num_chunks_per_seg_epoch(self, num_chunks_per_seg_epoch):
+        if num_chunks_per_seg_epoch == "auto":
+            self._compute_num_chunks_per_seg_epoch_auto()
+        else:
+            self.num_egs_per_utt_epoch = num_chunks_per_seg_epoch
+
+    def _compute_num_chunks_per_seg_epoch_auto(self):
+        seg_set = self.seg_set
+        avg_seg_length = np.mean(seg_set[self.length_column])
+        avg_chunk_length = (self.max_chunk_length + self.min_chunk_length) / 2
+        self.num_chunks_per_seg_epoch = math.ceil(avg_seg_length / avg_chunk_length)
+        logging.debug(
+            "num egs per segment and epoch: %d", self.num_chunks_per_seg_epoch
+        )
+
+    def _compute_len(self):
+        self._len = int(
+            math.ceil(
+                self.num_chunks_per_seg_epoch
+                * len(self.seg_set)
+                / self.avg_batch_size
+                / self.world_size
+            )
+        )
+
+    def __len__(self):
+        return self._len
+
+    def _gather_class_info(self):
+        # we get some extra info that we need for the classes.
+
+        # we need the maximum/minimum segment duration for each class.
+        max_dur = np.zeros(len(self.class_info))
+        min_dur = np.zeros(len(self.class_info))
+        for i, c in enumerate(self.class_info["id"]):
+            seg_idx = self.seg_set[self.class_column] == c
+            durs_i = self.seg_set.loc[seg_idx]
+            max_dur[i] = durs_i.max()
+            min_dur[i] = durs_i.min()
+
+        self.class_info["max_seg_duration"] = max_dur
+        self.class_info["min_seg_duration"] = min_dur
+
+        zero_weight = self.class_info["min_seg_duration"] < self.min_chunk_length
+        if np.any(zero_weight):
+            self.class_info.loc[zero_weight, "weights"] = 0
+            self.class_info.loc["weights"] /= self.class_info["weights"].sum()
+
+        self.var_weights = np.any(
+            self.seg_set[self.length_column] < self.max_chunk_length
+        )
+
+        self.map_idx_to_ids = self.class_info[["class_idx", "ids"]]
+        self.map_idx_to_ids.set_index("class_idx", inplace=True)
+
+    @property
+    def hard_prototype_mining(self):
+        return self.num_hard_prototypes > 1
+
+    def set_hard_prototypes(self, affinity_matrix):
+        if affinity_matrix is None:
+            self.hard_prototypes = None
+            return
+
+        # affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
+        # hard prototypes for a class are itself and k-1 closest to it.
+        self.hard_prototypes = torch.topk(
+            affinity_matrix, self.num_hard_prototypes, dim=-1
+        ).indices
+
+    def get_hard_prototypes(self, class_idx):
+        return self.hard_prototypes[class_idx].flatten()
+
+    def _sample_chunk_length(self):
+        if self.var_batch_size:
+            return (
+                torch.rand(size=(1,), generator=self.rng).item()
+                * (self.max_chunk_length - self.min_chunk_length)
+                + self.min_chunk_length
+            )
+
+        return self.min_chunk_length
+
+    def _compute_batch_size(self, chunk_length):
+        return int(self.min_batch_size * self.max_chunk_length / chunk_length)
+
+    def _compute_num_classes_per_batch(self, batch_size):
+        num_classes = batch_size / self.num_segs_per_class / self.num_egs_per_utt
+        if self.hard_prototype_mining:
+            num_classes /= self.num_hard_prototypes
+        return int(math.ceil(num_classes))
+
+    def _get_class_weights(self, chunk_length):
+        if not self.var_weights:
+            return self.class_info["weights"].values
+
+        # get classes where all segments are shorter than
+        # chunk length and put weight to 0
+        zero_idx = self.class_info["max_seg_duration"] < chunk_length
+        if not np.any(zero_idx):
+            return self.class_info["weights"].values
+
+        class_weights = self.class_info["weights"].values.copy()
+        class_weights[zero_idx] = 0.0
+        # renormalize weights
+        class_weights /= class_weights.sum()
+        return class_weights
+
+    def _sample_classes(self, num_classes, chunk_length):
+        weights = self._get_class_weights(chunk_length)
+        row_idx = torch.multinomial(
+            weights,
+            num_samples=num_classes,
+            replacement=True,
+        )
+
+        class_ids = self.class_info.iloc[row_idx].id.values
+        if self.hard_prototype_mining:
+            # map class ids to class indexes
+            class_idx = self.class_info.loc[class_ids, "class_idx"]
+            class_idx = self.get_hard_prototypes(class_idx)
+            # map back to class ids
+            class_ids = self.map_idx_to_ids.loc[class_idx]
+
+        return class_ids
+
+    def _sample_segs(self, class_ids, chunk_length):
+
+        seg_ids = []
+        for c in class_ids:
+            # for each class we sample segments longer than chunk length
+            # get segments belonging to c
+            seg_mask = (self.seg_set[self.class_column] == c) & (
+                self.seg_set[self.length_column] > chunk_length
+            )
+            seg_ids_c = self.seg_set.loc[seg_mask, "id"]
+            # sample num_segs_per_class random segments
+            sel_seg_idx_c = torch.randint(
+                low=0,
+                high=len(seg_ids_c),
+                size=(self.num_segs_per_class,),
+                generator=self.rng,
+            )
+            sel_seg_ids_c = seg_ids_c[sel_seg_idx_c]
+            seg_ids.extend(sel_seg_ids_c)
+
+        return seg_ids
+
+    def _sample_chunks(self, seg_ids, chunk_length):
+        chunks = []
+        scale = self.seg_set.loc[seg_ids, self.length_column] - chunk_length
+        for i in range(self.num_chunks_per_seg):
+            start = scale * torch.rand(size=(len(seg_ids),), generator=self.rng)
+            chunks_i = [(id, s, chunk_length) for id, s in zip(seg_ids, start)]
+            chunks.expand(chunks_i)
+
+        return chunks
+
+    def __next__(self):
+
+        if self.batch == self._len:
+            raise StopIteration
+
+        chunk_length = self._sample_chunk_length()
+        batch_size = self._compute_batch_size()
+        num_classes = self._compute_num_classes_per_batch(batch_size)
+        class_ids = self._sample_classes(num_classes, chunk_length)
+        seg_ids = self._sample_segs(class_ids, chunk_length)
+        chunks = self._sample_chunks(seg_ids, chunk_length)
+        if self.batch == 0:
+            logging.info("batch 0 uttidx=%s", str(chunks[:10]))
+
+        self.batch += 1
+        return chunks
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "min_chunk_length",
+            "max_chunk_length",
+            "min_batch_size",
+            "max_batch_size",
+            "max_batch_length",
+            "num_chunks_per_seg_epoch",
+            "num_segs_per_class",
+            "num_chunks_per_seg",
+            "num_hard_prototypes",
+            "class_column",
+            "length_column",
+            "iters_per_epoch",
+            "batch_size",
+            "shuffle",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--min-chunk-length",
+            type=float,
+            default=4.0,
+            help=("minimum length of the segment chunks"),
+        )
+        parser.add_argument(
+            "--max-chunk-length",
+            type=float,
+            default=None,
+            help=("maximum length of segment chunks"),
+        )
+
+        parser.add_argument(
+            "--min-batch-size",
+            type=int,
+            default=1,
+            help=("minimum batch size per gpu"),
+        )
+        parser.add_argument(
+            "--max-batch-size",
+            type=int,
+            default=None,
+            help=(
+                "maximum batch size per gpu, if None, estimated from max_batch_length"
+            ),
+        )
+
+        parser.add_argument(
+            "--batch-size",
+            default=128,
+            type=int,
+            help=("deprecated, use min-batch-size instead"),
+        )
+
+        parser.add_argument(
+            "--max-batch-duration",
+            type=float,
+            default=None,
+            help=(
+                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+            ),
+        )
+
+        parser.add_argument(
+            "--iters-per-epoch",
+            default=None,
+            type=lambda x: x if (x == "auto" or x is None) else float(x),
+            help=("deprecated, use --num-egs-per-seg-epoch instead"),
+        )
+
+        parser.add_argument(
+            "--num-chunks-per-seg-epoch",
+            default="auto",
+            type=lambda x: x if x == "auto" else float(x),
+            help=("number of times we sample a segment in each epoch"),
+        )
+
+        parser.add_argument(
+            "--num-segs-per-class",
+            type=int,
+            default=1,
+            help=("number of segments per class in batch"),
+        )
+        parser.add_argument(
+            "--num-chunks-per-seg",
+            type=int,
+            default=1,
+            help=("number of chunks per segment in batch"),
+        )
+        parser.add_argument(
+            "--num-hard-prototypes",
+            type=int,
+            default=0,
+            help=("number of hard prototype classes per batch"),
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the segments or chunks at the beginning of the epoch",
+        )
+
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        parser.add_argument(
+            "--length-column",
+            default="duration",
+            help="which column in the segment table indicates the duration of the segment",
+        )
+        parser.add_argument(
+            "--class-column",
+            default="class_id",
+            help="which column in the segment table indicates the class of the segment",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
new file mode 100644
index 00000000..18ae4b5d
--- /dev/null
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -0,0 +1,43 @@
+import math
+from jsonargparse import ArgumentParser, ActionParser
+import logging
+
+import numpy as np
+
+import torch
+from torch.utils.data import Sampler
+import torch.distributed as dist
+
+
+class HypSampler(Sampler):
+    def __init__(self, shuffle=False, seed=1234):
+        super().__init__(None)
+        self.epoch = 0
+        self.batch = 0
+        self.shuffle = shuffle
+        self.seed = seed
+
+        try:
+            rank = dist.get_rank()
+            world_size = dist.get_world_size()
+        except:
+            rank = 0
+            world_size = 1
+
+        self.rank = rank
+        self.world_size = world_size
+        self.rng = torch.Generator()
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+    def _set_seed(self):
+        if self.shuffle:
+            self.rng.manual_seed(self.seed + 10 * self.epoch)
+        else:
+            self.rng.manual_seed(self.seed)
+
+    def __iter__(self):
+        self.batch = 0
+        self._set_seed()
+        return self
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
new file mode 100644
index 00000000..a971f8ce
--- /dev/null
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -0,0 +1,148 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import math
+from jsonargparse import ArgumentParser, ActionParser
+import logging
+
+import numpy as np
+import pandas as pd
+
+import torch
+from .hyp_sampler import HypSampler
+from .seg_sampler import SegSampler
+import torch.distributed as dist
+
+
+class SegChunkSampler(HypSampler):
+    def __init__(
+        self,
+        seg_set,
+        min_chunk_length,
+        max_chunk_length=None,
+        base_sampler=SegSampler,
+        length_column="duration",
+        shuffle=False,
+        seed=1234,
+        **base_kwargs
+    ):
+
+        super().__init__(shuffle=shuffle, seed=seed)
+        self.seg_set = seg_set
+        self.min_chunk_length = min_chunk_length
+        self.max_chunk_length = (
+            min_chunk_length if max_chunk_length is None else max_chunk_length
+        )
+        self.avg_chunk_length = (max_chunk_length + min_chunk_length) / 2
+        self.chunk_set = None
+        self.length_column = length_column
+        self.chunk_sampler = base_sampler
+        self.base_kwargs = base_kwargs
+        self.base_kwargs["seed"] = seed
+        self.base_kwargs["shuffle"] = shuffle
+        if "subbase_sampler" in base_kwargs:
+            base_kwargs["base_sampler"] = base_kwargs.pop("subbase_sampler")
+
+        self.__iter__()
+
+    def __len__(self):
+        return len(self._seg_sampler)
+
+    # def _compute_num_chunks(self, seg_set):
+    #     num_chunks = 0
+    #     for len in seg_set['duration']:
+    #         if len < self.min_chunk_length:
+    #             #discard too short sequences
+    #             continue
+
+    #         num_chunks += math.ceil(len/self._avg_chunk_length)
+
+    #     self.num_chunks = num_chunks
+
+    @property
+    def duration_is_random(self):
+        return self.min_chunk_length != self.max_chunk_length
+
+    def get_random_duration(self):
+        if self.duration_is_random:
+            return (
+                torch.rand(size=(1,), generator=self.rng).item()
+                * (self.max_chunk_length - self.min_chunk_length)
+                + self.min_chunk_length
+            )
+        else:
+            return self.min_chunk_length
+
+    def _create_chunks(self):
+
+        chunks = []
+        for id, len in zip(self.seg_set["id"], self.seg_set[self.length_column]):
+            if len < self.min_chunk_length:
+                # discard too short sequences
+                continue
+
+            # making it this way, we get the same number of chunks in all epochs
+            num_chunks = math.ceil(len / self.avg_chunk_length)
+            start = 0
+            for i in range(num_chunks - 1):
+                dur = self.get_random_duration()
+                chunk = (id, start, dur)
+                chunks.append(chunk)
+                start += dur
+
+            # special treatment for last chunk we get from the recording
+            remainder = len - start
+            if remainder > self.max_chunk_length:
+                # here we discard part of the end
+                chunk = (id, start, self.max_chunk_length)
+            elif remainder < self.min_chunk_length:
+                # here we overlap with second last chunk
+                chunk = (id, len - self.min_chunk_length, self.min_chunk_length)
+            else:
+                # here the last chunk is what it is left
+                chunk = (id, start, remainder)
+
+            chunks.append(chunk)
+
+        self.chunk_set = pd.DataFrame(
+            chunks, columns=["id", "chunk_start", self.length_column]
+        )
+
+    def __iter__(self):
+        super().__iter__()
+        self._create_chunks()
+        self._seg_sampler = SegSampler(self.chunk_set, self._base_kwargs)
+        self._seg_sampler.set_epoch(self.epoch)
+        self._seg_sampler.__iter__()
+
+        return self
+
+    def __next__(self):
+
+        return next(self._seg_sampler)
+        # if self.batch == self._len:
+        #     raise StopIteration
+
+        # start = (self.batch -1)*self.batch_size
+        # chunks = self.chunks[start:start+self.batch_size]
+
+        # if self.batch == 0:
+        #     logging.info("batch 0 chunks=%s", str(chunks[:10]))
+
+        # self.batch +=1
+        # return chunks
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "min_chunk_length",
+            "max_chunk_length",
+            "length_column",
+            "shuffle",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
new file mode 100644
index 00000000..6802cc8e
--- /dev/null
+++ b/hyperion/torch/data/seg_sampler.py
@@ -0,0 +1,189 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import math
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+import logging
+
+import numpy as np
+
+import torch
+from .hyp_sampler import HypSampler
+
+
+class SegSampler(HypSampler):
+    def __init__(
+        self,
+        seg_set,
+        min_batch_size=1,
+        max_batch_size=None,
+        max_batch_length=None,
+        length_column="duration",
+        shuffle=False,
+        drop_last=False,
+        seed=1234,
+    ):
+        super().__init__(shuffle=shuffle, seed=seed)
+        self.seg_set = seg_set
+        self.min_batch_size = min_batch_size
+        self.max_batch_size = max_batch_size
+        self.max_batch_length = max_batch_length
+        self.var_batch_size = max_batch_length is not None
+        self.length_column = length_column
+        if self.var_batch_size:
+            avg_batch_size = max_batch_length / torch.mean(
+                self.seg_set[self.length_column]
+            )
+        else:
+            avg_batch_size = min_batch_size
+
+        len = len(self.seg_set) / avg_batch_size / self.world_size
+        if drop_last:
+            self._len = int(len)
+        else:
+            self._len = int(math.ceil(len))
+
+        self._permutation = None
+
+    @property
+    def seg_set(self):
+        return self.dataset
+
+    def __len__(self):
+        return self._len
+
+    def _shuffle_segs(self):
+        self._permutation = torch.randperm(len(self.seg_set), generator=self.rng)
+
+    def __iter__(self):
+        super().__iter__()
+        if self.shuffle:
+            self._shuffle_segs()
+
+        self.start = self.rank
+        return self
+
+    def __next__(self):
+
+        if self.batch == self._len:
+            raise StopIteration
+
+        if self.var_batch_size:
+            idxs = []
+            max_length = 0
+            batch_size = 0
+            while True:
+                if self._shuffle:
+                    idx = self._permutation[self.start]
+                else:
+                    idx = self.start
+
+                max_length = max(max_length, self.seg_set.iloc[idx].duration.values)
+                if max_length * (batch_size + 1) > self.max_batch_length:
+                    break
+
+                idxs.append(idx)
+                self.start = (self.start + self.world_size) % len(self.seg_set)
+                batch_size += 1
+                if (
+                    self.max_batch_size is not None
+                    and batch_size >= self.max_batch_size
+                ):
+                    break
+
+            assert len(idxs) > self.min_batch_size
+        else:
+            stop = min(self.start + self.min_batch_size, len(self.seg_set))
+            if self.shuffle:
+                idx = self._permutation[self.start : stop]
+            else:
+                idx = slice(self.start, stop)
+            self.start
+
+        seg_ids = self.seg_set.iloc[idx].id
+
+        if self.batch == 0:
+            logging.info("batch 0 chunks=%s", str(seg_ids[:10]))
+
+        self.batch += 1
+        if "chunk_start" in self.seg_set:
+            chunks = self.seg_set.loc[
+                seg_ids, ["chunk_start", self.length_column]
+            ].values
+            return [(id, chunk[0], chunk[1]) for id, chunk in zip(seg_ids, chunks)]
+
+        return seg_ids
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "min_batch_size",
+            "max_batch_size",
+            "max_batch_length",
+            "length_column",
+            "shuffle",
+            "drop_last",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--min-batch-size",
+            type=int,
+            default=1,
+            help=("minimum batch size per gpu"),
+        )
+        parser.add_argument(
+            "--max-batch-size",
+            type=int,
+            default=None,
+            help=(
+                "maximum batch size per gpu, if None, estimated from max_batch_length"
+            ),
+        )
+
+        parser.add_argument(
+            "--max-batch-duration",
+            type=float,
+            default=None,
+            help=(
+                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+            ),
+        )
+
+        parser.add_argument(
+            "--drop-last",
+            action=ActionYesNo,
+            help="drops the last batch of the epoch",
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the segments or chunks at the beginning of the epoch",
+        )
+
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        parser.add_argument(
+            "--length-column",
+            default="duration",
+            help="which column in the segment table indicates the duration of the file",
+        )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
new file mode 100644
index 00000000..e3ba84f8
--- /dev/null
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -0,0 +1,225 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from typing import Union, Optional
+import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from .audio_dataset import AudioDataset
+from .feat_seq_dataset import FeatSeqDataset
+
+from .seg_sampler import SegSampler
+from .class_weighted_seg_chunk_sampler import ClassWeightedRandomSegChunkSampler
+from .seg_chunk_sampler import SegChunkSampler
+from .bucketing_seg_sampler import BucketingSegSampler
+
+sampler_dict = {
+    "class_weighted_random_seg_chunk_sampler": ClassWeightedRandomSegChunkSampler,
+    "seg_sampler": SegSampler,
+    "seg_chunk_sampler": SegChunkSampler,
+    "bucketing_seg_sampler": BucketingSegSampler,
+}
+
+
+class SegSamplerFactory(object):
+    """Factory class to create different types of samplers for
+    sequencial data like audio or acoustic features.
+    """
+
+    @staticmethod
+    def create(
+        dataset: Union[AudioDataset, FeatSeqDataset],
+        sampler_type: str = "class_weighted_random_seg_chunk_sampler",
+        base_sampler_type: str = "seg_sampler",
+        subbase_sampler_type: str = "seg_sampler",
+        **kwargs,
+    ):
+        """Functions that creates a sequence sampler based on a dataset, sampler_type and sampler arguments.
+
+        Args:
+          dataset: sequence dataset object containing the data info of class AudioDataset or FeatSeqDataset.
+          sampler_type: string indicating the sampler type.
+        """
+
+        sampler_class = sampler_dict[sampler_type]
+        sampler_kwargs = sampler_class.filter_args(**kwargs)
+
+        if sampler_type in ["bucketing_seg_sampler", "seg_chunk_sampler"]:
+            base_sampler_class = sampler_dict[base_sampler_type]
+            base_sampler_kwargs = base_sampler_class.filter_args(**kwargs)
+            sampler_kwargs.update(base_sampler_kwargs)
+            sampler_kwargs["base_sampler"] = base_sampler_class
+            if base_sampler_type == "bucketing_seg_sampler":
+                base_sampler_class = sampler_dict[subbase_sampler_type]
+                base_sampler_kwargs = base_sampler_class.filter_args(**kwargs)
+                sampler_kwargs.update(base_sampler_kwargs)
+
+        if sampler_type in ["class_weighted_random_seg_chunk_sampler"]:
+            sampler_kwargs["class_info"] = dataset.class_info
+
+        logging.info(f"sampler-args={sampler_kwargs}")
+
+        return sampler_class(dataset.seg_set, **sampler_kwargs)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "sampler_type",
+            "num_buckets",
+            "min_chunk_length",
+            "max_chunk_length",
+            "min_batch_size",
+            "max_batch_size",
+            "max_batch_length",
+            "num_chunks_per_seg_epoch",
+            "num_segs_per_class",
+            "num_chunks_per_seg",
+            "num_hard_prototypes",
+            "class_column",
+            "length_column",
+            "iters_per_epoch",
+            "batch_size",
+            "shuffle",
+            "drop_last",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--sampler-type",
+            choices=sampler_dict.keys(),
+            default="class_weighted_random_seg_chunk_sampler",
+            help="batch sampler type",
+        )
+
+        parser.add_argument(
+            "--base-sampler-type",
+            choices=["seg_sampler", "bucketing_seg_sampler"],
+            default="seg_sampler",
+            help="base sampler used for seg_chunk_sampler or bucketing_seg_sampler",
+        )
+
+        parser.add_argument(
+            "--min-chunk-length",
+            type=float,
+            default=4.0,
+            help=("minimum length of the segment chunks"),
+        )
+
+        parser.add_argument(
+            "--min-chunk-length",
+            type=float,
+            default=4.0,
+            help=("minimum length of the segment chunks"),
+        )
+        parser.add_argument(
+            "--max-chunk-length",
+            type=float,
+            default=None,
+            help=("maximum length of segment chunks"),
+        )
+
+        parser.add_argument(
+            "--min-batch-size",
+            type=int,
+            default=1,
+            help=("minimum batch size per gpu"),
+        )
+        parser.add_argument(
+            "--max-batch-size",
+            type=int,
+            default=None,
+            help=(
+                "maximum batch size per gpu, if None, estimated from max_batch_length"
+            ),
+        )
+
+        parser.add_argument(
+            "--batch-size",
+            default=128,
+            type=int,
+            help=("deprecated, use min-batch-size instead"),
+        )
+
+        parser.add_argument(
+            "--max-batch-duration",
+            type=float,
+            default=None,
+            help=(
+                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+            ),
+        )
+
+        parser.add_argument(
+            "--iters-per-epoch",
+            default=None,
+            type=lambda x: x if (x == "auto" or x is None) else float(x),
+            help=("deprecated, use --num-egs-per-seg-epoch instead"),
+        )
+
+        parser.add_argument(
+            "--num-chunks-per-seg-epoch",
+            default="auto",
+            type=lambda x: x if x == "auto" else float(x),
+            help=("number of times we sample a segment in each epoch"),
+        )
+
+        parser.add_argument(
+            "--num-segs-per-class",
+            type=int,
+            default=1,
+            help=("number of segments per class in batch"),
+        )
+        parser.add_argument(
+            "--num-chunks-per-seg",
+            type=int,
+            default=1,
+            help=("number of chunks per segment in batch"),
+        )
+        parser.add_argument(
+            "--num-hard-prototypes",
+            type=int,
+            default=0,
+            help=("number of hard prototype classes per batch"),
+        )
+
+        parser.add_argument(
+            "--drop-last",
+            action=ActionYesNo,
+            help="drops the last batch of the epoch",
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the segments or chunks at the beginning of the epoch",
+        )
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        parser.add_argument(
+            "--length-column",
+            default="duration",
+            help="which column in the segment table indicates the duration of the segment",
+        )
+        parser.add_argument(
+            "--class-column",
+            default="class_id",
+            help="which column in the segment table indicates the class of the segment",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/optim/factory.py b/hyperion/torch/optim/factory.py
index 4fa7b186..ab350098 100644
--- a/hyperion/torch/optim/factory.py
+++ b/hyperion/torch/optim/factory.py
@@ -13,6 +13,8 @@
 
 
 class OptimizerFactory(object):
+    """Factory class to create different types of optimizers."""
+
     @staticmethod
     def create(
         params,
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
new file mode 100644
index 00000000..2aed18c1
--- /dev/null
+++ b/hyperion/utils/class_info.py
@@ -0,0 +1,28 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .info_table import InfoTable
+
+
+class ClassInfo(InfoTable):
+    def __init__(self, df):
+        super().__init__(df)
+        if "class_idx" not in self.df:
+            self.add_class_idx()
+
+        if "weights" not in self.df:
+            self.add_equal_weights()
+        else:
+            self.df['weights'] /= self.df['weigths'].sum()
+
+    def add_class_idx(self):
+        self.df["class_idx"] = [i for i in range(len(self.df))]
+
+    def add_equal_weights(self):
+        self.df["weights"] = 1 / len(self.df)
+
+    @property
+    def weights(self, id):
+        return self.df.loc[id, "weights"]
diff --git a/hyperion/utils/feature_set.py b/hyperion/utils/feature_set.py
new file mode 100644
index 00000000..456cf99b
--- /dev/null
+++ b/hyperion/utils/feature_set.py
@@ -0,0 +1,74 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from .info_table import InfoTable
+
+
+class FeatureSet(InfoTable):
+    def __init__(self, df):
+        super().__init__(df)
+        assert "storage_path" in df
+
+    def save(self, file_path, sep=None):
+        """Saves info table to file
+
+        Args:
+          file_path: File to write the list.
+          sep: Separator between the key and file_path in the text file.
+        """
+        file_path = Path(file_path)
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we save as kaldi feats.scp file
+            from .scp_list import SCPList
+
+            offset = self.df["storage_byte"] if "storage_byte" is not None else None
+            range = None
+            if "start" and "num_frames" in self.df:
+                range = [
+                    np.array([s, n], dtype=np.int64)
+                    for s, n in self.df[["start", "num_frames"]]
+                ]
+            scp = SCPList(self.df["id"], self.df["storage_path"], offset, range)
+            scp.save(file_path)
+            return
+
+        super().save(file_path, sep)
+
+    @classmethod
+    def load(cls, file_path, sep=None):
+        """Loads utt2info list from text file.
+
+        Args:
+          file_path: File to read the list.
+          sep: Separator between the key and file_path in the text file.
+        Returns:
+          FeatureSet object
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we load as kaldi feats.scp file
+            from .scp_list import SCPList
+
+            scp = SCPList.load(file_path)
+            df_dict = {"id": scp.key, "storage_path": scp.file_path}
+            df = pd.DataFrame(df_dict)
+            if scp.offset is not None:
+                df["storage_byte"] = scp.offset
+
+            if scp.range is not None:
+                df["start"] = [r[0] for r in scp.range]
+                df["num_frames"] = [r[0] for r in scp.range]
+
+            return cls(df)
+
+        return super().load(file_path, sep)
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
new file mode 100644
index 00000000..247001c0
--- /dev/null
+++ b/hyperion/utils/info_table.py
@@ -0,0 +1,383 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from pathlib import Path
+import logging
+from collections import OrderedDict
+from copy import deepcopy
+
+import numpy as np
+import pandas as pd
+
+from .list_utils import split_list, split_list_group_by_key
+
+
+class InfoTable(object):
+    """This is a base class to store information about recordings, segments,
+    features, etc.
+
+    Attributes:
+      df: pandas dataframe.
+    """
+
+    def __init__(self, df):
+        self.df = df
+        assert "id" in df
+        self.df.set_index("id", drop=False, inplace=True)
+
+    def copy(self):
+        """Makes a copy of the object."""
+        return deepcopy(self)
+
+    def clone(self):
+        """Makes a copy of the object."""
+        return deepcopy(self)
+
+    @property
+    def __len__(self):
+        return self.df.__len__
+
+    @property
+    def iat(self):
+        return self.df.iat
+
+    @property
+    def at(self):
+        return self.df.at
+
+    @property
+    def iloc(self):
+        return self.df.iloc
+
+    @property
+    def loc(self):
+        return self.df.loc
+
+    @property
+    def __getitem__(self):
+        return self.df.__getitem__
+
+    @property
+    def __contains__(self):
+        return self.df.__contains__
+
+    def save(self, file_path, sep=None):
+        """Saves info table to file
+
+        Args:
+          file_path: File to write the list.
+          sep: Separator between the key and file_path in the text file.
+        """
+        file_path = Path(file_path)
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we save as kaldi utt2spk file
+            self.df.to_csv(file_path, sep=" ", header=False, index=False)
+            return
+
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        self.df.to_csv(file_path, sep=sep, index=False)
+
+    @classmethod
+    def load(cls, file_path, sep=None):
+        """Loads utt2info list from text file.
+
+        Args:
+          file_path: File to read the list.
+          sep: Separator between the key and file_path in the text file.
+          dtype: Dictionary with the dtypes of each column.
+        Returns:
+          Utt2Info object
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we load as kaldi utt2spk file
+            df = pd.read_csv(
+                file_path,
+                sep=" ",
+                header=None,
+                names=["id", "class_id"],
+                dtype={"id": np.str, "class_id": np.str},
+            )
+
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        return cls(df)
+
+    def sort(self, column="id", ascending=True):
+        """Sorts the table by column"""
+        self.df.sort_values(by=column, inplace=True, ascending=ascending)
+
+    def split(self, idx, num_parts, group_by=None):
+        """Splits SCPList into num_parts and return part idx.
+
+        Args:
+          idx: Part to return from 1 to num_parts.
+          num_parts: Number of parts to split the list.
+          group_by_field: All the lines with the same value in column
+                          groub_by_field go to the same part
+
+        Returns:
+          Sub Utt2Info object
+        """
+        if group_by is None:
+            _, idx1 = split_list(self.df["id"], idx, num_parts)
+        else:
+            _, idx1 = split_list_group_by_key(
+                self.df[group_by], idx, num_parts
+            )
+
+        df = self.df.iloc[idx1]
+        return self.__class__(df)
+
+    @classmethod
+    def merge(cls, tables):
+        """Merges several Utt2Info tables.
+
+        Args:
+          info_lists: List of Utt2Info
+
+        Returns:
+          Utt2Info object concatenation the info_lists.
+        """
+        df_list = [table.df for table in tables]
+        df = pd.concat(df_list)
+        return cls(df)
+
+    def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
+        assert items is None or iindex is None, "items and iindex cannot be not None at the same time"
+        df = self.df
+
+        if not keep:
+            if items is not None:
+                items = np.setdiff1d(df[by], items)
+            elif iindex is not None:
+                iindex = np.setdiff1d(np.arange(len(df)), iindex)
+
+            if columns is not None:
+                columns = np.setdiff1d(df.columns, columns)
+
+        if items is not None:
+            if by != "id":
+                missing = [False if v in df[by] else True for v in items]
+                if any(missing):
+                    raise Exception(f"{items[missing]} not found in table")
+                items = [True if v in items else False for v in df[by]]
+
+            if columns is None:
+                df = df.loc[items]
+            else:
+                df = df.loc[items, columns]
+        else:
+            if iindex is not None:
+                df = self.df.iloc[iindex]
+
+            if columns is not None:
+                df = df[columns]
+       
+        return self.__class__(df)
+
+
+    def __eq__(self, other):
+        """Equal operator"""
+        if self.df.shape[0] == 0 and other.df.shape[0] == 0:
+            return True
+        eq = self.df.equals(other.df)
+        return eq
+
+    def __ne__(self, other):
+        """Non-equal operator"""
+        return not self.__eq__(other)
+
+    def __cmp__(self, other):
+        """Comparison operator"""
+        if self.__eq__(other):
+            return 0
+        return 1
+
+
+
+    # def __len__(self):
+    #     """Returns the number of elements in the list."""
+    #     return len(self.df)
+
+    # def _create_dict(self):
+    #     """Creates dictionary that returns the position of
+    #     a segment in the list.
+    #     """
+    #     self.key_to_index = OrderedDict(
+    #         (k, i) for i, k in enumerate(self.utt_info.index)
+    #     )
+
+    # def get_index(self, key):
+    #     """Returns the position of key in the list."""
+    #     if self.key_to_index is None:
+    #         self._create_dict()
+    #     return self.key_to_index[key]
+
+    # def __contains__(self, id):
+    #     """Returns True if the list contains the key"""
+    #     return id in self.df.index
+
+    # def __getitem__(self, id):
+    #     """It allows to acces the data in the list by key or index like in
+    #        a ditionary, e.g.:
+    #        If input is a string key:
+    #            utt2spk = Utt2Info(info)
+    #            spk_id = utt2spk['data1']
+    #        If input is an index:
+    #            key, spk_id  = utt2spk[0]
+
+    #     Args:
+    #       key: String key or integer index.
+    #     Returns:
+    #       If key is a string:
+    #           info corresponding to key
+    #       If key is the index in the key list:
+    #           key, info given index
+    #     """
+    #     if isinstance(id, str):
+    #         row = np.array(self.utt_info.loc[key])[1:]
+    #         if len(row) == 1:
+    #             return row[0]
+    #         else:
+    #             return row
+    #     else:
+    #         row = np.array(self.utt_info.iloc[key])
+    #         if len(row) == 2:
+    #             return row[0], row[1]
+    #         else:
+    #             return row[0], row[1:]
+
+    # def sort(self, field=0):
+    #     """Sorts the list by key"""
+    #     if field == 0:
+    #         self.utt_info.sort_index(ascending=True, inplace=True)
+    #     else:
+    #         idx = np.argsort(self.utt_info[field])
+    #         self.utt_info = self.utt_info.iloc[idx]
+    #     self.key_to_index = None
+
+    # @classmethod
+    # def load(cls, file_path, sep=" ", dtype={0: np.str, 1: np.str}):
+    #     """Loads utt2info list from text file.
+
+    #     Args:
+    #       file_path: File to read the list.
+    #       sep: Separator between the key and file_path in the text file.
+    #       dtype: Dictionary with the dtypes of each column.
+    #     Returns:
+    #       Utt2Info object
+    #     """
+    #     df = pd.read_csv(file_path, sep=sep, header=None, dtype=dtype)
+    #     df = df.rename(index=str, columns={0: "key"})
+    #     return cls(df)
+
+    # def split(self, idx, num_parts, group_by_field=0):
+    #     """Splits SCPList into num_parts and return part idx.
+
+    #     Args:
+    #       idx: Part to return from 1 to num_parts.
+    #       num_parts: Number of parts to split the list.
+    #       group_by_field: All the lines with the same value in column
+    #                       groub_by_field go to the same part
+
+    #     Returns:
+    #       Sub Utt2Info object
+    #     """
+    #     if group_by_field == 0:
+    #         key, idx1 = split_list(self.utt_info["key"], idx, num_parts)
+    #     else:
+    #         key, idx1 = split_list_group_by_key(
+    #             self.utt_info[group_by_field], idx, num_parts
+    #         )
+
+    #     utt_info = self.utt_info.iloc[idx1]
+    #     return Utt2Info(utt_info)
+
+    
+
+    # def filter(self, filter_key, keep=True):
+    #     """Removes elements from Utt2Info object by key
+
+    #     Args:
+    #       filter_key: List with the keys of the elements to keep or remove.
+    #       keep: If True, we keep the elements in filter_key;
+    #             if False, we remove the elements in filter_key;
+
+    #     Returns:
+    #       Utt2Info object.
+    #     """
+    #     if not keep:
+    #         filter_key = np.setdiff1d(self.utt_info["key"], filter_key)
+    #     utt_info = self.utt_info.loc[filter_key]
+    #     return Utt2Info(utt_info)
+
+    # def filter_info(self, filter_key, field=1, keep=True):
+    #     """Removes elements of Utt2Info by info value
+
+    #     Args:
+    #       filter_key: List with the file_path of the elements to keep or remove.
+    #       field: Field number corresponding to the info to filter
+    #       keep: If True, we keep the elements in filter_key;
+    #             if False, we remove the elements in filter_key;
+
+    #     Returns:
+    #       Utt2Info object.
+    #     """
+    #     if not keep:
+    #         filter_key = np.setdiff1d(self.utt_info[field], filter_key)
+    #     f, _ = ismember(filter_key, self.utt_info[field])
+    #     if not np.all(f):
+    #         for k in filter_key[f == False]:
+    #             logging.error("info %s not found in field %d" % (k, field))
+    #         raise Exception("not all keys were found in field %d" % (field))
+
+    #     f, _ = ismember(self.utt_info[field], filter_key)
+    #     utt_info = self.utt_info.iloc[f]
+    #     return Utt2Info(utt_info)
+
+    # def filter_index(self, index, keep=True):
+    #     """Removes elements of Utt2Info by index
+
+    #     Args:
+    #       filter_key: List with the index of the elements to keep or remove.
+    #       keep: If True, we keep the elements in filter_key;
+    #             if False, we remove the elements in filter_key;
+
+    #     Returns:
+    #       Utt2Info object.
+    #     """
+
+    #     if not keep:
+    #         index = np.setdiff1d(np.arange(len(self.key), dtype=np.int64), index)
+
+    #     utt_info = self.utt_info.iloc[index]
+    #     return Utt2Info(utt_info)
+
+    def shuffle(self, seed=1024, rng=None):
+        """Shuffles the elements of the list.
+
+        Args:
+          seed: Seed for random number generator.
+          rng: numpy random number generator object.
+
+        Returns:
+          Index used to shuffle the list.
+        """
+        if rng is None:
+            rng = np.random.RandomState(seed=seed)
+        index = np.arange(len(self.df))
+        rng.shuffle(index)
+        self.df = self.df.iloc[index]
+        return index
+
+    
\ No newline at end of file
diff --git a/hyperion/utils/recording_set.py b/hyperion/utils/recording_set.py
new file mode 100644
index 00000000..ad6f65f6
--- /dev/null
+++ b/hyperion/utils/recording_set.py
@@ -0,0 +1,61 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from .info_table import InfoTable
+
+
+class RecordingSet(InfoTable):
+    def __init__(self, df):
+        super().__init__(df)
+        assert "storage_path" in df
+
+    def save(self, file_path, sep=None):
+        """Saves info table to file
+
+        Args:
+          file_path: File to write the list.
+          sep: Separator between the key and file_path in the text file.
+        """
+        file_path = Path(file_path)
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we save as kaldi feats.scp file
+            from .scp_list import SCPList
+
+            scp = SCPList(self.df["id"], self.df["storage_path"])
+            scp.save(file_path)
+            return
+
+        super().save(file_path, sep)
+
+    @classmethod
+    def load(cls, file_path, sep=None):
+        """Loads utt2info list from text file.
+
+        Args:
+          file_path: File to read the list.
+          sep: Separator between the key and file_path in the text file.
+        Returns:
+          RecordingSet object
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we load as kaldi feats.scp file
+            from .scp_list import SCPList
+
+            scp = SCPList.load(file_path)
+            df_dict = {"id": scp.key, "storage_path": scp.file_path}
+            df = pd.DataFrame(df_dict)
+
+            return cls(df)
+
+        return super().load(file_path, sep)
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
new file mode 100644
index 00000000..4332dea3
--- /dev/null
+++ b/hyperion/utils/segment_set.py
@@ -0,0 +1,11 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .info_table import InfoTable
+
+
+class SegmentSet(InfoTable):
+    def __init__(self, df):
+        super().__init__(df)

From 67c1bb8864439eb43a1c1764cc49cb0086ee7e17 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 30 Aug 2022 04:20:36 -0400
Subject: [PATCH 023/154] new seq sampler works

---
 apps.txt                                      |  83 +----
 .../v1.1/conf/train_data_default.yaml         |  15 +-
 egs/voxceleb/v1.1/conf/val_data_default.yaml  |  15 +-
 ...statsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh |   2 +-
 egs/voxceleb/v1.1/run_011_train_xvector.sh    |   6 +-
 hyp_utils/conda_env.sh                        |   1 +
 hyperion/bin/extract_xvectors_from_wav.py     |   2 +-
 hyperion/bin/train_xvector_from_wav.py        |  79 +----
 hyperion/helpers/trial_data_reader.py         |  17 +-
 hyperion/io/audio_reader.py                   |   2 +-
 .../classifiers/binary_logistic_regression.py |   2 +-
 hyperion/np/score_norm/adapt_s_norm.py        | 273 +++++++++++++--
 hyperion/np/score_norm/score_norm.py          |   7 +-
 hyperion/np/score_norm/t_norm.py              |  18 +-
 hyperion/np/score_norm/z_norm.py              |  18 +-
 hyperion/torch/data/__init__.py               |   3 +-
 hyperion/torch/data/audio_dataset.py          | 326 +++++++++++++++++-
 .../data/class_weighted_seg_chunk_sampler.py  | 117 +++++--
 hyperion/torch/data/seg_chunk_sampler.py      |  50 ++-
 hyperion/torch/data/seg_sampler.py            |  54 +--
 hyperion/torch/data/seg_sampler_factory.py    |  20 +-
 hyperion/torch/lr_schedulers/factory.py       |   4 +-
 hyperion/torch/lr_schedulers/triangular_lr.py |   4 +-
 hyperion/torch/trainers/torch_trainer.py      |  15 +-
 hyperion/utils/class_info.py                  |  51 ++-
 hyperion/utils/feature_set.py                 |   4 +-
 hyperion/utils/info_table.py                  |  17 +-
 hyperion/utils/recording_set.py               |   4 +-
 hyperion/utils/segment_set.py                 |   6 +
 hyperion/utils/trial_ndx.py                   |   2 +-
 hyperion/utils/trial_scores.py                |   2 +-
 31 files changed, 904 insertions(+), 315 deletions(-)

diff --git a/apps.txt b/apps.txt
index 4bf4a173..837c064b 100644
--- a/apps.txt
+++ b/apps.txt
@@ -1,69 +1,14 @@
-apply-mvn-select-frames.py
-compute-energy-vad.py
-compute-mfcc-feats.py
-copy-feats.py
-eval-cos-1vs1.py
-eval-linear-gbe-up.py
-eval-linear-gbe.py
-eval-linear-svmc.py
-eval-logistic-regression.py
-eval-plda-1vs1.py
-eval-plda-nvs1.py
-make-babble-noise-audio-files.py
-merge-h5-files.py
-pack-audio-files.py
-pack-wav-rirs.py
-plot-vector-hist.py
-plot-vector-tsne.py
-preprocess-audio-files.py
-rttm-to-bin-vad.py
-segments-to-bin-vad.py
-torch-adv-finetune-xvec-from-wav.py
-torch-adv-finetune-xvec.py
-torch-compute-mfcc-feats.py
-torch-eval-vae.py
-torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
-torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
-torch-eval-xvec-cosine-scoring-from-art-test-wav.py
-torch-eval-xvec-cosine-scoring-from-test-wav.py
-torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
-torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
-torch-eval-xvec-logits-from-wav.py
-torch-extract-xvectors-from-wav-with-rttm.py
-torch-extract-xvectors-from-wav.py
-torch-extract-xvectors-slidwin-from-wav.py
-torch-extract-xvectors-slidwin.py
-torch-extract-xvectors-vae-preproc.py
-torch-extract-xvectors.py
-torch-finetune-xvec-dfr-from-wav.py
-torch-finetune-xvec-dfr.py
-torch-finetune-xvec-from-wav.py
-torch-finetune-xvec.py
-torch-generate-adv-attacks-xvector-classif.py
-torch-generate-adv-attacks-xvector-verif.py
-torch-train-dvae.py
-torch-train-efficientnet-xvec-from-wav.py
-torch-train-efficientnet-xvec.py
-torch-train-resnet-xvec-from-wav.py
-torch-train-resnet-xvec.py
-torch-train-spinenet-xvec-from-wav.py
-torch-train-tdnn-xvec-from-wav.py
-torch-train-tdnn-xvec.py
-torch-train-transformer-xvec-v1-from-wav.py
-torch-train-transformer-xvec-v1.py
-torch-train-vae.py
-torch-train-vq-dvae.py
-torch-train-vq-vae.py
-torch-train-xvec-from-wav.py
-train-cw-up.py
-train-cw.py
-train-gaussianizer.py
-train-lda.py
-train-linear-gbe-up.py
-train-linear-gbe.py
-train-linear-svmc.py
-train-logistic-regression.py
-train-mvn.py
-train-nda.py
-train-pca.py
-train-plda.py
+compute_energy_vad.py
+extract_wav2vec2xvectors.py
+extract_xvectors_from_wav.py
+finetune_wav2vec2xvector.py
+finetune_xvector_dfr_from_feats.py
+finetune_xvector_dfr_from_wav.py
+finetune_xvector_from_feats.py
+finetune_xvector_from_wav.py
+make_babble_noise_audio_files.py
+pack_wav_rirs.py
+preprocess_audio_files.py
+train_wav2vec2xvector.py
+train_xvector_from_feats.py
+train_xvector_from_wav.py
diff --git a/egs/voxceleb/v1.1/conf/train_data_default.yaml b/egs/voxceleb/v1.1/conf/train_data_default.yaml
index 451ffa35..acd088e6 100644
--- a/egs/voxceleb/v1.1/conf/train_data_default.yaml
+++ b/egs/voxceleb/v1.1/conf/train_data_default.yaml
@@ -1,10 +1,17 @@
 dataset:
-  max_chunk_length: 4.0
-  min_chunk_length: 4.0
-  aug_cfg: conf/reverb_noise_aug.yaml 
+  class_names:
+    - class_id
+  aug_cfgs:
+    - conf/reverb_noise_aug.yaml
+  return_segment_info:
+    - class_id
 sampler:
+  sampler_type: class_weighted_random_seg_chunk_sampler
   batch_size: 32
-  iters_per_epoch: 6
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  num_chunks_per_seg_epoch: 6
+  class_name: class_id
 data_loader:
   num_workers: 8
   
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/conf/val_data_default.yaml b/egs/voxceleb/v1.1/conf/val_data_default.yaml
index 451ffa35..acd088e6 100644
--- a/egs/voxceleb/v1.1/conf/val_data_default.yaml
+++ b/egs/voxceleb/v1.1/conf/val_data_default.yaml
@@ -1,10 +1,17 @@
 dataset:
-  max_chunk_length: 4.0
-  min_chunk_length: 4.0
-  aug_cfg: conf/reverb_noise_aug.yaml 
+  class_names:
+    - class_id
+  aug_cfgs:
+    - conf/reverb_noise_aug.yaml
+  return_segment_info:
+    - class_id
 sampler:
+  sampler_type: class_weighted_random_seg_chunk_sampler
   batch_size: 32
-  iters_per_epoch: 6
+  max_chunk_length: 4.0
+  min_chunk_length: 4.0
+  num_chunks_per_seg_epoch: 6
+  class_name: class_id
 data_loader:
   num_workers: 8
   
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 3cd4b108..ecd076c8 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -25,7 +25,7 @@ margin=0.3
 nnet_num_epochs=70
 
 xvec_train_base_cfg=conf/train_ecapatdnn_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu"
+xvec_train_args="--data.train.sampler.min-batch-size $batch_size_1gpu --data.val.sampler.min-batch-size $batch_size_1gpu"
 
 nnet_name=${feat_type}_ecapatdnn512x3_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
diff --git a/egs/voxceleb/v1.1/run_011_train_xvector.sh b/egs/voxceleb/v1.1/run_011_train_xvector.sh
index 17d50722..883c729b 100755
--- a/egs/voxceleb/v1.1/run_011_train_xvector.sh
+++ b/egs/voxceleb/v1.1/run_011_train_xvector.sh
@@ -47,11 +47,11 @@ if [ $stage -le 1 ]; then
     train_xvector_from_wav.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir $args \
     --num-gpus $ngpu \
   
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 283a7a49..0a8f7a41 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -68,6 +68,7 @@ if [ $num_gpus -gt 0 ];then
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
   export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
   if [ $num_gpus -gt 1 ];then
+    # export CUDA_LAUNCH_BLOCKING=1
     [[ $(type -P "$torchrun") ]] && command="torchrun" \
 	|| command="python -m torch.distributed.run"
     command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index 48c23687..e9746897 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -208,7 +208,7 @@ def extract_xvectors(
                     writer.write([key], [y])
                     if write_num_frames_spec is not None:
                         keys.append(key)
-                        info.append(str(x.shape[1]))
+                        info.append(str(x.shape[-1]))
 
                     t8 = time.time()
                     read_time = t2 - t1
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 5eb871db..0e074977 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -22,7 +22,9 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+
+# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.models import ResNetXVector as RXVec
@@ -46,19 +48,21 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    sampler_args = kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
         logging.info("init %s dataset", partition)
 
-    ad_args["is_val"] = partition == "val"
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
 
-    sampler = Sampler(dataset, **sampler_args)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
@@ -72,49 +76,6 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
-# def init_data(
-#     audio_path,
-#     train_list,
-#     val_list,
-#     train_aug_cfg,
-#     val_aug_cfg,
-#     num_workers,
-#     num_gpus,
-#     rank,
-#     **kwargs
-# ):
-
-#     ad_args = AD.filter_args(**kwargs)
-#     sampler_args = Sampler.filter_args(**kwargs)
-#     if rank == 0:
-#         logging.info("audio dataset args={}".format(ad_args))
-#         logging.info("sampler args={}".format(sampler_args))
-#         logging.info("init datasets")
-
-#     train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
-#     val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
-
-#     if rank == 0:
-#         logging.info("init samplers")
-#     train_sampler = Sampler(train_data, **sampler_args)
-#     val_sampler = Sampler(val_data, **sampler_args)
-
-#     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-#     largs = (
-#         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
-#     )
-
-#     train_loader = torch.utils.data.DataLoader(
-#         train_data, batch_sampler=train_sampler, **largs
-#     )
-
-#     test_loader = torch.utils.data.DataLoader(
-#         val_data, batch_sampler=val_sampler, **largs
-#     )
-
-#     return train_loader, test_loader
-
-
 def init_feats(rank, **kwargs):
     feat_args = AF.filter_args(**kwargs["feats"])
     if rank == 0:
@@ -154,7 +115,7 @@ def train_xvec(gpu_id, args):
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
     feat_extractor = init_feats(**kwargs)
-    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+    model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
@@ -180,14 +141,9 @@ def make_parser(xvec_class):
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     train_parser = ArgumentParser(prog="")
-    # parser.add_argument("--audio-path", required=True)
-    # parser.add_argument("--train-list", required=True)
-    # parser.add_argument("--val-list", required=True)
 
     AD.add_class_args(train_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(train_parser, prefix="sampler")
-    # parser.add_argument("--train-aug-cfg", default=None)
-    # parser.add_argument("--val-aug-cfg", default=None)
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -197,7 +153,7 @@ def make_parser(xvec_class):
 
     val_parser = ArgumentParser(prog="")
     AD.add_class_args(val_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(val_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -209,14 +165,11 @@ def make_parser(xvec_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
     parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
     )
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
-    parser.link_arguments(
-        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    )
 
     AF.add_class_args(parser, prefix="feats")
     xvec_class.add_class_args(parser, prefix="model")
@@ -225,12 +178,6 @@ def make_parser(xvec_class):
     )
     ddp.add_ddp_args(parser)
     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
-    # parser.add_argument(
-    #     "--resume",
-    #     action="store_true",
-    #     default=False,
-    #     help="resume training from checkpoint",
-    # )
     parser.add_argument(
         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
     )
@@ -241,11 +188,9 @@ def make_parser(xvec_class):
 if __name__ == "__main__":
 
     parser = ArgumentParser(description="Train XVector from audio files")
-
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
-
     for k, v in xvec_dict.items():
         parser_k = make_parser(v)
         subcommands.add_subcommand(k, parser_k)
diff --git a/hyperion/helpers/trial_data_reader.py b/hyperion/helpers/trial_data_reader.py
index e6da5b7d..219ee6ce 100644
--- a/hyperion/helpers/trial_data_reader.py
+++ b/hyperion/helpers/trial_data_reader.py
@@ -12,7 +12,7 @@
 
 from ..io import RandomAccessDataReaderFactory as DRF
 from ..utils.utt2info import Utt2Info
-from ..utils import TrialNdx, TrialKey
+from ..utils import TrialNdx, TrialKey  # , SparseTrialNdx, SparseTrialKey
 from ..np.transforms import TransformList
 
 
@@ -34,6 +34,7 @@ def __init__(
         num_seg_parts=1,
         eval_set="enroll-test",
         tlist_sep=" ",
+        sparse=False,
     ):
 
         self.r = DRF.create(v_file)
@@ -45,10 +46,16 @@ def __init__(
             test = Utt2Info.load(test_file, sep=tlist_sep)
         ndx = None
         if ndx_file is not None:
-            try:
-                ndx = TrialNdx.load(ndx_file)
-            except:
-                ndx = TrialKey.load(ndx_file).to_ndx()
+            if sparse:
+                try:
+                    ndx = TrialNdx.load(ndx_file)
+                except:
+                    ndx = TrialKey.load(ndx_file).to_ndx()
+            else:
+                try:
+                    ndx = TrialNdx.load(ndx_file)
+                except:
+                    ndx = TrialKey.load(ndx_file).to_ndx()
 
         ndx, enroll = TrialNdx.parse_eval_set(ndx, enroll, test, eval_set)
         if num_model_parts > 1 or num_seg_parts > 1:
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index c6bdeab8..043ae778 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -184,7 +184,7 @@ def _read_segment(self, segment, time_offset=0, time_dur=0):
         if s_beg >= num_samples_i:
             raise Exception(
                 "segment %s tbeg=%.2f (num_sample=%d) longer that wav file %s (num_samples=%d)"
-                % (key, tbeg, sbeg, file_id, num_samples_i)
+                % (file_id, t_beg, s_beg, file_id, num_samples_i)
             )
 
         s_end = int(t_end * fs_i)
diff --git a/hyperion/np/classifiers/binary_logistic_regression.py b/hyperion/np/classifiers/binary_logistic_regression.py
index c144105f..82a84529 100644
--- a/hyperion/np/classifiers/binary_logistic_regression.py
+++ b/hyperion/np/classifiers/binary_logistic_regression.py
@@ -91,7 +91,7 @@ def __init__(
             verbose=verbose,
             warm_start=warm_start,
             multi_class="ovr",
-            lr_seed=1024,
+            lr_seed=lr_seed,
             **kwargs
         )
 
diff --git a/hyperion/np/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
index b213d653..46d1fc14 100644
--- a/hyperion/np/score_norm/adapt_s_norm.py
+++ b/hyperion/np/score_norm/adapt_s_norm.py
@@ -21,10 +21,17 @@ class AdaptSNorm(ScoreNorm):
       std_floor: floor for standard deviations.
     """
 
-    def __init__(self, nbest=100, nbest_discard=0, **kwargs):
+    def __init__(
+        self,
+        nbest=100,
+        nbest_discard=0,
+        nbest_sel_method="highest-other-side",
+        **kwargs,
+    ):
         super().__init__(*kwargs)
         self.nbest = nbest
         self.nbest_discard = nbest_discard
+        self.nbest_sel_method = nbest_sel_method
 
     def predict(
         self,
@@ -33,6 +40,7 @@ def predict(
         scores_enr_coh,
         mask_coh_test=None,
         mask_enr_coh=None,
+        return_stats=False,
     ):
         """Normalizes the scores.
 
@@ -59,50 +67,267 @@ def predict(
         if mask_enr_coh is not None:
             scores_enr_coh[mask_enr_coh == False] = 0
 
+        if self.nbest_sel_method == "highest-other-side":
+            return self._norm_highest_other_side(
+                scores,
+                scores_coh_test,
+                scores_enr_coh,
+                mask_coh_test,
+                mask_enr_coh,
+                return_stats,
+                nbest,
+            )
+        elif self.nbest_sel_method == "highest-same-side":
+            return self._norm_highest_same_side(
+                scores,
+                scores_coh_test,
+                scores_enr_coh,
+                mask_coh_test,
+                mask_enr_coh,
+                return_stats,
+                nbest,
+            )
+        #     best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
+        #         self.nbest_discard : self.nbest_discard + nbest
+        #     ]
+        # elif self.nbest_sel_method == "highest-same-side":
+        #     best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
+        #         :, self.nbest_discard : self.nbest_discard + nbest
+        #     ].T
+        else:
+            raise Exception(f"invalid cohort selection method {self.nbest_sel_method}")
+
+        # scores_z_norm = np.zeros_like(scores)
+        # for i in range(scores.shape[1]):
+        #     best_idx_i = best_idx[:, i]
+
+        #     best_scores_i = scores_enr_coh[:, best_idx_i]
+        #     mu_z = np.mean(best_scores_i, axis=1, keepdims=True)
+
+        #     if mask_enr_coh is None:
+        #         s_z = np.std(best_scores_i, axis=1, keepdims=True)
+        #     else:
+        #         norm = np.mean(mask_enr_coh[:, best_idx_i], axis=1, keepdims=True)
+        #         mu_z /= norm
+        #         s_z = np.sqrt(
+        #             np.mean(best_scores_i ** 2, axis=1, keepdims=True) / norm
+        #             - mu_z ** 2
+        #         )
+
+        #     s_z = np.clip(s_z, a_min=1e-5, a_max=None)
+        #     if not self.norm_var:
+        #         s_z = 1.0
+
+        #     scores_z_norm[:, i] = (scores[:, i] - mu_z.T) / s_z.T
+
+        # if self.nbest_sel_method == "highest-other-side":
+        #     best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
+        #         :, self.nbest_discard : self.nbest_discard + nbest
+        #     ]
+        # elif self.nbest_sel_method == "highest-same-side":
+        #     best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
+        #         self.nbest_discard : self.nbest_discard + nbest
+        #     ].T
+        # else:
+        #     raise Exception(f"invalid cohort selection method {self.nbest_sel_method}")
+
+        # scores_t_norm = np.zeros_like(scores)
+        # for i in range(scores.shape[0]):
+        #     best_idx_i = best_idx[i]
+        #     best_scores_i = scores_coh_test[best_idx_i, :]
+        #     mu_t = np.mean(best_scores_i, axis=0, keepdims=True)
+
+        #     if mask_coh_test is None:
+        #         s_t = np.std(best_scores_i[best_idx_i, :], axis=0, keepdims=True)
+        #     else:
+        #         norm = np.mean(mask_coh_test[best_idx_i, :], axis=0, keepdims=True)
+        #         mu_t /= norm
+        #         s_t = np.sqrt(
+        #             np.mean(best_scores_i[best_idx_i, :] ** 2, axis=0, keepdims=True)
+        #             / norm
+        #             - mu_z ** 2
+        #         )
+
+        #     s_t = np.clip(s_t, a_min=1e-5, a_max=None)
+        #     if not self.norm_var:
+        #         s_t = 1.0
+
+        #     scores_t_norm[i, :] = (scores[i, :] - mu_t) / s_t
+
+        # scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
+
+        # if return_stats:
+        #     return scores_norm, mu_z, s_z, mu_t, s_t
+        # else:
+        #     return scores_norm
+
+    def _norm_highest_other_side(
+        self,
+        scores,
+        scores_coh_test,
+        scores_enr_coh,
+        mask_coh_test,
+        mask_enr_coh,
+        return_stats,
+        nbest,
+    ):
+
+        if return_stats:
+            mu_z = np.zeros_like(scores)
+            mu_t = np.zeros_like(scores)
+            if self.norm_var:
+                s_z = np.zeros_like(scores)
+                s_t = np.zeros_like(scores)
+            else:
+                s_z = s_t = 1.0
+
+        scores_z_norm = np.zeros_like(scores)
         best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
             self.nbest_discard : self.nbest_discard + nbest
         ]
-        scores_z_norm = np.zeros_like(scores)
         for i in range(scores.shape[1]):
             best_idx_i = best_idx[:, i]
 
-            mu_z = np.mean(scores_enr_coh[:, best_idx_i], axis=1, keepdims=True)
+            best_scores_i = scores_enr_coh[:, best_idx_i]
+            mu_z_i = np.mean(best_scores_i, axis=1, keepdims=False)
 
             if mask_enr_coh is None:
-                s_z = np.std(scores_enr_coh[:, best_idx_i], axis=1, keepdims=True)
+                s_z_i = np.std(best_scores_i, axis=1, keepdims=False)
             else:
-                norm = np.mean(mask_enr_coh[:, best_idx_i], axis=1, keepdims=True)
-                mu_z /= norm
-                s_z = np.sqrt(
-                    np.mean(scores_enr_coh[:, best_idx_i] ** 2, axis=1, keepdims=True)
-                    / norm
-                    - mu_z ** 2
+                norm = np.mean(mask_enr_coh[:, best_idx_i], axis=1, keepdims=False)
+                mu_z_i /= norm
+                s_z_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=1, keepdims=False) / norm
+                    - mu_z_i ** 2
                 )
 
-            s_z = np.clip(s_z, a_min=1e-5, a_max=None)
-            scores_z_norm[:, i] = (scores[:, i] - mu_z.T) / s_z.T
+            s_z_i = np.clip(s_z_i, a_min=1e-5, a_max=None)
+            if not self.norm_var:
+                s_z_i = 1.0
 
+            scores_z_norm[:, i] = (scores[:, i] - mu_z_i) / s_z_i
+            if return_stats:
+                mu_z[:, i] = mu_z_i
+                if self.norm_var:
+                    s_z[:, i] = s_z_i
+
+        scores_t_norm = np.zeros_like(scores)
         best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
             :, self.nbest_discard : self.nbest_discard + nbest
         ]
-        scores_t_norm = np.zeros_like(scores)
         for i in range(scores.shape[0]):
             best_idx_i = best_idx[i]
+            best_scores_i = scores_coh_test[best_idx_i, :]
+            mu_t_i = np.mean(best_scores_i, axis=0, keepdims=False)
+
+            if mask_coh_test is None:
+                s_t_i = np.std(best_scores_i, axis=0, keepdims=False)
+            else:
+                norm = np.mean(mask_coh_test[best_idx_i, :], axis=0, keepdims=False)
+                mu_t_i /= norm
+                s_t_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=0, keepdims=False) / norm
+                    - mu_t_i ** 2
+                )
+
+            s_t_i = np.clip(s_t_i, a_min=1e-5, a_max=None)
+            if not self.norm_var:
+                s_t_i = 1.0
 
-            mu_z = np.mean(scores_coh_test[best_idx_i, :], axis=0, keepdims=True)
+            scores_t_norm[i, :] = (scores[i, :] - mu_t_i) / s_t_i
+            if return_stats:
+                mu_t[i, :] = mu_t_i
+                if self.norm_var:
+                    s_t[i, :] = s_t_i
+
+        scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
+        if return_stats:
+            return scores_norm, mu_z, s_z, mu_t, s_t
+        else:
+            return scores_norm
+
+    def _norm_highest_same_side(
+        self,
+        scores,
+        scores_coh_test,
+        scores_enr_coh,
+        mask_coh_test,
+        mask_enr_coh,
+        return_stats,
+        nbest,
+    ):
+
+        if return_stats:
+            mu_z = np.zeros_like(scores)
+            mu_t = np.zeros_like(scores)
+            if self.norm_var:
+                s_z = np.zeros_like(scores)
+                s_t = np.zeros_like(scores)
+            else:
+                s_z = s_t = 1.0
+
+        best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
+            :, self.nbest_discard : self.nbest_discard + nbest
+        ]
+
+        scores_z_norm = np.zeros_like(scores)
+        for i in range(scores.shape[0]):
+            best_idx_i = best_idx[i]
+            best_scores_i = scores_enr_coh[:, best_idx_i]
+            mu_z_i = np.mean(best_scores_i, axis=1, keepdims=False)
 
             if mask_coh_test is None:
-                s_z = np.std(scores_coh_test[best_idx_i, :], axis=0, keepdims=True)
+                s_z_i = np.std(best_scores_i, axis=1, keepdims=False)
             else:
-                norm = np.mean(mask_coh_test[best_idx_i, :], axis=0, keepdims=True)
-                mu_z /= norm
-                s_z = np.sqrt(
-                    np.mean(scores_coh_test[best_idx_i, :] ** 2, axis=0, keepdims=True)
-                    / norm
-                    - mu_z ** 2
+                norm = np.mean(mask_enr_coh[:, best_idx_i], axis=1, keepdims=False)
+                mu_z_i /= norm
+                s_z_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=1, keepdims=False) / norm
+                    - mu_z_i ** 2
                 )
 
-            s_z = np.clip(s_z, a_min=1e-5, a_max=None)
-            scores_t_norm[i, :] = (scores[i, :] - mu_z) / s_z
+            s_z_i = np.clip(s_z_i, a_min=1e-5, a_max=None)
+            if not self.norm_var:
+                s_z_i = 1.0
+
+            scores_z_norm[:, i] = (scores[:, i] - mu_z_i) / s_z_i
+            if return_stats:
+                mu_z[:, i] = mu_z_i
+                if self.norm_var:
+                    s_z[:, i] = s_z_i
+
+        best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
+            self.nbest_discard : self.nbest_discard + nbest
+        ]
+        scores_t_norm = np.zeros_like(scores)
+        for i in range(scores.shape[1]):
+            best_idx_i = best_idx[:, i]
 
-        return (scores_z_norm + scores_t_norm) / np.sqrt(2)
+            best_scores_i = scores_coh_test[best_idx_i, :]
+            mu_t_i = np.mean(best_scores_i, axis=0, keepdims=False)
+
+            if mask_enr_coh is None:
+                s_t_i = np.std(best_scores_i, axis=0, keepdims=False)
+            else:
+                norm = np.mean(mask_coh_test[best_idx_i, :], axis=0, keepdims=False)
+                mu_t_i /= norm
+                s_t_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=0, keepdims=False) / norm
+                    - mu_t_i ** 2
+                )
+
+            s_t_i = np.clip(s_t_i, a_min=1e-5, a_max=None)
+            if not self.norm_var:
+                s_t_i = 1.0
+
+            scores_t_norm[i, :] = (scores[i, :] - mu_t_i) / s_t_i
+            if return_stats:
+                mu_t[i, :] = mu_t_i
+                if self.norm_var:
+                    s_t[i, :] = s_t_i
+
+        scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
+        if return_stats:
+            return scores_norm, mu_z, s_z, mu_t, s_t
+        else:
+            return scores_norm
diff --git a/hyperion/np/score_norm/score_norm.py b/hyperion/np/score_norm/score_norm.py
index e2fa1814..9b40c7d7 100644
--- a/hyperion/np/score_norm/score_norm.py
+++ b/hyperion/np/score_norm/score_norm.py
@@ -15,14 +15,15 @@ class ScoreNorm(NPModel):
       std_floor: floor for standard deviations.
     """
 
-    def __init__(self, std_floor=1e-5, **kwargs):
+    def __init__(self, norm_var=True, std_floor=1e-5, **kwargs):
         super().__init__(*kwargs)
+        self.norm_var = norm_var
         self.std_floor = std_floor
 
     def forward(self, **kwargs):
         """Overloads predict function."""
         return self.predict(**kwargs)
 
-    def __call__(self, *kwargs):
+    def __call__(self, *args, **kwargs):
         """Overloads predict function."""
-        return self.predict(**kwargs)
+        return self.predict(*args, **kwargs)
diff --git a/hyperion/np/score_norm/t_norm.py b/hyperion/np/score_norm/t_norm.py
index ac87c8ac..a5a80def 100644
--- a/hyperion/np/score_norm/t_norm.py
+++ b/hyperion/np/score_norm/t_norm.py
@@ -24,16 +24,22 @@ def predict(self, scores, scores_coh_test, mask=None):
         """
         if mask is None:
             mu_t = np.mean(scores_coh_test, axis=0, keepdims=True)
-            s_t = np.std(scores_coh_test, axis=0, keepdims=True)
+            if self.norm_var:
+                s_t = np.std(scores_coh_test, axis=0, keepdims=True)
         else:
             scores_coh_test[mask == False] = 0
             n_t = np.mean(mask, axis=0, keepdims=True)
             mu_t = np.mean(scores_coh_test, axis=0, keepdims=True) / n_t
-            s_t = np.sqrt(
-                np.mean(scores_coh_test ** 2, axis=0, keepdims=True) / n_t - mu_t ** 2
-            )
-
-        s_t[s_t < self.std_floor] = self.std_floor
+            if self.norm_var:
+                s_t = np.sqrt(
+                    np.mean(scores_coh_test ** 2, axis=0, keepdims=True) / n_t
+                    - mu_t ** 2
+                )
+
+        if self.norm_var:
+            s_t[s_t < self.std_floor] = self.std_floor
+        else:
+            s_t = 1.0
 
         scores_norm = (scores - mu_t) / s_t
         return scores_norm
diff --git a/hyperion/np/score_norm/z_norm.py b/hyperion/np/score_norm/z_norm.py
index 98189e06..7b9e32d8 100644
--- a/hyperion/np/score_norm/z_norm.py
+++ b/hyperion/np/score_norm/z_norm.py
@@ -25,16 +25,22 @@ def predict(self, scores, scores_enr_coh, mask=None):
         """
         if mask is None:
             mu_z = np.mean(scores_enr_coh, axis=1, keepdims=True)
-            s_z = np.std(scores_enr_coh, axis=1, keepdims=True)
+            if self.norm_var:
+                s_z = np.std(scores_enr_coh, axis=1, keepdims=True)
         else:
             scores_enr_coh[mask == False] = 0
             n_z = np.mean(mask, axis=1, keepdims=True)
             mu_z = np.mean(scores_enr_coh, axis=1, keepdims=True) / n_z
-            s_z = np.sqrt(
-                np.mean(scores_enr_coh ** 2, axis=1, keepdims=True) / n_z - mu_z ** 2
-            )
-
-        s_z[s_z < self.std_floor] = self.std_floor
+            if self.norm_var:
+                s_z = np.sqrt(
+                    np.mean(scores_enr_coh ** 2, axis=1, keepdims=True) / n_z
+                    - mu_z ** 2
+                )
+
+        if self.norm_var:
+            s_z[s_z < self.std_floor] = self.std_floor
+        else:
+            s_z = 1.0
 
         scores_norm = (scores - mu_z) / s_z
         return scores_norm
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 4deb3f25..752cf0f5 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -10,4 +10,5 @@
 from .audio_dataset import AudioDataset
 
 # samplers
-from .weighted_seq_sampler import ClassWeightedSeqSampler
+# from .weighted_seq_sampler import ClassWeightedSeqSampler
+from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index f86ad0a2..8875676f 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -4,7 +4,7 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionYesNo, ArgumentParser, ActionParser
 import time
 import math
 
@@ -21,8 +21,10 @@
 from torch.utils.data import Dataset
 import torch.distributed as dist
 
+from hyperion.np import augment
 
-class AudioDataset(Dataset):
+
+class AudioDataset1(Dataset):
     def __init__(
         self,
         audio_file,
@@ -443,3 +445,323 @@ def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
             # help='audio dataset options')
 
     add_argparse_args = add_class_args
+
+
+from ...utils.class_info import ClassInfo
+from ...utils.segment_set import SegmentSet
+
+
+class AudioDataset(Dataset):
+    def __init__(
+        self,
+        audio_file,
+        segments_file,
+        class_names=None,
+        class_files=None,
+        time_durs_file=None,
+        aug_cfgs=None,
+        num_augs=1,
+        return_segment_info=None,
+        return_orig=False,
+        wav_scale=2 ** 15 - 1,
+        is_val=False,
+    ):
+
+        super().__init__()
+        try:
+            rank = dist.get_rank()
+            world_size = dist.get_world_size()
+        except:
+            rank = 0
+            world_size = 1
+
+        self.rank = rank
+        self.world_size = world_size
+        self.epoch = 0
+
+        if rank == 0:
+            logging.info("opening audio reader %s", audio_file)
+
+        self.r = AR(audio_file, wav_scale=wav_scale)
+
+        if rank == 0:
+            logging.info("loading segments file %s" % segments_file)
+
+        self.seg_set = SegmentSet.load(segments_file)
+        if rank == 0:
+            logging.info("dataset contains %d seqs" % len(self.seg_set))
+
+        self.is_val = is_val
+        if time_durs_file is not None:
+            if rank == 0:
+                logging.info("loading durations file %s" % time_durs_file)
+
+            time_durs = SegmentSet.load(time_durs_file)
+            self.seg_set["duration"] = time_durs.loc[
+                self.seg_set["id"]
+            ].class_id.values.astype(np.float, copy=False)
+        else:
+            assert "duration" in self.seg_set
+
+        logging.info("loading class-info files")
+        self._load_class_infos(class_names, class_files, is_val)
+
+        self.return_segment_info = (
+            [] if return_segment_info is None else return_segment_info
+        )
+        self.return_orig = return_orig
+
+        self.num_augs = num_augs
+        self._create_augmenters(aug_cfgs)
+
+    def _load_class_infos(self, class_names, class_files, is_val):
+        self.class_info = {}
+        if class_names is None:
+            assert class_files is None
+            return
+
+        assert len(class_names) == len(class_files)
+        for name, file in zip(class_names, class_files):
+            assert (
+                name in self.seg_set
+            ), f"class_name {name} not present in the segment set"
+            if self.rank == 0:
+                logging.info("loading class-info file %s" % file)
+            table = ClassInfo.load(file)
+            self.class_info[name] = table
+            if not is_val:
+                # check that all classes are present in the training segments
+                class_ids = table["id"]
+                segment_class_ids = self.seg_set[name].unique()
+                for c_id in class_ids:
+                    if c_id not in segment_class_ids:
+                        logging.warning(
+                            "%s class: %s not present in dataset", name, c_id
+                        )
+
+    def _create_augmenters(self, aug_cfgs):
+        self.augmenters = []
+        self.reverb_context = 0
+        if aug_cfgs is None:
+            return
+
+        for aug_cfg in aug_cfgs:
+            logging.info(f"loading augmentation={aug_cfg}")
+            augmenter = SpeechAugment.create(
+                aug_cfg, random_seed=112358 + 1000 * self.rank
+            )
+            self.augmenters.append(augmenter)
+            self.reverb_context = max(augmenter.max_reverb_context, self.reverb_context)
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
+
+    @property
+    def wav_scale(self):
+        return self.r.wav_scale
+
+    @property
+    def num_seqs(self):
+        return len(self.seg_set)
+
+    def __len__(self):
+        return self.num_seqs
+
+    @property
+    def seq_lengths(self):
+        return self.seg_set["duration"]
+
+    @property
+    def total_length(self):
+        return np.sum(self.seq_lengths)
+
+    @property
+    def min_seq_length(self):
+        return np.min(self.seq_lengths)
+
+    @property
+    def max_seq_length(self):
+        return np.max(self.seq_lengths)
+
+    @property
+    def num_classes(self):
+        return {k: t.num_classes for k, t in self.class_info.items()}
+
+    def _parse_segment_item(self, segment):
+        if isinstance(segment, (tuple, list)):
+            seg_id, start, duration = segment
+            assert duration <= self.seg_set.loc[seg_id].duration
+        else:
+            seg_id, start, duration = segment, 0, 0
+
+        if "start" in self.seg_set:
+            start += self.seg_set.loc[seg_id].start
+
+        return seg_id, start, duration
+
+    def _read_audio(self, seg_id, start, duration):
+        # how much extra audio we need to load to
+        # calculate the reverb of the first part of the audio
+        reverb_context = min(self.reverb_context, start)
+        start -= reverb_context
+        read_duration = duration + reverb_context
+
+        # read audio
+        recording_id = self.seg_set.recording_ids(seg_id)
+        x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
+        return x[0], fs[0]
+
+    def _apply_augs(self, x, num_samples, reverb_context_samples):
+        x_augs = []
+        # for each type of augmentation
+        for i, augmenter in enumerate(self.augmenters):
+            # we do n_augs per augmentation type
+            for j in range(self.num_augs):
+                # augment x
+                x_aug, aug_info = augmenter(x)
+                # remove the extra left context used to compute the reverberation.
+                x_aug = x_aug[reverb_context_samples : len(x)]
+                x_augs.append(x_aug)
+
+        return x_augs
+
+    def _get_segment_info(self, seg_id):
+        r = []
+        # converts the class_ids to integers
+        for info_name in self.return_segment_info:
+            seg_info = self.seg_set.loc[seg_id, info_name]
+            if info_name in self.class_info:
+                # if the type of information is a class-id
+                # we use the class information table to
+                # convert from id to integer
+                class_info = self.class_info[info_name]
+                idx = class_info.loc[seg_info, "class_idx"]
+                seg_info = idx
+
+            r.append(seg_info)
+
+        return r
+
+    def __getitem__(self, segment):
+
+        seg_id, start, duration = self._parse_segment_item(segment)
+        x, fs = self._read_audio(seg_id, start, duration)
+        if self.augmenters:
+            # augmentations
+            num_samples = int(duration * fs)
+            reverb_context_samples = len(x) - num_samples
+            x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
+            r = x_augs
+
+            # add original non augmented audio
+            if self.return_orig:
+                x_orig = x[reverb_context_samples:]
+                r.append(x_orig)
+
+        else:
+            r = [x]
+
+        # adds the segment labels
+        seg_info = self._get_segment_info(seg_id)
+        r.extend(seg_info)
+
+        return (*r,)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        ar_args = AR.filter_args(**kwargs)
+        valid_args = (
+            "audio_file",
+            "segments_file",
+            "aug_cfgs",
+            "num_augs",
+            "class_names",
+            "class_files",
+            "return_segment_info",
+            "return_orig",
+            "time_durs_file",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args.update(ar_args)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip={}):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "audio_file" not in skip:
+            parser.add_argument(
+                "--audio-file",
+                required=True,
+                help=("audio manifest file"),
+            )
+
+        if "segments_file" not in skip:
+            parser.add_argument(
+                "--segments-file",
+                required=True,
+                help=("segments manifest file"),
+            )
+
+        parser.add_argument(
+            "--class-names",
+            default=None,
+            nargs="+",
+            help=(
+                "list with the names of the types of classes in the datasets, e.g., speaker, language"
+            ),
+        )
+
+        parser.add_argument(
+            "--class-files",
+            default=None,
+            nargs="+",
+            help=("list of class info files"),
+        )
+
+        parser.add_argument(
+            "--time-durs-file",
+            default=None,
+            help=(
+                "segment to duration in secs file, if durations are not in segments_file"
+            ),
+        )
+
+        parser.add_argument(
+            "--aug-cfgs",
+            default=None,
+            nargs="+",
+            help=("augmentation configuration file."),
+        )
+
+        parser.add_argument(
+            "--num-augs",
+            default=1,
+            help=("number of augmentations per segment and augmentation type"),
+        )
+        parser.add_argument(
+            "--return-segment-info",
+            default=None,
+            nargs="+",
+            help=(
+                "list of columns of the segment file which should be returned as supervisions"
+            ),
+        )
+        parser.add_argument(
+            "--return-orig",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "when using augmentation, whether or not to return also the original audio"
+            ),
+        )
+
+        AR.add_class_args(parser)
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            # help='audio dataset options')
+
+    add_argparse_args = add_class_args
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 1a9f98b8..91d592bc 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -27,17 +27,20 @@ def __init__(
         num_chunks_per_seg_epoch="auto",
         num_segs_per_class=1,
         num_chunks_per_seg=1,
+        weight_exponent=1.0,
+        weight_mode="custom",
         num_hard_prototypes=0,
         affinity_matrix=None,
-        class_column="class_id",
-        length_column="duration",
+        class_name="class_id",
+        length_name="duration",
+        shuffle=False,
         iters_per_epoch=None,
         batch_size=None,
         seed=1234,
     ):
-        super().__init__(seed=seed)
-        self.class_column = class_column
-        self.length_column = length_column
+        super().__init__(shuffle=shuffle, seed=seed)
+        self.class_name = class_name
+        self.length_name = length_name
         self.seg_set = seg_set
         self.class_info = class_info
         self.min_chunk_length = min_chunk_length
@@ -71,6 +74,9 @@ def __init__(
         self.num_segs_per_class = num_segs_per_class
         self.num_chunks_per_seg = num_chunks_per_seg
 
+        self.weight_exponent = weight_exponent
+        self.weight_mode = weight_mode
+
         self.num_hard_prototypes = num_hard_prototypes
         self.batch = 0
 
@@ -83,6 +89,7 @@ def __init__(
         self._compute_len()
 
         self._gather_class_info()
+        self._set_class_weights()
 
         self.set_hard_prototypes(affinity_matrix)
 
@@ -97,17 +104,20 @@ def __init__(
         )
 
     def _set_seed(self):
-        self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.rank)
+        if self.shuffle:
+            self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.rank)
+        else:
+            self.rng.manual_seed(self.seed + 100 * self.rank)
 
     def _set_num_chunks_per_seg_epoch(self, num_chunks_per_seg_epoch):
         if num_chunks_per_seg_epoch == "auto":
             self._compute_num_chunks_per_seg_epoch_auto()
         else:
-            self.num_egs_per_utt_epoch = num_chunks_per_seg_epoch
+            self.num_chunks_per_seg_epoch = num_chunks_per_seg_epoch
 
     def _compute_num_chunks_per_seg_epoch_auto(self):
         seg_set = self.seg_set
-        avg_seg_length = np.mean(seg_set[self.length_column])
+        avg_seg_length = np.mean(seg_set[self.length_name])
         avg_chunk_length = (self.max_chunk_length + self.min_chunk_length) / 2
         self.num_chunks_per_seg_epoch = math.ceil(avg_seg_length / avg_chunk_length)
         logging.debug(
@@ -133,27 +143,42 @@ def _gather_class_info(self):
         # we need the maximum/minimum segment duration for each class.
         max_dur = np.zeros(len(self.class_info))
         min_dur = np.zeros(len(self.class_info))
+        total_dur = np.zeros(len(self.class_info))
         for i, c in enumerate(self.class_info["id"]):
-            seg_idx = self.seg_set[self.class_column] == c
-            durs_i = self.seg_set.loc[seg_idx]
-            max_dur[i] = durs_i.max()
-            min_dur[i] = durs_i.min()
+            seg_idx = self.seg_set[self.class_name] == c
+            if seg_idx.sum() > 0:
+                durs_i = self.seg_set.loc[seg_idx, self.length_name]
+                max_dur[i] = durs_i.max()
+                min_dur[i] = durs_i.min()
+                total_dur[i] = durs_i.sum()
+            else:
+                max_dur[i] = min_dur[i] = total_dur[i] = 0
 
         self.class_info["max_seg_duration"] = max_dur
         self.class_info["min_seg_duration"] = min_dur
+        self.class_info["total_duration"] = total_dur
+
+        self.map_idx_to_ids = self.class_info[["class_idx", "id"]]
+        self.map_idx_to_ids.set_index("class_idx", inplace=True)
+
+    def _set_class_weights(self):
+        if self.weight_mode == "uniform":
+            self.class_info.set_uniform_weights()
+        elif self.weight_mode == "dataset-prior":
+            weights = self.class_info["total_duration"].values
+            self.class_info.set_weights(self, weights)
+
+        if self.weight_exponent != 1.0:
+            self.class_info.exp_weights(self.weight_exponent)
 
         zero_weight = self.class_info["min_seg_duration"] < self.min_chunk_length
         if np.any(zero_weight):
-            self.class_info.loc[zero_weight, "weights"] = 0
-            self.class_info.loc["weights"] /= self.class_info["weights"].sum()
+            self.class_info.set_zero_weight(zero_weight)
 
         self.var_weights = np.any(
-            self.seg_set[self.length_column] < self.max_chunk_length
+            self.seg_set[self.length_name] < self.max_chunk_length
         )
 
-        self.map_idx_to_ids = self.class_info[["class_idx", "ids"]]
-        self.map_idx_to_ids.set_index("class_idx", inplace=True)
-
     @property
     def hard_prototype_mining(self):
         return self.num_hard_prototypes > 1
@@ -186,14 +211,14 @@ def _compute_batch_size(self, chunk_length):
         return int(self.min_batch_size * self.max_chunk_length / chunk_length)
 
     def _compute_num_classes_per_batch(self, batch_size):
-        num_classes = batch_size / self.num_segs_per_class / self.num_egs_per_utt
+        num_classes = batch_size / self.num_segs_per_class / self.num_chunks_per_seg
         if self.hard_prototype_mining:
             num_classes /= self.num_hard_prototypes
         return int(math.ceil(num_classes))
 
     def _get_class_weights(self, chunk_length):
         if not self.var_weights:
-            return self.class_info["weights"].values
+            return torch.as_tensor(self.class_info["weights"].values)
 
         # get classes where all segments are shorter than
         # chunk length and put weight to 0
@@ -205,7 +230,7 @@ def _get_class_weights(self, chunk_length):
         class_weights[zero_idx] = 0.0
         # renormalize weights
         class_weights /= class_weights.sum()
-        return class_weights
+        return torch.as_tensor(class_weights)
 
     def _sample_classes(self, num_classes, chunk_length):
         weights = self._get_class_weights(chunk_length)
@@ -213,7 +238,8 @@ def _sample_classes(self, num_classes, chunk_length):
             weights,
             num_samples=num_classes,
             replacement=True,
-        )
+            generator=self.rng,
+        ).numpy()
 
         class_ids = self.class_info.iloc[row_idx].id.values
         if self.hard_prototype_mining:
@@ -231,29 +257,34 @@ def _sample_segs(self, class_ids, chunk_length):
         for c in class_ids:
             # for each class we sample segments longer than chunk length
             # get segments belonging to c
-            seg_mask = (self.seg_set[self.class_column] == c) & (
-                self.seg_set[self.length_column] > chunk_length
+            seg_mask = (self.seg_set[self.class_name] == c) & (
+                self.seg_set[self.length_name] >= chunk_length
             )
-            seg_ids_c = self.seg_set.loc[seg_mask, "id"]
+            seg_ids_c = self.seg_set.loc[seg_mask, "id"].values
             # sample num_segs_per_class random segments
+            if len(seg_ids_c) == 0:
+                print(chunk_length, c, self.class_info.loc[c], flush=True)
             sel_seg_idx_c = torch.randint(
                 low=0,
                 high=len(seg_ids_c),
                 size=(self.num_segs_per_class,),
                 generator=self.rng,
-            )
-            sel_seg_ids_c = seg_ids_c[sel_seg_idx_c]
+            ).numpy()
+            sel_seg_ids_c = list(seg_ids_c[sel_seg_idx_c])
             seg_ids.extend(sel_seg_ids_c)
 
         return seg_ids
 
     def _sample_chunks(self, seg_ids, chunk_length):
         chunks = []
-        scale = self.seg_set.loc[seg_ids, self.length_column] - chunk_length
+        scale = (
+            torch.as_tensor(self.seg_set.loc[seg_ids, self.length_name].values)
+            - chunk_length
+        )
         for i in range(self.num_chunks_per_seg):
             start = scale * torch.rand(size=(len(seg_ids),), generator=self.rng)
-            chunks_i = [(id, s, chunk_length) for id, s in zip(seg_ids, start)]
-            chunks.expand(chunks_i)
+            chunks_i = [(id, s.item(), chunk_length) for id, s in zip(seg_ids, start)]
+            chunks.extend(chunks_i)
 
         return chunks
 
@@ -263,7 +294,7 @@ def __next__(self):
             raise StopIteration
 
         chunk_length = self._sample_chunk_length()
-        batch_size = self._compute_batch_size()
+        batch_size = self._compute_batch_size(chunk_length)
         num_classes = self._compute_num_classes_per_batch(batch_size)
         class_ids = self._sample_classes(num_classes, chunk_length)
         seg_ids = self._sample_segs(class_ids, chunk_length)
@@ -286,9 +317,11 @@ def filter_args(**kwargs):
             "num_chunks_per_seg_epoch",
             "num_segs_per_class",
             "num_chunks_per_seg",
+            "weight_exponent",
+            "weight_mode",
             "num_hard_prototypes",
-            "class_column",
-            "length_column",
+            "class_name",
+            "length_name",
             "iters_per_epoch",
             "batch_size",
             "shuffle",
@@ -373,6 +406,20 @@ def add_class_args(parser, prefix=None):
             default=1,
             help=("number of chunks per segment in batch"),
         )
+
+        parser.add_argument(
+            "--weight-exponent",
+            default=1.0,
+            type=float,
+            help=("exponent for class weights"),
+        )
+        parser.add_argument(
+            "--weight-mode",
+            default="custom",
+            choices=["custom", "uniform", "dataset-prior"],
+            help=("exponent for class weights"),
+        )
+
         parser.add_argument(
             "--num-hard-prototypes",
             type=int,
@@ -394,12 +441,12 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--length-column",
+            "--length-name",
             default="duration",
             help="which column in the segment table indicates the duration of the segment",
         )
         parser.add_argument(
-            "--class-column",
+            "--class-name",
             default="class_id",
             help="which column in the segment table indicates the class of the segment",
         )
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index a971f8ce..2f5cc610 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -11,6 +11,7 @@
 import pandas as pd
 
 import torch
+from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
 import torch.distributed as dist
@@ -23,10 +24,10 @@ def __init__(
         min_chunk_length,
         max_chunk_length=None,
         base_sampler=SegSampler,
-        length_column="duration",
+        length_name="duration",
         shuffle=False,
         seed=1234,
-        **base_kwargs
+        **base_kwargs,
     ):
 
         super().__init__(shuffle=shuffle, seed=seed)
@@ -37,15 +38,17 @@ def __init__(
         )
         self.avg_chunk_length = (max_chunk_length + min_chunk_length) / 2
         self.chunk_set = None
-        self.length_column = length_column
+        self.length_name = length_name
         self.chunk_sampler = base_sampler
+        if "subbase_sampler" in base_kwargs:
+            base_kwargs["base_sampler"] = base_kwargs.pop("subbase_sampler")
+
         self.base_kwargs = base_kwargs
         self.base_kwargs["seed"] = seed
         self.base_kwargs["shuffle"] = shuffle
-        if "subbase_sampler" in base_kwargs:
-            base_kwargs["base_sampler"] = base_kwargs.pop("subbase_sampler")
 
         self.__iter__()
+        self.avg_batch_size = self._seg_sampler.avg_batch_size
 
     def __len__(self):
         return len(self._seg_sampler)
@@ -78,7 +81,7 @@ def get_random_duration(self):
     def _create_chunks(self):
 
         chunks = []
-        for id, len in zip(self.seg_set["id"], self.seg_set[self.length_column]):
+        for id, len in zip(self.seg_set["id"], self.seg_set[self.length_name]):
             if len < self.min_chunk_length:
                 # discard too short sequences
                 continue
@@ -88,51 +91,46 @@ def _create_chunks(self):
             start = 0
             for i in range(num_chunks - 1):
                 dur = self.get_random_duration()
-                chunk = (id, start, dur)
+                chunk = (f"{id}-{i}", id, start, dur)
                 chunks.append(chunk)
                 start += dur
 
             # special treatment for last chunk we get from the recording
             remainder = len - start
+            chunk_id = f"{id}-{num_chunks - 1}"
             if remainder > self.max_chunk_length:
                 # here we discard part of the end
-                chunk = (id, start, self.max_chunk_length)
+                chunk = (chunk_id, id, start, self.max_chunk_length)
             elif remainder < self.min_chunk_length:
                 # here we overlap with second last chunk
-                chunk = (id, len - self.min_chunk_length, self.min_chunk_length)
+                chunk = (
+                    chunk_id,
+                    id,
+                    len - self.min_chunk_length,
+                    self.min_chunk_length,
+                )
             else:
                 # here the last chunk is what it is left
-                chunk = (id, start, remainder)
+                chunk = (chunk_id, id, start, remainder)
 
             chunks.append(chunk)
 
-        self.chunk_set = pd.DataFrame(
-            chunks, columns=["id", "chunk_start", self.length_column]
+        chunk_set = pd.DataFrame(
+            chunks, columns=["id", "seg_id", "chunk_start", self.length_name]
         )
+        self.chunk_set = SegmentSet(chunk_set)
 
     def __iter__(self):
         super().__iter__()
         self._create_chunks()
-        self._seg_sampler = SegSampler(self.chunk_set, self._base_kwargs)
+        self._seg_sampler = SegSampler(self.chunk_set, **self.base_kwargs)
         self._seg_sampler.set_epoch(self.epoch)
         self._seg_sampler.__iter__()
 
         return self
 
     def __next__(self):
-
         return next(self._seg_sampler)
-        # if self.batch == self._len:
-        #     raise StopIteration
-
-        # start = (self.batch -1)*self.batch_size
-        # chunks = self.chunks[start:start+self.batch_size]
-
-        # if self.batch == 0:
-        #     logging.info("batch 0 chunks=%s", str(chunks[:10]))
-
-        # self.batch +=1
-        # return chunks
 
     @staticmethod
     def filter_args(**kwargs):
@@ -140,7 +138,7 @@ def filter_args(**kwargs):
         valid_args = (
             "min_chunk_length",
             "max_chunk_length",
-            "length_column",
+            "length_name",
             "shuffle",
             "seed",
         )
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 6802cc8e..73319dca 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -20,7 +20,7 @@ def __init__(
         min_batch_size=1,
         max_batch_size=None,
         max_batch_length=None,
-        length_column="duration",
+        length_name="duration",
         shuffle=False,
         drop_last=False,
         seed=1234,
@@ -31,31 +31,31 @@ def __init__(
         self.max_batch_size = max_batch_size
         self.max_batch_length = max_batch_length
         self.var_batch_size = max_batch_length is not None
-        self.length_column = length_column
+        self.length_name = length_name
         if self.var_batch_size:
             avg_batch_size = max_batch_length / torch.mean(
-                self.seg_set[self.length_column]
+                self.seg_set[self.length_name]
             )
         else:
             avg_batch_size = min_batch_size
 
-        len = len(self.seg_set) / avg_batch_size / self.world_size
+        self.avg_batch_size = avg_batch_size
+
+        num_batches = len(self.seg_set) / avg_batch_size / self.world_size
         if drop_last:
-            self._len = int(len)
+            self._len = int(num_batches)
         else:
-            self._len = int(math.ceil(len))
+            self._len = int(math.ceil(num_batches))
 
         self._permutation = None
 
-    @property
-    def seg_set(self):
-        return self.dataset
-
     def __len__(self):
         return self._len
 
     def _shuffle_segs(self):
-        self._permutation = torch.randperm(len(self.seg_set), generator=self.rng)
+        self._permutation = torch.randperm(
+            len(self.seg_set), generator=self.rng
+        ).numpy()
 
     def __iter__(self):
         super().__iter__()
@@ -95,25 +95,31 @@ def __next__(self):
 
             assert len(idxs) > self.min_batch_size
         else:
-            stop = min(self.start + self.min_batch_size, len(self.seg_set))
+            stop = min(
+                self.start + self.world_size * self.min_batch_size, len(self.seg_set)
+            )
             if self.shuffle:
-                idx = self._permutation[self.start : stop]
+                idx = self._permutation[self.start : stop : self.world_size]
             else:
-                idx = slice(self.start, stop)
-            self.start
+                idx = slice(self.start, stop, self.world_size)
+
+            self.start += self.world_size * self.min_batch_size
 
-        seg_ids = self.seg_set.iloc[idx].id
+        if "chunk_start" in self.seg_set:
+            chunks = self.seg_set.iloc[idx]
+            seg_ids = [
+                (id, s, d)
+                for id, s, d in zip(
+                    chunks.seg_id, chunks.chunk_start, chunks[self.length_name]
+                )
+            ]
+        else:
+            seg_ids = self.seg_set.iloc[idx].id
 
         if self.batch == 0:
             logging.info("batch 0 chunks=%s", str(seg_ids[:10]))
 
         self.batch += 1
-        if "chunk_start" in self.seg_set:
-            chunks = self.seg_set.loc[
-                seg_ids, ["chunk_start", self.length_column]
-            ].values
-            return [(id, chunk[0], chunk[1]) for id, chunk in zip(seg_ids, chunks)]
-
         return seg_ids
 
     @staticmethod
@@ -123,7 +129,7 @@ def filter_args(**kwargs):
             "min_batch_size",
             "max_batch_size",
             "max_batch_length",
-            "length_column",
+            "length_name",
             "shuffle",
             "drop_last",
             "seed",
@@ -181,7 +187,7 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--length-column",
+            "--length-name",
             default="duration",
             help="which column in the segment table indicates the duration of the file",
         )
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index e3ba84f8..f09095e6 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -56,7 +56,11 @@ def create(
                 sampler_kwargs.update(base_sampler_kwargs)
 
         if sampler_type in ["class_weighted_random_seg_chunk_sampler"]:
-            sampler_kwargs["class_info"] = dataset.class_info
+            try:
+                class_name = sampler_kwargs["class_name"]
+            except:
+                class_name = "class_id"
+            sampler_kwargs["class_info"] = dataset.class_info[class_name]
 
         logging.info(f"sampler-args={sampler_kwargs}")
 
@@ -77,8 +81,8 @@ def filter_args(**kwargs):
             "num_segs_per_class",
             "num_chunks_per_seg",
             "num_hard_prototypes",
-            "class_column",
-            "length_column",
+            "class_name",
+            "length_name",
             "iters_per_epoch",
             "batch_size",
             "shuffle",
@@ -115,12 +119,6 @@ def add_class_args(parser, prefix=None):
             help=("minimum length of the segment chunks"),
         )
 
-        parser.add_argument(
-            "--min-chunk-length",
-            type=float,
-            default=4.0,
-            help=("minimum length of the segment chunks"),
-        )
         parser.add_argument(
             "--max-chunk-length",
             type=float,
@@ -211,12 +209,12 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--length-column",
+            "--length-name",
             default="duration",
             help="which column in the segment table indicates the duration of the segment",
         )
         parser.add_argument(
-            "--class-column",
+            "--class-name",
             default="class_id",
             help="which column in the segment table indicates the class of the segment",
         )
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index 1a542bf2..3fef6e93 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -133,7 +133,7 @@ def create(
                 update_lr_on_opt_step=update_lr_on_opt_step,
             )
 
-        if lrsch_type == "cos_lr":
+        if lrsch_type == "triangular":
             return TriangularLR(
                 optimizer,
                 t,
@@ -251,7 +251,7 @@ def add_class_args(parser, prefix=None):
         )
         parser.add_argument(
             "--gamma",
-            default=1 / 100,
+            default=1.0,
             type=float,
             help=("LR decay rate for each restart in cos/triangular lr"),
         )
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
index c2b66c42..f2578e1d 100644
--- a/hyperion/torch/lr_schedulers/triangular_lr.py
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -84,9 +84,9 @@ def get_lr(self, step):
             )
 
         alpha = self.gamma ** self.num_restarts
-        x = math.abs(2 * x / self.T - 1)
+        x = abs(2 * x / self.T - 1)
 
         return [
-            eta_min + (alpha * eta_max - eta_min) * math.max(0, 1 - x)
+            eta_min + (alpha * eta_max - eta_min) * max(0, 1 - x)
             for eta_max, eta_min in zip(self.base_lrs, self.min_lrs)
         ]
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 4e29dab5..5f573904 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -207,6 +207,17 @@ def __init__(
                 self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
             )
 
+    def set_epoch(self, data_loader):
+        try:
+            data_loader.dataset.set_epoch(self.cur_epoch)
+        except AttributeError:
+            logging.warning("dataset doesn't have set_epoch member function")
+
+        try:
+            data_loader.batch_sampler.set_epoch(self.cur_epoch)
+        except AttributeError:
+            logging.warning("sampler doesn't have set_epoch member function")
+
     def fit(self, train_data, val_data=None):
         """Training function, it performs the training and validation epochs
 
@@ -223,7 +234,7 @@ def fit(self, train_data, val_data=None):
         val_logs = {}
         self.loggers.on_train_begin(epochs=self.epochs)
         for epoch in range(self.cur_epoch, self.epochs):
-
+            self.set_epoch(train_data)
             self.loggers.on_epoch_begin(epoch, batches=len(train_data))
             if self.lr_scheduler is not None:
                 # this is needed by cosine scheduler
@@ -232,6 +243,7 @@ def fit(self, train_data, val_data=None):
 
             logs = self.train_epoch(train_data)
             if val_data is not None:
+                self.set_epoch(val_data)
                 val_logs = self.validation_epoch(val_data)
                 logs.update(val_logs)
 
@@ -262,7 +274,6 @@ def fit(self, train_data, val_data=None):
             self.save_swa_model(logs)
 
     def set_train_mode(self):
-        # self.model.train_mode = self.train_mode
         self.model.set_train_mode(self.train_mode)
 
     def train_epoch(self, data_loader):
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index 2aed18c1..ff98c7c5 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -2,6 +2,10 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
 
 from .info_table import InfoTable
 
@@ -13,16 +17,57 @@ def __init__(self, df):
             self.add_class_idx()
 
         if "weights" not in self.df:
-            self.add_equal_weights()
+            self.set_uniform_weights()
         else:
-            self.df['weights'] /= self.df['weigths'].sum()
+            self.df["weights"] /= self.df["weigths"].sum()
 
     def add_class_idx(self):
         self.df["class_idx"] = [i for i in range(len(self.df))]
 
-    def add_equal_weights(self):
+    def set_uniform_weights(self):
         self.df["weights"] = 1 / len(self.df)
 
+    def set_weights(self, weights):
+        self.df["weights"] = weights / weights.sum()
+
+    def exp_weights(self, x):
+        weights = self.df["weights"] ** x
+        self.set_weights(weights)
+
+    def set_zero_weight(self, id):
+        self.df.loc[id, "weights"] = 0
+        self.df["weights"] /= self.df["weights"].sum()
+
     @property
     def weights(self, id):
         return self.df.loc[id, "weights"]
+
+    @property
+    def num_classes(self):
+        return self.df["class_idx"].values.max() + 1
+
+    @classmethod
+    def load(cls, file_path, sep=None):
+        """Loads utt2info list from text file.
+
+        Args:
+          file_path: File to read the list.
+          sep: Separator between the key and file_path in the text file.
+          dtype: Dictionary with the dtypes of each column.
+        Returns:
+          Utt2Info object
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we load as kaldi utt2spk file
+            df = pd.read_csv(
+                file_path,
+                sep=" ",
+                header=None,
+                names=["id"],
+                dtype={"id": np.str},
+            )
+            return cls(df)
+
+        return super().load(file_path, sep)
diff --git a/hyperion/utils/feature_set.py b/hyperion/utils/feature_set.py
index 456cf99b..986a21b9 100644
--- a/hyperion/utils/feature_set.py
+++ b/hyperion/utils/feature_set.py
@@ -26,7 +26,7 @@ def save(self, file_path, sep=None):
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
         ext = file_path.suffix
-        if ext == "":
+        if ext == ".scp":
             # if no extension we save as kaldi feats.scp file
             from .scp_list import SCPList
 
@@ -55,7 +55,7 @@ def load(cls, file_path, sep=None):
         """
         file_path = Path(file_path)
         ext = file_path.suffix
-        if ext == "":
+        if ext == ".scp":
             # if no extension we load as kaldi feats.scp file
             from .scp_list import SCPList
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 247001c0..25632941 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -24,7 +24,7 @@ class InfoTable(object):
 
     def __init__(self, df):
         self.df = df
-        assert "id" in df
+        assert "id" in df, f"info_table={df}"
         self.df.set_index("id", drop=False, inplace=True)
 
     def copy(self):
@@ -59,6 +59,10 @@ def loc(self):
     def __getitem__(self):
         return self.df.__getitem__
 
+    @property
+    def __setitem__(self):
+        return self.df.__setitem__
+
     @property
     def __contains__(self):
         return self.df.__contains__
@@ -73,7 +77,7 @@ def save(self, file_path, sep=None):
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
         ext = file_path.suffix
-        if ext == "":
+        if ext in ["", ".scp"]:
             # if no extension we save as kaldi utt2spk file
             self.df.to_csv(file_path, sep=" ", header=False, index=False)
             return
@@ -96,7 +100,7 @@ def load(cls, file_path, sep=None):
         """
         file_path = Path(file_path)
         ext = file_path.suffix
-        if ext == "":
+        if ext in ["", ".scp"]:
             # if no extension we load as kaldi utt2spk file
             df = pd.read_csv(
                 file_path,
@@ -105,11 +109,12 @@ def load(cls, file_path, sep=None):
                 names=["id", "class_id"],
                 dtype={"id": np.str, "class_id": np.str},
             )
+        else:
+            if sep is None:
+                sep = "\t" if ".tsv" in ext else ","
 
-        if sep is None:
-            sep = "\t" if ".tsv" in ext else ","
+            df = pd.read_csv(file_path, sep=sep)
 
-        df = pd.read_csv(file_path, sep=sep)
         return cls(df)
 
     def sort(self, column="id", ascending=True):
diff --git a/hyperion/utils/recording_set.py b/hyperion/utils/recording_set.py
index ad6f65f6..9695cef3 100644
--- a/hyperion/utils/recording_set.py
+++ b/hyperion/utils/recording_set.py
@@ -26,7 +26,7 @@ def save(self, file_path, sep=None):
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
         ext = file_path.suffix
-        if ext == "":
+        if ext == ".scp":
             # if no extension we save as kaldi feats.scp file
             from .scp_list import SCPList
 
@@ -48,7 +48,7 @@ def load(cls, file_path, sep=None):
         """
         file_path = Path(file_path)
         ext = file_path.suffix
-        if ext == "":
+        if ext == ".scp":
             # if no extension we load as kaldi feats.scp file
             from .scp_list import SCPList
 
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
index 4332dea3..f9da69fa 100644
--- a/hyperion/utils/segment_set.py
+++ b/hyperion/utils/segment_set.py
@@ -9,3 +9,9 @@
 class SegmentSet(InfoTable):
     def __init__(self, df):
         super().__init__(df)
+
+    def recording_ids(self, ids):
+        if "recording_id" in self.df:
+            return self.df.loc[ids, "recording_id"]
+
+        return ids
diff --git a/hyperion/utils/trial_ndx.py b/hyperion/utils/trial_ndx.py
index 783f39c4..58a36aa7 100644
--- a/hyperion/utils/trial_ndx.py
+++ b/hyperion/utils/trial_ndx.py
@@ -320,7 +320,7 @@ def __ne__(self, other):
 
     def __cmp__(self, other):
         """Comparison operator"""
-        if self.__eq__(oher):
+        if self.__eq__(other):
             return 0
         return 1
 
diff --git a/hyperion/utils/trial_scores.py b/hyperion/utils/trial_scores.py
index 19e17190..164b39df 100644
--- a/hyperion/utils/trial_scores.py
+++ b/hyperion/utils/trial_scores.py
@@ -402,7 +402,7 @@ def __ne__(self, other):
 
     def __cmp__(self, other):
         """Comparison operator"""
-        if self.__eq__(oher):
+        if self.__eq__(other):
             return 0
         return 1
 

From 80a24987a71047f7195b806f0920752b6795f04c Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sun, 4 Sep 2022 17:53:57 -0400
Subject: [PATCH 024/154] modified sparse scores

---
 hyperion/utils/sparse_trial_scores.py | 41 +++++++++++++++------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/hyperion/utils/sparse_trial_scores.py b/hyperion/utils/sparse_trial_scores.py
index d269c629..0684c57e 100644
--- a/hyperion/utils/sparse_trial_scores.py
+++ b/hyperion/utils/sparse_trial_scores.py
@@ -34,7 +34,7 @@ class SparseTrialScores(TrialScores):
     """
 
     def __init__(self, model_set=None, seg_set=None, scores=None, score_mask=None):
-        super(SparseTrialScores, self).__init__(model_set, seg_set, scores, score_mask)
+        super().__init__(model_set, seg_set, scores, score_mask)
 
     def save_h5(self, file_path):
         raise NotImplementedError()
@@ -123,7 +123,7 @@ def validate(self):
         assert len(np.unique(self.seg_set)) == len(self.seg_set)
         if self.scores is None:
             self.scores = sparse.csr_matrix(
-                (len(model_set), len(seg_set)), dtype=float_cpu()
+                (len(self.model_set), len(self.seg_set)), dtype=float_cpu()
             )
         else:
             assert self.scores.shape == (len(self.model_set), len(self.seg_set))
@@ -165,21 +165,6 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
             if raise_missing:
                 raise Exception("some scores were not computed")
 
-        # model_set = self.model_set[mod_idx]
-        # set_set = self.seg_set[seg_idx]
-        # ix = np.ix_(mod_idx, seg_idx)
-
-        # logging.info('hola1')
-        # new_src = [[self.scores[r,c], i, j] for i,r in enumerate(mod_idx) for j,c in enumerate(seg_idx) if self.score_mask[r,c]]
-        # logging.info('hola2')
-        # new_data = np.array([r[0] for r in new_src], dtype=float_cpu())
-        # new_row = np.array([r[1] for r in new_src], dtype=np.int)
-        # new_col = np.array([r[2] for r in new_src], dtype=np.int)
-        # logging.info('hola3')
-        # shape = (len(model_set), len(seg_set))
-        # scores = sparse.coo_matrix((new_data, (new_row, new_col)), shape=shape).tocsr()
-        # score_mask = sparse.coo_matrix((np.ones(new_data.shape, dtype=np.bool), (new_row, new_col)), shape=shape).tocsr()
-
         num_mod = len(model_set)
         num_seg = len(seg_set)
         shape = (num_mod, num_seg)
@@ -288,9 +273,29 @@ def get_tar_non(self, key):
         non = np.array(scr.scores[non_mask])[0]
         return tar, non
 
+    def get_valid_scores(self, ndx=None):
+        if ndx is None:
+            scr = self
+        else:
+            scr = self.align_with_ndx(ndx)
+
+        scores = np.array(scr.scores[scr.score_mask])[0]
+        return scores
+
+    def set_valid_scores(self, scores, ndx=None):
+        if ndx is not None:
+            scr = self.align_with_ndx(ndx)
+            self.model_set = scr.model_set
+            self.seg_set = scr.seg_set
+            self.scores = scr.scores
+            self.score_mat = scr.score_mat
+
+        self.scores[self.score_mask]=scores
+
     @classmethod
     def from_trial_scores(cls, scr):
-        scores = sparse.csr_matrix(scr.scores)
+        scores = scr.scores * scr.score_mask
+        scores = sparse.csr_matrix(scores)
         score_mask = sparse.csr_matrix(scr.score_mask)
         scores.eliminate_zeros()
         score_mask.eliminate_zeros()

From 9f1f73b30ed615ebfcc56c809106dd138c60e7c3 Mon Sep 17 00:00:00 2001
From: neillu23 <ylu125@c07.clsp.jhu.edu>
Date: Mon, 3 Oct 2022 19:45:43 -0400
Subject: [PATCH 025/154] software structure for transducer

---
 hyperion/bin/train_wav2vec2transducer.py      | 199 +++++++++
 hyperion/bin/train_wav2vec2xvector.py         |  23 +-
 hyperion/torch/data/__init__.py               |   2 +-
 hyperion/torch/models/__init__.py             |   3 +
 hyperion/torch/models/transducer/__init__.py  |   7 +
 .../torch/models/transducer/transducer.py     | 126 ++++++
 .../torch/models/wav2transducer/__init__.py   |   7 +
 .../wav2transducer/hf_wav2transducer.py       | 387 ++++++++++++++++++
 .../wav2transducer/hf_wav2vec2_transducer.py  | 387 ++++++++++++++++++
 hyperion/torch/trainers/__init__.py           |   3 +
 hyperion/torch/trainers/transducer_trainer.py | 160 ++++++++
 11 files changed, 1294 insertions(+), 10 deletions(-)
 create mode 100755 hyperion/bin/train_wav2vec2transducer.py
 create mode 100644 hyperion/torch/models/transducer/__init__.py
 create mode 100644 hyperion/torch/models/transducer/transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/__init__.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
 create mode 100644 hyperion/torch/trainers/transducer_trainer.py

diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
new file mode 100755
index 00000000..7f6fffef
--- /dev/null
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import HFWav2Vec2Transducer
+
+model_dict = {
+    "hf_wav2vec2transducer": HFWav2Vec2Transducer,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    ad_args["is_val"] = partition == "val"
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = Sampler(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_model(num_classes, rank, model_class, **kwargs):
+    model_args = model_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("model network args={}".format(model_args))
+    # TODO: check model_args 
+    model_args["transducer"]["num_classes"] = num_classes
+    model = model_class(**model_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(train_loader.dataset.num_classes, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    Sampler.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+
+    parser.add_argument("--data.train.dataset.class_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.val.dataset.class_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.train.data_loader.num_workers", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.val.data_loader.num_workers", action=ActionParser(parser=data_parser))
+    # parser.link_arguments(
+    #     "data.train.dataset.class_file", "data.val.dataset.class_file"
+    # )
+    # parser.link_arguments(
+    #     "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    # )
+    # parser.link_arguments(
+    #     "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    # )
+
+    model_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train Wav2Vec2Transducer model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index e92b9a1a..08913605 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -143,15 +143,20 @@ def make_parser(model_class):
     data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
-    parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
-    )
-    parser.link_arguments(
-        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
-    )
-    parser.link_arguments(
-        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    )
+
+    parser.add_argument("--data.train.dataset.class_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.val.dataset.class_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.train.data_loader.num_workers", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.val.data_loader.num_workers", action=ActionParser(parser=data_parser))
+    # parser.link_arguments(
+    #     "data.train.dataset.class_file", "data.val.dataset.class_file"
+    # )
+    # parser.link_arguments(
+    #     "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    # )
+    # parser.link_arguments(
+    #     "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    # )
 
     model_class.add_class_args(parser, prefix="model")
     Trainer.add_class_args(
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 752cf0f5..aebcfe8a 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -10,5 +10,5 @@
 from .audio_dataset import AudioDataset
 
 # samplers
-# from .weighted_seq_sampler import ClassWeightedSeqSampler
+from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index e953f58c..5a1368e2 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -18,5 +18,8 @@
     HFWavLM2ResNet1dXVector,
 )
 
+
+from .transducer import HFWav2Vec2Transducer
+
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
new file mode 100644
index 00000000..20372911
--- /dev/null
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -0,0 +1,7 @@
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""
+
+from .transducer import Transducer
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
new file mode 100644
index 00000000..8305248c
--- /dev/null
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -0,0 +1,126 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Note we use `rnnt_loss` from torchaudio, which exists only in
+torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
+"""
+import k2
+import torch
+import torch.nn as nn
+import torchaudio
+import torchaudio.functional
+from encoder_interface import EncoderInterface
+
+from icefall.utils import add_sos
+
+
+class Transducer(nn.Module):
+    """It implements https://arxiv.org/pdf/1211.3711.pdf
+    "Sequence Transduction with Recurrent Neural Networks"
+    """
+
+    def __init__(
+        self,
+        encoder: EncoderInterface,
+        decoder: nn.Module,
+        joiner: nn.Module,
+    ):
+        """
+        Args:
+          encoder:
+            It is the transcription network in the paper. Its accepts
+            two inputs: `x` of (N, T, C) and `x_lens` of shape (N,).
+            It returns two tensors: `logits` of shape (N, T, C) and
+            `logit_lens` of shape (N,).
+          decoder:
+            It is the prediction network in the paper. Its input shape
+            is (N, U) and its output shape is (N, U, C). It should contain
+            one attribute: `blank_id`.
+          joiner:
+            It has two inputs with shapes: (N, T, C) and (N, U, C). Its
+            output shape is (N, T, U, C). Note that its output contains
+            unnormalized probs, i.e., not processed by log-softmax.
+        """
+        super().__init__()
+        assert isinstance(encoder, EncoderInterface)
+        assert hasattr(decoder, "blank_id")
+
+        self.encoder = encoder
+        self.decoder = decoder
+        self.joiner = joiner
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        x_lens: torch.Tensor,
+        y: k2.RaggedTensor,
+    ) -> torch.Tensor:
+        """
+        Args:
+          x:
+            A 3-D tensor of shape (N, T, C).
+          x_lens:
+            A 1-D tensor of shape (N,). It contains the number of frames in `x`
+            before padding.
+          y:
+            A ragged tensor with 2 axes [utt][label]. It contains labels of each
+            utterance.
+        Returns:
+          Return the transducer loss.
+        """
+        assert x.ndim == 3, x.shape
+        assert x_lens.ndim == 1, x_lens.shape
+        assert y.num_axes == 2, y.num_axes
+
+        assert x.size(0) == x_lens.size(0) == y.dim0
+
+        encoder_out, x_lens = self.encoder(x, x_lens)
+        assert torch.all(x_lens > 0)
+
+        # Now for the decoder, i.e., the prediction network
+        row_splits = y.shape.row_splits(1)
+        y_lens = row_splits[1:] - row_splits[:-1]
+
+        blank_id = self.decoder.blank_id
+        sos_y = add_sos(y, sos_id=blank_id)
+
+        sos_y_padded = sos_y.pad(mode="constant", padding_value=blank_id)
+        sos_y_padded = sos_y_padded.to(torch.int64)
+
+        decoder_out, _ = self.decoder(sos_y_padded)
+
+        logits = self.joiner(encoder_out, decoder_out)
+
+        # rnnt_loss requires 0 padded targets
+        # Note: y does not start with SOS
+        y_padded = y.pad(mode="constant", padding_value=0)
+
+        assert hasattr(torchaudio.functional, "rnnt_loss"), (
+            f"Current torchaudio version: {torchaudio.__version__}\n"
+            "Please install a version >= 0.10.0"
+        )
+
+        loss = torchaudio.functional.rnnt_loss(
+            logits=logits,
+            targets=y_padded,
+            logit_lengths=x_lens,
+            target_lengths=y_lens,
+            blank=blank_id,
+            reduction="sum",
+        )
+
+        return loss
diff --git a/hyperion/torch/models/wav2transducer/__init__.py b/hyperion/torch/models/wav2transducer/__init__.py
new file mode 100644
index 00000000..5346bc78
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/__init__.py
@@ -0,0 +1,7 @@
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+"""
+
+from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
new file mode 100644
index 00000000..3fed7143
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -0,0 +1,387 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import contextlib
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+# import torch.nn.functional as nnf
+
+from ...torch_model import TorchModel
+from ...utils import remove_silence
+
+
+class HFWav2XVector(TorchModel):
+    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+
+    Attributes:
+       hf_feats: hugging face model wrapper object.
+       xvector: x-vector model object.
+       feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                          the wav2vec "num_layers".
+       feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+    ):
+
+        super().__init__()
+        self.hf_feats = hf_feats
+        self.xvector = xvector
+        self.feat_fusion_start = feat_fusion_start
+        self.feat_fusion_method = feat_fusion_method
+        self._hf_context = contextlib.nullcontext()
+        self._make_fuser()
+
+    def _make_fuser(self):
+        if self.feat_fusion_method == "last":
+            self.feat_fuser = None
+            return
+
+        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+        layer_dim = self.hf_feats.hidden_size
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
+        elif self.feat_fusion_method == "linear":
+            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
+            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
+        elif self.feat_fusion_method == "cat":
+            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+
+    def _fuse_hid_feats(self, hid_feats):
+        """Fuses the hidden features from the Wav2Vec model.
+
+        Args:
+          hid_feats: list of hidden features Tensors from Wav2Vec model.
+
+        Returns:
+          Tensor of fused features (batch, channels, time)
+        """
+        if len(hid_feats) == 1:
+            # There is only one layer of features
+            return hid_feats[0]
+
+        hid_feats = hid_feats[self.feat_fusion_start :]
+        if self.feat_fusion_method == "weighted-avg":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+            feats = torch.sum(hid_feats * norm_weights, dim=-1)
+        elif self.feat_fusion_method == "linear":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
+        elif self.feat_fusion_method == "cat":
+            hid_feats = torch.cat(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats)
+        elif self.feat_fusion_method == "last":
+            feats = hid_feats[-1]
+
+        return feats
+
+    def compute_prototype_affinity(self):
+        return self.xvector.compute_prototype_affinity()
+
+    def update_loss_margin(self, epoch):
+        """Updates the value of the margin in AAM/AM-softmax losses
+           given the epoch number
+
+        Args:
+          epoch: epoch which is about to start
+        """
+        self.xvector.update_loss_margin(epoch)
+
+    def rebuild_output_layer(
+        self,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+    ):
+        self.xvector.rebuild_output_layer(
+            num_classes=num_classes,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+        )
+
+    def forward_feats(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fusion_method == "last"
+            else True
+        )
+        with self._hf_context:
+            hf_output = self.hf_feats(
+                x,
+                x_lengths,
+                return_hid_states=return_hid_states,
+                chunk_length=chunk_length,
+                detach_chunks=detach_chunks,
+            )
+        feat_lengths = hf_output["hidden_states_lengths"]
+        if return_hid_states:
+            hid_feats = hf_output["hidden_states"]
+            feats = self._fuse_hid_feats(hid_feats)
+        else:
+            hid_feats = None
+            feats = hf_output["last_hidden_state"]
+
+        feats = feats.transpose(1, 2)
+        if return_feat_layers is not None:
+            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+            # as the hidden features of the x-vector encoder.
+            hid_feats = [
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
+                if i in return_feat_layers
+            ]
+        else:
+            hid_feats = None
+
+        return feats, hid_feats, feat_lengths
+
+    def forward(
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_feat_layers=None,
+        return_enc_layers=None,
+        return_classif_layers=None,
+        return_logits=True,
+    ):
+        """Forward function. If returns the logits posteriors of the classes.
+        It can also returns the hidden representations in the wav2vec feature extractor,
+        the x-vector encoder and the
+        classification head. In this case the ouput variable is a dictionary.
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time)
+          x_lengths: time lengths of the features with shape=(batch,)
+          y: target classes torch.long tensor with shape=(batch,)
+          return_feat_layers: list of integers indicating, which wav2vec layers
+                             we should return. If None, no wav2vec layers are returned.
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_enc_layers: list of integers indicating, which classification head layers
+                             we should return. If None, no head layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Tensor with class logits with shape=(batch, num_classes) or
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
+        """
+        feats, hid_feats, feat_lengths = self.forward_feats(
+            x, x_lengths, return_feat_layers
+        )
+        output = self.xvector(
+            feats,
+            feat_lengths,
+            y,
+            return_enc_layers=return_enc_layers,
+            return_classif_layers=return_classif_layers,
+            return_logits=return_logits,
+        )
+
+        if not return_feat_layers:
+            return output
+
+        if not isinstance(output, dict):
+            # if the xvector just returned the logits we put then into a dictionary
+            # to append the hid feats later.
+            output["logits"] = output
+
+        output["h_feats"] = hid_feats
+        return output
+
+    def extract_embed(
+        self,
+        x,
+        x_lengths=None,
+        vad_samples=None,
+        hf_chunk_length=0,
+        xvec_chunk_length=0,
+        embed_layer=None,
+        detach_chunks=False,
+    ):
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+
+        feats, _, feat_lengths = self.forward_feats(
+            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
+        )
+        xvec_chunk_length = int(
+            xvec_chunk_length
+            * self.hf_feats.sample_frequency
+            * feats.size(-1)
+            // x.size(-1)
+        )
+        return self.xvector.extract_embed(
+            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
+        )
+
+    def freeze_feat_fuser(self):
+        if self.feat_fuser is None:
+            return
+
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser.requires_grad = False
+            return
+
+        for param in self.feat_fuser.parameters():
+            param.requires_grad = False
+
+    def freeze_hf_feats(self):
+        self.hf_feats.freeze()
+
+    def freeze_hf_feature_encoder(self):
+        self.hf_feats.freeze_feature_encoder()
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.unfreeze()
+            self.freeze_feat_fuser()
+            self.freeze_hf_feats()
+            self.xvector.freeze_preembed_layers()
+        elif mode in ["ft-xvector", "ft-xvector-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+            self.freeze_feat_fuser()
+        elif mode in ["hf-feats-frozen", "hf-feats-frozen-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+        elif mode == "hf-feat-extractor-frozen":
+            self.unfreeze()
+            self.freeze_hf_feature_encoder()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        logging.info("train mode set to %s", mode)
+
+        if "nograd" in mode:
+            logging.info("using torch.no_grad for hf_feats")
+            self._hf_context = torch.no_grad()
+        else:
+            self._hf_context = contextlib.nullcontext()
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode == "ft-embed-affine":
+            self.hf_feats.train()
+            self.xvector._train("ft-embed_affine")
+        elif train_mode in [
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]:
+            self.hf_feats.train()
+            self.xvector._train("full")
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return [
+            "full",
+            "frozen",
+            "ft-embed-affine",
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "hf_feats",
+            "xvector",
+            "feat_fusion_start",
+            "feat_fusion_method",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    def get_config(self):
+
+        hf_cfg = self.hf_feats.get_config()
+        xvec_cfg = self.xvector.get_config()
+        del hf_cfg["class_name"]
+        del xvec_cfg["class_name"]
+        config = {
+            "hf_feats": hf_cfg,
+            "xvector": xvec_cfg,
+            "feat_fusion_start": self.feat_fusion_start,
+            "feat_fusion_method": self.feat_fusion_method,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def change_config(self, hf_feats, xvector):
+        logging.info("changing hf wav2xvector config")
+        self.hf_feats.change_config(**hf_feats)
+        self.xvector.change_config(**xvector)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--feat-fusion-start",
+            default=0,
+            type=int,
+            help=(
+                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+                "the wav2vec num_layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-fusion-method",
+            default="weighted-avg",
+            choices=["weighted-avg", "linear", "cat", "last"],
+            help=(
+                "method to fuse the hidden layers from the wav2vec model "
+                "in [weighted-avg, cat]"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+                help="xvector options",
+            )
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
new file mode 100644
index 00000000..e83dcb8c
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -0,0 +1,387 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import contextlib
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+# import torch.nn.functional as nnf
+
+from ...torch_model import TorchModel
+from ...utils import remove_silence
+
+
+class HFWav2Vec2Transducer(HFWav2Transducer):
+    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+
+    Attributes:
+       hf_feats: hugging face model wrapper object.
+       xvector: x-vector model object.
+       feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                          the wav2vec "num_layers".
+       feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+    ):
+
+        super().__init__()
+        self.hf_feats = hf_feats
+        self.xvector = xvector
+        self.feat_fusion_start = feat_fusion_start
+        self.feat_fusion_method = feat_fusion_method
+        self._hf_context = contextlib.nullcontext()
+        self._make_fuser()
+
+    def _make_fuser(self):
+        if self.feat_fusion_method == "last":
+            self.feat_fuser = None
+            return
+
+        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+        layer_dim = self.hf_feats.hidden_size
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
+        elif self.feat_fusion_method == "linear":
+            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
+            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
+        elif self.feat_fusion_method == "cat":
+            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+
+    def _fuse_hid_feats(self, hid_feats):
+        """Fuses the hidden features from the Wav2Vec model.
+
+        Args:
+          hid_feats: list of hidden features Tensors from Wav2Vec model.
+
+        Returns:
+          Tensor of fused features (batch, channels, time)
+        """
+        if len(hid_feats) == 1:
+            # There is only one layer of features
+            return hid_feats[0]
+
+        hid_feats = hid_feats[self.feat_fusion_start :]
+        if self.feat_fusion_method == "weighted-avg":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+            feats = torch.sum(hid_feats * norm_weights, dim=-1)
+        elif self.feat_fusion_method == "linear":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
+        elif self.feat_fusion_method == "cat":
+            hid_feats = torch.cat(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats)
+        elif self.feat_fusion_method == "last":
+            feats = hid_feats[-1]
+
+        return feats
+
+    def compute_prototype_affinity(self):
+        return self.xvector.compute_prototype_affinity()
+
+    def update_loss_margin(self, epoch):
+        """Updates the value of the margin in AAM/AM-softmax losses
+           given the epoch number
+
+        Args:
+          epoch: epoch which is about to start
+        """
+        self.xvector.update_loss_margin(epoch)
+
+    def rebuild_output_layer(
+        self,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+    ):
+        self.xvector.rebuild_output_layer(
+            num_classes=num_classes,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+        )
+
+    def forward_feats(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fusion_method == "last"
+            else True
+        )
+        with self._hf_context:
+            hf_output = self.hf_feats(
+                x,
+                x_lengths,
+                return_hid_states=return_hid_states,
+                chunk_length=chunk_length,
+                detach_chunks=detach_chunks,
+            )
+        feat_lengths = hf_output["hidden_states_lengths"]
+        if return_hid_states:
+            hid_feats = hf_output["hidden_states"]
+            feats = self._fuse_hid_feats(hid_feats)
+        else:
+            hid_feats = None
+            feats = hf_output["last_hidden_state"]
+
+        feats = feats.transpose(1, 2)
+        if return_feat_layers is not None:
+            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+            # as the hidden features of the x-vector encoder.
+            hid_feats = [
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
+                if i in return_feat_layers
+            ]
+        else:
+            hid_feats = None
+
+        return feats, hid_feats, feat_lengths
+
+    def forward(
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_feat_layers=None,
+        return_enc_layers=None,
+        return_classif_layers=None,
+        return_logits=True,
+    ):
+        """Forward function. If returns the logits posteriors of the classes.
+        It can also returns the hidden representations in the wav2vec feature extractor,
+        the x-vector encoder and the
+        classification head. In this case the ouput variable is a dictionary.
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time)
+          x_lengths: time lengths of the features with shape=(batch,)
+          y: target classes torch.long tensor with shape=(batch,)
+          return_feat_layers: list of integers indicating, which wav2vec layers
+                             we should return. If None, no wav2vec layers are returned.
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_enc_layers: list of integers indicating, which classification head layers
+                             we should return. If None, no head layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Tensor with class logits with shape=(batch, num_classes) or
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
+        """
+        feats, hid_feats, feat_lengths = self.forward_feats(
+            x, x_lengths, return_feat_layers
+        )
+        output = self.xvector(
+            feats,
+            feat_lengths,
+            y,
+            return_enc_layers=return_enc_layers,
+            return_classif_layers=return_classif_layers,
+            return_logits=return_logits,
+        )
+
+        if not return_feat_layers:
+            return output
+
+        if not isinstance(output, dict):
+            # if the xvector just returned the logits we put then into a dictionary
+            # to append the hid feats later.
+            output["logits"] = output
+
+        output["h_feats"] = hid_feats
+        return output
+
+    def extract_embed(
+        self,
+        x,
+        x_lengths=None,
+        vad_samples=None,
+        hf_chunk_length=0,
+        xvec_chunk_length=0,
+        embed_layer=None,
+        detach_chunks=False,
+    ):
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+
+        feats, _, feat_lengths = self.forward_feats(
+            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
+        )
+        xvec_chunk_length = int(
+            xvec_chunk_length
+            * self.hf_feats.sample_frequency
+            * feats.size(-1)
+            // x.size(-1)
+        )
+        return self.xvector.extract_embed(
+            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
+        )
+
+    def freeze_feat_fuser(self):
+        if self.feat_fuser is None:
+            return
+
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser.requires_grad = False
+            return
+
+        for param in self.feat_fuser.parameters():
+            param.requires_grad = False
+
+    def freeze_hf_feats(self):
+        self.hf_feats.freeze()
+
+    def freeze_hf_feature_encoder(self):
+        self.hf_feats.freeze_feature_encoder()
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.unfreeze()
+            self.freeze_feat_fuser()
+            self.freeze_hf_feats()
+            self.xvector.freeze_preembed_layers()
+        elif mode in ["ft-xvector", "ft-xvector-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+            self.freeze_feat_fuser()
+        elif mode in ["hf-feats-frozen", "hf-feats-frozen-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+        elif mode == "hf-feat-extractor-frozen":
+            self.unfreeze()
+            self.freeze_hf_feature_encoder()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        logging.info("train mode set to %s", mode)
+
+        if "nograd" in mode:
+            logging.info("using torch.no_grad for hf_feats")
+            self._hf_context = torch.no_grad()
+        else:
+            self._hf_context = contextlib.nullcontext()
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode == "ft-embed-affine":
+            self.hf_feats.train()
+            self.xvector._train("ft-embed_affine")
+        elif train_mode in [
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]:
+            self.hf_feats.train()
+            self.xvector._train("full")
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return [
+            "full",
+            "frozen",
+            "ft-embed-affine",
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "hf_feats",
+            "xvector",
+            "feat_fusion_start",
+            "feat_fusion_method",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    def get_config(self):
+
+        hf_cfg = self.hf_feats.get_config()
+        xvec_cfg = self.xvector.get_config()
+        del hf_cfg["class_name"]
+        del xvec_cfg["class_name"]
+        config = {
+            "hf_feats": hf_cfg,
+            "xvector": xvec_cfg,
+            "feat_fusion_start": self.feat_fusion_start,
+            "feat_fusion_method": self.feat_fusion_method,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def change_config(self, hf_feats, xvector):
+        logging.info("changing hf wav2xvector config")
+        self.hf_feats.change_config(**hf_feats)
+        self.xvector.change_config(**xvector)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--feat-fusion-start",
+            default=0,
+            type=int,
+            help=(
+                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+                "the wav2vec num_layers"
+            ),
+        )
+        parser.add_argument(
+            "--feat-fusion-method",
+            default="weighted-avg",
+            choices=["weighted-avg", "linear", "cat", "last"],
+            help=(
+                "method to fuse the hidden layers from the wav2vec model "
+                "in [weighted-avg, cat]"
+            ),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+                help="xvector options",
+            )
diff --git a/hyperion/torch/trainers/__init__.py b/hyperion/torch/trainers/__init__.py
index 8fef7df5..593cfa1f 100644
--- a/hyperion/torch/trainers/__init__.py
+++ b/hyperion/torch/trainers/__init__.py
@@ -5,6 +5,9 @@
 
 from .torch_trainer import TorchTrainer
 
+
+from .transducer_trainer import TransducerTrainer
+
 from .xvector_trainer import XVectorTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
 from .xvector_adv_trainer import XVectorAdvTrainer
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
new file mode 100644
index 00000000..a67da181
--- /dev/null
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -0,0 +1,160 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import os
+from collections import OrderedDict as ODict
+
+import logging
+
+import torch
+import torch.nn as nn
+
+from ..utils import MetricAcc
+from .torch_trainer import TorchTrainer
+from torch.distributed.elastic.multiprocessing.errors import record
+
+
+class TransducerTrainer(TorchTrainer):
+    """Trainer to train ASR style models.
+
+    Attributes:
+      model: ASR model object.
+      optim: pytorch optimizer object or options dict
+      epochs: max. number of epochs
+      exp_path: experiment output path
+      cur_epoch: current epoch
+      grad_acc_steps: gradient accumulation steps to simulate larger batch size.
+      device: cpu/gpu device
+      metrics: extra metrics to compute besides cxe.
+      lrsched: learning rate scheduler object or options dict
+      loggers: LoggerList object, loggers write training progress to std. output and file.
+               If None, it uses default loggers.
+      ddp: if True use distributed data parallel training
+      ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
+      loss: if None, it uses cross-entropy
+      train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
+      use_amp: uses mixed precision training.
+      log_interval: number of optim. steps between log outputs
+      use_tensorboard: use tensorboard logger
+      use_wandb: use wandb logger
+      wandb: wandb dictionary of options
+      grad_clip: norm to clip gradients, if 0 there is no clipping
+      grad_clip_norm: norm type to clip gradients
+      swa_start: epoch to start doing swa
+      swa_lr: SWA learning rate
+      swa_anneal_epochs: SWA learning rate anneal epochs
+      cpu_offload: CPU offload of gradients when using fully sharded ddp
+    """
+
+    def __init__(
+        self,
+        model,
+        optim={},
+        epochs=100,
+        exp_path="./train",
+        cur_epoch=0,
+        grad_acc_steps=1,
+        eff_batch_size=None,
+        device=None,
+        metrics=None,
+        lrsched=None,
+        loggers=None,
+        ddp=False,
+        ddp_type="ddp",
+        loss=None,
+        train_mode="full",
+        use_amp=False,
+        log_interval=10,
+        use_tensorboard=False,
+        use_wandb=False,
+        wandb={},
+        grad_clip=0,
+        grad_clip_norm=2,
+        swa_start=0,
+        swa_lr=1e-3,
+        swa_anneal_epochs=10,
+        cpu_offload=False,
+    ):
+
+        if loss is None:
+            # TODO: Check and Modify loss
+            loss = nn.CrossEntropyLoss()
+        super().__init__(
+            model,
+            loss,
+            optim,
+            epochs,
+            exp_path,
+            cur_epoch=cur_epoch,
+            grad_acc_steps=grad_acc_steps,
+            eff_batch_size=eff_batch_size,
+            device=device,
+            metrics=metrics,
+            lrsched=lrsched,
+            loggers=loggers,
+            ddp=ddp,
+            ddp_type=ddp_type,
+            train_mode=train_mode,
+            use_amp=use_amp,
+            log_interval=log_interval,
+            use_tensorboard=use_tensorboard,
+            use_wandb=use_wandb,
+            wandb=wandb,
+            grad_clip=grad_clip,
+            grad_clip_norm=grad_clip_norm,
+            swa_start=swa_start,
+            swa_lr=swa_lr,
+            swa_anneal_epochs=swa_anneal_epochs,
+            cpu_offload=cpu_offload,
+        )
+
+    @record
+    def train_epoch(self, data_loader):
+        """Training epoch loop
+
+        Args:
+          data_loader: pytorch data loader returning features and class labels.
+        """
+
+        self.model.update_loss_margin(self.cur_epoch)
+
+        metric_acc = MetricAcc(device=self.device)
+        batch_metrics = ODict()
+        self.model.train()
+        for batch, (data, target) in enumerate(data_loader):
+            self.loggers.on_batch_begin(batch)
+
+            if batch % self.grad_acc_steps == 0:
+                self.optimizer.zero_grad()
+            # TODO: Check and Modify data, target
+            data, target = data.to(self.device), target.to(self.device)
+            batch_size = data.shape[0]
+
+            with self.amp_autocast():
+                output = self.model(data, y=target)
+                loss = self.loss(output, target).mean() / self.grad_acc_steps
+
+            if self.use_amp:
+                self.grad_scaler.scale(loss).backward()
+            else:
+                loss.backward()
+
+            if (batch + 1) % self.grad_acc_steps == 0:
+                if self.lr_scheduler is not None and not self.in_swa:
+                    self.lr_scheduler.on_opt_step()
+                self.update_model()
+
+            batch_metrics["loss"] = loss.item() * self.grad_acc_steps
+            for k, metric in self.metrics.items():
+                batch_metrics[k] = metric(output, target)
+
+            metric_acc.update(batch_metrics, batch_size)
+            logs = metric_acc.metrics
+            logs["lr"] = self._get_lr()
+            self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
+
+        logs = metric_acc.metrics
+        logs = ODict(("train_" + k, v) for k, v in logs.items())
+        logs["lr"] = self._get_lr()
+        return logs

From b8ffac41c60b13872025caf7e6dfd9dfe1d87347 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 3 Oct 2022 19:55:35 -0400
Subject: [PATCH 026/154] Librispeech recognition recipe

---
 egs/librispeech/v1/cmd.sh                     |  28 +++++
 egs/librispeech/v1/conf/fbank80_16k.yaml      |   7 ++
 .../conf/wav2vec2base960h_ecapatdnn512x2.yaml |  37 ++++++
 egs/librispeech/v1/datapath.sh                |  22 ++++
 egs/librispeech/v1/feats                      |   1 +
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh |  55 +++++++++
 egs/librispeech/v1/hyp_utils                  |   1 +
 egs/librispeech/v1/local/data_prep.sh         |  85 ++++++++++++++
 egs/librispeech/v1/path.sh                    |   5 +
 egs/librispeech/v1/run_001_prepare_data.sh    |  52 ++++++++
 egs/librispeech/v1/run_003_compute_fbank.sh   |  67 +++++++++++
 egs/librispeech/v1/run_004_compute_bpe.sh     |  84 +++++++++++++
 .../v1/run_010_prepare_asr_train_data.sh      |  42 +++++++
 egs/librispeech/v1/run_011_train_asr.sh       | 111 ++++++++++++++++++
 egs/librispeech/v1/run_030_inference.sh       |  74 ++++++++++++
 egs/librispeech/v1/run_040_eval_wer.sh        | 103 ++++++++++++++++
 egs/librispeech/v1/steps                      |   1 +
 egs/librispeech/v1/steps_be                   |   1 +
 egs/librispeech/v1/steps_pyfe                 |   1 +
 egs/librispeech/v1/utils                      |   1 +
 egs/librispeech/v1/xvectors                   |   1 +
 21 files changed, 779 insertions(+)
 create mode 100755 egs/librispeech/v1/cmd.sh
 create mode 100644 egs/librispeech/v1/conf/fbank80_16k.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
 create mode 100644 egs/librispeech/v1/datapath.sh
 create mode 120000 egs/librispeech/v1/feats
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
 create mode 120000 egs/librispeech/v1/hyp_utils
 create mode 100755 egs/librispeech/v1/local/data_prep.sh
 create mode 100755 egs/librispeech/v1/path.sh
 create mode 100755 egs/librispeech/v1/run_001_prepare_data.sh
 create mode 100755 egs/librispeech/v1/run_003_compute_fbank.sh
 create mode 100755 egs/librispeech/v1/run_004_compute_bpe.sh
 create mode 100755 egs/librispeech/v1/run_010_prepare_asr_train_data.sh
 create mode 100755 egs/librispeech/v1/run_011_train_asr.sh
 create mode 100755 egs/librispeech/v1/run_030_inference.sh
 create mode 100755 egs/librispeech/v1/run_040_eval_wer.sh
 create mode 120000 egs/librispeech/v1/steps
 create mode 120000 egs/librispeech/v1/steps_be
 create mode 120000 egs/librispeech/v1/steps_pyfe
 create mode 120000 egs/librispeech/v1/utils
 create mode 120000 egs/librispeech/v1/xvectors

diff --git a/egs/librispeech/v1/cmd.sh b/egs/librispeech/v1/cmd.sh
new file mode 100755
index 00000000..71f3bae0
--- /dev/null
+++ b/egs/librispeech/v1/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 10G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/librispeech/v1/conf/fbank80_16k.yaml b/egs/librispeech/v1/conf/fbank80_16k.yaml
new file mode 100644
index 00000000..88bae69e
--- /dev/null
+++ b/egs/librispeech/v1/conf/fbank80_16k.yaml
@@ -0,0 +1,7 @@
+sample_frequency: 16000
+frame_length: 25
+low_freq: 20 
+high_freq: 7600
+num_filters: 80
+snip_edges: false
+use_energy: false
diff --git a/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml b/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
new file mode 100644
index 00000000..85964372
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
@@ -0,0 +1,37 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h
+xvector:
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+feat_fusion_start: 2
+feat_fusion_method: weighted-avg
diff --git a/egs/librispeech/v1/datapath.sh b/egs/librispeech/v1/datapath.sh
new file mode 100644
index 00000000..4c7987ef
--- /dev/null
+++ b/egs/librispeech/v1/datapath.sh
@@ -0,0 +1,22 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  librispeech_root=/export/corpora5/LibriSpeech 
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
+  # voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
+  # voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  # musan_root=/expscratch/dgromero/corpora-open/musan
+  echo "Put your database paths here"
+  exit 1
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/egs/librispeech/v1/feats b/egs/librispeech/v1/feats
new file mode 120000
index 00000000..7b9d122a
--- /dev/null
+++ b/egs/librispeech/v1/feats
@@ -0,0 +1 @@
+hyp_utils/feats
\ No newline at end of file
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
new file mode 100644
index 00000000..942fb336
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
@@ -0,0 +1,55 @@
+# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
+
+# hugging face model
+hf_model_name=wav2vec2base
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+dropout=0
+embed_dim=256
+lr=0.05
+s=30
+margin_warmup=20
+margin=0.3
+nnet_num_epochs=70
+
+
+lr=0.001
+#lr=0.005
+xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
+xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --model conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
+
+nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v12 #v1
+
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+nnet=$nnet_dir/model_ep0060.pth
+nnet=$nnet_dir/model_ep0030.pth
+nnet=$nnet_dir/model_ep0040.pth
+nnet=$nnet_dir/model_ep0020.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/librispeech/v1/hyp_utils b/egs/librispeech/v1/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/librispeech/v1/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/librispeech/v1/local/data_prep.sh b/egs/librispeech/v1/local/data_prep.sh
new file mode 100755
index 00000000..c903d45b
--- /dev/null
+++ b/egs/librispeech/v1/local/data_prep.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+
+# Copyright 2014  Vassil Panayotov
+#           2014  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <src-dir> <dst-dir>"
+  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
+  exit 1
+fi
+
+src=$1
+dst=$2
+
+# all utterances are FLAC compressed
+if ! which flac >&/dev/null; then
+   echo "Please install 'flac' on ALL worker nodes!"
+   exit 1
+fi
+
+spk_file=$src/../SPEAKERS.TXT
+
+mkdir -p $dst || exit 1
+
+[ ! -d $src ] && echo "$0: no such directory $src" && exit 1
+[ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1
+
+
+wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
+trans=$dst/text; [[ -f "$trans" ]] && rm $trans
+utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk
+spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender
+
+for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
+  reader=$(basename $reader_dir)
+  if ! [ $reader -eq $reader ]; then  # not integer.
+    echo "$0: unexpected subdirectory name $reader"
+    exit 1
+  fi
+
+  reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}')
+  if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then
+    echo "Unexpected gender: '$reader_gender'"
+    exit 1
+  fi
+
+  for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
+    chapter=$(basename $chapter_dir)
+    if ! [ "$chapter" -eq "$chapter" ]; then
+      echo "$0: unexpected chapter-subdirectory name $chapter"
+      exit 1
+    fi
+
+    find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
+      awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1
+
+    chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
+    [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
+    cat $chapter_trans >>$trans
+
+    # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
+    #       to be a different speaker. This is done for simplicity and because we want
+    #       e.g. the CMVN to be calculated per-chapter
+    awk -v "reader=$reader" -v "chapter=$chapter" '{printf "%s %s-%s\n", $1, reader, chapter}' \
+      <$chapter_trans >>$utt2spk || exit 1
+
+    # reader -> gender map (again using per-chapter granularity)
+    echo "${reader}-${chapter} $reader_gender" >>$spk2gender
+  done
+done
+
+spk2utt=$dst/spk2utt
+utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
+
+ntrans=$(wc -l <$trans)
+nutt2spk=$(wc -l <$utt2spk)
+! [ "$ntrans" -eq "$nutt2spk" ] && \
+  echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1
+
+utils/validate_data_dir.sh --no-feats $dst || exit 1
+
+echo "$0: successfully prepared data in $dst"
+
+exit 0
diff --git a/egs/librispeech/v1/path.sh b/egs/librispeech/v1/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/librispeech/v1/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
new file mode 100755
index 00000000..c6c15692
--- /dev/null
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. ./datapath.sh 
+
+
+nj=6
+
+mkdir -p data
+
+
+if [ ${stage} -le 1 ]; then
+    ### Task dependent. You have to make data the following preparation part by yourself.
+    ### But you can utilize Kaldi recipes in most cases
+    echo "stage 0: Data preparation"
+    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
+        # use underscore-separated names in data directories.
+        local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+    done
+fi
+
+# if [ $stage -le 1 ]; then
+#   echo "Stage 1: Prepare LibriSpeech manifest"
+#   # We assume that you have downloaded the LibriSpeech corpus
+#   # to $librispeech_root
+#   mkdir -p data/manifests
+#   if [ ! -e data/manifests/.librispeech.done ]; then
+#     lhotse prepare librispeech -j $nj $librispeech_root data/manifests
+#     touch data/manifests/.librispeech.done
+#   fi
+# fi
+
+# if [ $stage -le 2 ]; then
+#   echo "Stage 2: Prepare musan manifest"
+#   # We assume that you have downloaded the musan corpus
+#   # to $musan_root
+#   mkdir -p data/manifests
+#   if [ ! -e data/manifests/.musan.done ]; then
+#     lhotse prepare musan $musan_root data/manifests
+#     touch data/manifests/.musan.done
+#   fi
+# fi
diff --git a/egs/librispeech/v1/run_003_compute_fbank.sh b/egs/librispeech/v1/run_003_compute_fbank.sh
new file mode 100755
index 00000000..0f5966a8
--- /dev/null
+++ b/egs/librispeech/v1/run_003_compute_fbank.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+storage_name=$(date +'%m_%d_%H_%M')
+fbankdir=`pwd`/exp/fbank
+
+stage=1
+config_file=default_config.sh
+feat_vers="numpy"
+
+. parse_options.sh || exit 1;
+
+if [ "$feat_vers" == "kaldi" ];then
+    make_fbank=steps/make_fbank.sh
+    fbank_cfg=conf/fbank80_16k.conf
+else
+    fbank_cfg=conf/fbank80_16k.yaml
+    if [ "$feat_vers" == "numpy" ];then
+	make_fbank=steps_pyfe/make_fbank.sh
+    else
+	make_fbank=steps_pyfe/make_torch_fbank.sh
+    fi
+fi
+
+
+# Make filterbanks 
+if [ $stage -le 1 ]; then
+    # Prepare to distribute data over multiple machines
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $fbankdir/storage ]; then
+	dir_name=$USER/hyp-data/librispeech/v1/$storage_name/fbank/storage
+	if [ "$nodes" == "b0" ];then
+	    utils/create_split_dir.pl \
+			    utils/create_split_dir.pl \
+		/export/b{04,05,06,07}/$dir_name $fbankdir/storage
+	elif [ "$nodes" == "b1" ];then
+	    utils/create_split_dir.pl \
+		/export/b{14,15,16,17}/$dir_name $fbankdir/storage
+	elif [ "$nodes" == "c0" ];then
+	    utils/create_split_dir.pl \
+		/export/c{06,07,08,09}/$dir_name $fbankdir/storage
+	elif [ "$nodes" == "fs01" ];then
+	    utils/create_split_dir.pl \
+		/export/fs01/$dir_name $fbankdir/storage
+	else
+	    echo "we don't distribute data between multiple machines"
+	fi
+    fi
+fi
+
+if [ $stage -le 2 ];then 
+    for name in dev_clean test_clean dev_other test_other train_clean_100 train_clean_360 train_other_500;
+    do
+		num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+		nj=$(($num_spk < 40 ? $num_spk:40))
+		$make_fbank --write-utt2num-frames true --fbank-config $fbank_cfg --nj $nj --cmd "$train_cmd" \
+			data/${name} exp/make_fbank/$name $fbankdir
+		utils/fix_data_dir.sh data/${name}
+    done
+
+fi
+
diff --git a/egs/librispeech/v1/run_004_compute_bpe.sh b/egs/librispeech/v1/run_004_compute_bpe.sh
new file mode 100755
index 00000000..571205a8
--- /dev/null
+++ b/egs/librispeech/v1/run_004_compute_bpe.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+storage_name=$(date +'%m_%d_%H_%M')
+
+
+dl_dir=$PWD/download
+
+stage=2
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+
+if [ $stage -le 1 ]; then
+  echo "Stage 1: Download LM"
+  mkdir -p $dl_dir/lm
+  if [ ! -e $dl_dir/lm/.done ]; then
+    ./local/download_lm.py --out-dir=$dl_dir/lm
+    touch $dl_dir/lm/.done
+  fi
+fi
+
+if [ $stage -le 2 ]; then
+  echo "Stage 2: Prepare phone based lang"
+  lang_dir=data/lang_phone
+  mkdir -p $lang_dir
+
+  (echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |
+    cat - $dl_dir/lm/librispeech-lexicon.txt |
+    sort | uniq > $lang_dir/lexicon.txt
+
+  if [ ! -f $lang_dir/L_disambig.pt ]; then
+    ./local/prepare_lang.py --lang-dir $lang_dir
+  fi
+fi
+
+
+if [ $stage -le 3 ]; then
+  echo "Stage 3: Prepare BPE based lang"
+
+  for vocab_size in ${vocab_sizes[@]}; do
+    lang_dir=data/lang_bpe_${vocab_size}
+    mkdir -p $lang_dir
+    # We reuse words.txt from phone based lexicon
+    # so that the two can share G.pt later.
+    cp data/lang_phone/words.txt $lang_dir
+
+    if [ ! -f $lang_dir/transcript_words.txt ]; then
+      echo "Generate data for BPE training"
+      files=$(
+        find "$dl_dir/LibriSpeech/train-clean-100" -name "*.trans.txt"
+        find "$dl_dir/LibriSpeech/train-clean-360" -name "*.trans.txt"
+        find "$dl_dir/LibriSpeech/train-other-500" -name "*.trans.txt"
+      )
+      for f in ${files[@]}; do
+        cat $f | cut -d " " -f 2-
+      done > $lang_dir/transcript_words.txt
+    fi
+
+    if [ ! -f $lang_dir/bpe.model ]; then
+      ./local/train_bpe_model.py \
+        --lang-dir $lang_dir \
+        --vocab-size $vocab_size \
+        --transcript $lang_dir/transcript_words.txt
+    fi
+
+    if [ ! -f $lang_dir/L_disambig.pt ]; then
+      ./local/prepare_lang_bpe.py --lang-dir $lang_dir
+
+      echo "Validating $lang_dir/lexicon.txt"
+      ./local/validate_bpe_lexicon.py \
+        --lexicon $lang_dir/lexicon.txt \
+        --bpe-model $lang_dir/bpe.model
+    fi
+  done
+fi
diff --git a/egs/librispeech/v1/run_010_prepare_asr_train_data.sh b/egs/librispeech/v1/run_010_prepare_asr_train_data.sh
new file mode 100755
index 00000000..5936fbf4
--- /dev/null
+++ b/egs/librispeech/v1/run_010_prepare_asr_train_data.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 2 ]; then
+    # This script preprocess audio for x-vector training
+    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
+	--storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
+    hyp_utils/kaldi/utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 3 ]; then
+    # Now, we remove files with less than 4s
+    hyp_utils/remove_short_audios.sh --min-len 4 data/${nnet_data}_proc_audio_no_sil
+
+    # We also want several utterances per speaker. Now we'll throw out speakers
+    # with fewer than 4 utterances.
+    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 4 ]; then
+    # Prepare train and validation lists for x-vectors
+    local/make_train_lists_sup_embed_with_augm.sh \
+	data/${nnet_data}_proc_audio_no_sil \
+	data/${nnet_data}_proc_audio_no_sil/lists_xvec
+fi
+
+exit
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
new file mode 100755
index 00000000..dc4e1dee
--- /dev/null
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+if [ "$use_wandb" == "true" ];then
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+
+# Network Training
+if [ $stage -le 1 ]; then
+
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_s1_dir $args \
+    --num-gpus $ngpu
+  
+fi
+
+if [ $stage -le 2 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
+if [ $stage -le 3 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v1/run_030_inference.sh
new file mode 100755
index 00000000..67122f85
--- /dev/null
+++ b/egs/librispeech/v1/run_030_inference.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=2
+config_file=default_config.sh
+use_gpu=false
+nnet_stage=3
+hf_chunk_length=120 #seconds
+xvec_chunk_length=120 #seconds
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+  xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
+  xvec_cmd="$cuda_eval_cmd --mem 6G"
+else
+  xvec_cmd="$train_cmd --mem 12G"
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+if [ $stage -le 1 ]; then
+  # Extract xvectors for training LDA/PLDA
+  for name in voxceleb2cat_train
+  do
+    if [ $plda_num_augs -eq 0 ]; then
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}
+    else
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
+	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+	--aug-config $plda_aug_config --num-augs $plda_num_augs \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}_augx${plda_num_augs} \
+	data/${name}_augx${plda_num_augs}
+    fi
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # Extracts x-vectors for evaluation
+  for name in voxceleb1_test 
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 100 ? $num_spk:100))
+    steps_xvec/extract_wav2vec2xvectors.sh \
+      --cmd "$xvec_cmd" --nj $nj ${xvec_args} \
+      $nnet data/$name \
+      $xvector_dir/$name
+  done
+fi
+
+exit
diff --git a/egs/librispeech/v1/run_040_eval_wer.sh b/egs/librispeech/v1/run_040_eval_wer.sh
new file mode 100755
index 00000000..ac561344
--- /dev/null
+++ b/egs/librispeech/v1/run_040_eval_wer.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
+#                
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+# By default we evaluate the nnet after finetuning stage 3 and only with cosine scoring
+stage=3
+config_file=default_config.sh
+nnet_stage=3
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh 
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+fi
+
+plda_label=${plda_type}y${plda_y_dim}_v1
+be_name=lda${lda_dim}_${plda_label}_${plda_data}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name/${be_name}
+score_plda_dir=$score_dir/plda
+score_cosine_dir=exp/scores/$nnet_name/cosine
+
+if [ $stage -le 1 ]; then
+    echo "Train PLDA on Voxceleb2"
+    steps_be/train_be_v1.sh \
+      --cmd "$train_cmd" \
+      --lda_dim $lda_dim \
+      --plda_type $plda_type \
+      --y_dim $plda_y_dim --z_dim $plda_z_dim \
+      $xvector_dir/$plda_data/xvector.scp \
+      data/$plda_data \
+      $be_dir &
+
+    wait
+fi
+
+
+if [ $stage -le 2 ];then
+
+    echo "Eval Voxceleb 1 with LDA+CentWhiten+LNorm+PLDA"
+    steps_be/eval_be_v1.sh \
+      --cmd "$train_cmd" --plda_type $plda_type \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $be_dir/lda_lnorm.h5 \
+      $be_dir/plda.h5 \
+      $score_plda_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+    	       local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+    
+    for f in $(ls $score_plda_dir/*_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+    
+fi
+
+score_plda_dir=$score_cosine_dir
+
+if [ $stage -le 3 ];then
+
+    echo "Eval Voxceleb 1 with Cosine scoring"
+    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    	data/voxceleb1_test/trials \
+    	data/voxceleb1_test/utt2model \
+    	$xvector_dir/voxceleb1_test/xvector.scp \
+    	$score_plda_dir/voxceleb1_scores
+
+    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+
+    for f in $(ls $score_plda_dir/*_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+
+exit
+
diff --git a/egs/librispeech/v1/steps b/egs/librispeech/v1/steps
new file mode 120000
index 00000000..aede39fe
--- /dev/null
+++ b/egs/librispeech/v1/steps
@@ -0,0 +1 @@
+hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/librispeech/v1/steps_be b/egs/librispeech/v1/steps_be
new file mode 120000
index 00000000..b2098c2a
--- /dev/null
+++ b/egs/librispeech/v1/steps_be
@@ -0,0 +1 @@
+../v1/steps_be
\ No newline at end of file
diff --git a/egs/librispeech/v1/steps_pyfe b/egs/librispeech/v1/steps_pyfe
new file mode 120000
index 00000000..7b9d122a
--- /dev/null
+++ b/egs/librispeech/v1/steps_pyfe
@@ -0,0 +1 @@
+hyp_utils/feats
\ No newline at end of file
diff --git a/egs/librispeech/v1/utils b/egs/librispeech/v1/utils
new file mode 120000
index 00000000..3d590a1d
--- /dev/null
+++ b/egs/librispeech/v1/utils
@@ -0,0 +1 @@
+hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/egs/librispeech/v1/xvectors b/egs/librispeech/v1/xvectors
new file mode 120000
index 00000000..af66a94d
--- /dev/null
+++ b/egs/librispeech/v1/xvectors
@@ -0,0 +1 @@
+hyp_utils/xvectors
\ No newline at end of file

From 09354a45d0afa6de79099193db1d1a23fe2d70eb Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 3 Oct 2022 20:25:26 -0400
Subject: [PATCH 027/154] discard the change of train_wav2vec2xvector

---
 hyperion/bin/train_wav2vec2xvector.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 08913605..8c30faaf 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -144,19 +144,15 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-    parser.add_argument("--data.train.dataset.class_file", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.val.dataset.class_file", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.train.data_loader.num_workers", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.val.data_loader.num_workers", action=ActionParser(parser=data_parser))
-    # parser.link_arguments(
-    #     "data.train.dataset.class_file", "data.val.dataset.class_file"
-    # )
-    # parser.link_arguments(
-    #     "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
-    # )
-    # parser.link_arguments(
-    #     "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    # )
+    parser.link_arguments(
+        "data.train.dataset.class_file", "data.val.dataset.class_file"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+    parser.link_arguments(
+        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+    )
 
     model_class.add_class_args(parser, prefix="model")
     Trainer.add_class_args(

From ac99960641cbb8e021fdf8bbfbd3b45512621235 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 5 Oct 2022 08:51:26 -0400
Subject: [PATCH 028/154] improved utils

---
 egs/voxceleb/v1/steps_be/eval-be-v1.py        |  4 +-
 egs/voxceleb/v1/steps_be/eval-be-v2.py        |  4 +-
 .../preprocess_audios_for_nnet_train.sh       |  2 +-
 .../data/class_weighted_seg_chunk_sampler.py  | 40 ++++++++++++++-----
 hyperion/torch/data/seg_sampler_factory.py    | 24 +++++++++++
 hyperion/utils/__init__.py                    |  7 +++-
 hyperion/utils/class_info.py                  |  2 +-
 hyperion/utils/feature_set.py                 |  6 ++-
 hyperion/utils/info_table.py                  | 21 ++++++++++
 hyperion/utils/recording_set.py               |  2 +-
 10 files changed, 95 insertions(+), 17 deletions(-)

diff --git a/egs/voxceleb/v1/steps_be/eval-be-v1.py b/egs/voxceleb/v1/steps_be/eval-be-v1.py
index f7d26390..da77f8f3 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v1.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-v1.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.list_utils import ismember
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
@@ -78,7 +79,8 @@ def eval_plda(
     if num_model_parts > 1 or num_seg_parts > 1:
         score_file = "%s-%03d-%03d" % (score_file, model_part_idx, seg_part_idx)
     logging.info("saving scores to %s" % (score_file))
-    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=ndx.trial_mask)
+    f, loc = ismember(enroll, ndx.model_set)
+    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=ndx.trial_mask[loc])
     s.save_txt(score_file)
 
 
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v2.py b/egs/voxceleb/v1/steps_be/eval-be-v2.py
index d5cd6a55..413ca313 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v2.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-v2.py
@@ -18,6 +18,7 @@
 import numpy as np
 
 from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.list_utils import ismember
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.utils.math import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
@@ -72,7 +73,8 @@ def eval_plda(
     if num_model_parts > 1 or num_seg_parts > 1:
         score_file = "%s-%03d-%03d" % (score_file, model_part_idx, seg_part_idx)
     logging.info("saving scores to %s" % (score_file))
-    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=ndx.trial_mask)
+    f, loc = ismember(enroll, ndx.model_set)
+    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=ndx.trial_mask[loc])
     s.save_txt(score_file)
 
 
diff --git a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
index 1a1fd7ad..7c35b234 100755
--- a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
+++ b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
@@ -45,7 +45,7 @@ mkdir -p $data_out
 output_dir=$(utils/make_absolute.sh $dir)
 
 if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $output_dir/storage ]; then
-    dir_name=$USER/hyp-data/xvectors/$storage_name/xvector_audio/storage
+    dir_name=$USER/hyp-data/$storage_name/xvector_audio/storage
     if [ "$nodes" == "b0" ];then
 	utils/create_split_dir.pl \
 	    utils/create_split_dir.pl \
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 91d592bc..7dfb8a35 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -29,6 +29,7 @@ def __init__(
         num_chunks_per_seg=1,
         weight_exponent=1.0,
         weight_mode="custom",
+        seg_weight_mode="uniform",
         num_hard_prototypes=0,
         affinity_matrix=None,
         class_name="class_id",
@@ -76,6 +77,7 @@ def __init__(
 
         self.weight_exponent = weight_exponent
         self.weight_mode = weight_mode
+        self.seg_weight_mode = seg_weight_mode
 
         self.num_hard_prototypes = num_hard_prototypes
         self.batch = 0
@@ -164,7 +166,7 @@ def _gather_class_info(self):
     def _set_class_weights(self):
         if self.weight_mode == "uniform":
             self.class_info.set_uniform_weights()
-        elif self.weight_mode == "dataset-prior":
+        elif self.weight_mode == "data-prior":
             weights = self.class_info["total_duration"].values
             self.class_info.set_weights(self, weights)
 
@@ -264,12 +266,24 @@ def _sample_segs(self, class_ids, chunk_length):
             # sample num_segs_per_class random segments
             if len(seg_ids_c) == 0:
                 print(chunk_length, c, self.class_info.loc[c], flush=True)
-            sel_seg_idx_c = torch.randint(
-                low=0,
-                high=len(seg_ids_c),
-                size=(self.num_segs_per_class,),
-                generator=self.rng,
-            ).numpy()
+            if self.seg_weight_mode == "uniform":
+                sel_seg_idx_c = torch.randint(
+                    low=0,
+                    high=len(seg_ids_c),
+                    size=(self.num_segs_per_class,),
+                    generator=self.rng,
+                ).numpy()
+            elif self.seg_weight_mode == "data-prior":
+                weights = self.seg_set.loc[seg_mask, self.length_name].values
+                weights /= weights.sum()
+                sel_seg_idx_c = torch.multinomial(
+                    torch.from_numpy(weights),
+                    num_samples=self.num_segs_per_class,
+                    replacement=True,
+                    generator=self.rng,
+                ).numpy()
+            else:
+                raise ValueError("unknown seg-weight-mode=%s", self.seg_weight_mode)
             sel_seg_ids_c = list(seg_ids_c[sel_seg_idx_c])
             seg_ids.extend(sel_seg_ids_c)
 
@@ -319,6 +333,7 @@ def filter_args(**kwargs):
             "num_chunks_per_seg",
             "weight_exponent",
             "weight_mode",
+            "seg_weight_mode",
             "num_hard_prototypes",
             "class_name",
             "length_name",
@@ -416,8 +431,15 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--weight-mode",
             default="custom",
-            choices=["custom", "uniform", "dataset-prior"],
-            help=("exponent for class weights"),
+            choices=["custom", "uniform", "data-prior"],
+            help=("method to get the class weights"),
+        )
+
+        parser.add_argument(
+            "--seg-weight-mode",
+            default="uniform",
+            choices=["uniform", "data-prior"],
+            help=("method to sample segments given a class"),
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index f09095e6..3093a532 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -80,6 +80,9 @@ def filter_args(**kwargs):
             "num_chunks_per_seg_epoch",
             "num_segs_per_class",
             "num_chunks_per_seg",
+            "weight_mode",
+            "weight_exponent",
+            "seg_weight_mode",
             "num_hard_prototypes",
             "class_name",
             "length_name",
@@ -183,6 +186,27 @@ def add_class_args(parser, prefix=None):
             default=1,
             help=("number of chunks per segment in batch"),
         )
+
+        parser.add_argument(
+            "--weight-exponent",
+            default=1.0,
+            type=float,
+            help=("exponent for class weights"),
+        )
+        parser.add_argument(
+            "--weight-mode",
+            default="custom",
+            choices=["custom", "uniform", "data-prior"],
+            help=("method to get the class weights"),
+        )
+
+        parser.add_argument(
+            "--seg-weight-mode",
+            default="uniform",
+            choices=["uniform", "data-prior"],
+            help=("method to sample segments given a class"),
+        )
+
         parser.add_argument(
             "--num-hard-prototypes",
             type=int,
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index bfd81028..251361ae 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -10,7 +10,12 @@
 from .sparse_trial_scores import SparseTrialScores
 from .scp_list import SCPList
 from .utt2info import Utt2Info
-from .ext_segment_list import ExtSegmentList
+
+# from .ext_segment_list import ExtSegmentList
 from .segment_list import SegmentList
 from .kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
 from .rttm import RTTM
+from .recording_set import RecordingSet
+from .class_info import ClassInfo
+from .segment_set import SegmentSet
+from .feature_set import FeatureSet
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index ff98c7c5..f1eaf665 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -19,7 +19,7 @@ def __init__(self, df):
         if "weights" not in self.df:
             self.set_uniform_weights()
         else:
-            self.df["weights"] /= self.df["weigths"].sum()
+            self.df["weights"] /= self.df["weights"].sum()
 
     def add_class_idx(self):
         self.df["class_idx"] = [i for i in range(len(self.df))]
diff --git a/hyperion/utils/feature_set.py b/hyperion/utils/feature_set.py
index 986a21b9..2b2f0aaf 100644
--- a/hyperion/utils/feature_set.py
+++ b/hyperion/utils/feature_set.py
@@ -30,14 +30,16 @@ def save(self, file_path, sep=None):
             # if no extension we save as kaldi feats.scp file
             from .scp_list import SCPList
 
-            offset = self.df["storage_byte"] if "storage_byte" is not None else None
+            offset = self.df["storage_byte"] if "storage_byte" in self.df else None
             range = None
             if "start" and "num_frames" in self.df:
                 range = [
                     np.array([s, n], dtype=np.int64)
                     for s, n in self.df[["start", "num_frames"]]
                 ]
-            scp = SCPList(self.df["id"], self.df["storage_path"], offset, range)
+            scp = SCPList(
+                self.df["id"].values, self.df["storage_path"].values, offset, range
+            )
             scp.save(file_path)
             return
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 25632941..61033d16 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -385,4 +385,25 @@ def shuffle(self, seed=1024, rng=None):
         self.df = self.df.iloc[index]
         return index
 
+    def set_index(self, keys, inplace=True):
+        if inplace:
+            self.df.set_index(keys, drop=False, inplace=True)
+            return
+
+        df = self.df.set_index(keys, drop=False, inplace=False)
+        return type(self)(df)
+
+    def reset_index(self):
+        self.df.set_index("id", drop=False, inplace=True)
+
+    def get_loc(self, keys):
+        loc = self.df.index.get_loc(keys)
+        if isinstance(loc, int):
+            return loc
+        elif isinstance(loc, np.ndarray) and loc.dtype==np.bool:
+            return np.nonzero(loc)[0]
+        else:
+            return list(range(loc.start, loc.stop, loc.step))
+        
+
     
\ No newline at end of file
diff --git a/hyperion/utils/recording_set.py b/hyperion/utils/recording_set.py
index 9695cef3..8346315c 100644
--- a/hyperion/utils/recording_set.py
+++ b/hyperion/utils/recording_set.py
@@ -30,7 +30,7 @@ def save(self, file_path, sep=None):
             # if no extension we save as kaldi feats.scp file
             from .scp_list import SCPList
 
-            scp = SCPList(self.df["id"], self.df["storage_path"])
+            scp = SCPList(self.df["id"].values, self.df["storage_path"].values)
             scp.save(file_path)
             return
 

From 42daf5cee831f9b117bc53d40244f19b4a721891 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Thu, 13 Oct 2022 22:51:05 -0400
Subject: [PATCH 029/154] update data preparation

---
 egs/librispeech/v1/local/prepare_lang.py      | 413 ++++++++
 egs/librispeech/v1/local/prepare_lang_bpe.py  | 261 +++++
 egs/librispeech/v1/local/train_bpe_model.py   |  97 ++
 hyperion/torch/models/__init__.py             |   2 +-
 .../models/transducer/encoder_interface.py    |  43 +
 .../torch/models/transducer/transducer.py     |   4 +-
 .../wav2transducer/hf_wav2transducer.py       |   2 +-
 .../wav2transducer/hf_wav2vec2_transducer.py  |   1 +
 hyperion/utils/lexicon.py                     | 277 +++++
 hyperion/utils/utils.py                       | 978 ++++++++++++++++++
 10 files changed, 2074 insertions(+), 4 deletions(-)
 create mode 100755 egs/librispeech/v1/local/prepare_lang.py
 create mode 100755 egs/librispeech/v1/local/prepare_lang_bpe.py
 create mode 100755 egs/librispeech/v1/local/train_bpe_model.py
 create mode 100644 hyperion/torch/models/transducer/encoder_interface.py
 create mode 100644 hyperion/utils/lexicon.py
 create mode 100644 hyperion/utils/utils.py

diff --git a/egs/librispeech/v1/local/prepare_lang.py b/egs/librispeech/v1/local/prepare_lang.py
new file mode 100755
index 00000000..74e09629
--- /dev/null
+++ b/egs/librispeech/v1/local/prepare_lang.py
@@ -0,0 +1,413 @@
+#!/usr/bin/env python3
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script takes as input a lexicon file "data/lang_phone/lexicon.txt"
+consisting of words and tokens (i.e., phones) and does the following:
+
+1. Add disambiguation symbols to the lexicon and generate lexicon_disambig.txt
+
+2. Generate tokens.txt, the token table mapping a token to a unique integer.
+
+3. Generate words.txt, the word table mapping a word to a unique integer.
+
+4. Generate L.pt, in k2 format. It can be loaded by
+
+        d = torch.load("L.pt")
+        lexicon = k2.Fsa.from_dict(d)
+
+5. Generate L_disambig.pt, in k2 format.
+"""
+import argparse
+import math
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+import k2
+import torch
+
+from hyperion.utils.lexicon import read_lexicon, write_lexicon
+from hyperion.utils.utils import str2bool
+
+Lexicon = List[Tuple[str, List[str]]]
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-dir",
+        type=str,
+        help="""Input and output directory.
+        It should contain a file lexicon.txt.
+        Generated files by this script are saved into this directory.
+        """,
+    )
+
+    parser.add_argument(
+        "--debug",
+        type=str2bool,
+        default=False,
+        help="""True for debugging, which will generate
+        a visualization of the lexicon FST.
+
+        Caution: If your lexicon contains hundreds of thousands
+        of lines, please set it to False!
+        """,
+    )
+
+    return parser.parse_args()
+
+
+def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
+    """Write a symbol to ID mapping to a file.
+
+    Note:
+      No need to implement `read_mapping` as it can be done
+      through :func:`k2.SymbolTable.from_file`.
+
+    Args:
+      filename:
+        Filename to save the mapping.
+      sym2id:
+        A dict mapping symbols to IDs.
+    Returns:
+      Return None.
+    """
+    with open(filename, "w", encoding="utf-8") as f:
+        for sym, i in sym2id.items():
+            f.write(f"{sym} {i}\n")
+
+
+def get_tokens(lexicon: Lexicon) -> List[str]:
+    """Get tokens from a lexicon.
+
+    Args:
+      lexicon:
+        It is the return value of :func:`read_lexicon`.
+    Returns:
+      Return a list of unique tokens.
+    """
+    ans = set()
+    for _, tokens in lexicon:
+        ans.update(tokens)
+    sorted_ans = sorted(list(ans))
+    return sorted_ans
+
+
+def get_words(lexicon: Lexicon) -> List[str]:
+    """Get words from a lexicon.
+
+    Args:
+      lexicon:
+        It is the return value of :func:`read_lexicon`.
+    Returns:
+      Return a list of unique words.
+    """
+    ans = set()
+    for word, _ in lexicon:
+        ans.add(word)
+    sorted_ans = sorted(list(ans))
+    return sorted_ans
+
+
+def add_disambig_symbols(lexicon: Lexicon) -> Tuple[Lexicon, int]:
+    """It adds pseudo-token disambiguation symbols #1, #2 and so on
+    at the ends of tokens to ensure that all pronunciations are different,
+    and that none is a prefix of another.
+
+    See also add_lex_disambig.pl from kaldi.
+
+    Args:
+      lexicon:
+        It is returned by :func:`read_lexicon`.
+    Returns:
+      Return a tuple with two elements:
+
+        - The output lexicon with disambiguation symbols
+        - The ID of the max disambiguation symbol that appears
+          in the lexicon
+    """
+
+    # (1) Work out the count of each token-sequence in the
+    # lexicon.
+    count = defaultdict(int)
+    for _, tokens in lexicon:
+        count[" ".join(tokens)] += 1
+
+    # (2) For each left sub-sequence of each token-sequence, note down
+    # that it exists (for identifying prefixes of longer strings).
+    issubseq = defaultdict(int)
+    for _, tokens in lexicon:
+        tokens = tokens.copy()
+        tokens.pop()
+        while tokens:
+            issubseq[" ".join(tokens)] = 1
+            tokens.pop()
+
+    # (3) For each entry in the lexicon:
+    # if the token sequence is unique and is not a
+    # prefix of another word, no disambig symbol.
+    # Else output #1, or #2, #3, ... if the same token-seq
+    # has already been assigned a disambig symbol.
+    ans = []
+
+    # We start with #1 since #0 has its own purpose
+    first_allowed_disambig = 1
+    max_disambig = first_allowed_disambig - 1
+    last_used_disambig_symbol_of = defaultdict(int)
+
+    for word, tokens in lexicon:
+        tokenseq = " ".join(tokens)
+        assert tokenseq != ""
+        if issubseq[tokenseq] == 0 and count[tokenseq] == 1:
+            ans.append((word, tokens))
+            continue
+
+        cur_disambig = last_used_disambig_symbol_of[tokenseq]
+        if cur_disambig == 0:
+            cur_disambig = first_allowed_disambig
+        else:
+            cur_disambig += 1
+
+        if cur_disambig > max_disambig:
+            max_disambig = cur_disambig
+        last_used_disambig_symbol_of[tokenseq] = cur_disambig
+        tokenseq += f" #{cur_disambig}"
+        ans.append((word, tokenseq.split()))
+    return ans, max_disambig
+
+
+def generate_id_map(symbols: List[str]) -> Dict[str, int]:
+    """Generate ID maps, i.e., map a symbol to a unique ID.
+
+    Args:
+      symbols:
+        A list of unique symbols.
+    Returns:
+      A dict containing the mapping between symbols and IDs.
+    """
+    return {sym: i for i, sym in enumerate(symbols)}
+
+
+def add_self_loops(
+    arcs: List[List[Any]], disambig_token: int, disambig_word: int
+) -> List[List[Any]]:
+    """Adds self-loops to states of an FST to propagate disambiguation symbols
+    through it. They are added on each state with non-epsilon output symbols
+    on at least one arc out of the state.
+
+    See also fstaddselfloops.pl from Kaldi. One difference is that
+    Kaldi uses OpenFst style FSTs and it has multiple final states.
+    This function uses k2 style FSTs and it does not need to add self-loops
+    to the final state.
+
+    The input label of a self-loop is `disambig_token`, while the output
+    label is `disambig_word`.
+
+    Args:
+      arcs:
+        A list-of-list. The sublist contains
+        `[src_state, dest_state, label, aux_label, score]`
+      disambig_token:
+        It is the token ID of the symbol `#0`.
+      disambig_word:
+        It is the word ID of the symbol `#0`.
+
+    Return:
+      Return new `arcs` containing self-loops.
+    """
+    states_needs_self_loops = set()
+    for arc in arcs:
+        src, dst, ilabel, olabel, score = arc
+        if olabel != 0:
+            states_needs_self_loops.add(src)
+
+    ans = []
+    for s in states_needs_self_loops:
+        ans.append([s, s, disambig_token, disambig_word, 0])
+
+    return arcs + ans
+
+
+def lexicon_to_fst(
+    lexicon: Lexicon,
+    token2id: Dict[str, int],
+    word2id: Dict[str, int],
+    sil_token: str = "SIL",
+    sil_prob: float = 0.5,
+    need_self_loops: bool = False,
+) -> k2.Fsa:
+    """Convert a lexicon to an FST (in k2 format) with optional silence at
+    the beginning and end of each word.
+
+    Args:
+      lexicon:
+        The input lexicon. See also :func:`read_lexicon`
+      token2id:
+        A dict mapping tokens to IDs.
+      word2id:
+        A dict mapping words to IDs.
+      sil_token:
+        The silence token.
+      sil_prob:
+        The probability for adding a silence at the beginning and end
+        of the word.
+      need_self_loops:
+        If True, add self-loop to states with non-epsilon output symbols
+        on at least one arc out of the state. The input label for this
+        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
+    Returns:
+      Return an instance of `k2.Fsa` representing the given lexicon.
+    """
+    assert sil_prob > 0.0 and sil_prob < 1.0
+    # CAUTION: we use score, i.e, negative cost.
+    sil_score = math.log(sil_prob)
+    no_sil_score = math.log(1.0 - sil_prob)
+
+    start_state = 0
+    loop_state = 1  # words enter and leave from here
+    sil_state = 2  # words terminate here when followed by silence; this state
+    # has a silence transition to loop_state.
+    next_state = 3  # the next un-allocated state, will be incremented as we go.
+    arcs = []
+
+    assert token2id["<eps>"] == 0
+    assert word2id["<eps>"] == 0
+
+    eps = 0
+
+    sil_token = token2id[sil_token]
+
+    arcs.append([start_state, loop_state, eps, eps, no_sil_score])
+    arcs.append([start_state, sil_state, eps, eps, sil_score])
+    arcs.append([sil_state, loop_state, sil_token, eps, 0])
+
+    for word, tokens in lexicon:
+        assert len(tokens) > 0, f"{word} has no pronunciations"
+        cur_state = loop_state
+
+        word = word2id[word]
+        tokens = [token2id[i] for i in tokens]
+
+        for i in range(len(tokens) - 1):
+            w = word if i == 0 else eps
+            arcs.append([cur_state, next_state, tokens[i], w, 0])
+
+            cur_state = next_state
+            next_state += 1
+
+        # now for the last token of this word
+        # It has two out-going arcs, one to the loop state,
+        # the other one to the sil_state.
+        i = len(tokens) - 1
+        w = word if i == 0 else eps
+        arcs.append([cur_state, loop_state, tokens[i], w, no_sil_score])
+        arcs.append([cur_state, sil_state, tokens[i], w, sil_score])
+
+    if need_self_loops:
+        disambig_token = token2id["#0"]
+        disambig_word = word2id["#0"]
+        arcs = add_self_loops(
+            arcs,
+            disambig_token=disambig_token,
+            disambig_word=disambig_word,
+        )
+
+    final_state = next_state
+    arcs.append([loop_state, final_state, -1, -1, 0])
+    arcs.append([final_state])
+
+    arcs = sorted(arcs, key=lambda arc: arc[0])
+    arcs = [[str(i) for i in arc] for arc in arcs]
+    arcs = [" ".join(arc) for arc in arcs]
+    arcs = "\n".join(arcs)
+
+    fsa = k2.Fsa.from_str(arcs, acceptor=False)
+    return fsa
+
+
+def main():
+    args = get_args()
+    lang_dir = Path(args.lang_dir)
+    lexicon_filename = lang_dir / "lexicon.txt"
+    sil_token = "SIL"
+    sil_prob = 0.5
+
+    lexicon = read_lexicon(lexicon_filename)
+    tokens = get_tokens(lexicon)
+    words = get_words(lexicon)
+
+    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
+
+    for i in range(max_disambig + 1):
+        disambig = f"#{i}"
+        assert disambig not in tokens
+        tokens.append(f"#{i}")
+
+    assert "<eps>" not in tokens
+    tokens = ["<eps>"] + tokens
+
+    assert "<eps>" not in words
+    assert "#0" not in words
+    assert "<s>" not in words
+    assert "</s>" not in words
+
+    words = ["<eps>"] + words + ["#0", "<s>", "</s>"]
+
+    token2id = generate_id_map(tokens)
+    word2id = generate_id_map(words)
+
+    write_mapping(lang_dir / "tokens.txt", token2id)
+    write_mapping(lang_dir / "words.txt", word2id)
+    write_lexicon(lang_dir / "lexicon_disambig.txt", lexicon_disambig)
+
+    L = lexicon_to_fst(
+        lexicon,
+        token2id=token2id,
+        word2id=word2id,
+        sil_token=sil_token,
+        sil_prob=sil_prob,
+    )
+
+    L_disambig = lexicon_to_fst(
+        lexicon_disambig,
+        token2id=token2id,
+        word2id=word2id,
+        sil_token=sil_token,
+        sil_prob=sil_prob,
+        need_self_loops=True,
+    )
+    torch.save(L.as_dict(), lang_dir / "L.pt")
+    torch.save(L_disambig.as_dict(), lang_dir / "L_disambig.pt")
+
+    if args.debug:
+        labels_sym = k2.SymbolTable.from_file(lang_dir / "tokens.txt")
+        aux_labels_sym = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+        L.labels_sym = labels_sym
+        L.aux_labels_sym = aux_labels_sym
+        L.draw(f"{lang_dir / 'L.svg'}", title="L.pt")
+
+        L_disambig.labels_sym = labels_sym
+        L_disambig.aux_labels_sym = aux_labels_sym
+        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}", title="L_disambig.pt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/librispeech/v1/local/prepare_lang_bpe.py b/egs/librispeech/v1/local/prepare_lang_bpe.py
new file mode 100755
index 00000000..d8cee8ed
--- /dev/null
+++ b/egs/librispeech/v1/local/prepare_lang_bpe.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
+
+"""
+
+This script takes as input `lang_dir`, which should contain::
+
+    - lang_dir/bpe.model,
+    - lang_dir/words.txt
+
+and generates the following files in the directory `lang_dir`:
+
+    - lexicon.txt
+    - lexicon_disambig.txt
+    - L.pt
+    - L_disambig.pt
+    - tokens.txt
+"""
+
+import argparse
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import k2
+import sentencepiece as spm
+import torch
+from prepare_lang import (
+    Lexicon,
+    add_disambig_symbols,
+    add_self_loops,
+    write_lexicon,
+    write_mapping,
+)
+
+from hyperion.utils.utils import str2bool
+
+
+def lexicon_to_fst_no_sil(
+    lexicon: Lexicon,
+    token2id: Dict[str, int],
+    word2id: Dict[str, int],
+    need_self_loops: bool = False,
+) -> k2.Fsa:
+    """Convert a lexicon to an FST (in k2 format).
+
+    Args:
+      lexicon:
+        The input lexicon. See also :func:`read_lexicon`
+      token2id:
+        A dict mapping tokens to IDs.
+      word2id:
+        A dict mapping words to IDs.
+      need_self_loops:
+        If True, add self-loop to states with non-epsilon output symbols
+        on at least one arc out of the state. The input label for this
+        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
+    Returns:
+      Return an instance of `k2.Fsa` representing the given lexicon.
+    """
+    loop_state = 0  # words enter and leave from here
+    next_state = 1  # the next un-allocated state, will be incremented as we go
+
+    arcs = []
+
+    # The blank symbol <blk> is defined in local/train_bpe_model.py
+    assert token2id["<blk>"] == 0
+    assert word2id["<eps>"] == 0
+
+    eps = 0
+
+    for word, pieces in lexicon:
+        assert len(pieces) > 0, f"{word} has no pronunciations"
+        cur_state = loop_state
+
+        word = word2id[word]
+        pieces = [token2id[i] for i in pieces]
+
+        for i in range(len(pieces) - 1):
+            w = word if i == 0 else eps
+            arcs.append([cur_state, next_state, pieces[i], w, 0])
+
+            cur_state = next_state
+            next_state += 1
+
+        # now for the last piece of this word
+        i = len(pieces) - 1
+        w = word if i == 0 else eps
+        arcs.append([cur_state, loop_state, pieces[i], w, 0])
+
+    if need_self_loops:
+        disambig_token = token2id["#0"]
+        disambig_word = word2id["#0"]
+        arcs = add_self_loops(
+            arcs,
+            disambig_token=disambig_token,
+            disambig_word=disambig_word,
+        )
+
+    final_state = next_state
+    arcs.append([loop_state, final_state, -1, -1, 0])
+    arcs.append([final_state])
+
+    arcs = sorted(arcs, key=lambda arc: arc[0])
+    arcs = [[str(i) for i in arc] for arc in arcs]
+    arcs = [" ".join(arc) for arc in arcs]
+    arcs = "\n".join(arcs)
+
+    fsa = k2.Fsa.from_str(arcs, acceptor=False)
+    return fsa
+
+
+def generate_lexicon(
+    model_file: str, words: List[str]
+) -> Tuple[Lexicon, Dict[str, int]]:
+    """Generate a lexicon from a BPE model.
+
+    Args:
+      model_file:
+        Path to a sentencepiece model.
+      words:
+        A list of strings representing words.
+    Returns:
+      Return a tuple with two elements:
+        - A dict whose keys are words and values are the corresponding
+          word pieces.
+        - A dict representing the token symbol, mapping from tokens to IDs.
+    """
+    sp = spm.SentencePieceProcessor()
+    sp.load(str(model_file))
+
+    # Convert word to word piece IDs instead of word piece strings
+    # to avoid OOV tokens.
+    words_pieces_ids: List[List[int]] = sp.encode(words, out_type=int)
+
+    # Now convert word piece IDs back to word piece strings.
+    words_pieces: List[List[str]] = [
+        sp.id_to_piece(ids) for ids in words_pieces_ids
+    ]
+
+    lexicon = []
+    for word, pieces in zip(words, words_pieces):
+        lexicon.append((word, pieces))
+
+    # The OOV word is <UNK>
+    lexicon.append(("<UNK>", [sp.id_to_piece(sp.unk_id())]))
+
+    token2id: Dict[str, int] = dict()
+    for i in range(sp.vocab_size()):
+        token2id[sp.id_to_piece(i)] = i
+
+    return lexicon, token2id
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-dir",
+        type=str,
+        help="""Input and output directory.
+        It should contain the bpe.model and words.txt
+        """,
+    )
+
+    parser.add_argument(
+        "--debug",
+        type=str2bool,
+        default=False,
+        help="""True for debugging, which will generate
+        a visualization of the lexicon FST.
+
+        Caution: If your lexicon contains hundreds of thousands
+        of lines, please set it to False!
+
+        See "test/test_bpe_lexicon.py" for usage.
+        """,
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    lang_dir = Path(args.lang_dir)
+    model_file = lang_dir / "bpe.model"
+
+    word_sym_table = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+    words = word_sym_table.symbols
+
+    excluded = ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]
+    for w in excluded:
+        if w in words:
+            words.remove(w)
+
+    lexicon, token_sym_table = generate_lexicon(model_file, words)
+
+    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
+
+    next_token_id = max(token_sym_table.values()) + 1
+    for i in range(max_disambig + 1):
+        disambig = f"#{i}"
+        assert disambig not in token_sym_table
+        token_sym_table[disambig] = next_token_id
+        next_token_id += 1
+
+    word_sym_table.add("#0")
+    word_sym_table.add("<s>")
+    word_sym_table.add("</s>")
+
+    write_mapping(lang_dir / "tokens.txt", token_sym_table)
+
+    write_lexicon(lang_dir / "lexicon.txt", lexicon)
+    write_lexicon(lang_dir / "lexicon_disambig.txt", lexicon_disambig)
+
+    L = lexicon_to_fst_no_sil(
+        lexicon,
+        token2id=token_sym_table,
+        word2id=word_sym_table,
+    )
+
+    L_disambig = lexicon_to_fst_no_sil(
+        lexicon_disambig,
+        token2id=token_sym_table,
+        word2id=word_sym_table,
+        need_self_loops=True,
+    )
+    torch.save(L.as_dict(), lang_dir / "L.pt")
+    torch.save(L_disambig.as_dict(), lang_dir / "L_disambig.pt")
+
+    if args.debug:
+        labels_sym = k2.SymbolTable.from_file(lang_dir / "tokens.txt")
+        aux_labels_sym = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+        L.labels_sym = labels_sym
+        L.aux_labels_sym = aux_labels_sym
+        L.draw(f"{lang_dir / 'L.svg'}", title="L.pt")
+
+        L_disambig.labels_sym = labels_sym
+        L_disambig.aux_labels_sym = aux_labels_sym
+        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}", title="L_disambig.pt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/librispeech/v1/local/train_bpe_model.py b/egs/librispeech/v1/local/train_bpe_model.py
new file mode 100755
index 00000000..42aba957
--- /dev/null
+++ b/egs/librispeech/v1/local/train_bpe_model.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# You can install sentencepiece via:
+#
+#  pip install sentencepiece
+#
+# Due to an issue reported in
+# https://github.com/google/sentencepiece/pull/642#issuecomment-857972030
+#
+# Please install a version >=0.1.96
+
+import argparse
+import shutil
+from pathlib import Path
+
+import sentencepiece as spm
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-dir",
+        type=str,
+        help="""Input and output directory.
+        The generated bpe.model is saved to this directory.
+        """,
+    )
+
+    parser.add_argument(
+        "--transcript",
+        type=str,
+        help="Training transcript.",
+    )
+
+    parser.add_argument(
+        "--vocab-size",
+        type=int,
+        help="Vocabulary size for BPE training",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    vocab_size = args.vocab_size
+    lang_dir = Path(args.lang_dir)
+
+    model_type = "unigram"
+
+    model_prefix = f"{lang_dir}/{model_type}_{vocab_size}"
+    train_text = args.transcript
+    character_coverage = 1.0
+    input_sentence_size = 100000000
+
+    user_defined_symbols = ["<blk>", "<sos/eos>"]
+    unk_id = len(user_defined_symbols)
+    # Note: unk_id is fixed to 2.
+    # If you change it, you should also change other
+    # places that are using it.
+
+    model_file = Path(model_prefix + ".model")
+    if not model_file.is_file():
+        spm.SentencePieceTrainer.train(
+            input=train_text,
+            vocab_size=vocab_size,
+            model_type=model_type,
+            model_prefix=model_prefix,
+            input_sentence_size=input_sentence_size,
+            character_coverage=character_coverage,
+            user_defined_symbols=user_defined_symbols,
+            unk_id=unk_id,
+            bos_id=-1,
+            eos_id=-1,
+        )
+
+    shutil.copyfile(model_file, f"{lang_dir}/bpe.model")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 5a1368e2..44ff171d 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -19,7 +19,7 @@
 )
 
 
-from .transducer import HFWav2Vec2Transducer
+from .wav2transducer import HFWav2Vec2Transducer
 
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
diff --git a/hyperion/torch/models/transducer/encoder_interface.py b/hyperion/torch/models/transducer/encoder_interface.py
new file mode 100644
index 00000000..257facce
--- /dev/null
+++ b/hyperion/torch/models/transducer/encoder_interface.py
@@ -0,0 +1,43 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+
+
+class EncoderInterface(nn.Module):
+    def forward(
+        self, x: torch.Tensor, x_lens: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Args:
+          x:
+            A tensor of shape (batch_size, input_seq_len, num_features)
+            containing the input features.
+          x_lens:
+            A tensor of shape (batch_size,) containing the number of frames
+            in `x` before padding.
+        Returns:
+          Return a tuple containing two tensors:
+            - encoder_out, a tensor of (batch_size, out_seq_len, output_dim)
+              containing unnormalized probabilities, i.e., the output of a
+              linear layer.
+            - encoder_out_lens, a tensor of shape (batch_size,) containing
+              the number of frames in `encoder_out` before padding.
+        """
+        raise NotImplementedError("Please implement it in a subclass")
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 8305248c..ff12ef18 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -23,9 +23,9 @@
 import torch.nn as nn
 import torchaudio
 import torchaudio.functional
-from encoder_interface import EncoderInterface
+from .encoder_interface import EncoderInterface
 
-from icefall.utils import add_sos
+from hyperion.utils.utils import add_sos
 
 
 class Transducer(nn.Module):
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index 3fed7143..1e038f17 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -15,7 +15,7 @@
 from ...utils import remove_silence
 
 
-class HFWav2XVector(TorchModel):
+class HFWav2Transducer(TorchModel):
     """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
 
     Attributes:
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index e83dcb8c..e2c6e1be 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -13,6 +13,7 @@
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
+from .hf_wav2transducer import HFWav2Transducer
 
 
 class HFWav2Vec2Transducer(HFWav2Transducer):
diff --git a/hyperion/utils/lexicon.py b/hyperion/utils/lexicon.py
new file mode 100644
index 00000000..80bd7c1e
--- /dev/null
+++ b/hyperion/utils/lexicon.py
@@ -0,0 +1,277 @@
+# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import logging
+import re
+import sys
+from pathlib import Path
+from typing import List, Tuple
+
+import k2
+import torch
+
+
+def read_lexicon(filename: str) -> List[Tuple[str, List[str]]]:
+    """Read a lexicon from `filename`.
+
+    Each line in the lexicon contains "word p1 p2 p3 ...".
+    That is, the first field is a word and the remaining
+    fields are tokens. Fields are separated by space(s).
+
+    Args:
+      filename:
+        Path to the lexicon.txt
+
+    Returns:
+      A list of tuples., e.g., [('w', ['p1', 'p2']), ('w1', ['p3, 'p4'])]
+    """
+    ans = []
+
+    with open(filename, "r", encoding="utf-8") as f:
+        whitespace = re.compile("[ \t]+")
+        for line in f:
+            a = whitespace.split(line.strip(" \t\r\n"))
+            if len(a) == 0:
+                continue
+
+            if len(a) < 2:
+                logging.info(
+                    f"Found bad line {line} in lexicon file {filename}"
+                )
+                logging.info(
+                    "Every line is expected to contain at least 2 fields"
+                )
+                sys.exit(1)
+            word = a[0]
+            if word == "<eps>":
+                logging.info(
+                    f"Found bad line {line} in lexicon file {filename}"
+                )
+                logging.info("<eps> should not be a valid word")
+                sys.exit(1)
+
+            tokens = a[1:]
+            ans.append((word, tokens))
+
+    return ans
+
+
+def write_lexicon(filename: str, lexicon: List[Tuple[str, List[str]]]) -> None:
+    """Write a lexicon to a file.
+
+    Args:
+      filename:
+        Path to the lexicon file to be generated.
+      lexicon:
+        It can be the return value of :func:`read_lexicon`.
+    """
+    with open(filename, "w", encoding="utf-8") as f:
+        for word, tokens in lexicon:
+            f.write(f"{word} {' '.join(tokens)}\n")
+
+
+def convert_lexicon_to_ragged(
+    filename: str, word_table: k2.SymbolTable, token_table: k2.SymbolTable
+) -> k2.RaggedTensor:
+    """Read a lexicon and convert it to a ragged tensor.
+
+    The ragged tensor has two axes: [word][token].
+
+    Caution:
+      We assume that each word has a unique pronunciation.
+
+    Args:
+      filename:
+        Filename of the lexicon. It has a format that can be read
+        by :func:`read_lexicon`.
+      word_table:
+        The word symbol table.
+      token_table:
+        The token symbol table.
+    Returns:
+      A k2 ragged tensor with two axes [word][token].
+    """
+    disambig_id = word_table["#0"]
+    # We reuse the same words.txt from the phone based lexicon
+    # so that we can share the same G.fst. Here, we have to
+    # exclude some words present only in the phone based lexicon.
+    excluded_words = ["<eps>", "!SIL", "<SPOKEN_NOISE>"]
+
+    # epsilon is not a word, but it occupies a position
+    #
+    row_splits = [0]
+    token_ids_list = []
+
+    lexicon_tmp = read_lexicon(filename)
+    lexicon = dict(lexicon_tmp)
+    if len(lexicon_tmp) != len(lexicon):
+        raise RuntimeError(
+            "It's assumed that each word has a unique pronunciation"
+        )
+
+    for i in range(disambig_id):
+        w = word_table[i]
+        if w in excluded_words:
+            row_splits.append(row_splits[-1])
+            continue
+        tokens = lexicon[w]
+        token_ids = [token_table[k] for k in tokens]
+
+        row_splits.append(row_splits[-1] + len(token_ids))
+        token_ids_list.extend(token_ids)
+
+    cached_tot_size = row_splits[-1]
+    row_splits = torch.tensor(row_splits, dtype=torch.int32)
+
+    shape = k2.ragged.create_ragged_shape2(
+        row_splits,
+        None,
+        cached_tot_size,
+    )
+    values = torch.tensor(token_ids_list, dtype=torch.int32)
+
+    return k2.RaggedTensor(shape, values)
+
+
+class Lexicon(object):
+    """Phone based lexicon."""
+
+    def __init__(
+        self,
+        lang_dir: Path,
+        disambig_pattern: str = re.compile(r"^#\d+$"),
+    ):
+        """
+        Args:
+          lang_dir:
+            Path to the lang directory. It is expected to contain the following
+            files:
+                - tokens.txt
+                - words.txt
+                - L.pt
+            The above files are produced by the script `prepare.sh`. You
+            should have run that before running the training code.
+          disambig_pattern:
+            It contains the pattern for disambiguation symbols.
+        """
+        lang_dir = Path(lang_dir)
+        self.token_table = k2.SymbolTable.from_file(lang_dir / "tokens.txt")
+        self.word_table = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+        if (lang_dir / "Linv.pt").exists():
+            logging.info(f"Loading pre-compiled {lang_dir}/Linv.pt")
+            L_inv = k2.Fsa.from_dict(torch.load(lang_dir / "Linv.pt"))
+        else:
+            logging.info("Converting L.pt to Linv.pt")
+            L = k2.Fsa.from_dict(torch.load(lang_dir / "L.pt"))
+            L_inv = k2.arc_sort(L.invert())
+            torch.save(L_inv.as_dict(), lang_dir / "Linv.pt")
+
+        # We save L_inv instead of L because it will be used to intersect with
+        # transcript FSAs, both of whose labels are word IDs.
+        self.L_inv = L_inv
+        self.disambig_pattern = disambig_pattern
+
+    @property
+    def tokens(self) -> List[int]:
+        """Return a list of token IDs excluding those from
+        disambiguation symbols.
+
+        Caution:
+          0 is not a token ID so it is excluded from the return value.
+        """
+        symbols = self.token_table.symbols
+        ans = []
+        for s in symbols:
+            if not self.disambig_pattern.match(s):
+                ans.append(self.token_table[s])
+        if 0 in ans:
+            ans.remove(0)
+        ans.sort()
+        return ans
+
+
+class UniqLexicon(Lexicon):
+    def __init__(
+        self,
+        lang_dir: Path,
+        uniq_filename: str = "uniq_lexicon.txt",
+        disambig_pattern: str = re.compile(r"^#\d+$"),
+    ):
+        """
+        Refer to the help information in Lexicon.__init__.
+
+        uniq_filename: It is assumed to be inside the given `lang_dir`.
+
+        Each word in the lexicon is assumed to have a unique pronunciation.
+        """
+        lang_dir = Path(lang_dir)
+        super().__init__(lang_dir=lang_dir, disambig_pattern=disambig_pattern)
+
+        self.ragged_lexicon = convert_lexicon_to_ragged(
+            filename=lang_dir / uniq_filename,
+            word_table=self.word_table,
+            token_table=self.token_table,
+        )
+        # TODO: should we move it to a certain device ?
+
+    def texts_to_token_ids(
+        self, texts: List[str], oov: str = "<UNK>"
+    ) -> k2.RaggedTensor:
+        """
+        Args:
+          texts:
+            A list of transcripts. Each transcript contains space(s)
+            separated words. An example texts is::
+
+                ['HELLO k2', 'HELLO icefall']
+          oov:
+            The OOV word. If a word in `texts` is not in the lexicon, it is
+            replaced with `oov`.
+        Returns:
+          Return a ragged int tensor with 2 axes [utterance][token_id]
+        """
+        oov_id = self.word_table[oov]
+
+        word_ids_list = []
+        for text in texts:
+            word_ids = []
+            for word in text.split():
+                if word in self.word_table:
+                    word_ids.append(self.word_table[word])
+                else:
+                    word_ids.append(oov_id)
+            word_ids_list.append(word_ids)
+        ragged_indexes = k2.RaggedTensor(word_ids_list, dtype=torch.int32)
+        ans = self.ragged_lexicon.index(ragged_indexes)
+        ans = ans.remove_axis(ans.num_axes - 2)
+        return ans
+
+    def words_to_token_ids(self, words: List[str]) -> k2.RaggedTensor:
+        """Convert a list of words to a ragged tensor containing token IDs.
+
+        We assume there are no OOVs in "words".
+        """
+        word_ids = [self.word_table[w] for w in words]
+        word_ids = torch.tensor(word_ids, dtype=torch.int32)
+
+        ragged, _ = self.ragged_lexicon.index(
+            indexes=word_ids,
+            axis=0,
+            need_value_indexes=False,
+        )
+        return ragged
diff --git a/hyperion/utils/utils.py b/hyperion/utils/utils.py
new file mode 100644
index 00000000..1663fb03
--- /dev/null
+++ b/hyperion/utils/utils.py
@@ -0,0 +1,978 @@
+# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang
+#                                                    Mingshuang Luo)
+#
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import collections
+import logging
+import os
+import re
+import subprocess
+from collections import defaultdict
+from contextlib import contextmanager
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, TextIO, Tuple, Union
+
+import k2
+import k2.version
+import kaldialign
+import sentencepiece as spm
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch.utils.tensorboard import SummaryWriter
+
+from hyperion.utils.checkpoint import average_checkpoints
+
+Pathlike = Union[str, Path]
+
+
+# Pytorch issue: https://github.com/pytorch/pytorch/issues/47379
+# Fixed: https://github.com/pytorch/pytorch/pull/49853
+# The fix was included in v1.9.0
+# https://github.com/pytorch/pytorch/releases/tag/v1.9.0
+def is_jit_tracing():
+    if torch.jit.is_scripting():
+        return False
+    elif torch.jit.is_tracing():
+        return True
+    return False
+
+
+@contextmanager
+def get_executor():
+    # We'll either return a process pool or a distributed worker pool.
+    # Note that this has to be a context manager because we might use multiple
+    # context manager ("with" clauses) inside, and this way everything will
+    # free up the resources at the right time.
+    try:
+        # If this is executed on the CLSP grid, we will try to use the
+        # Grid Engine to distribute the tasks.
+        # Other clusters can also benefit from that, provided a
+        # cluster-specific wrapper.
+        # (see https://github.com/pzelasko/plz for reference)
+        #
+        # The following must be installed:
+        # $ pip install dask distributed
+        # $ pip install git+https://github.com/pzelasko/plz
+        name = subprocess.check_output("hostname -f", shell=True, text=True)
+        if name.strip().endswith(".clsp.jhu.edu"):
+            import plz
+            from distributed import Client
+
+            with plz.setup_cluster() as cluster:
+                cluster.scale(80)
+                yield Client(cluster)
+            return
+    except Exception:
+        pass
+    # No need to return anything - compute_and_store_features
+    # will just instantiate the pool itself.
+    yield None
+
+
+def str2bool(v):
+    """Used in argparse.ArgumentParser.add_argument to indicate
+    that a type is a bool type and user can enter
+
+        - yes, true, t, y, 1, to represent True
+        - no, false, f, n, 0, to represent False
+
+    See https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse  # noqa
+    """
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+
+
+def setup_logger(
+    log_filename: Pathlike,
+    log_level: str = "info",
+    use_console: bool = True,
+) -> None:
+    """Setup log level.
+
+    Args:
+      log_filename:
+        The filename to save the log.
+      log_level:
+        The log level to use, e.g., "debug", "info", "warning", "error",
+        "critical"
+      use_console:
+        True to also print logs to console.
+    """
+    now = datetime.now()
+    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
+    if dist.is_available() and dist.is_initialized():
+        world_size = dist.get_world_size()
+        rank = dist.get_rank()
+        formatter = f"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] ({rank}/{world_size}) %(message)s"  # noqa
+        log_filename = f"{log_filename}-{date_time}-{rank}"
+    else:
+        formatter = (
+            "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
+        )
+        log_filename = f"{log_filename}-{date_time}"
+
+    os.makedirs(os.path.dirname(log_filename), exist_ok=True)
+
+    level = logging.ERROR
+    if log_level == "debug":
+        level = logging.DEBUG
+    elif log_level == "info":
+        level = logging.INFO
+    elif log_level == "warning":
+        level = logging.WARNING
+    elif log_level == "critical":
+        level = logging.CRITICAL
+
+    logging.basicConfig(
+        filename=log_filename,
+        format=formatter,
+        level=level,
+        filemode="w",
+    )
+    if use_console:
+        console = logging.StreamHandler()
+        console.setLevel(level)
+        console.setFormatter(logging.Formatter(formatter))
+        logging.getLogger("").addHandler(console)
+
+
+class AttributeDict(dict):
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError(f"No such attribute '{key}'")
+
+    def __setattr__(self, key, value):
+        self[key] = value
+
+    def __delattr__(self, key):
+        if key in self:
+            del self[key]
+            return
+        raise AttributeError(f"No such attribute '{key}'")
+
+
+def encode_supervisions(
+    supervisions: dict, subsampling_factor: int
+) -> Tuple[torch.Tensor, List[str]]:
+    """
+    Encodes Lhotse's ``batch["supervisions"]`` dict into
+    a pair of torch Tensor, and a list of transcription strings.
+
+    The supervision tensor has shape ``(batch_size, 3)``.
+    Its second dimension contains information about sequence index [0],
+    start frames [1] and num frames [2].
+
+    The batch items might become re-ordered during this operation -- the
+    returned tensor and list of strings are guaranteed to be consistent with
+    each other.
+    """
+    supervision_segments = torch.stack(
+        (
+            supervisions["sequence_idx"],
+            supervisions["start_frame"] // subsampling_factor,
+            supervisions["num_frames"] // subsampling_factor,
+        ),
+        1,
+    ).to(torch.int32)
+
+    indices = torch.argsort(supervision_segments[:, 2], descending=True)
+    supervision_segments = supervision_segments[indices]
+    texts = supervisions["text"]
+    texts = [texts[idx] for idx in indices]
+
+    return supervision_segments, texts
+
+
+def get_texts(
+    best_paths: k2.Fsa, return_ragged: bool = False
+) -> Union[List[List[int]], k2.RaggedTensor]:
+    """Extract the texts (as word IDs) from the best-path FSAs.
+    Args:
+      best_paths:
+        A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
+        containing multiple FSAs, which is expected to be the result
+        of k2.shortest_path (otherwise the returned values won't
+        be meaningful).
+      return_ragged:
+        True to return a ragged tensor with two axes [utt][word_id].
+        False to return a list-of-list word IDs.
+    Returns:
+      Returns a list of lists of int, containing the label sequences we
+      decoded.
+    """
+    if isinstance(best_paths.aux_labels, k2.RaggedTensor):
+        # remove 0's and -1's.
+        aux_labels = best_paths.aux_labels.remove_values_leq(0)
+        # TODO: change arcs.shape() to arcs.shape
+        aux_shape = best_paths.arcs.shape().compose(aux_labels.shape)
+
+        # remove the states and arcs axes.
+        aux_shape = aux_shape.remove_axis(1)
+        aux_shape = aux_shape.remove_axis(1)
+        aux_labels = k2.RaggedTensor(aux_shape, aux_labels.values)
+    else:
+        # remove axis corresponding to states.
+        aux_shape = best_paths.arcs.shape().remove_axis(1)
+        aux_labels = k2.RaggedTensor(aux_shape, best_paths.aux_labels)
+        # remove 0's and -1's.
+        aux_labels = aux_labels.remove_values_leq(0)
+
+    assert aux_labels.num_axes == 2
+    if return_ragged:
+        return aux_labels
+    else:
+        return aux_labels.tolist()
+
+
+def get_alignments(best_paths: k2.Fsa, kind: str) -> List[List[int]]:
+    """Extract labels or aux_labels from the best-path FSAs.
+
+    Args:
+      best_paths:
+        A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
+        containing multiple FSAs, which is expected to be the result
+        of k2.shortest_path (otherwise the returned values won't
+        be meaningful).
+      kind:
+        Possible values are: "labels" and "aux_labels". Caution: When it is
+        "labels", the resulting alignments contain repeats.
+    Returns:
+      Returns a list of lists of int, containing the token sequences we
+      decoded. For `ans[i]`, its length equals to the number of frames
+      after subsampling of the i-th utterance in the batch.
+
+    Example:
+      When `kind` is `labels`, one possible alignment example is (with
+      repeats)::
+
+        c c c blk a a blk blk t t t blk blk
+
+     If `kind` is `aux_labels`, the above example changes to::
+
+        c blk blk blk a blk blk blk t blk blk blk blk
+
+    """
+    assert kind in ("labels", "aux_labels")
+    # arc.shape() has axes [fsa][state][arc], we remove "state"-axis here
+    token_shape = best_paths.arcs.shape().remove_axis(1)
+    # token_shape has axes [fsa][arc]
+    tokens = k2.RaggedTensor(
+        token_shape, getattr(best_paths, kind).contiguous()
+    )
+    tokens = tokens.remove_values_eq(-1)
+    return tokens.tolist()
+
+
+def save_alignments(
+    alignments: Dict[str, List[int]],
+    subsampling_factor: int,
+    filename: str,
+) -> None:
+    """Save alignments to a file.
+
+    Args:
+      alignments:
+        A dict containing alignments. Keys of the dict are utterances and
+        values are the corresponding framewise alignments after subsampling.
+      subsampling_factor:
+        The subsampling factor of the model.
+      filename:
+        Path to save the alignments.
+    Returns:
+      Return None.
+    """
+    ali_dict = {
+        "subsampling_factor": subsampling_factor,
+        "alignments": alignments,
+    }
+    torch.save(ali_dict, filename)
+
+
+def load_alignments(filename: str) -> Tuple[int, Dict[str, List[int]]]:
+    """Load alignments from a file.
+
+    Args:
+      filename:
+        Path to the file containing alignment information.
+        The file should be saved by :func:`save_alignments`.
+    Returns:
+      Return a tuple containing:
+        - subsampling_factor: The subsampling_factor used to compute
+          the alignments.
+        - alignments: A dict containing utterances and their corresponding
+          framewise alignment, after subsampling.
+    """
+    ali_dict = torch.load(filename)
+    subsampling_factor = ali_dict["subsampling_factor"]
+    alignments = ali_dict["alignments"]
+    return subsampling_factor, alignments
+
+
+def store_transcripts(
+    filename: Pathlike, texts: Iterable[Tuple[str, str, str]]
+) -> None:
+    """Save predicted results and reference transcripts to a file.
+
+    Args:
+      filename:
+        File to save the results to.
+      texts:
+        An iterable of tuples. The first element is the cur_id, the second is
+        the reference transcript and the third element is the predicted result.
+    Returns:
+      Return None.
+    """
+    with open(filename, "w") as f:
+        for cut_id, ref, hyp in texts:
+            print(f"{cut_id}:\tref={ref}", file=f)
+            print(f"{cut_id}:\thyp={hyp}", file=f)
+
+
+def write_error_stats(
+    f: TextIO,
+    test_set_name: str,
+    results: List[Tuple[str, str]],
+    enable_log: bool = True,
+) -> float:
+    """Write statistics based on predicted results and reference transcripts.
+
+    It will write the following to the given file:
+
+        - WER
+        - number of insertions, deletions, substitutions, corrects and total
+          reference words. For example::
+
+              Errors: 23 insertions, 57 deletions, 212 substitutions, over 2606
+              reference words (2337 correct)
+
+        - The difference between the reference transcript and predicted result.
+          An instance is given below::
+
+            THE ASSOCIATION OF (EDISON->ADDISON) ILLUMINATING COMPANIES
+
+          The above example shows that the reference word is `EDISON`,
+          but it is predicted to `ADDISON` (a substitution error).
+
+          Another example is::
+
+            FOR THE FIRST DAY (SIR->*) I THINK
+
+          The reference word `SIR` is missing in the predicted
+          results (a deletion error).
+      results:
+        An iterable of tuples. The first element is the cur_id, the second is
+        the reference transcript and the third element is the predicted result.
+      enable_log:
+        If True, also print detailed WER to the console.
+        Otherwise, it is written only to the given file.
+    Returns:
+      Return None.
+    """
+    subs: Dict[Tuple[str, str], int] = defaultdict(int)
+    ins: Dict[str, int] = defaultdict(int)
+    dels: Dict[str, int] = defaultdict(int)
+
+    # `words` stores counts per word, as follows:
+    #   corr, ref_sub, hyp_sub, ins, dels
+    words: Dict[str, List[int]] = defaultdict(lambda: [0, 0, 0, 0, 0])
+    num_corr = 0
+    ERR = "*"
+    for cut_id, ref, hyp in results:
+        ali = kaldialign.align(ref, hyp, ERR)
+        for ref_word, hyp_word in ali:
+            if ref_word == ERR:
+                ins[hyp_word] += 1
+                words[hyp_word][3] += 1
+            elif hyp_word == ERR:
+                dels[ref_word] += 1
+                words[ref_word][4] += 1
+            elif hyp_word != ref_word:
+                subs[(ref_word, hyp_word)] += 1
+                words[ref_word][1] += 1
+                words[hyp_word][2] += 1
+            else:
+                words[ref_word][0] += 1
+                num_corr += 1
+    ref_len = sum([len(r) for _, r, _ in results])
+    sub_errs = sum(subs.values())
+    ins_errs = sum(ins.values())
+    del_errs = sum(dels.values())
+    tot_errs = sub_errs + ins_errs + del_errs
+    tot_err_rate = "%.2f" % (100.0 * tot_errs / ref_len)
+
+    if enable_log:
+        logging.info(
+            f"[{test_set_name}] %WER {tot_errs / ref_len:.2%} "
+            f"[{tot_errs} / {ref_len}, {ins_errs} ins, "
+            f"{del_errs} del, {sub_errs} sub ]"
+        )
+
+    print(f"%WER = {tot_err_rate}", file=f)
+    print(
+        f"Errors: {ins_errs} insertions, {del_errs} deletions, "
+        f"{sub_errs} substitutions, over {ref_len} reference "
+        f"words ({num_corr} correct)",
+        file=f,
+    )
+    print(
+        "Search below for sections starting with PER-UTT DETAILS:, "
+        "SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS:",
+        file=f,
+    )
+
+    print("", file=f)
+    print("PER-UTT DETAILS: corr or (ref->hyp)  ", file=f)
+    for cut_id, ref, hyp in results:
+        ali = kaldialign.align(ref, hyp, ERR)
+        combine_successive_errors = True
+        if combine_successive_errors:
+            ali = [[[x], [y]] for x, y in ali]
+            for i in range(len(ali) - 1):
+                if ali[i][0] != ali[i][1] and ali[i + 1][0] != ali[i + 1][1]:
+                    ali[i + 1][0] = ali[i][0] + ali[i + 1][0]
+                    ali[i + 1][1] = ali[i][1] + ali[i + 1][1]
+                    ali[i] = [[], []]
+            ali = [
+                [
+                    list(filter(lambda a: a != ERR, x)),
+                    list(filter(lambda a: a != ERR, y)),
+                ]
+                for x, y in ali
+            ]
+            ali = list(filter(lambda x: x != [[], []], ali))
+            ali = [
+                [
+                    ERR if x == [] else " ".join(x),
+                    ERR if y == [] else " ".join(y),
+                ]
+                for x, y in ali
+            ]
+
+        print(
+            f"{cut_id}:\t"
+            + " ".join(
+                (
+                    ref_word
+                    if ref_word == hyp_word
+                    else f"({ref_word}->{hyp_word})"
+                    for ref_word, hyp_word in ali
+                )
+            ),
+            file=f,
+        )
+
+    print("", file=f)
+    print("SUBSTITUTIONS: count ref -> hyp", file=f)
+
+    for count, (ref, hyp) in sorted(
+        [(v, k) for k, v in subs.items()], reverse=True
+    ):
+        print(f"{count}   {ref} -> {hyp}", file=f)
+
+    print("", file=f)
+    print("DELETIONS: count ref", file=f)
+    for count, ref in sorted([(v, k) for k, v in dels.items()], reverse=True):
+        print(f"{count}   {ref}", file=f)
+
+    print("", file=f)
+    print("INSERTIONS: count hyp", file=f)
+    for count, hyp in sorted([(v, k) for k, v in ins.items()], reverse=True):
+        print(f"{count}   {hyp}", file=f)
+
+    print("", file=f)
+    print(
+        "PER-WORD STATS: word  corr tot_errs count_in_ref count_in_hyp", file=f
+    )
+    for _, word, counts in sorted(
+        [(sum(v[1:]), k, v) for k, v in words.items()], reverse=True
+    ):
+        (corr, ref_sub, hyp_sub, ins, dels) = counts
+        tot_errs = ref_sub + hyp_sub + ins + dels
+        ref_count = corr + ref_sub + dels
+        hyp_count = corr + hyp_sub + ins
+
+        print(f"{word}   {corr} {tot_errs} {ref_count} {hyp_count}", file=f)
+    return float(tot_err_rate)
+
+
+class MetricsTracker(collections.defaultdict):
+    def __init__(self):
+        # Passing the type 'int' to the base-class constructor
+        # makes undefined items default to int() which is zero.
+        # This class will play a role as metrics tracker.
+        # It can record many metrics, including but not limited to loss.
+        super(MetricsTracker, self).__init__(int)
+
+    def __add__(self, other: "MetricsTracker") -> "MetricsTracker":
+        ans = MetricsTracker()
+        for k, v in self.items():
+            ans[k] = v
+        for k, v in other.items():
+            ans[k] = ans[k] + v
+        return ans
+
+    def __mul__(self, alpha: float) -> "MetricsTracker":
+        ans = MetricsTracker()
+        for k, v in self.items():
+            ans[k] = v * alpha
+        return ans
+
+    def __str__(self) -> str:
+        ans_frames = ""
+        ans_utterances = ""
+        for k, v in self.norm_items():
+            norm_value = "%.4g" % v
+            if "utt_" not in k:
+                ans_frames += str(k) + "=" + str(norm_value) + ", "
+            else:
+                ans_utterances += str(k) + "=" + str(norm_value)
+                if k == "utt_duration":
+                    ans_utterances += " frames, "
+                elif k == "utt_pad_proportion":
+                    ans_utterances += ", "
+                else:
+                    raise ValueError(f"Unexpected key: {k}")
+        frames = "%.2f" % self["frames"]
+        ans_frames += "over " + str(frames) + " frames. "
+        if ans_utterances != "":
+            utterances = "%.2f" % self["utterances"]
+            ans_utterances += "over " + str(utterances) + " utterances."
+
+        return ans_frames + ans_utterances
+
+    def norm_items(self) -> List[Tuple[str, float]]:
+        """
+        Returns a list of pairs, like:
+          [('ctc_loss', 0.1), ('att_loss', 0.07)]
+        """
+        num_frames = self["frames"] if "frames" in self else 1
+        num_utterances = self["utterances"] if "utterances" in self else 1
+        ans = []
+        for k, v in self.items():
+            if k == "frames" or k == "utterances":
+                continue
+            norm_value = (
+                float(v) / num_frames
+                if "utt_" not in k
+                else float(v) / num_utterances
+            )
+            ans.append((k, norm_value))
+        return ans
+
+    def reduce(self, device):
+        """
+        Reduce using torch.distributed, which I believe ensures that
+        all processes get the total.
+        """
+        keys = sorted(self.keys())
+        s = torch.tensor([float(self[k]) for k in keys], device=device)
+        dist.all_reduce(s, op=dist.ReduceOp.SUM)
+        for k, v in zip(keys, s.cpu().tolist()):
+            self[k] = v
+
+    def write_summary(
+        self,
+        tb_writer: SummaryWriter,
+        prefix: str,
+        batch_idx: int,
+    ) -> None:
+        """Add logging information to a TensorBoard writer.
+
+        Args:
+            tb_writer: a TensorBoard writer
+            prefix: a prefix for the name of the loss, e.g. "train/valid_",
+                or "train/current_"
+            batch_idx: The current batch index, used as the x-axis of the plot.
+        """
+        for k, v in self.norm_items():
+            tb_writer.add_scalar(prefix + k, v, batch_idx)
+
+
+def concat(
+    ragged: k2.RaggedTensor, value: int, direction: str
+) -> k2.RaggedTensor:
+    """Prepend a value to the beginning of each sublist or append a value.
+    to the end of each sublist.
+
+    Args:
+      ragged:
+        A ragged tensor with two axes.
+      value:
+        The value to prepend or append.
+      direction:
+        It can be either "left" or "right". If it is "left", we
+        prepend the value to the beginning of each sublist;
+        if it is "right", we append the value to the end of each
+        sublist.
+
+    Returns:
+      Return a new ragged tensor, whose sublists either start with
+      or end with the given value.
+
+    >>> a = k2.RaggedTensor([[1, 3], [5]])
+    >>> a
+    [ [ 1 3 ] [ 5 ] ]
+    >>> concat(a, value=0, direction="left")
+    [ [ 0 1 3 ] [ 0 5 ] ]
+    >>> concat(a, value=0, direction="right")
+    [ [ 1 3 0 ] [ 5 0 ] ]
+
+    """
+    dtype = ragged.dtype
+    device = ragged.device
+
+    assert ragged.num_axes == 2, f"num_axes: {ragged.num_axes}"
+    pad_values = torch.full(
+        size=(ragged.tot_size(0), 1),
+        fill_value=value,
+        device=device,
+        dtype=dtype,
+    )
+    pad = k2.RaggedTensor(pad_values)
+
+    if direction == "left":
+        ans = k2.ragged.cat([pad, ragged], axis=1)
+    elif direction == "right":
+        ans = k2.ragged.cat([ragged, pad], axis=1)
+    else:
+        raise ValueError(
+            f'Unsupported direction: {direction}. " \
+            "Expect either "left" or "right"'
+        )
+    return ans
+
+
+def add_sos(ragged: k2.RaggedTensor, sos_id: int) -> k2.RaggedTensor:
+    """Add SOS to each sublist.
+
+    Args:
+      ragged:
+        A ragged tensor with two axes.
+      sos_id:
+        The ID of the SOS symbol.
+
+    Returns:
+      Return a new ragged tensor, where each sublist starts with SOS.
+
+    >>> a = k2.RaggedTensor([[1, 3], [5]])
+    >>> a
+    [ [ 1 3 ] [ 5 ] ]
+    >>> add_sos(a, sos_id=0)
+    [ [ 0 1 3 ] [ 0 5 ] ]
+
+    """
+    return concat(ragged, sos_id, direction="left")
+
+
+def add_eos(ragged: k2.RaggedTensor, eos_id: int) -> k2.RaggedTensor:
+    """Add EOS to each sublist.
+
+    Args:
+      ragged:
+        A ragged tensor with two axes.
+      eos_id:
+        The ID of the EOS symbol.
+
+    Returns:
+      Return a new ragged tensor, where each sublist ends with EOS.
+
+    >>> a = k2.RaggedTensor([[1, 3], [5]])
+    >>> a
+    [ [ 1 3 ] [ 5 ] ]
+    >>> add_eos(a, eos_id=0)
+    [ [ 1 3 0 ] [ 5 0 ] ]
+
+    """
+    return concat(ragged, eos_id, direction="right")
+
+
+def make_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
+    """
+    Args:
+      lengths:
+        A 1-D tensor containing sentence lengths.
+    Returns:
+      Return a 2-D bool tensor, where masked positions
+      are filled with `True` and non-masked positions are
+      filled with `False`.
+
+    >>> lengths = torch.tensor([1, 3, 2, 5])
+    >>> make_pad_mask(lengths)
+    tensor([[False,  True,  True,  True,  True],
+            [False, False, False,  True,  True],
+            [False, False,  True,  True,  True],
+            [False, False, False, False, False]])
+    """
+    assert lengths.ndim == 1, lengths.ndim
+
+    max_len = lengths.max()
+    n = lengths.size(0)
+
+    expaned_lengths = torch.arange(max_len).expand(n, max_len).to(lengths)
+
+    return expaned_lengths >= lengths.unsqueeze(1)
+
+
+# Copied and modified from https://github.com/wenet-e2e/wenet/blob/main/wenet/utils/mask.py
+def subsequent_chunk_mask(
+    size: int,
+    chunk_size: int,
+    num_left_chunks: int = -1,
+    device: torch.device = torch.device("cpu"),
+) -> torch.Tensor:
+    """Create mask for subsequent steps (size, size) with chunk size,
+       this is for streaming encoder
+    Args:
+        size (int): size of mask
+        chunk_size (int): size of chunk
+        num_left_chunks (int): number of left chunks
+            <0: use full chunk
+            >=0: use num_left_chunks
+        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
+    Returns:
+        torch.Tensor: mask
+    Examples:
+        >>> subsequent_chunk_mask(4, 2)
+        [[1, 1, 0, 0],
+         [1, 1, 0, 0],
+         [1, 1, 1, 1],
+         [1, 1, 1, 1]]
+    """
+    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
+    for i in range(size):
+        if num_left_chunks < 0:
+            start = 0
+        else:
+            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
+        ending = min((i // chunk_size + 1) * chunk_size, size)
+        ret[i, start:ending] = True
+    return ret
+
+
+def l1_norm(x):
+    return torch.sum(torch.abs(x))
+
+
+def l2_norm(x):
+    return torch.sum(torch.pow(x, 2))
+
+
+def linf_norm(x):
+    return torch.max(torch.abs(x))
+
+
+def measure_weight_norms(
+    model: nn.Module, norm: str = "l2"
+) -> Dict[str, float]:
+    """
+    Compute the norms of the model's parameters.
+
+    :param model: a torch.nn.Module instance
+    :param norm: how to compute the norm. Available values: 'l1', 'l2', 'linf'
+    :return: a dict mapping from parameter's name to its norm.
+    """
+    with torch.no_grad():
+        norms = {}
+        for name, param in model.named_parameters():
+            if norm == "l1":
+                val = l1_norm(param)
+            elif norm == "l2":
+                val = l2_norm(param)
+            elif norm == "linf":
+                val = linf_norm(param)
+            else:
+                raise ValueError(f"Unknown norm type: {norm}")
+            norms[name] = val.item()
+        return norms
+
+
+def measure_gradient_norms(
+    model: nn.Module, norm: str = "l1"
+) -> Dict[str, float]:
+    """
+    Compute the norms of the gradients for each of model's parameters.
+
+    :param model: a torch.nn.Module instance
+    :param norm: how to compute the norm. Available values: 'l1', 'l2', 'linf'
+    :return: a dict mapping from parameter's name to its gradient's norm.
+    """
+    with torch.no_grad():
+        norms = {}
+        for name, param in model.named_parameters():
+            if norm == "l1":
+                val = l1_norm(param.grad)
+            elif norm == "l2":
+                val = l2_norm(param.grad)
+            elif norm == "linf":
+                val = linf_norm(param.grad)
+            else:
+                raise ValueError(f"Unknown norm type: {norm}")
+            norms[name] = val.item()
+        return norms
+
+
+def optim_step_and_measure_param_change(
+    model: nn.Module,
+    old_parameters: Dict[str, nn.parameter.Parameter],
+) -> Dict[str, float]:
+    """
+    Measure the "relative change in parameters per minibatch."
+    It is understood as a ratio between the L2 norm of the difference between original and updates parameters,
+    and the L2 norm of the original parameter. It is given by the formula:
+
+        .. math::
+            \begin{aligned}
+                \delta = \frac{\Vert\theta - \theta_{new}\Vert^2}{\Vert\theta\Vert^2}
+            \end{aligned}
+
+    This function is supposed to be used as follows:
+
+      .. code-block:: python
+
+        old_parameters = {
+            n: p.detach().clone() for n, p in model.named_parameters()
+        }
+
+        optimizer.step()
+
+        deltas = optim_step_and_measure_param_change(old_parameters)
+
+    Args:
+      model: A torch.nn.Module instance.
+      old_parameters:
+        A Dict of named_parameters before optimizer.step().
+
+    Return:
+      A Dict containing the relative change for each parameter.
+    """
+    relative_change = {}
+    with torch.no_grad():
+        for n, p_new in model.named_parameters():
+            p_orig = old_parameters[n]
+            delta = l2_norm(p_orig - p_new) / l2_norm(p_orig)
+            relative_change[n] = delta.item()
+    return relative_change
+
+
+def load_averaged_model(
+    model_dir: str,
+    model: torch.nn.Module,
+    epoch: int,
+    avg: int,
+    device: torch.device,
+):
+    """
+    Load a model which is the average of all checkpoints
+
+    :param model_dir: a str of the experiment directory
+    :param model: a torch.nn.Module instance
+
+    :param epoch: the last epoch to load from
+    :param avg: how many models to average from
+    :param device: move model to this device
+
+    :return: A model averaged
+    """
+
+    # start cannot be negative
+    start = max(epoch - avg + 1, 0)
+    filenames = [f"{model_dir}/epoch-{i}.pt" for i in range(start, epoch + 1)]
+
+    logging.info(f"averaging {filenames}")
+    model.to(device)
+    model.load_state_dict(average_checkpoints(filenames, device=device))
+
+    return model
+
+
+def tokenize_by_bpe_model(
+    sp: spm.SentencePieceProcessor,
+    txt: str,
+) -> str:
+    """
+    Tokenize text with bpe model. This function is from
+    https://github1s.com/wenet-e2e/wenet/blob/main/wenet/dataset/processor.py#L322-L342.
+    Args:
+      sp: spm.SentencePieceProcessor.
+      txt: str
+
+    Return:
+      A new string which includes chars and bpes.
+    """
+    tokens = []
+    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
+    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    pattern = re.compile(r"([\u4e00-\u9fff])")
+    # Example:
+    #   txt   = "你好 ITS'S OKAY 的"
+    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
+    chars = pattern.split(txt.upper())
+    mix_chars = [w for w in chars if len(w.strip()) > 0]
+    for ch_or_w in mix_chars:
+        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
+        if pattern.fullmatch(ch_or_w) is not None:
+            tokens.append(ch_or_w)
+        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
+        # encode ch_or_w using bpe_model.
+        else:
+            for p in sp.encode_as_pieces(ch_or_w):
+                tokens.append(p)
+    txt_with_bpe = "/".join(tokens)
+
+    return txt_with_bpe
+
+
+def display_and_save_batch(
+    batch: dict,
+    params: AttributeDict,
+    sp: spm.SentencePieceProcessor,
+) -> None:
+    """Display the batch statistics and save the batch into disk.
+
+    Args:
+      batch:
+        A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()`
+        for the content in it.
+      params:
+        Parameters for training. See :func:`get_params`.
+      sp:
+        The BPE model.
+    """
+    from lhotse.utils import uuid4
+
+    filename = f"{params.exp_dir}/batch-{uuid4()}.pt"
+    logging.info(f"Saving batch to {filename}")
+    torch.save(batch, filename)
+
+    supervisions = batch["supervisions"]
+    features = batch["inputs"]
+
+    logging.info(f"features shape: {features.shape}")
+
+    y = sp.encode(supervisions["text"], out_type=int)
+    num_tokens = sum(len(i) for i in y)
+    logging.info(f"num tokens: {num_tokens}")

From 85a62dcb1de1b954486c0fa54dca38c27cc60dc8 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Sat, 15 Oct 2022 23:52:33 -0400
Subject: [PATCH 030/154] bpe data preparation and training

---
 .../v1/local/validate_bpe_lexicon.py          |  77 +++
 egs/librispeech/v1/run_004_compute_bpe.sh     |  16 +-
 hyperion/torch/data/asr_datamodule.py         | 454 ++++++++++++++++++
 3 files changed, 544 insertions(+), 3 deletions(-)
 create mode 100755 egs/librispeech/v1/local/validate_bpe_lexicon.py
 create mode 100644 hyperion/torch/data/asr_datamodule.py

diff --git a/egs/librispeech/v1/local/validate_bpe_lexicon.py b/egs/librispeech/v1/local/validate_bpe_lexicon.py
new file mode 100755
index 00000000..36962933
--- /dev/null
+++ b/egs/librispeech/v1/local/validate_bpe_lexicon.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# Copyright    2022  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script checks that there are no OOV tokens in the BPE-based lexicon.
+
+Usage example:
+
+    python3 ./local/validate_bpe_lexicon.py \
+            --lexicon /path/to/lexicon.txt \
+            --bpe-model /path/to/bpe.model
+"""
+
+import argparse
+from pathlib import Path
+from typing import List, Tuple
+
+import sentencepiece as spm
+
+from hyperion.utils.lexicon import read_lexicon
+
+# Map word to word pieces
+Lexicon = List[Tuple[str, List[str]]]
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--lexicon",
+        required=True,
+        type=Path,
+        help="Path to lexicon.txt",
+    )
+
+    parser.add_argument(
+        "--bpe-model",
+        required=True,
+        type=Path,
+        help="Path to bpe.model",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    assert args.lexicon.is_file(), args.lexicon
+    assert args.bpe_model.is_file(), args.bpe_model
+
+    lexicon = read_lexicon(args.lexicon)
+
+    sp = spm.SentencePieceProcessor()
+    sp.load(str(args.bpe_model))
+
+    word_pieces = set(sp.id_to_piece(list(range(sp.vocab_size()))))
+    for word, pieces in lexicon:
+        for p in pieces:
+            if p not in word_pieces:
+                raise ValueError(f"The word {word} contains an OOV token {p}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/librispeech/v1/run_004_compute_bpe.sh b/egs/librispeech/v1/run_004_compute_bpe.sh
index 571205a8..f1fa36b1 100755
--- a/egs/librispeech/v1/run_004_compute_bpe.sh
+++ b/egs/librispeech/v1/run_004_compute_bpe.sh
@@ -5,9 +5,19 @@
 #
 . ./cmd.sh
 . ./path.sh
+
+
 set -e
 nodes=fs01
 storage_name=$(date +'%m_%d_%H_%M')
+. ./datapath.sh 
+
+vocab_sizes=(
+  # 5000
+  2000
+  1000
+  500
+)
 
 
 dl_dir=$PWD/download
@@ -56,9 +66,9 @@ if [ $stage -le 3 ]; then
     if [ ! -f $lang_dir/transcript_words.txt ]; then
       echo "Generate data for BPE training"
       files=$(
-        find "$dl_dir/LibriSpeech/train-clean-100" -name "*.trans.txt"
-        find "$dl_dir/LibriSpeech/train-clean-360" -name "*.trans.txt"
-        find "$dl_dir/LibriSpeech/train-other-500" -name "*.trans.txt"
+        find "$librispeech_root/train-clean-100" -name "*.trans.txt"
+        find "$librispeech_root/train-clean-360" -name "*.trans.txt"
+        find "$librispeech_root/train-other-500" -name "*.trans.txt"
       )
       for f in ${files[@]}; do
         cat $f | cut -d " " -f 2-
diff --git a/hyperion/torch/data/asr_datamodule.py b/hyperion/torch/data/asr_datamodule.py
new file mode 100644
index 00000000..355ccc99
--- /dev/null
+++ b/hyperion/torch/data/asr_datamodule.py
@@ -0,0 +1,454 @@
+# Copyright      2021  Piotr Żelasko
+# Copyright      2022  Xiaomi Corporation     (Author: Mingshuang Luo)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import inspect
+import logging
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import torch
+from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
+from lhotse.dataset import (  # noqa F401 for PrecomputedFeatures
+    CutConcatenate,
+    CutMix,
+    DynamicBucketingSampler,
+    K2SpeechRecognitionDataset,
+    PrecomputedFeatures,
+    SingleCutSampler,
+    SpecAugment,
+)
+from lhotse.dataset.input_strategies import (  # noqa F401 For AudioSamples
+    AudioSamples,
+    OnTheFlyFeatures,
+)
+from lhotse.utils import fix_random_seed
+from torch.utils.data import DataLoader
+
+from icefall.utils import str2bool
+
+
+class _SeedWorkers:
+    def __init__(self, seed: int):
+        self.seed = seed
+
+    def __call__(self, worker_id: int):
+        fix_random_seed(self.seed + worker_id)
+
+
+class LibriSpeechAsrDataModule:
+    """
+    DataModule for k2 ASR experiments.
+    It assumes there is always one train and valid dataloader,
+    but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
+    and test-other).
+
+    It contains all the common data pipeline modules used in ASR
+    experiments, e.g.:
+    - dynamic batch size,
+    - bucketing samplers,
+    - cut concatenation,
+    - augmentation,
+    - on-the-fly feature extraction
+
+    This class should be derived for specific corpora used in ASR tasks.
+    """
+
+    def __init__(self, args: argparse.Namespace):
+        self.args = args
+
+    @classmethod
+    def add_arguments(cls, parser: argparse.ArgumentParser):
+        group = parser.add_argument_group(
+            title="ASR data related options",
+            description="These options are used for the preparation of "
+            "PyTorch DataLoaders from Lhotse CutSet's -- they control the "
+            "effective batch sizes, sampling strategies, applied data "
+            "augmentations, etc.",
+        )
+        group.add_argument(
+            "--full-libri",
+            type=str2bool,
+            default=True,
+            help="When enabled, use 960h LibriSpeech. "
+            "Otherwise, use 100h subset.",
+        )
+        group.add_argument(
+            "--manifest-dir",
+            type=Path,
+            default=Path("data/fbank"),
+            help="Path to directory with train/valid/test cuts.",
+        )
+        group.add_argument(
+            "--max-duration",
+            type=int,
+            default=200.0,
+            help="Maximum pooled recordings duration (seconds) in a "
+            "single batch. You can reduce it if it causes CUDA OOM.",
+        )
+        group.add_argument(
+            "--bucketing-sampler",
+            type=str2bool,
+            default=True,
+            help="When enabled, the batches will come from buckets of "
+            "similar duration (saves padding frames).",
+        )
+        group.add_argument(
+            "--num-buckets",
+            type=int,
+            default=30,
+            help="The number of buckets for the DynamicBucketingSampler"
+            "(you might want to increase it for larger datasets).",
+        )
+        group.add_argument(
+            "--concatenate-cuts",
+            type=str2bool,
+            default=False,
+            help="When enabled, utterances (cuts) will be concatenated "
+            "to minimize the amount of padding.",
+        )
+        group.add_argument(
+            "--duration-factor",
+            type=float,
+            default=1.0,
+            help="Determines the maximum duration of a concatenated cut "
+            "relative to the duration of the longest cut in a batch.",
+        )
+        group.add_argument(
+            "--gap",
+            type=float,
+            default=1.0,
+            help="The amount of padding (in seconds) inserted between "
+            "concatenated cuts. This padding is filled with noise when "
+            "noise augmentation is used.",
+        )
+        group.add_argument(
+            "--on-the-fly-feats",
+            type=str2bool,
+            default=False,
+            help="When enabled, use on-the-fly cut mixing and feature "
+            "extraction. Will drop existing precomputed feature manifests "
+            "if available.",
+        )
+        group.add_argument(
+            "--shuffle",
+            type=str2bool,
+            default=True,
+            help="When enabled (=default), the examples will be "
+            "shuffled for each epoch.",
+        )
+        group.add_argument(
+            "--drop-last",
+            type=str2bool,
+            default=True,
+            help="Whether to drop last batch. Used by sampler.",
+        )
+        group.add_argument(
+            "--return-cuts",
+            type=str2bool,
+            default=True,
+            help="When enabled, each batch will have the "
+            "field: batch['supervisions']['cut'] with the cuts that "
+            "were used to construct it.",
+        )
+
+        group.add_argument(
+            "--num-workers",
+            type=int,
+            default=2,
+            help="The number of training dataloader workers that "
+            "collect the batches.",
+        )
+
+        group.add_argument(
+            "--enable-spec-aug",
+            type=str2bool,
+            default=True,
+            help="When enabled, use SpecAugment for training dataset.",
+        )
+
+        group.add_argument(
+            "--spec-aug-time-warp-factor",
+            type=int,
+            default=80,
+            help="Used only when --enable-spec-aug is True. "
+            "It specifies the factor for time warping in SpecAugment. "
+            "Larger values mean more warping. "
+            "A value less than 1 means to disable time warp.",
+        )
+
+        group.add_argument(
+            "--enable-musan",
+            type=str2bool,
+            default=True,
+            help="When enabled, select noise from MUSAN and mix it"
+            "with training dataset. ",
+        )
+
+        group.add_argument(
+            "--input-strategy",
+            type=str,
+            default="PrecomputedFeatures",
+            help="AudioSamples or PrecomputedFeatures",
+        )
+
+    def train_dataloaders(
+        self,
+        cuts_train: CutSet,
+        sampler_state_dict: Optional[Dict[str, Any]] = None,
+    ) -> DataLoader:
+        """
+        Args:
+          cuts_train:
+            CutSet for training.
+          sampler_state_dict:
+            The state dict for the training sampler.
+        """
+        transforms = []
+        if self.args.enable_musan:
+            logging.info("Enable MUSAN")
+            logging.info("About to get Musan cuts")
+            cuts_musan = load_manifest(
+                self.args.manifest_dir / "musan_cuts.jsonl.gz"
+            )
+            transforms.append(
+                CutMix(
+                    cuts=cuts_musan, prob=0.5, snr=(10, 20), preserve_id=True
+                )
+            )
+        else:
+            logging.info("Disable MUSAN")
+
+        if self.args.concatenate_cuts:
+            logging.info(
+                f"Using cut concatenation with duration factor "
+                f"{self.args.duration_factor} and gap {self.args.gap}."
+            )
+            # Cut concatenation should be the first transform in the list,
+            # so that if we e.g. mix noise in, it will fill the gaps between
+            # different utterances.
+            transforms = [
+                CutConcatenate(
+                    duration_factor=self.args.duration_factor, gap=self.args.gap
+                )
+            ] + transforms
+
+        input_transforms = []
+        if self.args.enable_spec_aug:
+            logging.info("Enable SpecAugment")
+            logging.info(
+                f"Time warp factor: {self.args.spec_aug_time_warp_factor}"
+            )
+            # Set the value of num_frame_masks according to Lhotse's version.
+            # In different Lhotse's versions, the default of num_frame_masks is
+            # different.
+            num_frame_masks = 10
+            num_frame_masks_parameter = inspect.signature(
+                SpecAugment.__init__
+            ).parameters["num_frame_masks"]
+            if num_frame_masks_parameter.default == 1:
+                num_frame_masks = 2
+            logging.info(f"Num frame mask: {num_frame_masks}")
+            input_transforms.append(
+                SpecAugment(
+                    time_warp_factor=self.args.spec_aug_time_warp_factor,
+                    num_frame_masks=num_frame_masks,
+                    features_mask_size=27,
+                    num_feature_masks=2,
+                    frames_mask_size=100,
+                )
+            )
+        else:
+            logging.info("Disable SpecAugment")
+
+        logging.info("About to create train dataset")
+        train = K2SpeechRecognitionDataset(
+            input_strategy=eval(self.args.input_strategy)(),
+            cut_transforms=transforms,
+            input_transforms=input_transforms,
+            return_cuts=self.args.return_cuts,
+        )
+
+        if self.args.on_the_fly_feats:
+            # NOTE: the PerturbSpeed transform should be added only if we
+            # remove it from data prep stage.
+            # Add on-the-fly speed perturbation; since originally it would
+            # have increased epoch size by 3, we will apply prob 2/3 and use
+            # 3x more epochs.
+            # Speed perturbation probably should come first before
+            # concatenation, but in principle the transforms order doesn't have
+            # to be strict (e.g. could be randomized)
+            # transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms   # noqa
+            # Drop feats to be on the safe side.
+            train = K2SpeechRecognitionDataset(
+                cut_transforms=transforms,
+                input_strategy=OnTheFlyFeatures(
+                    Fbank(FbankConfig(num_mel_bins=80))
+                ),
+                input_transforms=input_transforms,
+                return_cuts=self.args.return_cuts,
+            )
+
+        if self.args.bucketing_sampler:
+            logging.info("Using DynamicBucketingSampler.")
+            train_sampler = DynamicBucketingSampler(
+                cuts_train,
+                max_duration=self.args.max_duration,
+                shuffle=self.args.shuffle,
+                num_buckets=self.args.num_buckets,
+                drop_last=self.args.drop_last,
+            )
+        else:
+            logging.info("Using SingleCutSampler.")
+            train_sampler = SingleCutSampler(
+                cuts_train,
+                max_duration=self.args.max_duration,
+                shuffle=self.args.shuffle,
+            )
+        logging.info("About to create train dataloader")
+
+        if sampler_state_dict is not None:
+            logging.info("Loading sampler state dict")
+            train_sampler.load_state_dict(sampler_state_dict)
+
+        # 'seed' is derived from the current random state, which will have
+        # previously been set in the main process.
+        seed = torch.randint(0, 100000, ()).item()
+        worker_init_fn = _SeedWorkers(seed)
+
+        train_dl = DataLoader(
+            train,
+            sampler=train_sampler,
+            batch_size=None,
+            num_workers=self.args.num_workers,
+            persistent_workers=False,
+            worker_init_fn=worker_init_fn,
+        )
+
+        return train_dl
+
+    def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader:
+        transforms = []
+        if self.args.concatenate_cuts:
+            transforms = [
+                CutConcatenate(
+                    duration_factor=self.args.duration_factor, gap=self.args.gap
+                )
+            ] + transforms
+
+        logging.info("About to create dev dataset")
+        if self.args.on_the_fly_feats:
+            validate = K2SpeechRecognitionDataset(
+                cut_transforms=transforms,
+                input_strategy=OnTheFlyFeatures(
+                    Fbank(FbankConfig(num_mel_bins=80))
+                ),
+                return_cuts=self.args.return_cuts,
+            )
+        else:
+            validate = K2SpeechRecognitionDataset(
+                cut_transforms=transforms,
+                return_cuts=self.args.return_cuts,
+            )
+        valid_sampler = DynamicBucketingSampler(
+            cuts_valid,
+            max_duration=self.args.max_duration,
+            shuffle=False,
+        )
+        logging.info("About to create dev dataloader")
+        valid_dl = DataLoader(
+            validate,
+            sampler=valid_sampler,
+            batch_size=None,
+            num_workers=2,
+            persistent_workers=False,
+        )
+
+        return valid_dl
+
+    def test_dataloaders(self, cuts: CutSet) -> DataLoader:
+        logging.debug("About to create test dataset")
+        test = K2SpeechRecognitionDataset(
+            input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
+            if self.args.on_the_fly_feats
+            else eval(self.args.input_strategy)(),
+            return_cuts=self.args.return_cuts,
+        )
+        sampler = DynamicBucketingSampler(
+            cuts,
+            max_duration=self.args.max_duration,
+            shuffle=False,
+        )
+        logging.debug("About to create test dataloader")
+        test_dl = DataLoader(
+            test,
+            batch_size=None,
+            sampler=sampler,
+            num_workers=self.args.num_workers,
+        )
+        return test_dl
+
+    @lru_cache()
+    def train_clean_100_cuts(self) -> CutSet:
+        logging.info("About to get train-clean-100 cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_train-clean-100.jsonl.gz"
+        )
+
+    @lru_cache()
+    def train_clean_360_cuts(self) -> CutSet:
+        logging.info("About to get train-clean-360 cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_train-clean-360.jsonl.gz"
+        )
+
+    @lru_cache()
+    def train_other_500_cuts(self) -> CutSet:
+        logging.info("About to get train-other-500 cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_train-other-500.jsonl.gz"
+        )
+
+    @lru_cache()
+    def dev_clean_cuts(self) -> CutSet:
+        logging.info("About to get dev-clean cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_dev-clean.jsonl.gz"
+        )
+
+    @lru_cache()
+    def dev_other_cuts(self) -> CutSet:
+        logging.info("About to get dev-other cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_dev-other.jsonl.gz"
+        )
+
+    @lru_cache()
+    def test_clean_cuts(self) -> CutSet:
+        logging.info("About to get test-clean cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_test-clean.jsonl.gz"
+        )
+
+    @lru_cache()
+    def test_other_cuts(self) -> CutSet:
+        logging.info("About to get test-other cuts")
+        return load_manifest_lazy(
+            self.args.manifest_dir / "librispeech_cuts_test-other.jsonl.gz"
+        )

From 7e61cb0730cb06a0361840d5633ae8200f47b0c6 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Sun, 16 Oct 2022 02:39:05 -0400
Subject: [PATCH 031/154] remove redundent file

---
 hyperion/torch/data/asr_datamodule.py | 454 --------------------------
 1 file changed, 454 deletions(-)
 delete mode 100644 hyperion/torch/data/asr_datamodule.py

diff --git a/hyperion/torch/data/asr_datamodule.py b/hyperion/torch/data/asr_datamodule.py
deleted file mode 100644
index 355ccc99..00000000
--- a/hyperion/torch/data/asr_datamodule.py
+++ /dev/null
@@ -1,454 +0,0 @@
-# Copyright      2021  Piotr Żelasko
-# Copyright      2022  Xiaomi Corporation     (Author: Mingshuang Luo)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import inspect
-import logging
-from functools import lru_cache
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-import torch
-from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
-from lhotse.dataset import (  # noqa F401 for PrecomputedFeatures
-    CutConcatenate,
-    CutMix,
-    DynamicBucketingSampler,
-    K2SpeechRecognitionDataset,
-    PrecomputedFeatures,
-    SingleCutSampler,
-    SpecAugment,
-)
-from lhotse.dataset.input_strategies import (  # noqa F401 For AudioSamples
-    AudioSamples,
-    OnTheFlyFeatures,
-)
-from lhotse.utils import fix_random_seed
-from torch.utils.data import DataLoader
-
-from icefall.utils import str2bool
-
-
-class _SeedWorkers:
-    def __init__(self, seed: int):
-        self.seed = seed
-
-    def __call__(self, worker_id: int):
-        fix_random_seed(self.seed + worker_id)
-
-
-class LibriSpeechAsrDataModule:
-    """
-    DataModule for k2 ASR experiments.
-    It assumes there is always one train and valid dataloader,
-    but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
-    and test-other).
-
-    It contains all the common data pipeline modules used in ASR
-    experiments, e.g.:
-    - dynamic batch size,
-    - bucketing samplers,
-    - cut concatenation,
-    - augmentation,
-    - on-the-fly feature extraction
-
-    This class should be derived for specific corpora used in ASR tasks.
-    """
-
-    def __init__(self, args: argparse.Namespace):
-        self.args = args
-
-    @classmethod
-    def add_arguments(cls, parser: argparse.ArgumentParser):
-        group = parser.add_argument_group(
-            title="ASR data related options",
-            description="These options are used for the preparation of "
-            "PyTorch DataLoaders from Lhotse CutSet's -- they control the "
-            "effective batch sizes, sampling strategies, applied data "
-            "augmentations, etc.",
-        )
-        group.add_argument(
-            "--full-libri",
-            type=str2bool,
-            default=True,
-            help="When enabled, use 960h LibriSpeech. "
-            "Otherwise, use 100h subset.",
-        )
-        group.add_argument(
-            "--manifest-dir",
-            type=Path,
-            default=Path("data/fbank"),
-            help="Path to directory with train/valid/test cuts.",
-        )
-        group.add_argument(
-            "--max-duration",
-            type=int,
-            default=200.0,
-            help="Maximum pooled recordings duration (seconds) in a "
-            "single batch. You can reduce it if it causes CUDA OOM.",
-        )
-        group.add_argument(
-            "--bucketing-sampler",
-            type=str2bool,
-            default=True,
-            help="When enabled, the batches will come from buckets of "
-            "similar duration (saves padding frames).",
-        )
-        group.add_argument(
-            "--num-buckets",
-            type=int,
-            default=30,
-            help="The number of buckets for the DynamicBucketingSampler"
-            "(you might want to increase it for larger datasets).",
-        )
-        group.add_argument(
-            "--concatenate-cuts",
-            type=str2bool,
-            default=False,
-            help="When enabled, utterances (cuts) will be concatenated "
-            "to minimize the amount of padding.",
-        )
-        group.add_argument(
-            "--duration-factor",
-            type=float,
-            default=1.0,
-            help="Determines the maximum duration of a concatenated cut "
-            "relative to the duration of the longest cut in a batch.",
-        )
-        group.add_argument(
-            "--gap",
-            type=float,
-            default=1.0,
-            help="The amount of padding (in seconds) inserted between "
-            "concatenated cuts. This padding is filled with noise when "
-            "noise augmentation is used.",
-        )
-        group.add_argument(
-            "--on-the-fly-feats",
-            type=str2bool,
-            default=False,
-            help="When enabled, use on-the-fly cut mixing and feature "
-            "extraction. Will drop existing precomputed feature manifests "
-            "if available.",
-        )
-        group.add_argument(
-            "--shuffle",
-            type=str2bool,
-            default=True,
-            help="When enabled (=default), the examples will be "
-            "shuffled for each epoch.",
-        )
-        group.add_argument(
-            "--drop-last",
-            type=str2bool,
-            default=True,
-            help="Whether to drop last batch. Used by sampler.",
-        )
-        group.add_argument(
-            "--return-cuts",
-            type=str2bool,
-            default=True,
-            help="When enabled, each batch will have the "
-            "field: batch['supervisions']['cut'] with the cuts that "
-            "were used to construct it.",
-        )
-
-        group.add_argument(
-            "--num-workers",
-            type=int,
-            default=2,
-            help="The number of training dataloader workers that "
-            "collect the batches.",
-        )
-
-        group.add_argument(
-            "--enable-spec-aug",
-            type=str2bool,
-            default=True,
-            help="When enabled, use SpecAugment for training dataset.",
-        )
-
-        group.add_argument(
-            "--spec-aug-time-warp-factor",
-            type=int,
-            default=80,
-            help="Used only when --enable-spec-aug is True. "
-            "It specifies the factor for time warping in SpecAugment. "
-            "Larger values mean more warping. "
-            "A value less than 1 means to disable time warp.",
-        )
-
-        group.add_argument(
-            "--enable-musan",
-            type=str2bool,
-            default=True,
-            help="When enabled, select noise from MUSAN and mix it"
-            "with training dataset. ",
-        )
-
-        group.add_argument(
-            "--input-strategy",
-            type=str,
-            default="PrecomputedFeatures",
-            help="AudioSamples or PrecomputedFeatures",
-        )
-
-    def train_dataloaders(
-        self,
-        cuts_train: CutSet,
-        sampler_state_dict: Optional[Dict[str, Any]] = None,
-    ) -> DataLoader:
-        """
-        Args:
-          cuts_train:
-            CutSet for training.
-          sampler_state_dict:
-            The state dict for the training sampler.
-        """
-        transforms = []
-        if self.args.enable_musan:
-            logging.info("Enable MUSAN")
-            logging.info("About to get Musan cuts")
-            cuts_musan = load_manifest(
-                self.args.manifest_dir / "musan_cuts.jsonl.gz"
-            )
-            transforms.append(
-                CutMix(
-                    cuts=cuts_musan, prob=0.5, snr=(10, 20), preserve_id=True
-                )
-            )
-        else:
-            logging.info("Disable MUSAN")
-
-        if self.args.concatenate_cuts:
-            logging.info(
-                f"Using cut concatenation with duration factor "
-                f"{self.args.duration_factor} and gap {self.args.gap}."
-            )
-            # Cut concatenation should be the first transform in the list,
-            # so that if we e.g. mix noise in, it will fill the gaps between
-            # different utterances.
-            transforms = [
-                CutConcatenate(
-                    duration_factor=self.args.duration_factor, gap=self.args.gap
-                )
-            ] + transforms
-
-        input_transforms = []
-        if self.args.enable_spec_aug:
-            logging.info("Enable SpecAugment")
-            logging.info(
-                f"Time warp factor: {self.args.spec_aug_time_warp_factor}"
-            )
-            # Set the value of num_frame_masks according to Lhotse's version.
-            # In different Lhotse's versions, the default of num_frame_masks is
-            # different.
-            num_frame_masks = 10
-            num_frame_masks_parameter = inspect.signature(
-                SpecAugment.__init__
-            ).parameters["num_frame_masks"]
-            if num_frame_masks_parameter.default == 1:
-                num_frame_masks = 2
-            logging.info(f"Num frame mask: {num_frame_masks}")
-            input_transforms.append(
-                SpecAugment(
-                    time_warp_factor=self.args.spec_aug_time_warp_factor,
-                    num_frame_masks=num_frame_masks,
-                    features_mask_size=27,
-                    num_feature_masks=2,
-                    frames_mask_size=100,
-                )
-            )
-        else:
-            logging.info("Disable SpecAugment")
-
-        logging.info("About to create train dataset")
-        train = K2SpeechRecognitionDataset(
-            input_strategy=eval(self.args.input_strategy)(),
-            cut_transforms=transforms,
-            input_transforms=input_transforms,
-            return_cuts=self.args.return_cuts,
-        )
-
-        if self.args.on_the_fly_feats:
-            # NOTE: the PerturbSpeed transform should be added only if we
-            # remove it from data prep stage.
-            # Add on-the-fly speed perturbation; since originally it would
-            # have increased epoch size by 3, we will apply prob 2/3 and use
-            # 3x more epochs.
-            # Speed perturbation probably should come first before
-            # concatenation, but in principle the transforms order doesn't have
-            # to be strict (e.g. could be randomized)
-            # transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms   # noqa
-            # Drop feats to be on the safe side.
-            train = K2SpeechRecognitionDataset(
-                cut_transforms=transforms,
-                input_strategy=OnTheFlyFeatures(
-                    Fbank(FbankConfig(num_mel_bins=80))
-                ),
-                input_transforms=input_transforms,
-                return_cuts=self.args.return_cuts,
-            )
-
-        if self.args.bucketing_sampler:
-            logging.info("Using DynamicBucketingSampler.")
-            train_sampler = DynamicBucketingSampler(
-                cuts_train,
-                max_duration=self.args.max_duration,
-                shuffle=self.args.shuffle,
-                num_buckets=self.args.num_buckets,
-                drop_last=self.args.drop_last,
-            )
-        else:
-            logging.info("Using SingleCutSampler.")
-            train_sampler = SingleCutSampler(
-                cuts_train,
-                max_duration=self.args.max_duration,
-                shuffle=self.args.shuffle,
-            )
-        logging.info("About to create train dataloader")
-
-        if sampler_state_dict is not None:
-            logging.info("Loading sampler state dict")
-            train_sampler.load_state_dict(sampler_state_dict)
-
-        # 'seed' is derived from the current random state, which will have
-        # previously been set in the main process.
-        seed = torch.randint(0, 100000, ()).item()
-        worker_init_fn = _SeedWorkers(seed)
-
-        train_dl = DataLoader(
-            train,
-            sampler=train_sampler,
-            batch_size=None,
-            num_workers=self.args.num_workers,
-            persistent_workers=False,
-            worker_init_fn=worker_init_fn,
-        )
-
-        return train_dl
-
-    def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader:
-        transforms = []
-        if self.args.concatenate_cuts:
-            transforms = [
-                CutConcatenate(
-                    duration_factor=self.args.duration_factor, gap=self.args.gap
-                )
-            ] + transforms
-
-        logging.info("About to create dev dataset")
-        if self.args.on_the_fly_feats:
-            validate = K2SpeechRecognitionDataset(
-                cut_transforms=transforms,
-                input_strategy=OnTheFlyFeatures(
-                    Fbank(FbankConfig(num_mel_bins=80))
-                ),
-                return_cuts=self.args.return_cuts,
-            )
-        else:
-            validate = K2SpeechRecognitionDataset(
-                cut_transforms=transforms,
-                return_cuts=self.args.return_cuts,
-            )
-        valid_sampler = DynamicBucketingSampler(
-            cuts_valid,
-            max_duration=self.args.max_duration,
-            shuffle=False,
-        )
-        logging.info("About to create dev dataloader")
-        valid_dl = DataLoader(
-            validate,
-            sampler=valid_sampler,
-            batch_size=None,
-            num_workers=2,
-            persistent_workers=False,
-        )
-
-        return valid_dl
-
-    def test_dataloaders(self, cuts: CutSet) -> DataLoader:
-        logging.debug("About to create test dataset")
-        test = K2SpeechRecognitionDataset(
-            input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
-            if self.args.on_the_fly_feats
-            else eval(self.args.input_strategy)(),
-            return_cuts=self.args.return_cuts,
-        )
-        sampler = DynamicBucketingSampler(
-            cuts,
-            max_duration=self.args.max_duration,
-            shuffle=False,
-        )
-        logging.debug("About to create test dataloader")
-        test_dl = DataLoader(
-            test,
-            batch_size=None,
-            sampler=sampler,
-            num_workers=self.args.num_workers,
-        )
-        return test_dl
-
-    @lru_cache()
-    def train_clean_100_cuts(self) -> CutSet:
-        logging.info("About to get train-clean-100 cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_train-clean-100.jsonl.gz"
-        )
-
-    @lru_cache()
-    def train_clean_360_cuts(self) -> CutSet:
-        logging.info("About to get train-clean-360 cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_train-clean-360.jsonl.gz"
-        )
-
-    @lru_cache()
-    def train_other_500_cuts(self) -> CutSet:
-        logging.info("About to get train-other-500 cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_train-other-500.jsonl.gz"
-        )
-
-    @lru_cache()
-    def dev_clean_cuts(self) -> CutSet:
-        logging.info("About to get dev-clean cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_dev-clean.jsonl.gz"
-        )
-
-    @lru_cache()
-    def dev_other_cuts(self) -> CutSet:
-        logging.info("About to get dev-other cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_dev-other.jsonl.gz"
-        )
-
-    @lru_cache()
-    def test_clean_cuts(self) -> CutSet:
-        logging.info("About to get test-clean cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_test-clean.jsonl.gz"
-        )
-
-    @lru_cache()
-    def test_other_cuts(self) -> CutSet:
-        logging.info("About to get test-other cuts")
-        return load_manifest_lazy(
-            self.args.manifest_dir / "librispeech_cuts_test-other.jsonl.gz"
-        )

From d8efa4f77885b4784783a482ce07787041efae08 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 17 Oct 2022 14:16:43 -0400
Subject: [PATCH 032/154] added script for tsne plots of lre

---
 hyperion/bin/plot-vector-tsne.py              | 209 -----------
 hyperion/bin/plot_embedding_tsne.py           | 345 ++++++++++++++++++
 hyperion/bin/plot_embedding_tsne_per_class.py | 198 ++++++++++
 .../apply-mvn-select-frames.py                |   0
 .../compute-mfcc-feats.py                     |   0
 hyperion/{bin => bin_deprec2}/copy-feats.py   |   0
 .../{bin => bin_deprec2}/eval-cos-1vs1.py     |   0
 .../eval-linear-gbe-up.py                     |   0
 .../{bin => bin_deprec2}/eval-linear-gbe.py   |   0
 .../{bin => bin_deprec2}/eval-linear-svmc.py  |   0
 .../eval-logistic-regression.py               |   0
 .../{bin => bin_deprec2}/eval-plda-1vs1.py    |   0
 .../{bin => bin_deprec2}/eval-plda-nvs1.py    |   0
 .../{bin => bin_deprec2}/merge-h5-files.py    |   0
 .../{bin => bin_deprec2}/pack-audio-files.py  |   0
 .../{bin => bin_deprec2}/plot-vector-hist.py  |   0
 .../{bin => bin_deprec2}/rttm-to-bin-vad.py   |   0
 .../segments-to-bin-vad.py                    |   0
 .../torch-adv-finetune-xvec-from-wav.py       |   0
 .../torch-adv-finetune-xvec.py                |   0
 .../torch-compute-mfcc-feats.py               |   0
 .../{bin => bin_deprec2}/torch-eval-vae.py    |   0
 ...osine-scoring-from-adv-test-wav-wavegan.py |   0
 ...l-xvec-cosine-scoring-from-adv-test-wav.py |   0
 ...l-xvec-cosine-scoring-from-art-test-wav.py |   0
 ...-eval-xvec-cosine-scoring-from-test-wav.py |   0
 ...sine-scoring-from-transfer-adv-test-wav.py |   0
 ...sine-scoring-from-transfer-art-test-wav.py |   0
 .../torch-eval-xvec-logits-from-wav.py        |   0
 ...rch-extract-xvectors-from-wav-with-rttm.py |   0
 ...torch-extract-xvectors-slidwin-from-wav.py |   0
 .../torch-extract-xvectors-slidwin.py         |   0
 .../torch-extract-xvectors-vae-preproc.py     |   0
 .../torch-extract-xvectors.py                 |   0
 ...ch-generate-adv-attacks-xvector-classif.py |   0
 ...orch-generate-adv-attacks-xvector-verif.py |   0
 .../torch-train-dc1d-ae.py                    |   0
 .../{bin => bin_deprec2}/torch-train-dvae.py  |   0
 .../torch-train-efficientnet-xvec-from-wav.py |   0
 .../torch-train-efficientnet-xvec.py          |   0
 .../torch-train-resnet-xvec-from-wav.py       |   0
 .../torch-train-resnet-xvec.py                |   0
 .../torch-train-resnet1d-xvec-from-wav.py     |   0
 .../torch-train-spinenet-xvec-from-wav.py     |   0
 .../torch-train-tdnn-xvec-from-wav.py         |   0
 .../torch-train-tdnn-xvec.py                  |   0
 ...orch-train-transformer-xvec-v1-from-wav.py |   0
 .../torch-train-transformer-xvec-v1.py        |   0
 .../{bin => bin_deprec2}/torch-train-vae.py   |   0
 .../torch-train-vq-dvae.py                    |   0
 .../torch-train-vq-vae.py                     |   0
 hyperion/{bin => bin_deprec2}/train-cw-up.py  |   0
 hyperion/{bin => bin_deprec2}/train-cw.py     |   0
 .../train-gaussianizer.py                     |   0
 hyperion/{bin => bin_deprec2}/train-lda.py    |   0
 .../train-linear-gbe-up.py                    |   0
 .../{bin => bin_deprec2}/train-linear-gbe.py  |   0
 .../{bin => bin_deprec2}/train-linear-svmc.py |   0
 .../train-logistic-regression.py              |   0
 hyperion/{bin => bin_deprec2}/train-mvn.py    |   0
 hyperion/{bin => bin_deprec2}/train-nda.py    |   0
 hyperion/{bin => bin_deprec2}/train-pca.py    |   0
 hyperion/{bin => bin_deprec2}/train-plda.py   |   0
 .../data/class_weighted_seg_chunk_sampler.py  | 106 +++++-
 hyperion/utils/info_table.py                  |   3 +
 65 files changed, 643 insertions(+), 218 deletions(-)
 delete mode 100755 hyperion/bin/plot-vector-tsne.py
 create mode 100755 hyperion/bin/plot_embedding_tsne.py
 create mode 100755 hyperion/bin/plot_embedding_tsne_per_class.py
 rename hyperion/{bin => bin_deprec2}/apply-mvn-select-frames.py (100%)
 rename hyperion/{bin => bin_deprec2}/compute-mfcc-feats.py (100%)
 rename hyperion/{bin => bin_deprec2}/copy-feats.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-cos-1vs1.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-linear-gbe-up.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-linear-gbe.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-linear-svmc.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-logistic-regression.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-plda-1vs1.py (100%)
 rename hyperion/{bin => bin_deprec2}/eval-plda-nvs1.py (100%)
 rename hyperion/{bin => bin_deprec2}/merge-h5-files.py (100%)
 rename hyperion/{bin => bin_deprec2}/pack-audio-files.py (100%)
 rename hyperion/{bin => bin_deprec2}/plot-vector-hist.py (100%)
 rename hyperion/{bin => bin_deprec2}/rttm-to-bin-vad.py (100%)
 rename hyperion/{bin => bin_deprec2}/segments-to-bin-vad.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-adv-finetune-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-adv-finetune-xvec.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-compute-mfcc-feats.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-vae.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-art-test-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-test-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-eval-xvec-logits-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-extract-xvectors-from-wav-with-rttm.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-extract-xvectors-slidwin-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-extract-xvectors-slidwin.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-extract-xvectors-vae-preproc.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-extract-xvectors.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-generate-adv-attacks-xvector-classif.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-generate-adv-attacks-xvector-verif.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-dc1d-ae.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-dvae.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-efficientnet-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-efficientnet-xvec.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-resnet-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-resnet-xvec.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-resnet1d-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-spinenet-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-tdnn-xvec-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-tdnn-xvec.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-transformer-xvec-v1-from-wav.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-transformer-xvec-v1.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-vae.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-vq-dvae.py (100%)
 rename hyperion/{bin => bin_deprec2}/torch-train-vq-vae.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-cw-up.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-cw.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-gaussianizer.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-lda.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-linear-gbe-up.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-linear-gbe.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-linear-svmc.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-logistic-regression.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-mvn.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-nda.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-pca.py (100%)
 rename hyperion/{bin => bin_deprec2}/train-plda.py (100%)

diff --git a/hyperion/bin/plot-vector-tsne.py b/hyperion/bin/plot-vector-tsne.py
deleted file mode 100755
index c4c30302..00000000
--- a/hyperion/bin/plot-vector-tsne.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/env python
-"""
- Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import sys
-import os
-import argparse
-import time
-import logging
-
-import numpy as np
-import matplotlib
-
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D as plt3d
-
-from sklearn.manifold import TSNE
-
-from hyperion.hyp_defs import config_logger
-from hyperion.io import DataWriterFactory as DWF
-from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList, PCA
-
-colors = ["b", "g", "r", "c", "m", "y", "k"]
-markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
-
-
-def plot_vector_tsne(
-    iv_file,
-    v_list,
-    preproc_file,
-    output_path,
-    save_embed,
-    output_dim,
-    perplexity,
-    exag,
-    lr,
-    num_iter,
-    init_method,
-    rng_seed,
-    verbose,
-    pca_dim,
-    max_classes,
-    **kwargs
-):
-
-    if preproc_file is not None:
-        preproc = TransformList.load(preproc_file)
-    else:
-        preproc = None
-
-    vr_args = VCR.filter_args(**kwargs)
-    vcr = VCR(iv_file, v_list, preproc, **vr_args)
-
-    x, class_ids = vcr.read()
-
-    t1 = time.time()
-
-    if pca_dim > 0:
-        pca = PCA(pca_dim=pca_dim)
-        pca.fit(x)
-        x = pca.predict(x)
-
-    if not os.path.exists(output_path):
-        os.makedirs(ouput_path)
-
-    tsne_obj = lambda n: TSNE(
-        n_components=n,
-        perplexity=perplexity,
-        early_exaggeration=exag,
-        learning_rate=lr,
-        n_iter=num_iter,
-        init=init_method,
-        random_state=rng_seed,
-        verbose=verbose,
-    )
-
-    if max_classes > 0:
-        index = class_ids < max_classes
-        x = x[index]
-        class_ids = class_ids[index]
-
-    if output_dim > 3:
-        tsne = tsne_obj(output_dim)
-        y = tsne.fit_transform(x)
-
-        if save_embed:
-            h5_file = "%s/embed_%dd.h5" % (output_path, ouput_dim)
-            hw = DWF.create(h5_file)
-            hw.write(vcr.u2c.key, y)
-
-    tsne = tsne_obj(2)
-    y = tsne.fit_transform(x)
-    if save_embed:
-        h5_file = "%s/embed_2d.h5" % output_path
-        hw = DWF.create(h5_file)
-        hw.write(vcr.u2c.key, y)
-
-    fig_file = "%s/tsne_2d.pdf" % (output_path)
-    # plt.scatter(y[:,0], y[:,1], c=class_ids, marker='x')
-
-    color_marker = [(c, m) for m in markers for c in colors]
-    for c in np.unique(class_ids):
-        idx = class_ids == c
-        plt.scatter(
-            y[idx, 0],
-            y[idx, 1],
-            c=color_marker[c][0],
-            marker=color_marker[c][1],
-            label=vcr.class_names[c],
-        )
-
-    plt.legend()
-    plt.grid(True)
-    plt.show()
-    plt.savefig(fig_file)
-    plt.clf()
-
-    # if max_classes > 0:
-    #     fig_file = '%s/tsne_2d_n%d.pdf' % (output_path, max_classes)
-    #     index = class_ids < max_classes
-    #     plt.scatter(y[index,0], y[index,1], c=class_ids[index], marker='x')
-    #     plt.grid(True)
-    #     plt.show()
-    #     plt.savefig(fig_file)
-    #     plt.clf()
-
-    tsne = tsne_obj(3)
-    y = tsne.fit_transform(x)
-    if save_embed:
-        h5_file = "%s/embed_3d.h5" % output_path
-        hw = DWF.create(h5_file)
-        hw.write(vcr.u2c.key, y)
-
-    fig_file = "%s/tsne_3d.pdf" % (output_path)
-    fig = plt.figure()
-    ax = fig.add_subplot(111, projection="3d")
-    # ax.scatter(y[:,0], y[:,1], y[:,2], c=class_ids, marker='x')
-    for c in np.unique(class_ids):
-        idx = class_ids == c
-        ax.scatter(
-            y[idx, 0],
-            y[idx, 1],
-            y[idx, 2],
-            c=color_marker[c][0],
-            marker=color_marker[c][1],
-            label=vcr.class_names[c],
-        )
-
-    plt.grid(True)
-    plt.show()
-    plt.savefig(fig_file)
-    plt.clf()
-
-    # if max_classes > 0:
-    #     fig_file = '%s/tsne_3d_n%d.pdf' % (output_path, max_classes)
-    #     index = class_ids < max_classes
-    #     ax = fig.add_subplot(111, projection='3d')
-    #     ax.scatter(y[index,0], y[index,1], y[index,2], c=class_ids[index], marker='x')
-    #     plt.grid(True)
-    #     plt.show()
-    #     plt.savefig(fig_file)
-    #     plt.clf()
-
-    logging.info("Elapsed time: %.2f s." % (time.time() - t1))
-
-
-if __name__ == "__main__":
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        fromfile_prefix_chars="@",
-        description="Plots TSNE embeddings",
-    )
-
-    parser.add_argument("--iv-file", dest="iv_file", required=True)
-    parser.add_argument("--v-list", dest="v_list", required=True)
-    parser.add_argument("--preproc-file", dest="preproc_file", default=None)
-
-    VCR.add_argparse_args(parser)
-
-    parser.add_argument("--output-path", dest="output_path", required=True)
-    parser.add_argument(
-        "--save-embed", dest="save_embed", default=False, action="store_true"
-    )
-
-    parser.add_argument("--output-dim", dest="output_dim", type=int, default=3)
-    parser.add_argument("--perplexity", dest="perplexity", type=float, default=30)
-    parser.add_argument("--exag", dest="exag", type=float, default=12)
-    parser.add_argument("--lr", dest="lr", type=float, default=200)
-    parser.add_argument("--num-iter", dest="num_iter", type=int, default=1000)
-    parser.add_argument(
-        "--init-method", dest="init_method", default="pca", choices=["random", "pca"]
-    )
-    parser.add_argument("--rng-seed", dest="rng_seed", type=int, default=1024)
-    parser.add_argument("--pca-dim", dest="pca_dim", type=int, default=50)
-    parser.add_argument("--max-classes", dest="max_classes", type=int, default=10)
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
-
-    args = parser.parse_args()
-    config_logger(args.verbose)
-    logging.debug(args)
-
-    plot_vector_tsne(**vars(args))
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
new file mode 100755
index 00000000..e514252f
--- /dev/null
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python
+""" 
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba) 
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import logging
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib
+
+import matplotlib.pyplot as plt
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import PCA, SklTSNE, LNorm
+
+matplotlib.use("Agg")
+colors = ["b", "g", "r", "c", "m", "y", "k"]
+markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
+
+color_marker = [(c, m) for m in markers for c in colors]
+
+
+def plot_embedding_tsne(
+    train_v_file,
+    train_list,
+    pca_var_r,
+    prob_plot,
+    lnorm,
+    title,
+    max_classes,
+    unlabeled,
+    plot_class_names,
+    output_dir,
+    **kwargs,
+):
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    train_reader = DRF.create(train_v_file)
+    x_trn = train_reader.read(train_segs["id"], squeeze=True)
+    del train_reader
+    logging.info("loaded %d samples", x_trn.shape[0])
+    if lnorm:
+        x_trn = LNorm().predict(x_trn)
+
+    if pca_var_r < 1:
+        pca = PCA(pca_var_r=pca_var_r)
+        pca.fit(x_trn)
+        x_pca = pca.predict(x_trn)
+        logging.info("pca-dim=%d", x_pca.shape[1])
+    else:
+        x_pca = x_trn
+
+    tsne_args = SklTSNE.filter_args(**kwargs["tsne"])
+    tsne = SklTSNE(**tsne_args)
+    x_tsne = tsne.fit(x_pca)
+    p = np.random.rand(x_tsne.shape[0]) <= prob_plot
+    x_tsne = x_tsne[p]
+    logging.info("plots %d samples", x_tsne.shape[0])
+
+    if unlabeled:
+        plot_class_names = ["none"]
+
+    for col in plot_class_names:
+        fig_file = f"{output_dir}/train_tsne_{col}.png"
+        if not unlabeled:
+            classes = train_segs.loc[p, col]
+            classes, class_ids = np.unique(classes, return_inverse=True)
+            if max_classes is not None:
+                index = class_ids < max_classes
+                x_tsne_filtered = x_tsne[index]
+                class_ids = class_ids[index]
+            else:
+                x_tsne_filtered = x_tsne
+
+        else:
+            class_ids = np.zeros((len(x_tsne.shape[0]),), dtype=np.int)
+            classes = [None]
+
+        for c in range(np.max(class_ids) + 1):
+            idx = class_ids == c
+            if not unlabeled:
+                logging.info("plot class %s with %d samples", classes[c], np.sum(idx))
+            plt.scatter(
+                x_tsne_filtered[idx, 0],
+                x_tsne_filtered[idx, 1],
+                c=color_marker[c][0],
+                marker=color_marker[c][1],
+                label=classes[c],
+            )
+
+        if not unlabeled:
+            plt.legend()
+        plt.grid(True)
+        plt.title(title)
+        plt.savefig(fig_file)
+        plt.clf()
+
+    # fig_file = "%s/tsne_3d.pdf" % (output_dir)
+    # fig = plt.figure()
+    # ax = fig.add_subplot(111, projection="3d")
+    # # ax.scatter(y[:,0], y[:,1], y[:,2], c=class_ids, marker='x')
+    # for c in np.unique(class_ids):
+    #     idx = class_ids == c
+    #     ax.scatter(
+    #         y[idx, 0],
+    #         y[idx, 1],
+    #         y[idx, 2],
+    #         c=color_marker[c][0],
+    #         marker=color_marker[c][1],
+    #         label=vcr.class_names[c],
+    #     )
+
+    # plt.grid(True)
+    # plt.show()
+    # plt.savefig(fig_file)
+    # plt.clf()
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Projects embeddings using TSNE")
+
+    parser.add_argument("--train-v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+
+    parser.add_argument("--pca-var-r", default=0.95, type=float)
+    parser.add_argument("--prob-plot", default=0.1, type=float)
+    parser.add_argument("--lnorm", default=False, action=ActionYesNo)
+    parser.add_argument("--unlabeled", default=False, action=ActionYesNo)
+    parser.add_argument(
+        "--plot-class-names",
+        default=["class_id"],
+        nargs="+",
+        help="names of the class columns we plot",
+    )
+    parser.add_argument("--title", default="")
+    SklTSNE.add_class_args(parser, prefix="tsne")
+
+    parser.add_argument(
+        "--max-classes", default=None, type=int, help="max number of clases to plot"
+    )
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    plot_embedding_tsne(**namespace_to_dict(args))
+
+
+# #!/usr/bin/env python
+# """
+#  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+# """
+
+# import sys
+# import os
+# from jsonargparse import (
+#     ArgumentParser,
+#     ActionConfigFile,
+#     ActionParser,
+#     namespace_to_dict,
+# )
+# import time
+# import logging
+
+# import numpy as np
+# import pandas as pd
+# import matplotlib
+
+# import matplotlib.pyplot as plt
+# from mpl_toolkits.mplot3d import Axes3D as plt3d
+
+# from sklearn.manifold import TSNE
+
+# from hyperion.hyp_defs import config_logger
+# from hyperion.io import DataWriterFactory as DWF
+# from hyperion.helpers import VectorClassReader as VCR
+# from hyperion.np.transforms import TransformList, PCA
+
+# matplotlib.use("Agg")
+# colors = ["b", "g", "r", "c", "m", "y", "k"]
+# markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
+
+
+# def plot_embedding_tsne(
+#     v_file,
+#     v_list,
+#     preproc_file,
+#     output_dir,
+#     save_embed,
+#     output_dim,
+#     perplexity,
+#     exag,
+#     lr,
+#     num_iter,
+#     init_method,
+#     rng_seed,
+#     verbose,
+#     pca_dim,
+#     max_classes,
+#     **kwargs
+# ):
+
+#     if preproc_file is not None:
+#         preproc = TransformList.load(preproc_file)
+#     else:
+#         preproc = None
+
+#     vr_args = VCR.filter_args(**kwargs)
+#     vcr = VCR(iv_file, v_list, preproc, **vr_args)
+
+#     x, class_ids = vcr.read()
+
+#     t1 = time.time()
+
+#     if pca_dim > 0:
+#         pca = PCA(pca_dim=pca_dim)
+#         pca.fit(x)
+#         x = pca.predict(x)
+
+#     if not os.path.exists(output_path):
+#         os.makedirs(ouput_path)
+
+#     tsne_obj = lambda n: TSNE(
+#         n_components=n,
+#         perplexity=perplexity,
+#         early_exaggeration=exag,
+#         learning_rate=lr,
+#         n_iter=num_iter,
+#         init=init_method,
+#         random_state=rng_seed,
+#         verbose=verbose,
+#     )
+
+#     if max_classes > 0:
+#         index = class_ids < max_classes
+#         x = x[index]
+#         class_ids = class_ids[index]
+
+#     if output_dim > 3:
+#         tsne = tsne_obj(output_dim)
+#         y = tsne.fit_transform(x)
+
+#         if save_embed:
+#             h5_file = "%s/embed_%dd.h5" % (output_path, ouput_dim)
+#             hw = DWF.create(h5_file)
+#             hw.write(vcr.u2c.key, y)
+
+#     tsne = tsne_obj(2)
+#     y = tsne.fit_transform(x)
+#     if save_embed:
+#         h5_file = "%s/embed_2d.h5" % output_path
+#         hw = DWF.create(h5_file)
+#         hw.write(vcr.u2c.key, y)
+
+#     fig_file = "%s/tsne_2d.pdf" % (output_path)
+#     # plt.scatter(y[:,0], y[:,1], c=class_ids, marker='x')
+
+#     color_marker = [(c, m) for m in markers for c in colors]
+#     for c in np.unique(class_ids):
+#         idx = class_ids == c
+#         plt.scatter(
+#             y[idx, 0],
+#             y[idx, 1],
+#             c=color_marker[c][0],
+#             marker=color_marker[c][1],
+#             label=vcr.class_names[c],
+#         )
+
+#     plt.legend()
+#     plt.grid(True)
+#     plt.show()
+#     plt.savefig(fig_file)
+#     plt.clf()
+
+#     # if max_classes > 0:
+#     #     fig_file = '%s/tsne_2d_n%d.pdf' % (output_path, max_classes)
+#     #     index = class_ids < max_classes
+#     #     plt.scatter(y[index,0], y[index,1], c=class_ids[index], marker='x')
+#     #     plt.grid(True)
+#     #     plt.show()
+#     #     plt.savefig(fig_file)
+#     #     plt.clf()
+
+#     tsne = tsne_obj(3)
+#     y = tsne.fit_transform(x)
+#     if save_embed:
+#         h5_file = "%s/embed_3d.h5" % output_path
+#         hw = DWF.create(h5_file)
+#         hw.write(vcr.u2c.key, y)
+
+#     fig_file = "%s/tsne_3d.pdf" % (output_path)
+#     fig = plt.figure()
+#     ax = fig.add_subplot(111, projection="3d")
+#     # ax.scatter(y[:,0], y[:,1], y[:,2], c=class_ids, marker='x')
+#     for c in np.unique(class_ids):
+#         idx = class_ids == c
+#         ax.scatter(
+#             y[idx, 0],
+#             y[idx, 1],
+#             y[idx, 2],
+#             c=color_marker[c][0],
+#             marker=color_marker[c][1],
+#             label=vcr.class_names[c],
+#         )
+
+#     plt.grid(True)
+#     plt.show()
+#     plt.savefig(fig_file)
+#     plt.clf()
+
+#     # if max_classes > 0:
+#     #     fig_file = '%s/tsne_3d_n%d.pdf' % (output_path, max_classes)
+#     #     index = class_ids < max_classes
+#     #     ax = fig.add_subplot(111, projection='3d')
+#     #     ax.scatter(y[index,0], y[index,1], y[index,2], c=class_ids[index], marker='x')
+#     #     plt.grid(True)
+#     #     plt.show()
+#     #     plt.savefig(fig_file)
+#     #     plt.clf()
+
+#     logging.info("Elapsed time: %.2f s." % (time.time() - t1))
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
new file mode 100755
index 00000000..5730cc06
--- /dev/null
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python
+""" 
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba) 
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import logging
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib
+
+import matplotlib.pyplot as plt
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import cosine_scoring
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import PCA, SklTSNE, LNorm
+from hyperion.np.clustering import AHC
+
+
+matplotlib.use("Agg")
+colors = ["b", "g", "r", "c", "m", "y", "k"]
+markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
+
+color_marker = [(c, m) for m in markers for c in colors]
+
+
+def plot_embedding_tsne(
+    train_v_file,
+    train_list,
+    pca_var_r,
+    prob_plot,
+    lnorm,
+    title,
+    max_classes,
+    plot_class_name,
+    do_ahc,
+    cluster_tsne,
+    num_clusters,
+    ahc_thr,
+    output_dir,
+    **kwargs,
+):
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    train_reader = DRF.create(train_v_file)
+    x_trn = train_reader.read(train_segs["id"], squeeze=True)
+    del train_reader
+    logging.info("loaded %d samples", x_trn.shape[0])
+    if lnorm:
+        x_trn = LNorm().predict(x_trn)
+
+    if pca_var_r < 1:
+        pca = PCA(pca_var_r=pca_var_r)
+        pca.fit(x_trn)
+        x_pca = pca.predict(x_trn)
+        logging.info("pca-dim=%d", x_pca.shape[1])
+    else:
+        x_pca = x_trn
+
+    class_ids = train_segs[plot_class_name]
+    classes, class_idx = np.unique(class_ids, return_inverse=True)
+    if max_classes is not None:
+        index = class_idx < max_classes
+        x_pca = x_pca[index]
+        class_idx = class_idx[index]
+
+    tsne_args = SklTSNE.filter_args(**kwargs["tsne"])
+    tsne = SklTSNE(**tsne_args)
+    if do_ahc:
+        ahc = AHC()
+        global_subclass_idx = np.zeros_like(class_idx)
+
+    for c in range(np.max(class_idx) + 1):
+        fig_file = f"{output_dir}/train_tsne_{plot_class_name}_{classes[c]}.png"
+        idx = class_idx == c
+        logging.info("plot class %s with %d samples", classes[c], np.sum(idx))
+        x_c = x_pca[idx]
+        x_tsne = tsne.fit(x_c)
+        if do_ahc:
+            if cluster_tsne:
+                # in the low dim space, we cannot use cosine scoring
+                x2 = np.sum(x_tsne ** 2, axis=1)[:, None]
+                d2 = x2 - 2 * np.dot(x_tsne, x_tsne.T) + x2.T
+                scores = -np.sqrt(d2)
+            else:
+                scores = cosine_scoring(x_c, x_c)
+            ahc.fit(scores)
+            if num_clusters is None:
+                subclass_idx_c = ahc.get_flat_clusters(ahc_thr)
+            else:
+                subclass_idx_c = ahc.get_flat_clusters(num_clusters, "num_clusters")
+            global_subclass_idx[idx] = subclass_idx_c
+
+        p = np.random.rand(x_tsne.shape[0]) <= prob_plot
+        x_tsne = x_tsne[p]
+        logging.info("plots %d samples", x_tsne.shape[0])
+        if do_ahc:
+            subclass_idx_c = subclass_idx_c[p]
+            for sc in range(min(np.max(subclass_idx_c) + 1, len(color_marker))):
+                idx_sc = subclass_idx_c == sc
+                plt.scatter(
+                    x_tsne[idx_sc, 0],
+                    x_tsne[idx_sc, 1],
+                    c=color_marker[sc][0],
+                    marker=color_marker[sc][1],
+                )
+        else:
+            plt.scatter(
+                x_tsne[:, 0],
+                x_tsne[:, 1],
+                c=color_marker[0][0],
+                marker=color_marker[0][1],
+            )
+
+        # plt.legend()
+        plt.grid(True)
+        plt.title(f"{title} {classes[c]}")
+        plt.savefig(fig_file)
+        plt.clf()
+
+    if do_ahc:
+        # subclass_ids = [f"{a}-{b}" for a, b in zip(class_ids, global_subclass_idx)]
+        # _, subclass_idx = np.unique(subclass_ids, return_inverse=True)
+        # train_segs["subclass_id"] = subclass_ids
+        train_segs["subclass_idx"] = global_subclass_idx
+        train_segs.save(output_dir / "segments.csv")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Projects embeddings using TSNE, "
+            "plots a TSNE per class to discover subclusters inside of the classes"
+        )
+    )
+
+    parser.add_argument("--train-v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+
+    parser.add_argument("--pca-var-r", default=0.95, type=float)
+    parser.add_argument("--prob-plot", default=0.1, type=float)
+    parser.add_argument("--lnorm", default=False, action=ActionYesNo)
+    parser.add_argument(
+        "--plot-class-name",
+        default="class_id",
+        help="names of the class column we plot",
+    )
+    parser.add_argument("--title", default="")
+    SklTSNE.add_class_args(parser, prefix="tsne")
+
+    parser.add_argument(
+        "--max-classes", default=None, type=int, help="max number of clases to plot"
+    )
+    parser.add_argument(
+        "--do-ahc", default=False, action=ActionYesNo, help="Do AHC on each class"
+    )
+    parser.add_argument(
+        "--cluster-tsne",
+        default=False,
+        action=ActionYesNo,
+        help="if true, clustering is done after TSNE, otherwise after PCA",
+    )
+
+    parser.add_argument(
+        "--num-clusters",
+        default=None,
+        type=int,
+        help="if not None, number of clusters for AHC, discards ahc-threshold",
+    )
+    parser.add_argument("--ahc-thr", default=0.7, type=float, help="AHC threshold")
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    plot_embedding_tsne(**namespace_to_dict(args))
diff --git a/hyperion/bin/apply-mvn-select-frames.py b/hyperion/bin_deprec2/apply-mvn-select-frames.py
similarity index 100%
rename from hyperion/bin/apply-mvn-select-frames.py
rename to hyperion/bin_deprec2/apply-mvn-select-frames.py
diff --git a/hyperion/bin/compute-mfcc-feats.py b/hyperion/bin_deprec2/compute-mfcc-feats.py
similarity index 100%
rename from hyperion/bin/compute-mfcc-feats.py
rename to hyperion/bin_deprec2/compute-mfcc-feats.py
diff --git a/hyperion/bin/copy-feats.py b/hyperion/bin_deprec2/copy-feats.py
similarity index 100%
rename from hyperion/bin/copy-feats.py
rename to hyperion/bin_deprec2/copy-feats.py
diff --git a/hyperion/bin/eval-cos-1vs1.py b/hyperion/bin_deprec2/eval-cos-1vs1.py
similarity index 100%
rename from hyperion/bin/eval-cos-1vs1.py
rename to hyperion/bin_deprec2/eval-cos-1vs1.py
diff --git a/hyperion/bin/eval-linear-gbe-up.py b/hyperion/bin_deprec2/eval-linear-gbe-up.py
similarity index 100%
rename from hyperion/bin/eval-linear-gbe-up.py
rename to hyperion/bin_deprec2/eval-linear-gbe-up.py
diff --git a/hyperion/bin/eval-linear-gbe.py b/hyperion/bin_deprec2/eval-linear-gbe.py
similarity index 100%
rename from hyperion/bin/eval-linear-gbe.py
rename to hyperion/bin_deprec2/eval-linear-gbe.py
diff --git a/hyperion/bin/eval-linear-svmc.py b/hyperion/bin_deprec2/eval-linear-svmc.py
similarity index 100%
rename from hyperion/bin/eval-linear-svmc.py
rename to hyperion/bin_deprec2/eval-linear-svmc.py
diff --git a/hyperion/bin/eval-logistic-regression.py b/hyperion/bin_deprec2/eval-logistic-regression.py
similarity index 100%
rename from hyperion/bin/eval-logistic-regression.py
rename to hyperion/bin_deprec2/eval-logistic-regression.py
diff --git a/hyperion/bin/eval-plda-1vs1.py b/hyperion/bin_deprec2/eval-plda-1vs1.py
similarity index 100%
rename from hyperion/bin/eval-plda-1vs1.py
rename to hyperion/bin_deprec2/eval-plda-1vs1.py
diff --git a/hyperion/bin/eval-plda-nvs1.py b/hyperion/bin_deprec2/eval-plda-nvs1.py
similarity index 100%
rename from hyperion/bin/eval-plda-nvs1.py
rename to hyperion/bin_deprec2/eval-plda-nvs1.py
diff --git a/hyperion/bin/merge-h5-files.py b/hyperion/bin_deprec2/merge-h5-files.py
similarity index 100%
rename from hyperion/bin/merge-h5-files.py
rename to hyperion/bin_deprec2/merge-h5-files.py
diff --git a/hyperion/bin/pack-audio-files.py b/hyperion/bin_deprec2/pack-audio-files.py
similarity index 100%
rename from hyperion/bin/pack-audio-files.py
rename to hyperion/bin_deprec2/pack-audio-files.py
diff --git a/hyperion/bin/plot-vector-hist.py b/hyperion/bin_deprec2/plot-vector-hist.py
similarity index 100%
rename from hyperion/bin/plot-vector-hist.py
rename to hyperion/bin_deprec2/plot-vector-hist.py
diff --git a/hyperion/bin/rttm-to-bin-vad.py b/hyperion/bin_deprec2/rttm-to-bin-vad.py
similarity index 100%
rename from hyperion/bin/rttm-to-bin-vad.py
rename to hyperion/bin_deprec2/rttm-to-bin-vad.py
diff --git a/hyperion/bin/segments-to-bin-vad.py b/hyperion/bin_deprec2/segments-to-bin-vad.py
similarity index 100%
rename from hyperion/bin/segments-to-bin-vad.py
rename to hyperion/bin_deprec2/segments-to-bin-vad.py
diff --git a/hyperion/bin/torch-adv-finetune-xvec-from-wav.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-adv-finetune-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
diff --git a/hyperion/bin/torch-adv-finetune-xvec.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
similarity index 100%
rename from hyperion/bin/torch-adv-finetune-xvec.py
rename to hyperion/bin_deprec2/torch-adv-finetune-xvec.py
diff --git a/hyperion/bin/torch-compute-mfcc-feats.py b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
similarity index 100%
rename from hyperion/bin/torch-compute-mfcc-feats.py
rename to hyperion/bin_deprec2/torch-compute-mfcc-feats.py
diff --git a/hyperion/bin/torch-eval-vae.py b/hyperion/bin_deprec2/torch-eval-vae.py
similarity index 100%
rename from hyperion/bin/torch-eval-vae.py
rename to hyperion/bin_deprec2/torch-eval-vae.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-test-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
diff --git a/hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
diff --git a/hyperion/bin/torch-eval-xvec-logits-from-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-eval-xvec-logits-from-wav.py
rename to hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
diff --git a/hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors-from-wav-with-rttm.py
rename to hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
diff --git a/hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors-slidwin-from-wav.py
rename to hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
diff --git a/hyperion/bin/torch-extract-xvectors-slidwin.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors-slidwin.py
rename to hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
diff --git a/hyperion/bin/torch-extract-xvectors-vae-preproc.py b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors-vae-preproc.py
rename to hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
diff --git a/hyperion/bin/torch-extract-xvectors.py b/hyperion/bin_deprec2/torch-extract-xvectors.py
similarity index 100%
rename from hyperion/bin/torch-extract-xvectors.py
rename to hyperion/bin_deprec2/torch-extract-xvectors.py
diff --git a/hyperion/bin/torch-generate-adv-attacks-xvector-classif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
similarity index 100%
rename from hyperion/bin/torch-generate-adv-attacks-xvector-classif.py
rename to hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
diff --git a/hyperion/bin/torch-generate-adv-attacks-xvector-verif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
similarity index 100%
rename from hyperion/bin/torch-generate-adv-attacks-xvector-verif.py
rename to hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
diff --git a/hyperion/bin/torch-train-dc1d-ae.py b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
similarity index 100%
rename from hyperion/bin/torch-train-dc1d-ae.py
rename to hyperion/bin_deprec2/torch-train-dc1d-ae.py
diff --git a/hyperion/bin/torch-train-dvae.py b/hyperion/bin_deprec2/torch-train-dvae.py
similarity index 100%
rename from hyperion/bin/torch-train-dvae.py
rename to hyperion/bin_deprec2/torch-train-dvae.py
diff --git a/hyperion/bin/torch-train-efficientnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-efficientnet-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
diff --git a/hyperion/bin/torch-train-efficientnet-xvec.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
similarity index 100%
rename from hyperion/bin/torch-train-efficientnet-xvec.py
rename to hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
diff --git a/hyperion/bin/torch-train-resnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-resnet-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
diff --git a/hyperion/bin/torch-train-resnet-xvec.py b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
similarity index 100%
rename from hyperion/bin/torch-train-resnet-xvec.py
rename to hyperion/bin_deprec2/torch-train-resnet-xvec.py
diff --git a/hyperion/bin/torch-train-resnet1d-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-resnet1d-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
diff --git a/hyperion/bin/torch-train-spinenet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-spinenet-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
diff --git a/hyperion/bin/torch-train-tdnn-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-tdnn-xvec-from-wav.py
rename to hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
diff --git a/hyperion/bin/torch-train-tdnn-xvec.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
similarity index 100%
rename from hyperion/bin/torch-train-tdnn-xvec.py
rename to hyperion/bin_deprec2/torch-train-tdnn-xvec.py
diff --git a/hyperion/bin/torch-train-transformer-xvec-v1-from-wav.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
similarity index 100%
rename from hyperion/bin/torch-train-transformer-xvec-v1-from-wav.py
rename to hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
diff --git a/hyperion/bin/torch-train-transformer-xvec-v1.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
similarity index 100%
rename from hyperion/bin/torch-train-transformer-xvec-v1.py
rename to hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
diff --git a/hyperion/bin/torch-train-vae.py b/hyperion/bin_deprec2/torch-train-vae.py
similarity index 100%
rename from hyperion/bin/torch-train-vae.py
rename to hyperion/bin_deprec2/torch-train-vae.py
diff --git a/hyperion/bin/torch-train-vq-dvae.py b/hyperion/bin_deprec2/torch-train-vq-dvae.py
similarity index 100%
rename from hyperion/bin/torch-train-vq-dvae.py
rename to hyperion/bin_deprec2/torch-train-vq-dvae.py
diff --git a/hyperion/bin/torch-train-vq-vae.py b/hyperion/bin_deprec2/torch-train-vq-vae.py
similarity index 100%
rename from hyperion/bin/torch-train-vq-vae.py
rename to hyperion/bin_deprec2/torch-train-vq-vae.py
diff --git a/hyperion/bin/train-cw-up.py b/hyperion/bin_deprec2/train-cw-up.py
similarity index 100%
rename from hyperion/bin/train-cw-up.py
rename to hyperion/bin_deprec2/train-cw-up.py
diff --git a/hyperion/bin/train-cw.py b/hyperion/bin_deprec2/train-cw.py
similarity index 100%
rename from hyperion/bin/train-cw.py
rename to hyperion/bin_deprec2/train-cw.py
diff --git a/hyperion/bin/train-gaussianizer.py b/hyperion/bin_deprec2/train-gaussianizer.py
similarity index 100%
rename from hyperion/bin/train-gaussianizer.py
rename to hyperion/bin_deprec2/train-gaussianizer.py
diff --git a/hyperion/bin/train-lda.py b/hyperion/bin_deprec2/train-lda.py
similarity index 100%
rename from hyperion/bin/train-lda.py
rename to hyperion/bin_deprec2/train-lda.py
diff --git a/hyperion/bin/train-linear-gbe-up.py b/hyperion/bin_deprec2/train-linear-gbe-up.py
similarity index 100%
rename from hyperion/bin/train-linear-gbe-up.py
rename to hyperion/bin_deprec2/train-linear-gbe-up.py
diff --git a/hyperion/bin/train-linear-gbe.py b/hyperion/bin_deprec2/train-linear-gbe.py
similarity index 100%
rename from hyperion/bin/train-linear-gbe.py
rename to hyperion/bin_deprec2/train-linear-gbe.py
diff --git a/hyperion/bin/train-linear-svmc.py b/hyperion/bin_deprec2/train-linear-svmc.py
similarity index 100%
rename from hyperion/bin/train-linear-svmc.py
rename to hyperion/bin_deprec2/train-linear-svmc.py
diff --git a/hyperion/bin/train-logistic-regression.py b/hyperion/bin_deprec2/train-logistic-regression.py
similarity index 100%
rename from hyperion/bin/train-logistic-regression.py
rename to hyperion/bin_deprec2/train-logistic-regression.py
diff --git a/hyperion/bin/train-mvn.py b/hyperion/bin_deprec2/train-mvn.py
similarity index 100%
rename from hyperion/bin/train-mvn.py
rename to hyperion/bin_deprec2/train-mvn.py
diff --git a/hyperion/bin/train-nda.py b/hyperion/bin_deprec2/train-nda.py
similarity index 100%
rename from hyperion/bin/train-nda.py
rename to hyperion/bin_deprec2/train-nda.py
diff --git a/hyperion/bin/train-pca.py b/hyperion/bin_deprec2/train-pca.py
similarity index 100%
rename from hyperion/bin/train-pca.py
rename to hyperion/bin_deprec2/train-pca.py
diff --git a/hyperion/bin/train-plda.py b/hyperion/bin_deprec2/train-plda.py
similarity index 100%
rename from hyperion/bin/train-plda.py
rename to hyperion/bin_deprec2/train-plda.py
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 7dfb8a35..620d4d36 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -6,6 +6,7 @@
 import math
 from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
+import time
 
 import numpy as np
 import pandas as pd
@@ -90,6 +91,12 @@ def __init__(
         self._set_num_chunks_per_seg_epoch(num_chunks_per_seg_epoch)
         self._compute_len()
 
+        # fast mapping from classes to segments
+        self.map_class_to_segs = self.seg_set.df[
+            ["id", self.class_name, self.length_name]
+        ]
+        self.map_class_to_segs.set_index(self.class_name, drop=False, inplace=True)
+
         self._gather_class_info()
         self._set_class_weights()
 
@@ -160,8 +167,21 @@ def _gather_class_info(self):
         self.class_info["min_seg_duration"] = min_dur
         self.class_info["total_duration"] = total_dur
 
-        self.map_idx_to_ids = self.class_info[["class_idx", "id"]]
-        self.map_idx_to_ids.set_index("class_idx", inplace=True)
+        # we need the mapping from class index to id
+        self.map_class_idx_to_ids = self.class_info[["class_idx", "id"]]
+        self.map_class_idx_to_ids.set_index("class_idx", inplace=True)
+
+        # we need the list of segments from each class
+        # to speed up segment sampling
+        # searching then in each batch, it is too slow
+        map_class_to_segs = self.seg_set.df[["id", self.class_name]].set_index(
+            self.class_name
+        )
+        self.map_class_to_segs_idx = {}
+        for class_id in self.class_info["id"].values:
+            seg_ids = map_class_to_segs.loc[class_id, "id"]
+            seg_idx = self.seg_set.get_loc(seg_ids)
+            self.map_class_to_segs_idx[class_id] = seg_idx
 
     def _set_class_weights(self):
         if self.weight_mode == "uniform":
@@ -249,20 +269,27 @@ def _sample_classes(self, num_classes, chunk_length):
             class_idx = self.class_info.loc[class_ids, "class_idx"]
             class_idx = self.get_hard_prototypes(class_idx)
             # map back to class ids
-            class_ids = self.map_idx_to_ids.loc[class_idx]
+            class_ids = self.map_class_idx_to_ids.loc[class_idx]
 
         return class_ids
 
-    def _sample_segs(self, class_ids, chunk_length):
+    def _sample_segs0(self, class_ids, chunk_length):
 
         seg_ids = []
         for c in class_ids:
             # for each class we sample segments longer than chunk length
             # get segments belonging to c
-            seg_mask = (self.seg_set[self.class_name] == c) & (
-                self.seg_set[self.length_name] >= chunk_length
-            )
-            seg_ids_c = self.seg_set.loc[seg_mask, "id"].values
+            # t1 = time.time()
+            segs_c = self.map_class_to_segs.loc[c]
+            # seg_idx_c = self.map_class_to_segs.index.get_loc(c)
+            if self.class_info.loc[c, "min_seg_duration"] < chunk_length:
+                segs_c = segs_c[segs_c[self.length_name] >= chunk_length]
+                # seg_idx_c = seg_idx_c[self.seg_set.loc[seg_idx_c, self.length_name]>chunk_length]
+
+            # t2 = time.time()
+            seg_ids_c = segs_c["id"].values
+            # seg_ids_c = self.seg_set.loc[seg_idx_c, "id"].values
+            # t3 = time.time()
             # sample num_segs_per_class random segments
             if len(seg_ids_c) == 0:
                 print(chunk_length, c, self.class_info.loc[c], flush=True)
@@ -273,8 +300,11 @@ def _sample_segs(self, class_ids, chunk_length):
                     size=(self.num_segs_per_class,),
                     generator=self.rng,
                 ).numpy()
+
             elif self.seg_weight_mode == "data-prior":
-                weights = self.seg_set.loc[seg_mask, self.length_name].values
+                # weights = self.seg_set.loc[seg_mask, self.length_name].values
+                weights = segs_c[self.length_name].values
+                # weights = self.seg_set.loc[seg_idx_c, self.length_name].values
                 weights /= weights.sum()
                 sel_seg_idx_c = torch.multinomial(
                     torch.from_numpy(weights),
@@ -282,10 +312,68 @@ def _sample_segs(self, class_ids, chunk_length):
                     replacement=True,
                     generator=self.rng,
                 ).numpy()
+                # t4 = time.time()
             else:
                 raise ValueError("unknown seg-weight-mode=%s", self.seg_weight_mode)
             sel_seg_ids_c = list(seg_ids_c[sel_seg_idx_c])
+            # t5 = time.time()
             seg_ids.extend(sel_seg_ids_c)
+            # t6 = time.time()
+            # logging.info(
+            #     "stime %f %f %f %f %f", t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5
+            # )
+
+        return seg_ids
+
+    def _sample_segs(self, class_ids, chunk_length):
+
+        dur_col_idx = self.seg_set.get_col_idx(self.length_name)
+        id_col_idx = self.seg_set.get_col_idx("id")
+
+        seg_ids = []
+        for c in class_ids:
+            # for each class we sample segments longer than chunk length
+            # get segments belonging to c
+            t1 = time.time()
+            seg_idx_c = self.map_class_to_segs_idx[c]
+            t2 = time.time()
+            durs = self.seg_set.iloc[seg_idx_c, dur_col_idx].values
+            if self.class_info.loc[c, "min_seg_duration"] < chunk_length:
+                mask = durs >= chunk_length
+                seg_idx_c = seg_idx_c[mask]
+                durs = durs[mask]
+
+            t3 = time.time()
+            # sample num_segs_per_class random segments
+            if len(seg_idx_c) == 0:
+                print(chunk_length, c, self.class_info.loc[c], flush=True)
+            if self.seg_weight_mode == "uniform":
+                sel_seg_idx_c = torch.randint(
+                    low=0,
+                    high=len(seg_idx_c),
+                    size=(self.num_segs_per_class,),
+                    generator=self.rng,
+                ).numpy()
+
+            elif self.seg_weight_mode == "data-prior":
+                weights = durs / durs.sum()
+                sel_seg_idx_c = torch.multinomial(
+                    torch.from_numpy(weights),
+                    num_samples=self.num_segs_per_class,
+                    replacement=True,
+                    generator=self.rng,
+                ).numpy()
+                t4 = time.time()
+            else:
+                raise ValueError("unknown seg-weight-mode=%s", self.seg_weight_mode)
+
+            sel_seg_ids_c = list(self.seg_set.iloc[sel_seg_idx_c, id_col_idx])
+            t5 = time.time()
+            seg_ids.extend(sel_seg_ids_c)
+            t6 = time.time()
+            logging.info(
+                "stime %f %f %f %f %f", t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5
+            )
 
         return seg_ids
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 61033d16..b38bd3fe 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -404,6 +404,9 @@ def get_loc(self, keys):
             return np.nonzero(loc)[0]
         else:
             return list(range(loc.start, loc.stop, loc.step))
+
+    def get_col_idx(self, keys):
+        return self.df.columns.get_loc(keys)
         
 
     
\ No newline at end of file

From 7a6dca1645abef247a9c696c5295fa80db644976 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 17 Oct 2022 15:58:32 -0400
Subject: [PATCH 033/154] add data preparation for recognition

---
 egs/librispeech/v1/default_config.sh          |   1 +
 .../v1/global_conf/config_transducer_v1.sh    |  50 ++++++++
 egs/librispeech/v1/run_011_train_asr.sh       |  27 +++--
 hyperion/bin/train_wav2vec2transducer.py      |  14 ++-
 hyperion/torch/data/audio_dataset.py          |  59 ++++++++-
 hyperion/utils/text_info.py                   | 114 ++++++++++++++++++
 6 files changed, 248 insertions(+), 17 deletions(-)
 create mode 120000 egs/librispeech/v1/default_config.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v1.sh
 create mode 100644 hyperion/utils/text_info.py

diff --git a/egs/librispeech/v1/default_config.sh b/egs/librispeech/v1/default_config.sh
new file mode 120000
index 00000000..2b6239b6
--- /dev/null
+++ b/egs/librispeech/v1/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_transducer_v1.sh
\ No newline at end of file
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
new file mode 100644
index 00000000..c0a07257
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -0,0 +1,50 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage3_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index dc4e1dee..cd68587e 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -8,7 +8,7 @@
 set -e
 
 stage=1
-ngpu=4
+ngpu=1
 config_file=default_config.sh
 interactive=false
 num_workers=""
@@ -19,11 +19,13 @@ use_wandb=false
 . $config_file
 . datapath.sh
 
-list_dir=data/${nnet_data}_proc_audio_no_sil
+train_dir=data/${nnet_data}/
+val_dir=data/dev_clean/
 
 #add extra args from the command line arguments
 if [ -n "$num_workers" ];then
     extra_args="--data.train.data_loader.num-workers $num_workers"
+    extra_args="--data.val.data_loader.num-workers $num_workers"
 fi
 if [ "$use_tb" == "true" ];then
     extra_args="$extra_args --trainer.use-tensorboard"
@@ -33,9 +35,9 @@ if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
-if [ "$use_wandb" == "true" ];then
-  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
-fi
+# if [ "$use_wandb" == "true" ];then
+#   extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+# fi
 
 
 # Network Training
@@ -45,15 +47,14 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    train_wav2vec2xvector.py $nnet_type \
+    train_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
-    --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
-    --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s1_dir $args \
     --num-gpus $ngpu
   
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 7f6fffef..01db6960 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -25,6 +25,7 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
+# from hyperion.torch.data import LibriSpeechAsrDataModule as ASRD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
@@ -35,7 +36,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = Sampler.filter_args(**kwargs["sampler"])
@@ -139,10 +139,18 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-    parser.add_argument("--data.train.dataset.class_file", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.val.dataset.class_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.train.dataset.text_file", action=ActionParser(parser=data_parser))
+    parser.add_argument("--data.val.dataset.text_file", action=ActionParser(parser=data_parser))
     parser.add_argument("--data.train.data_loader.num_workers", action=ActionParser(parser=data_parser))
     parser.add_argument("--data.val.data_loader.num_workers", action=ActionParser(parser=data_parser))
+
+    parser.add_argument(
+        "--bpe-model",
+        type=str,
+        default="data/lang_bpe_500/bpe.model",
+        help="Path to the BPE model",
+    )
+
     # parser.link_arguments(
     #     "data.train.dataset.class_file", "data.val.dataset.class_file"
     # )
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 8875676f..058b7902 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -18,10 +18,15 @@
 from ...utils.utt2info import Utt2Info
 from ...np.augment import SpeechAugment
 
+
+import k2
+import sentencepiece as spm
+
 from torch.utils.data import Dataset
 import torch.distributed as dist
 
 from hyperion.np import augment
+from hyperion.utils.util import read_2column_text
 
 
 class AudioDataset1(Dataset):
@@ -458,6 +463,8 @@ def __init__(
         segments_file,
         class_names=None,
         class_files=None,
+        bpe_model=None,
+        text_files=None,
         time_durs_file=None,
         aug_cfgs=None,
         num_augs=1,
@@ -506,6 +513,15 @@ def __init__(
         logging.info("loading class-info files")
         self._load_class_infos(class_names, class_files, is_val)
 
+
+        if bpe_model is not None:
+            logging.info("loading bpe models")
+            self._load_bpe_model(bpe_model, is_val)
+
+        if text_files is not None:
+            logging.info("loading text files")
+            self._load_text(text_files, is_val)
+
         self.return_segment_info = (
             [] if return_segment_info is None else return_segment_info
         )
@@ -514,6 +530,23 @@ def __init__(
         self.num_augs = num_augs
         self._create_augmenters(aug_cfgs)
 
+
+    def _load_bpe_model(self, bpe_model, is_val):
+        self.sp  = spm.SentencePieceProcessor()
+        self.sp.load(params.bpe_model)
+        blank_id = self.sp.piece_to_id("<blk>")
+        vocab_size = self.sp.get_piece_size()
+
+    def _load_text(self, text_file, is_val):
+        #TODO: load bpe and text into data structure
+        if text_file is None:
+            return
+        if self.rank == 0:
+            logging.info("loading text file %s" % text_file)
+        self.text_info = TextInfo.load(text_file, self.sp)
+
+
+
     def _load_class_infos(self, class_names, class_files, is_val):
         self.class_info = {}
         if class_names is None:
@@ -642,8 +675,26 @@ def _get_segment_info(self, seg_id):
 
         return r
 
-    def __getitem__(self, segment):
 
+    def _get_text_info(self, seg_id):
+        #TODO: bpe labels from data structure for getitem 
+        r = []
+        # converts the class_ids to integers
+        for info_name in self.return_segment_info:
+            seg_info = self.seg_set.loc[seg_id, info_name]
+            if info_name in self.class_info:
+                # if the type of information is a class-id
+                # we use the class information table to
+                # convert from id to integer
+                class_info = self.class_info[info_name]
+                idx = class_info.loc[seg_info, "class_idx"]
+                seg_info = idx
+
+            r.append(seg_info)
+
+        return r
+
+    def __getitem__(self, segment):
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
         if self.augmenters:
@@ -665,6 +716,11 @@ def __getitem__(self, segment):
         seg_info = self._get_segment_info(seg_id)
         r.extend(seg_info)
 
+        # adds the text labels
+        text_info = self._get_text_info(seg_id)
+        r.extend(text_info)
+
+
         return (*r,)
 
     @staticmethod
@@ -678,6 +734,7 @@ def filter_args(**kwargs):
             "num_augs",
             "class_names",
             "class_files",
+            "text_files",
             "return_segment_info",
             "return_orig",
             "time_durs_file",
diff --git a/hyperion/utils/text_info.py b/hyperion/utils/text_info.py
new file mode 100644
index 00000000..44a71d0a
--- /dev/null
+++ b/hyperion/utils/text_info.py
@@ -0,0 +1,114 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from .info_table import InfoTable
+
+
+def read_2column_text(path: Union[Path, str]) -> Dict[str, str]:
+    """Read a text file having 2 column as dict object.
+
+    Examples:
+        wav.scp:
+            key1 /some/path/a.wav
+            key2 /some/path/b.wav
+
+        >>> read_2column_text('wav.scp')
+        {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}
+
+    """
+    assert check_argument_types()
+
+    data = {}
+    with Path(path).open("r", encoding="utf-8") as f:
+        for linenum, line in enumerate(f, 1):
+            sps = line.rstrip().split(maxsplit=1)
+            if len(sps) == 1:
+                k, v = sps[0], ""
+            else:
+                k, v = sps
+            if k in data:
+                raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
+            data[k] = v
+    return data
+
+
+
+class TextInfo(InfoTable):
+    def __init__(self, df):
+        super().__init__(df)
+        if "class_idx" not in self.df:
+            self.add_class_idx()
+
+        if "weights" not in self.df:
+            self.set_uniform_weights()
+        else:
+            self.df["weights"] /= self.df["weigths"].sum()
+
+
+    def add_class_idx(self):
+        self.df["class_idx"] = [i for i in range(len(self.df))]
+
+    def set_uniform_weights(self):
+        self.df["weights"] = 1 / len(self.df)
+
+    def set_weights(self, weights):
+        self.df["weights"] = weights / weights.sum()
+
+    def exp_weights(self, x):
+        weights = self.df["weights"] ** x
+        self.set_weights(weights)
+
+    def set_zero_weight(self, id):
+        self.df.loc[id, "weights"] = 0
+        self.df["weights"] /= self.df["weights"].sum()
+
+    @property
+    def weights(self, id):
+        return self.df.loc[id, "weights"]
+
+    @property
+    def num_classes(self):
+        return self.df["class_idx"].values.max() + 1
+
+    @classmethod
+    def load(cls, file_path, sp):
+        #TODO: load text information
+        """Loads utt2info list from text file.
+
+        Args:
+          file_path: File to read the list.
+          sp: SentencePieceProcessor for bpe.
+        Returns:
+          Utt2Info object
+        """
+        # y: k2.RaggedTensor,
+        # A ragged tensor with 2 axes [utt][label]. It contains labels of each utterance.
+
+        texts = read_2column_text(file_path)
+        # {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}
+        for utterance_id in texts:
+            texts[utterance_id]
+
+        y = sp.encode(texts, out_type=int)
+        y = k2.RaggedTensor(y).to(device)
+
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext == "":
+            # if no extension we load as kaldi utt2spk file
+            df = pd.read_csv(
+                file_path,
+                sep=" ",
+                header=None,
+                names=["id"],
+                dtype={"id": np.str},
+            )
+            return cls(df)
+
+        return super().load(file_path, sep)

From a10083941d9c91a5382c2a62d132985771217eaf Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 17 Oct 2022 22:10:20 -0400
Subject: [PATCH 034/154] updated finetune xvector script

---
 hyperion/bin/finetune_xvector_from_wav.py | 163 ++++++++++++++++++----
 1 file changed, 133 insertions(+), 30 deletions(-)

diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index b5a7f63b..a960ebeb 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -20,32 +20,52 @@
 
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.utils import ddp
-from hyperion.torch.models import XVector as XVec
+
+# from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+
+# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import TDNNXVector as TDXVec
+from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    "efficientnet": EXVec,
+    "tdnn": TDXVec,
+    "transformer": TFXVec,
+    "spinenet": SpineXVec,
+}
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
 
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    sampler_args = kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
         logging.info("init %s dataset", partition)
 
-    ad_args["is_val"] = partition == "val"
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
 
-    sampler = Sampler(dataset, **sampler_args)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
@@ -70,13 +90,13 @@ def init_feats(rank, **kwargs):
     return feat_extractor
 
 
-def init_xvector(num_classes, in_model_path, rank, **kwargs):
-    xvec_args = XVec.filter_finetune_args(**kwargs["model"])
+def init_xvector(num_classes, in_model_file, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_finetune_args(**kwargs["model"])
     if rank == 0:
         logging.info("xvector network ft args={}".format(xvec_args))
     xvec_args["num_classes"] = num_classes
-    model = TML.load(in_model_path)
-    model.rebuild_output_layer(**xvec_args)
+    model = TML.load(in_model_file)
+    model.change_config(**xvec_args)
     if rank == 0:
         logging.info("x-vector-model={}".format(model))
     return model
@@ -99,9 +119,9 @@ def train_xvec(gpu_id, args):
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
     feat_extractor = init_feats(**kwargs)
-    model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+    model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
 
-    trn_args = Trainer.filter_args(**kwargs)
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
     metrics = {"acc": CategoricalAccuracy()}
@@ -119,14 +139,15 @@ def train_xvec(gpu_id, args):
     ddp.ddp_cleanup()
 
 
-if __name__ == "__main__":
+def make_parser(xvec_class):
+    parser = ArgumentParser()
 
-    parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     train_parser = ArgumentParser(prog="")
+
     AD.add_class_args(train_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(train_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -136,7 +157,7 @@ def train_xvec(gpu_id, args):
 
     val_parser = ArgumentParser(prog="")
     AD.add_class_args(val_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(val_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -148,41 +169,123 @@ def train_xvec(gpu_id, args):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
     parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
     )
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
-    parser.link_arguments(
-        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    )
 
     AF.add_class_args(parser, prefix="feats")
-    parser.add_argument("--in-model-path", required=True)
-
-    XVec.add_finetune_args(parser, prefix="model")
+    xvec_class.add_finetune_args(parser, prefix="model")
+    parser.add_argument("--in-model-file", required=True)
     Trainer.add_class_args(
-        parser, prefix="trainer", train_modes=XVec.valid_train_modes()
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
     )
     ddp.add_ddp_args(parser)
-
     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
     parser.add_argument(
         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
     )
-    parser.add_argument("--local_rank", default=0, type=int)
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
 
     args = parser.parse_args()
-    gpu_id = args.local_rank
-    del args.local_rank
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
 
     if gpu_id == 0:
         try:
-            config_file = Path(args.exp_path) / "config.yaml"
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
             parser.save(args, str(config_file), format="yaml", overwrite=True)
         except:
             pass
 
+    args_sc.xvec_class = xvec_dict[xvec_type]
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
-    train_xvec(gpu_id, args)
+    train_xvec(gpu_id, args_sc)
+
+
+# if __name__ == "__main__":
+
+#     parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
+#     parser.add_argument("--cfg", action=ActionConfigFile)
+
+#     train_parser = ArgumentParser(prog="")
+#     AD.add_class_args(train_parser, prefix="dataset", skip={})
+#     Sampler.add_class_args(train_parser, prefix="sampler")
+#     train_parser.add_argument(
+#         "--data_loader.num-workers",
+#         type=int,
+#         default=5,
+#         help="num_workers of data loader",
+#     )
+
+#     val_parser = ArgumentParser(prog="")
+#     AD.add_class_args(val_parser, prefix="dataset", skip={})
+#     Sampler.add_class_args(val_parser, prefix="sampler")
+#     val_parser.add_argument(
+#         "--data_loader.num-workers",
+#         type=int,
+#         default=5,
+#         help="num_workers of data loader",
+#     )
+#     data_parser = ArgumentParser(prog="")
+#     data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+#     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+#     parser.add_argument("--data", action=ActionParser(parser=data_parser))
+#     parser.link_arguments(
+#         "data.train.dataset.class_file", "data.val.dataset.class_file"
+#     )
+#     parser.link_arguments(
+#         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+#     )
+#     parser.link_arguments(
+#         "data.train.sampler.batch_size", "data.val.sampler.batch_size"
+#     )
+
+#     AF.add_class_args(parser, prefix="feats")
+#     parser.add_argument("--in-model-path", required=True)
+
+#     XVec.add_finetune_args(parser, prefix="model")
+#     Trainer.add_class_args(
+#         parser, prefix="trainer", train_modes=XVec.valid_train_modes()
+#     )
+#     ddp.add_ddp_args(parser)
+
+#     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+#     parser.add_argument(
+#         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+#     )
+#     parser.add_argument("--local_rank", default=0, type=int)
+
+#     args = parser.parse_args()
+#     gpu_id = args.local_rank
+#     del args.local_rank
+
+#     if gpu_id == 0:
+#         try:
+#             config_file = Path(args.exp_path) / "config.yaml"
+#             parser.save(args, str(config_file), format="yaml", overwrite=True)
+#         except:
+#             pass
+
+#     # torch docs recommend using forkserver
+#     multiprocessing.set_start_method("forkserver")
+#     train_xvec(gpu_id, args)

From 566341669c2f0f50b3159d2b5b9a72852a8855d3 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 20 Oct 2022 11:07:01 -0400
Subject: [PATCH 035/154] added labels to gbe and lsvmc

---
 hyperion/np/classifiers/linear_gbe.py  | 84 +++++++++++++----------
 hyperion/np/classifiers/linear_svmc.py | 93 ++++++++++++++------------
 hyperion/np/metrics/acc.py             |  4 +-
 hyperion/np/np_model.py                |  8 ++-
 hyperion/np/transforms/cent_whiten.py  | 23 ++++---
 hyperion/np/transforms/pca.py          | 33 ++++-----
 6 files changed, 140 insertions(+), 105 deletions(-)

diff --git a/hyperion/np/classifiers/linear_gbe.py b/hyperion/np/classifiers/linear_gbe.py
index c786cb50..00a8b1bf 100644
--- a/hyperion/np/classifiers/linear_gbe.py
+++ b/hyperion/np/classifiers/linear_gbe.py
@@ -5,6 +5,7 @@
 
 import logging
 import numpy as np
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
@@ -30,6 +31,7 @@ class LinearGBE(NPModel):
       prior_nu: if given, it overwrites nu in the prior object.
       post_beta: if given, it fixes the value of beta in the posterior, overwriting the beta computed by the fit function.
       post_nu: if given, it fixes the value of nu in the posterior, overwriting the beta computed by the fit function.
+      labels: list of class labels.
     """
 
     def __init__(
@@ -48,6 +50,7 @@ def __init__(
         prior_nu=None,
         post_beta=None,
         post_nu=None,
+        labels=None,
         **kwargs
     ):
 
@@ -73,8 +76,15 @@ def __init__(
         self.post_beta = post_beta
         self.post_nu = post_nu
 
+        self.set_labels(labels)
         self._compute_Ab()
 
+    def set_labels(self, labels):
+        if isinstance(labels, np.ndarray):
+            labels = list(labels)
+
+        self.labels = labels
+
     def get_config(self):
         """
         Returns:
@@ -90,6 +100,7 @@ def get_config(self):
             "prior_nu": self.prior_nu,
             "post_beta": self.post_beta,
             "post_nu": self.post_nu,
+            "labels": self.labels,
         }
 
         base_config = super().get_config()
@@ -259,7 +270,6 @@ def fit(self, x, class_ids=None, p_theta=None, sample_weight=None):
             p_theta = sample_weight[:, None] * p_theta
 
         N = np.sum(p_theta, axis=0)
-
         F = np.dot(p_theta.T, x)
 
         if self.update_mu:
@@ -337,8 +347,8 @@ def filter_class_args(**kwargs):
         valid_args = (
             "update_mu",
             "update_W",
-            "no_update_mu",
-            "no_update_W",
+            "update_mu",
+            "update_W",
             "balance_class_weight",
             "prior",
             "prior_beta",
@@ -348,11 +358,6 @@ def filter_class_args(**kwargs):
             "name",
         )
         d = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        if "no_update_mu" in d:
-            d["update_mu"] = not d["no_update_mu"]
-        if "no_update_W" in d:
-            d["update_W"] = not d["no_update_W"]
-
         return d
 
     filter_train_args = filter_class_args
@@ -364,61 +369,67 @@ def add_class_args(parser, prefix=None):
           parser: jsonargparse object
           prefix: argument prefix.
         """
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "no-update-mu",
-            default=False,
-            action="store_true",
+            "--update-mu",
+            default=True,
+            action=ActionYesNo,
+            nargs="?",
             help="do not update mu",
         )
         parser.add_argument(
-            p1 + "no-update-W",
-            default=False,
-            action="store_true",
+            "--update-W",
+            default=True,
+            action=ActionYesNo,
+            nargs="?",
             help="do not update W",
         )
         parser.add_argument(
-            p1 + "balance-class-weight",
+            "--balance-class-weight",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
+            nargs="?",
             help="Balances the weight of each class when computing W",
         )
         parser.add_argument(
-            p1 + "prior", default=None, help="prior file for MAP adaptation"
+            "--prior", default=None, help="prior file for MAP adaptation"
         )
         parser.add_argument(
-            p1 + "prior-beta",
+            "--prior-beta",
             default=16,
             type=float,
             help="relevance factor for the means",
         )
         parser.add_argument(
-            p1 + "prior-nu",
+            "--prior-nu",
             default=16,
             type=float,
             help="relevance factor for the variances",
         )
         parser.add_argument(
-            p1 + "post-beta",
+            "--post-beta",
             default=None,
             type=float,
             help="relevance factor for the means",
         )
         parser.add_argument(
-            p1 + "post-nu",
+            "--post-nu",
             default=None,
             type=float,
             help="relevance factor for the variances",
         )
 
-        parser.add_argument(p1 + "name", default="lgbe", help="model name")
+        parser.add_argument("--name", default="lgbe", help="model name")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     @staticmethod
-    def filter_eval_args(prefix, **kwargs):
+    def filter_eval_args(**kwargs):
         """Extracts the evaluation time hyperparams of the class from a dictionary.
 
         Returns:
@@ -434,20 +445,19 @@ def add_eval_args(parser, prefix=None):
           parser: jsonargparse object
           prefix: argument prefix.
         """
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
-        parser.add_argument(p1 + "model-file", required=True, help=("model file"))
         parser.add_argument(
-            p1 + "normalize",
+            "--normalize",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
+            nargs="?",
             help=("normalizes the ouput probabilities to sum to one"),
         )
         parser.add_argument(
-            p1 + "eval-method",
+            "--eval-method",
             default="linear",
             choices=["linear", "llk", "predictive"],
             help=(
@@ -455,6 +465,10 @@ def add_eval_args(parser, prefix=None):
                 "or predictive distribution"
             ),
         )
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     add_argparse_args = add_class_args
     add_argparse_train_args = add_class_args
diff --git a/hyperion/np/classifiers/linear_svmc.py b/hyperion/np/classifiers/linear_svmc.py
index df14a16e..cb95e903 100644
--- a/hyperion/np/classifiers/linear_svmc.py
+++ b/hyperion/np/classifiers/linear_svmc.py
@@ -5,6 +5,7 @@
 
 import logging
 import numpy as np
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 from sklearn.svm import LinearSVC as SVC
 
@@ -61,6 +62,7 @@ class LinearSVMC(NPModel):
       verbose: int, default: 0
       balance_class_weight: if True and class_weight is None, it makes class_weight="balanced".
       lr_seed: seed form RandomState, used when random_state is None.
+      labels: list of class labels
     """
 
     def __init__(
@@ -81,7 +83,8 @@ def __init__(
         verbose=0,
         balance_class_weight=True,
         lr_seed=1024,
-        **kwargs
+        labels=None,
+        **kwargs,
     ):
 
         super().__init__(**kwargs)
@@ -95,7 +98,6 @@ def __init__(
         self.use_bias = use_bias
         self.bias_scaling = bias_scaling
         self.balance_class_weight = balance_class_weight
-        logging.debug(class_weight)
         self.svm = SVC(
             penalty=penalty,
             C=C,
@@ -117,6 +119,8 @@ def __init__(
         if b is not None:
             self.svm.intercept_ = b
 
+        self.set_labels(labels)
+
     @property
     def A(self):
         return self.svm.coef_.T
@@ -125,6 +129,12 @@ def A(self):
     def b(self):
         return self.svm.intercept_ * self.bias_scaling
 
+    def set_labels(self, labels):
+        if isinstance(labels, np.ndarray):
+            labels = list(labels)
+
+        self.labels = labels
+
     def get_config(self):
         """Gets configuration hyperparams.
         Returns:
@@ -134,8 +144,9 @@ def get_config(self):
             "use_bias": self.use_bias,
             "bias_scaling": self.bias_scaling,
             "balance_class_weight": self.balance_class_weight,
+            "labels": self.labels,
         }
-        base_config = super(LinearSVMC, self).get_config()
+        base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     def predict(self, x, eval_type="logit"):
@@ -203,7 +214,7 @@ def load_params(cls, f, config):
         return cls(**kwargs)
 
     @staticmethod
-    def filter_class_args(prefix=None, **kwargs):
+    def filter_class_args(**kwargs):
         """Extracts the hyperparams of the class from a dictionary.
 
         Returns:
@@ -236,42 +247,35 @@ def add_class_args(parser, prefix=None):
           parser: jsonargparse object
           prefix: argument prefix.
         """
-        if prefix is None:
-            p1 = "--"
-            p2 = ""
-        else:
-            p1 = "--" + prefix + "."
-            p2 = prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "penalty",
+            "--penalty",
             default="l2",
             choices=["l2", "l1"],
             help="used to specify the norm used in the penalization",
         )
         parser.add_argument(
-            p1 + "c",
-            dest=(p2 + "C"),
+            "--c",
+            dest="C",
             default=1.0,
             type=float,
             help="inverse of regularization strength",
         )
         parser.add_argument(
-            p1 + "loss",
+            "--loss",
             default="squared_hinge",
             choices=["hinge", "squared_hinge"],
             help="type of loss",
         )
 
         parser.add_argument(
-            p1 + "no-use-bias",
-            dest=(p2 + "use_bias"),
-            default=True,
-            action="store_false",
-            help="Not use bias",
+            "--use-bias", default=True, action=ActionYesNo, nargs="?", help="Use bias",
         )
         parser.add_argument(
-            p1 + "bias-scaling",
+            "--bias-scaling",
             default=1.0,
             type=float,
             help=(
@@ -280,19 +284,19 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            p1 + "lr-seed", default=1024, type=int, help="random number generator seed"
+            "--lr-seed", default=1024, type=int, help="random number generator seed"
         )
         parser.add_argument(
-            p1 + "max-iter",
+            "--max-iter",
             default=100,
             type=int,
             help="only for the newton-cg, sag and lbfgs solvers",
         )
         parser.add_argument(
-            p1 + "no-dual",
-            dest=(p2 + "dual"),
+            "--dual",
             default=True,
-            action="store_false",
+            action=ActionYesNo,
+            nargs="?",
             help=(
                 "dual or primal formulation. "
                 "Dual formulation is only implemented for "
@@ -300,10 +304,10 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            p1 + "tol", default=1e-4, type=float, help="tolerance for stopping criteria"
+            "--tol", default=1e-4, type=float, help="tolerance for stopping criteria"
         )
         parser.add_argument(
-            p1 + "multi-class",
+            "--multi-class",
             default="ovr",
             choices=["ovr", "crammer_singer"],
             help=(
@@ -312,29 +316,33 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            p1 + "verbose",
+            "--verbose",
             default=0,
             type=int,
             help="For the liblinear and lbfgs solvers",
         )
 
         parser.add_argument(
-            p1 + "balance-class-weight",
+            "--balance-class-weight",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="Balances the weight of each class when computing W",
         )
 
-        parser.add_argument(p1 + "name", default="svc", help="model name")
+        parser.add_argument("--name", default="svc", help="model name")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     @staticmethod
-    def filter_eval_args(prefix, **kwargs):
+    def filter_eval_args(**kwargs):
         """Extracts the evaluation time hyperparams of the class from a dictionary.
 
         Returns:
           Hyperparameters to evaluate the class.
         """
-        valid_args = ("model_file", "eval_type")
+        valid_args = "eval_type"
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
@@ -344,21 +352,22 @@ def add_eval_args(parser, prefix=None):
           parser: jsonargparse object
           prefix: argument prefix.
         """
-        if prefix is None:
-            p1 = "--"
-            p2 = ""
-        else:
-            p1 = "--" + prefix + "."
-            p2 = prefix + "."
-
-        parser.add_argument(p1 + "model-file", required=True, help=("model file"))
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
         parser.add_argument(
-            p1 + "eval-type",
+            "--eval-type",
             default="logit",
             choices=["logit", "bin-logpost", "bin-post", "cat-logpost", "cat-post"],
             help=("type of evaluation"),
         )
 
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
+
     # for backward compatibility
     filter_train_args = filter_class_args
     add_argparse_args = add_class_args
diff --git a/hyperion/np/metrics/acc.py b/hyperion/np/metrics/acc.py
index daea183e..148981f5 100644
--- a/hyperion/np/metrics/acc.py
+++ b/hyperion/np/metrics/acc.py
@@ -22,4 +22,6 @@ def compute_accuracy(y_true, y_pred, normalize=True, sample_weight=None):
     Returns:
       Accuracy or number of correctly classified samples.
     """
-    return accuracy_score(y_true, y_pred, normalize, sample_weight)
+    return accuracy_score(
+        y_true, y_pred, normalize=normalize, sample_weight=sample_weight
+    )
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index a53135e6..db49f6d5 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -195,8 +195,14 @@ def get_config(self):
 
     def to_json(self, **kwargs):
         """Returns model config as json string."""
-        # Piece of code borrowed from keras
+
         def get_json_type(obj):
+            # if obj is a np list of strings
+            if isinstance(obj, np.ndarray) and obj.ndim == 1:
+                if isinstance(obj[0], str):
+                    return list(obj)
+
+            # Piece of code borrowed from keras
             # if obj is any numpy type
             if type(obj).__module__ == np.__name__:
                 return obj.item()
diff --git a/hyperion/np/transforms/cent_whiten.py b/hyperion/np/transforms/cent_whiten.py
index e700dbe8..5f71c173 100644
--- a/hyperion/np/transforms/cent_whiten.py
+++ b/hyperion/np/transforms/cent_whiten.py
@@ -2,7 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import numpy as np
 import h5py
 
@@ -155,25 +155,28 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "update-mu",
-            default=True,
+            "--update-mu",
+            default=ActionYesNo,
             type=bool,
             help=("updates centering parameter"),
         )
 
         parser.add_argument(
-            p1 + "update-T",
+            "--update-T",
             default=True,
-            type=bool,
+            type=ActionYesNo,
             help=("updates whitening parameter"),
         )
 
-        parser.add_argument(p1 + "name", default="lnorm")
+        parser.add_argument("--name", default="lnorm")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/np/transforms/pca.py b/hyperion/np/transforms/pca.py
index 6d6ff7b1..36f6012b 100644
--- a/hyperion/np/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -2,6 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import numpy as np
 import h5py
 
@@ -186,11 +187,7 @@ def load_params(cls, f, config):
         """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
-        return cls(
-            mu=params["mu"],
-            T=params["T"],
-            **config,
-        )
+        return cls(mu=params["mu"], T=params["T"], **config,)
 
     @classmethod
     def load_mat(cls, file_path):
@@ -211,35 +208,39 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "update-mu",
+            "--update-mu",
             default=True,
-            type=bool,
+            action=ActionYesNo,
             help=("updates centering parameter"),
         )
         parser.add_argument(
-            p1 + "update-T",
+            "--update-T",
             default=True,
-            type=bool,
+            action=ActionYesNo,
             help=("updates whitening parameter"),
         )
 
         parser.add_argument(
-            p1 + "pca-dim", default=None, type=int, help=("output dimension of PCA")
+            "--pca-dim", default=None, type=int, help=("output dimension of PCA")
         )
 
         parser.add_argument(
-            p1 + "pca-var-r",
+            "--pca-var-r",
             default=None,
-            type=int,
+            type=float,
             help=("proportion of variance to keep when choosing the PCA dimension"),
         )
 
         parser.add_argument("--name", dest="name", default="pca")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     add_argparse_args = add_class_args

From 2121d87860f3c8271259c48e1623a67c92c96fc0 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 21 Oct 2022 20:36:24 -0400
Subject: [PATCH 036/154] fixed class weighted sampler

---
 hyperion/bin/finetune_xvector_from_wav.py     |  17 +++
 hyperion/torch/data/audio_dataset.py          |   6 +-
 .../data/class_weighted_seg_chunk_sampler.py  | 121 +++++++-----------
 hyperion/torch/data/seg_sampler_factory.py    |   4 +-
 hyperion/utils/info_table.py                  |  11 ++
 5 files changed, 82 insertions(+), 77 deletions(-)

diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index a960ebeb..0f23fb0a 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -102,6 +102,22 @@ def init_xvector(num_classes, in_model_file, rank, xvec_class, **kwargs):
     return model
 
 
+def init_hard_prototype_mining(model, train_loader, val_loader, rank):
+    if not train_loader.batch_sampler.hard_prototype_mining:
+        return
+
+    if rank == 0:
+        logging.info("setting hard prototypes")
+
+    affinity_matrix = model.compute_prototype_affinity()
+    train_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+    if not val_loader.batch_sampler.hard_prototype_mining:
+        return
+
+    val_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+
 def train_xvec(gpu_id, args):
 
     config_logger(args.verbose)
@@ -120,6 +136,7 @@ def train_xvec(gpu_id, args):
     val_loader = init_data(partition="val", **kwargs)
     feat_extractor = init_feats(**kwargs)
     model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
+    init_hard_prototype_mining(model, train_loader, val_loader, rank)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 8875676f..8c69c3e1 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -590,7 +590,11 @@ def num_classes(self):
     def _parse_segment_item(self, segment):
         if isinstance(segment, (tuple, list)):
             seg_id, start, duration = segment
-            assert duration <= self.seg_set.loc[seg_id].duration
+            assert duration <= self.seg_set.loc[seg_id].duration, (
+                f"{seg_id} with start={start} duration "
+                f"({self.seg_set.loc[seg_id].duration}) < "
+                f"chunk duration ({duration})"
+            )
         else:
             seg_id, start, duration = segment, 0, 0
 
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 620d4d36..27ad4d33 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -91,11 +91,11 @@ def __init__(
         self._set_num_chunks_per_seg_epoch(num_chunks_per_seg_epoch)
         self._compute_len()
 
-        # fast mapping from classes to segments
-        self.map_class_to_segs = self.seg_set.df[
-            ["id", self.class_name, self.length_name]
-        ]
-        self.map_class_to_segs.set_index(self.class_name, drop=False, inplace=True)
+        # # fast mapping from classes to segments
+        # self.map_class_to_segs = self.seg_set.df[
+        #     ["id", self.class_name, self.length_name]
+        # ]
+        # self.map_class_to_segs.set_index(self.class_name, drop=False, inplace=True)
 
         self._gather_class_info()
         self._set_class_weights()
@@ -103,7 +103,10 @@ def __init__(
         self.set_hard_prototypes(affinity_matrix)
 
         logging.info(
-            "batches/epoch=%d min-batch-size=%d, max-batch-size=%d avg-batch-size/gpu=%.2f avg-classes/batch=%.2f  samples/(seg*epoch)=%d",
+            (
+                "sampler batches/epoch=%d min-batch-size=%d, max-batch-size=%d "
+                "avg-batch-size/gpu=%.2f avg-classes/batch=%.2f  samples/(seg*epoch)=%d"
+            ),
             self._len,
             self.min_batch_size,
             self.max_batch_size,
@@ -179,7 +182,7 @@ def _gather_class_info(self):
         )
         self.map_class_to_segs_idx = {}
         for class_id in self.class_info["id"].values:
-            seg_ids = map_class_to_segs.loc[class_id, "id"]
+            seg_ids = map_class_to_segs.loc[class_id, "id"].values
             seg_idx = self.seg_set.get_loc(seg_ids)
             self.map_class_to_segs_idx[class_id] = seg_idx
 
@@ -246,7 +249,7 @@ def _get_class_weights(self, chunk_length):
         # chunk length and put weight to 0
         zero_idx = self.class_info["max_seg_duration"] < chunk_length
         if not np.any(zero_idx):
-            return self.class_info["weights"].values
+            return torch.as_tensor(self.class_info["weights"].values)
 
         class_weights = self.class_info["weights"].values.copy()
         class_weights[zero_idx] = 0.0
@@ -266,65 +269,13 @@ def _sample_classes(self, num_classes, chunk_length):
         class_ids = self.class_info.iloc[row_idx].id.values
         if self.hard_prototype_mining:
             # map class ids to class indexes
-            class_idx = self.class_info.loc[class_ids, "class_idx"]
+            class_idx = self.class_info.loc[class_ids, "class_idx"].values
             class_idx = self.get_hard_prototypes(class_idx)
             # map back to class ids
-            class_ids = self.map_class_idx_to_ids.loc[class_idx]
+            class_ids = self.map_class_idx_to_ids.loc[class_idx, "id"].values
 
         return class_ids
 
-    def _sample_segs0(self, class_ids, chunk_length):
-
-        seg_ids = []
-        for c in class_ids:
-            # for each class we sample segments longer than chunk length
-            # get segments belonging to c
-            # t1 = time.time()
-            segs_c = self.map_class_to_segs.loc[c]
-            # seg_idx_c = self.map_class_to_segs.index.get_loc(c)
-            if self.class_info.loc[c, "min_seg_duration"] < chunk_length:
-                segs_c = segs_c[segs_c[self.length_name] >= chunk_length]
-                # seg_idx_c = seg_idx_c[self.seg_set.loc[seg_idx_c, self.length_name]>chunk_length]
-
-            # t2 = time.time()
-            seg_ids_c = segs_c["id"].values
-            # seg_ids_c = self.seg_set.loc[seg_idx_c, "id"].values
-            # t3 = time.time()
-            # sample num_segs_per_class random segments
-            if len(seg_ids_c) == 0:
-                print(chunk_length, c, self.class_info.loc[c], flush=True)
-            if self.seg_weight_mode == "uniform":
-                sel_seg_idx_c = torch.randint(
-                    low=0,
-                    high=len(seg_ids_c),
-                    size=(self.num_segs_per_class,),
-                    generator=self.rng,
-                ).numpy()
-
-            elif self.seg_weight_mode == "data-prior":
-                # weights = self.seg_set.loc[seg_mask, self.length_name].values
-                weights = segs_c[self.length_name].values
-                # weights = self.seg_set.loc[seg_idx_c, self.length_name].values
-                weights /= weights.sum()
-                sel_seg_idx_c = torch.multinomial(
-                    torch.from_numpy(weights),
-                    num_samples=self.num_segs_per_class,
-                    replacement=True,
-                    generator=self.rng,
-                ).numpy()
-                # t4 = time.time()
-            else:
-                raise ValueError("unknown seg-weight-mode=%s", self.seg_weight_mode)
-            sel_seg_ids_c = list(seg_ids_c[sel_seg_idx_c])
-            # t5 = time.time()
-            seg_ids.extend(sel_seg_ids_c)
-            # t6 = time.time()
-            # logging.info(
-            #     "stime %f %f %f %f %f", t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5
-            # )
-
-        return seg_ids
-
     def _sample_segs(self, class_ids, chunk_length):
 
         dur_col_idx = self.seg_set.get_col_idx(self.length_name)
@@ -334,21 +285,21 @@ def _sample_segs(self, class_ids, chunk_length):
         for c in class_ids:
             # for each class we sample segments longer than chunk length
             # get segments belonging to c
-            t1 = time.time()
+            # t1 = time.time()
             seg_idx_c = self.map_class_to_segs_idx[c]
-            t2 = time.time()
+            # t2 = time.time()
             durs = self.seg_set.iloc[seg_idx_c, dur_col_idx].values
             if self.class_info.loc[c, "min_seg_duration"] < chunk_length:
                 mask = durs >= chunk_length
                 seg_idx_c = seg_idx_c[mask]
                 durs = durs[mask]
 
-            t3 = time.time()
+            # t3 = time.time()
             # sample num_segs_per_class random segments
             if len(seg_idx_c) == 0:
-                print(chunk_length, c, self.class_info.loc[c], flush=True)
+                logging.error("no segments found with class=%s dur=%d", c, chunk_length)
             if self.seg_weight_mode == "uniform":
-                sel_seg_idx_c = torch.randint(
+                sel_idx = torch.randint(
                     low=0,
                     high=len(seg_idx_c),
                     size=(self.num_segs_per_class,),
@@ -357,23 +308,24 @@ def _sample_segs(self, class_ids, chunk_length):
 
             elif self.seg_weight_mode == "data-prior":
                 weights = durs / durs.sum()
-                sel_seg_idx_c = torch.multinomial(
+                sel_idx = torch.multinomial(
                     torch.from_numpy(weights),
                     num_samples=self.num_segs_per_class,
                     replacement=True,
                     generator=self.rng,
                 ).numpy()
-                t4 = time.time()
+                # t4 = time.time()
             else:
                 raise ValueError("unknown seg-weight-mode=%s", self.seg_weight_mode)
 
+            sel_seg_idx_c = seg_idx_c[sel_idx]
             sel_seg_ids_c = list(self.seg_set.iloc[sel_seg_idx_c, id_col_idx])
-            t5 = time.time()
+            # t5 = time.time()
             seg_ids.extend(sel_seg_ids_c)
-            t6 = time.time()
-            logging.info(
-                "stime %f %f %f %f %f", t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5
-            )
+            # t6 = time.time()
+            # logging.info(
+            #     "stime %f %f %f %f %f", t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5
+            # )
 
         return seg_ids
 
@@ -395,12 +347,33 @@ def __next__(self):
         if self.batch == self._len:
             raise StopIteration
 
+        # t1 = time.time()
         chunk_length = self._sample_chunk_length()
+        # t2 = time.time()
         batch_size = self._compute_batch_size(chunk_length)
+        # t3 = time.time()
         num_classes = self._compute_num_classes_per_batch(batch_size)
+        # t4 = time.time()
         class_ids = self._sample_classes(num_classes, chunk_length)
+        # t5 = time.time()
         seg_ids = self._sample_segs(class_ids, chunk_length)
+        # t6 = time.time()
         chunks = self._sample_chunks(seg_ids, chunk_length)
+        # t7 = time.time()
+        # print(
+        #     "next",
+        #     t2 - t1,
+        #     t3 - t2,
+        #     t4 - t3,
+        #     t5 - t4,
+        #     t6 - t5,
+        #     t7 - t6,
+        #     batch_size,
+        #     num_classes,
+        #     self.min_batch_size,
+        #     len(chunks),
+        #     flush=True,
+        # )
         if self.batch == 0:
             logging.info("batch 0 uttidx=%s", str(chunks[:10]))
 
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index 3093a532..251d937b 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -132,7 +132,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--min-batch-size",
             type=int,
-            default=1,
+            default=64,
             help=("minimum batch size per gpu"),
         )
         parser.add_argument(
@@ -146,7 +146,7 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--batch-size",
-            default=128,
+            default=None,
             type=int,
             help=("deprecated, use min-batch-size instead"),
         )
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index b38bd3fe..217f1f9a 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -39,6 +39,14 @@ def clone(self):
     def __len__(self):
         return self.df.__len__
 
+    @property
+    def __str__(self):
+        return self.df.__str__
+
+    @property
+    def __repr__(self):
+        return self.df.__repr__ 
+
     @property
     def iat(self):
         return self.df.iat
@@ -397,6 +405,9 @@ def reset_index(self):
         self.df.set_index("id", drop=False, inplace=True)
 
     def get_loc(self, keys):
+        if isinstance(keys, (list, np.ndarray)):
+            return self.df.index.get_indexer(keys)
+
         loc = self.df.index.get_loc(keys)
         if isinstance(loc, int):
             return loc

From 115d00ed9d8bb47ff0d997b6ad20fea639ec03bd Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 24 Oct 2022 00:21:01 -0400
Subject: [PATCH 037/154] Add Mix Musan noise, and update the transducer data
 preparation

---
 egs/librispeech/v1/conf/clsp.conf             |  11 +
 egs/librispeech/v1/conf/fbank80_16k.yaml      |   7 -
 egs/librispeech/v1/conf/reverb_noise_aug.yaml |  35 ++++
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  45 +++++
 .../conf/wav2vec2base960h_ecapatdnn512x2.yaml |  37 ----
 .../v1/global_conf/config_transducer_v1.sh    |   8 +-
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh |  55 -----
 egs/librispeech/v1/local/make_musan.py        | 189 ++++++++++++++++++
 egs/librispeech/v1/local/make_musan.sh        |  48 +++++
 egs/librispeech/v1/local/make_rirs_data.sh    |  29 +++
 egs/librispeech/v1/run_003_compute_fbank.sh   |  67 -------
 .../v1/run_003_prepare_noises_rirs.sh         |  67 +++++++
 .../v1/run_010_prepare_asr_train_data.sh      |  42 ----
 egs/librispeech/v1/run_011_train_asr.sh       |   4 +-
 egs/librispeech/v1/steps_xvec                 |   1 +
 hyperion/bin/train_wav2vec2transducer.py      |  18 +-
 hyperion/bin/train_wav2vec2xvector.py         |   2 +-
 hyperion/torch/data/audio_dataset.py          |  68 ++++---
 hyperion/utils/info_table.py                  |   7 +-
 hyperion/utils/text_info.py                   |  56 +++---
 20 files changed, 511 insertions(+), 285 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/clsp.conf
 delete mode 100644 egs/librispeech/v1/conf/fbank80_16k.yaml
 create mode 100644 egs/librispeech/v1/conf/reverb_noise_aug.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 delete mode 100644 egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
 delete mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
 create mode 100755 egs/librispeech/v1/local/make_musan.py
 create mode 100755 egs/librispeech/v1/local/make_musan.sh
 create mode 100755 egs/librispeech/v1/local/make_rirs_data.sh
 delete mode 100755 egs/librispeech/v1/run_003_compute_fbank.sh
 create mode 100755 egs/librispeech/v1/run_003_prepare_noises_rirs.sh
 delete mode 100755 egs/librispeech/v1/run_010_prepare_asr_train_data.sh
 create mode 120000 egs/librispeech/v1/steps_xvec

diff --git a/egs/librispeech/v1/conf/clsp.conf b/egs/librispeech/v1/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/librispeech/v1/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/librispeech/v1/conf/fbank80_16k.yaml b/egs/librispeech/v1/conf/fbank80_16k.yaml
deleted file mode 100644
index 88bae69e..00000000
--- a/egs/librispeech/v1/conf/fbank80_16k.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-sample_frequency: 16000
-frame_length: 25
-low_freq: 20 
-high_freq: 7600
-num_filters: 80
-snip_edges: false
-use_energy: false
diff --git a/egs/librispeech/v1/conf/reverb_noise_aug.yaml b/egs/librispeech/v1/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/librispeech/v1/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
new file mode 100644
index 00000000..737f42cf
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -0,0 +1,45 @@
+data:
+  train:
+    dataset:
+      # max_chunk_length: 3.0
+      # min_chunk_length: 3.0
+      aug_cfgs: [conf/reverb_noise_aug.yaml]
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      # max_chunk_length: 4.0
+      # min_chunk_length: 4.0
+      aug_cfgs: [conf/reverb_noise_aug.yaml]
+      wav_scale: 1
+    sampler:
+      batch_size: 32
+      iters_per_epoch: 6
+    data_loader:
+      num_workers: 8
+model: 
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-4
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml b/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
deleted file mode 100644
index 85964372..00000000
--- a/egs/librispeech/v1/conf/wav2vec2base960h_ecapatdnn512x2.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-hf_feats:
-  pretrained_model_path: facebook/wav2vec2-base-960h
-xvector:
-  resnet_enc:
-    in_feats: 80
-    in_conv_channels: 512
-    in_kernel_size: 5
-    in_stride: 1
-    resb_type: seres2bn
-    resb_repeats:
-      - 1
-      - 1
-    resb_channels:
-      - 512
-    resb_kernel_sizes:
-      - 3
-    resb_dilations:
-      - 2
-      - 3
-    resb_strides:
-      - 1
-    res2net_width_factor: 1
-    res2net_scale: 8
-    se_r: 4
-    multilayer: true
-    multilayer_concat: true
-    endpoint_channels: 1536
-  pool_net:
-    pool_type: ch-wise-att-mean+stddev
-    inner_feats: 128
-  embed_dim: 256
-  cos_scale: 30.0
-  margin: 0.3
-  margin_warmup_epochs: 20.0
-  dropout_rate: 0.0
-feat_fusion_start: 2
-feat_fusion_method: weighted-avg
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index c0a07257..e6f7eac0 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -4,14 +4,14 @@
 hf_model_name=wav2vec2xlsr300m
 
 #vad
-vad_config=conf/vad_16k.yaml
+# vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=train_clean_100
 
 # x-vector cfg
 
-nnet_type=hf_wav2vec2resnet1d
+nnet_type=hf_wav2vec2transducer
 
 nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s1_args=""
@@ -22,13 +22,13 @@ nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0060.pth
 
-nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage2_v1.0.yaml
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s2_args=""
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
 nnet_s2=$nnet_s2_dir/model_ep0020.pth
 
-nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage3_v1.0.yaml
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s3_args=""
 nnet_s3_name=${nnet_name}.s3
 nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
deleted file mode 100644
index 942fb336..00000000
--- a/egs/librispeech/v1/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wav2vec2base
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wav2vec2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-
-lr=0.001
-#lr=0.005
-xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --model conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v12 #v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/model_ep0030.pth
-nnet=$nnet_dir/model_ep0040.pth
-nnet=$nnet_dir/model_ep0020.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/librispeech/v1/local/make_musan.py b/egs/librispeech/v1/local/make_musan.py
new file mode 100755
index 00000000..b0ae6846
--- /dev/null
+++ b/egs/librispeech/v1/local/make_musan.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This file is meant to be invoked by make_musan.sh.
+
+import os, sys
+
+
+def process_music_annotations(path):
+    utt2spk = {}
+    utt2vocals = {}
+    lines = open(path, "r").readlines()
+    for line in lines:
+        utt, genres, vocals, musician = line.rstrip().split()[:4]
+        # For this application, the musican ID isn't important
+        utt2spk[utt] = utt
+        utt2vocals[utt] = vocals == "Y"
+    return utt2spk, utt2vocals
+
+
+def prepare_music(root_dir, fs, use_vocals):
+    utt2vocals = {}
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    music_dir = os.path.join(root_dir, "music")
+    for root, dirs, files in os.walk(music_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+            elif str(file) == "ANNOTATIONS":
+                utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
+                utt2spk.update(utt2spk_part)
+                utt2vocals.update(utt2vocals_part)
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2vocals:
+        if utt in utt2wav:
+            if use_vocals or not utt2vocals[utt]:
+                utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+                if fs == 8:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 8k -t wav - |\n"
+                    )
+                else:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 16k -t wav - |\n"
+                    )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In music directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_speech(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    speech_dir = os.path.join(root_dir, "speech")
+    for root, dirs, files in os.walk(speech_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In speech directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_noise(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    noise_dir = os.path.join(root_dir, "noise")
+    for root, dirs, files in os.walk(noise_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In noise directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def main():
+    in_dir = sys.argv[1]
+    fs = int(sys.argv[2])
+    out_dir = sys.argv[3]
+    use_vocals = sys.argv[4] == "Y"
+    utt2spk_music, utt2wav_music = prepare_music(in_dir, fs, use_vocals)
+    utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, fs)
+    utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, fs)
+    utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
+    utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
+    wav_fi = open(os.path.join(out_dir, "wav.scp"), "w")
+    wav_fi.write(utt2wav)
+    utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), "w")
+    utt2spk_fi.write(utt2spk)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/librispeech/v1/local/make_musan.sh b/egs/librispeech/v1/local/make_musan.sh
new file mode 100755
index 00000000..4a6d30f9
--- /dev/null
+++ b/egs/librispeech/v1/local/make_musan.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This script, called by ../run.sh, creates the MUSAN
+# data directory. The required dataset is freely available at
+#   http://www.openslr.org/17/
+
+set -e
+use_vocals='Y'
+
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ];then
+    echo "Usage: $0 [options] <in-dir> <fs> <data-dir>";
+    echo "e.g.: $0 /export/corpora/JHU/musan 8 data"
+    exit 1;
+fi
+
+in_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir/musan.tmp
+
+echo "Preparing ${data_dir}/musan..."
+mkdir -p ${data_dir}/musan
+local/make_musan.py ${in_dir} $fs ${data_dir}/musan ${use_vocals}
+
+utils/fix_data_dir.sh ${data_dir}/musan
+
+grep "music" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_music
+grep "speech" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_speech
+grep "noise" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_noise
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_music \
+  ${data_dir}/musan ${data_dir}/musan_music
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_speech \
+  ${data_dir}/musan ${data_dir}/musan_speech
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_noise \
+  ${data_dir}/musan ${data_dir}/musan_noise
+
+utils/fix_data_dir.sh ${data_dir}/musan_music
+utils/fix_data_dir.sh ${data_dir}/musan_speech
+utils/fix_data_dir.sh ${data_dir}/musan_noise
+
+rm -rf $data_dir/musan.tmp
+
diff --git a/egs/librispeech/v1/local/make_rirs_data.sh b/egs/librispeech/v1/local/make_rirs_data.sh
new file mode 100755
index 00000000..c6652eda
--- /dev/null
+++ b/egs/librispeech/v1/local/make_rirs_data.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)
+#           
+# Apache 2.0.
+set -e
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 <rir-dir> <fs> <data-dir>"
+  echo "e.g.: $0 RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom"
+fi
+
+rir_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir
+
+rir_list=$rir_dir/rir_list
+if [ "$fs" -eq 16 ];then
+    awk '{ key=$5; sub(/.*\//,"",key); print key,$5 }' $rir_list > $data_dir/wav.scp
+else
+    awk '{ 
+key=$5; sub(/.*\//,"",key); 
+print key,"sox "$5" -r 8000 -t wav -b 16 -e signed-integer - |" }' \
+    $rir_list > $data_dir/wav.scp
+fi
+awk '{ key=$5; sub(/.*\//,"",key); print key,$4 }' $rir_list > $data_dir/rir2room
+
diff --git a/egs/librispeech/v1/run_003_compute_fbank.sh b/egs/librispeech/v1/run_003_compute_fbank.sh
deleted file mode 100755
index 0f5966a8..00000000
--- a/egs/librispeech/v1/run_003_compute_fbank.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-fbankdir=`pwd`/exp/fbank
-
-stage=1
-config_file=default_config.sh
-feat_vers="numpy"
-
-. parse_options.sh || exit 1;
-
-if [ "$feat_vers" == "kaldi" ];then
-    make_fbank=steps/make_fbank.sh
-    fbank_cfg=conf/fbank80_16k.conf
-else
-    fbank_cfg=conf/fbank80_16k.yaml
-    if [ "$feat_vers" == "numpy" ];then
-	make_fbank=steps_pyfe/make_fbank.sh
-    else
-	make_fbank=steps_pyfe/make_torch_fbank.sh
-    fi
-fi
-
-
-# Make filterbanks 
-if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $fbankdir/storage ]; then
-	dir_name=$USER/hyp-data/librispeech/v1/$storage_name/fbank/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $fbankdir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
-    fi
-fi
-
-if [ $stage -le 2 ];then 
-    for name in dev_clean test_clean dev_other test_other train_clean_100 train_clean_360 train_other_500;
-    do
-		num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-		nj=$(($num_spk < 40 ? $num_spk:40))
-		$make_fbank --write-utt2num-frames true --fbank-config $fbank_cfg --nj $nj --cmd "$train_cmd" \
-			data/${name} exp/make_fbank/$name $fbankdir
-		utils/fix_data_dir.sh data/${name}
-    done
-
-fi
-
diff --git a/egs/librispeech/v1/run_003_prepare_noises_rirs.sh b/egs/librispeech/v1/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..a448af9a
--- /dev/null
+++ b/egs/librispeech/v1/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 16 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
+	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 16 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 16 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/egs/librispeech/v1/run_010_prepare_asr_train_data.sh b/egs/librispeech/v1/run_010_prepare_asr_train_data.sh
deleted file mode 100755
index 5936fbf4..00000000
--- a/egs/librispeech/v1/run_010_prepare_asr_train_data.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ $stage -le 2 ]; then
-    # This script preprocess audio for x-vector training
-    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
-	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
-    hyp_utils/kaldi/utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 3 ]; then
-    # Now, we remove files with less than 4s
-    hyp_utils/remove_short_audios.sh --min-len 4 data/${nnet_data}_proc_audio_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 4 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh \
-	data/${nnet_data}_proc_audio_no_sil \
-	data/${nnet_data}_proc_audio_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index cd68587e..85d2e918 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -51,12 +51,12 @@ if [ $stage -le 1 ]; then
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $train_dir/wav.scp \
     --data.train.dataset.time-durs-file $train_dir/utt2dur \
-    --data.train.dataset.text-file $train_dir/text \
     --data.val.dataset.audio-file $val_dir/wav.scp \
     --data.val.dataset.time-durs-file $val_dir/utt2dur \
-    --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s1_dir $args \
     --num-gpus $ngpu
+    # --data.train.dataset.text-file $train_dir/text \
+    # --data.val.dataset.text-file $val_dir/text \
   
 fi
 
diff --git a/egs/librispeech/v1/steps_xvec b/egs/librispeech/v1/steps_xvec
new file mode 120000
index 00000000..289276b7
--- /dev/null
+++ b/egs/librispeech/v1/steps_xvec
@@ -0,0 +1 @@
+hyp_utils/xvectors/
\ No newline at end of file
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 01db6960..07a6a31a 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -116,7 +116,7 @@ def make_parser(model_class):
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset", skip={"segments_file"})
     Sampler.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -126,7 +126,7 @@ def make_parser(model_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset", skip={"segments_file"})
     Sampler.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
@@ -139,10 +139,16 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-    parser.add_argument("--data.train.dataset.text_file", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.val.dataset.text_file", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.train.data_loader.num_workers", action=ActionParser(parser=data_parser))
-    parser.add_argument("--data.val.data_loader.num_workers", action=ActionParser(parser=data_parser))
+
+    parser.add_argument(
+        "--data.train.dataset.text_file",
+        type=str, 
+    )
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+    parser.add_argument("--data.train.data_loader.num_workers", type=int,
+        default=5,)
+    parser.add_argument("--data.val.data_loader.num_workers", type=int,
+        default=5,)
 
     parser.add_argument(
         "--bpe-model",
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 8c30faaf..d2ef9715 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -145,7 +145,7 @@ def make_parser(model_class):
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
     parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
     )
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 058b7902..58905ef8 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -26,7 +26,6 @@
 import torch.distributed as dist
 
 from hyperion.np import augment
-from hyperion.utils.util import read_2column_text
 
 
 class AudioDataset1(Dataset):
@@ -460,7 +459,7 @@ class AudioDataset(Dataset):
     def __init__(
         self,
         audio_file,
-        segments_file,
+        segments_file=None,
         class_names=None,
         class_files=None,
         bpe_model=None,
@@ -491,24 +490,25 @@ def __init__(
 
         self.r = AR(audio_file, wav_scale=wav_scale)
 
+        
         if rank == 0:
             logging.info("loading segments file %s" % segments_file)
-
-        self.seg_set = SegmentSet.load(segments_file)
-        if rank == 0:
-            logging.info("dataset contains %d seqs" % len(self.seg_set))
-
-        self.is_val = is_val
-        if time_durs_file is not None:
+        if segments_file is not None:
+            self.seg_set = SegmentSet.load(segments_file)
             if rank == 0:
-                logging.info("loading durations file %s" % time_durs_file)
+                logging.info("dataset contains %d seqs" % len(self.seg_set))
 
-            time_durs = SegmentSet.load(time_durs_file)
-            self.seg_set["duration"] = time_durs.loc[
-                self.seg_set["id"]
-            ].class_id.values.astype(np.float, copy=False)
-        else:
-            assert "duration" in self.seg_set
+            self.is_val = is_val
+            if time_durs_file is not None:
+                if rank == 0:
+                    logging.info("loading durations file %s" % time_durs_file)
+
+                time_durs = SegmentSet.load(time_durs_file)
+                self.seg_set["duration"] = time_durs.loc[
+                    self.seg_set["id"]
+                ].class_id.values.astype(np.float, copy=False)
+            else:
+                assert "duration" in self.seg_set
 
         logging.info("loading class-info files")
         self._load_class_infos(class_names, class_files, is_val)
@@ -520,7 +520,7 @@ def __init__(
 
         if text_files is not None:
             logging.info("loading text files")
-            self._load_text(text_files, is_val)
+            self._load_text_infos(text_files, is_val)
 
         self.return_segment_info = (
             [] if return_segment_info is None else return_segment_info
@@ -537,7 +537,7 @@ def _load_bpe_model(self, bpe_model, is_val):
         blank_id = self.sp.piece_to_id("<blk>")
         vocab_size = self.sp.get_piece_size()
 
-    def _load_text(self, text_file, is_val):
+    def _load_text_infos(self, text_file, is_val):
         #TODO: load bpe and text into data structure
         if text_file is None:
             return
@@ -682,19 +682,22 @@ def _get_text_info(self, seg_id):
         # converts the class_ids to integers
         for info_name in self.return_segment_info:
             seg_info = self.seg_set.loc[seg_id, info_name]
-            if info_name in self.class_info:
-                # if the type of information is a class-id
-                # we use the class information table to
-                # convert from id to integer
-                class_info = self.class_info[info_name]
-                idx = class_info.loc[seg_info, "class_idx"]
+            if info_name in self.text_info:
+                # if the type of information is a text
+                # we use the text information table to
+                # convert from id to text labels
+                text_info = self.text_info[info_name]
+                idx = text_info.loc[seg_info, "class_idx"]
                 seg_info = idx
+                y = sp.encode(text, out_type=int)
+                y = k2.RaggedTensor(y).to(device)
 
-            r.append(seg_info)
+            r.append(y)
 
         return r
 
     def __getitem__(self, segment):
+        #TODO: check the start/end time for Recognition
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
         if self.augmenters:
@@ -712,9 +715,10 @@ def __getitem__(self, segment):
         else:
             r = [x]
 
-        # adds the segment labels
-        seg_info = self._get_segment_info(seg_id)
-        r.extend(seg_info)
+        #TODO: Add it back for both case
+        # # adds the segment labels
+        # seg_info = self._get_segment_info(seg_id)
+        # r.extend(seg_info)
 
         # adds the text labels
         text_info = self._get_text_info(seg_id)
@@ -787,6 +791,14 @@ def add_class_args(parser, prefix=None, skip={}):
             ),
         )
 
+        parser.add_argument(
+            "--text-file",
+            default=None,
+            help=(
+                "text file"
+            ),
+        )
+
         parser.add_argument(
             "--aug-cfgs",
             default=None,
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 25632941..4433327d 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -88,13 +88,14 @@ def save(self, file_path, sep=None):
         self.df.to_csv(file_path, sep=sep, index=False)
 
     @classmethod
-    def load(cls, file_path, sep=None):
+    def load(cls, file_path, sep=None, name="class_id"):
         """Loads utt2info list from text file.
 
         Args:
           file_path: File to read the list.
           sep: Separator between the key and file_path in the text file.
           dtype: Dictionary with the dtypes of each column.
+          name: name for the data to be loaded
         Returns:
           Utt2Info object
         """
@@ -106,8 +107,8 @@ def load(cls, file_path, sep=None):
                 file_path,
                 sep=" ",
                 header=None,
-                names=["id", "class_id"],
-                dtype={"id": np.str, "class_id": np.str},
+                names=["id", name],
+                dtype={"id": np.str, name: np.str},
             )
         else:
             if sep is None:
diff --git a/hyperion/utils/text_info.py b/hyperion/utils/text_info.py
index 44a71d0a..4d7e2c3a 100644
--- a/hyperion/utils/text_info.py
+++ b/hyperion/utils/text_info.py
@@ -42,18 +42,11 @@ def read_2column_text(path: Union[Path, str]) -> Dict[str, str]:
 class TextInfo(InfoTable):
     def __init__(self, df):
         super().__init__(df)
-        if "class_idx" not in self.df:
-            self.add_class_idx()
-
         if "weights" not in self.df:
             self.set_uniform_weights()
         else:
             self.df["weights"] /= self.df["weigths"].sum()
 
-
-    def add_class_idx(self):
-        self.df["class_idx"] = [i for i in range(len(self.df))]
-
     def set_uniform_weights(self):
         self.df["weights"] = 1 / len(self.df)
 
@@ -77,38 +70,35 @@ def num_classes(self):
         return self.df["class_idx"].values.max() + 1
 
     @classmethod
-    def load(cls, file_path, sp):
-        #TODO: load text information
+    def load(cls, file_path, sp, sep=None):
         """Loads utt2info list from text file.
 
         Args:
           file_path: File to read the list.
-          sp: SentencePieceProcessor for bpe.
+          sp: SentencePieceProcessor from the BPE model
+          sep: Separator between the key and file_path in the text file.
+          dtype: Dictionary with the dtypes of each column.
         Returns:
           Utt2Info object
         """
-        # y: k2.RaggedTensor,
-        # A ragged tensor with 2 axes [utt][label]. It contains labels of each utterance.
-
-        texts = read_2column_text(file_path)
-        # {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}
-        for utterance_id in texts:
-            texts[utterance_id]
-
-        y = sp.encode(texts, out_type=int)
-        y = k2.RaggedTensor(y).to(device)
+        #TODO: load text information
+        """Loads utt2info list from text file.
 
+        Args:
+          file_path: File to read the list.
+          sp: SentencePieceProcessor for bpe.
+        Returns:
+          Utt2Info object
+        """            
+        # # y: k2.RaggedTensor,
+        # # A ragged tensor with 2 axes [utt][label]. It contains labels of each utterance.
+        # y = sp.encode(texts, out_type=int)
+        # y = k2.RaggedTensor(y).to(device)
         file_path = Path(file_path)
-        ext = file_path.suffix
-        if ext == "":
-            # if no extension we load as kaldi utt2spk file
-            df = pd.read_csv(
-                file_path,
-                sep=" ",
-                header=None,
-                names=["id"],
-                dtype={"id": np.str},
-            )
-            return cls(df)
-
-        return super().load(file_path, sep)
+        text_df = super().load(file_path, sep, name="text_label")
+        # for i, text in enumerate(text_df["text_label"]):
+        #     y = sp.encode(text, out_type=int)
+        #     y = k2.RaggedTensor(y).to(device)
+        #     text_df["text_label"][i] = y
+
+        return text_df

From dac0cc5e3c152b19e7fe23c75a7499b77eaa6bfb Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 25 Oct 2022 10:53:36 -0400
Subject: [PATCH 038/154] fix weighted sampler for unpresent classes, and
 changed bucket creation

---
 hyperion/torch/data/bucketing_seg_sampler.py  | 19 +++++++------------
 .../data/class_weighted_seg_chunk_sampler.py  | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 85b6772e..224660bb 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -36,30 +36,25 @@ def __init__(
         self._compute_len()
         self.depleted_buckets = torch.zeros((num_buckets,), dtype=torch.bool)
 
-    @staticmethod
-    def create_buckets(self, seg_ids, seg_lengths):
-        sort_idx = torch.argsort(seg_lengths)
-        sort_ids = seg_ids[sort_idx]
-        sort_lengths = seg_lengths[sort_ids]
-        cum_lengths = torch.cumsum(sort_lengths)
+    def create_buckets(self):
+        sort_idx = torch.argsort(self.seg_set[self.length_column].values)
+        sorted_seg_set = self.seg_set.iloc[sort_idx]
+        cum_lengths = torch.cumsum(sorted_seg_set[self.length_column].values)
         bucket_length = cum_lengths[-1] / self.num_buckets
         buckets = []
         for i in range(self.num_buckets):
             bucket_idx = (cum_lengths <= bucket_length) & (cum_lengths > 0)
-            bucket_i = sort_ids[bucket_idx]
+            bucket_i = sorted_seg_set.loc[bucket_idx]
             buckets.append(bucket_i)
             cum_lengths -= bucket_length
 
         return buckets
 
     def _create_bucket_samplers(self):
-        buckets = self.create_buckets(
-            self.dataset["ids"], self.dataset[self.length_column]
-        )
+        buckets = self.create_buckets()
         bucket_samplers = []
         for i in range(self.num_buckets):
-            dataset_i = self.dataset.create_bucket(buckets[i])
-            sampler_i = self.base_sampler(dataset_i, self.seed, **self.base_kwargs)
+            sampler_i = self.base_sampler(buckets[i], self.seed, **self.base_kwargs)
             bucket_samplers.append(sampler_i)
 
         self.bucket_samplers = bucket_samplers
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 27ad4d33..05b222c7 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -213,6 +213,17 @@ def set_hard_prototypes(self, affinity_matrix):
             self.hard_prototypes = None
             return
 
+        # don't sample hard negs from classes with zero weigth or absent
+        zero_w = self.class_info["weights"] == 0
+        if np.any(zero_w):
+            zero_w_idx = self.class_info.loc[zero_w, "class_idx"].values
+            affinity_matrix[:, zero_w_idx] = -1000
+
+        for i in range(affinity_matrix.size(1)):
+            mask_i = self.class_info["class_idx"] == i
+            if np.all(mask_i == 0):
+                affinity_matrix[:, i] = -1000
+
         # affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
         # hard prototypes for a class are itself and k-1 closest to it.
         self.hard_prototypes = torch.topk(
@@ -260,10 +271,7 @@ def _get_class_weights(self, chunk_length):
     def _sample_classes(self, num_classes, chunk_length):
         weights = self._get_class_weights(chunk_length)
         row_idx = torch.multinomial(
-            weights,
-            num_samples=num_classes,
-            replacement=True,
-            generator=self.rng,
+            weights, num_samples=num_classes, replacement=True, generator=self.rng,
         ).numpy()
 
         class_ids = self.class_info.iloc[row_idx].id.values

From 1ff37201ef388859084b9c1cc7153803359eb489 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Tue, 25 Oct 2022 22:49:57 -0400
Subject: [PATCH 039/154] update loading text and batch into the transducer

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  2 +
 .../v1/global_conf/config_transducer_v1.sh    |  1 +
 egs/librispeech/v1/local/data_prep.sh         |  3 +
 egs/librispeech/v1/run_011_train_asr.sh       | 28 +++++--
 hyperion/bin/train_wav2vec2transducer.py      | 83 +++++++++++++------
 hyperion/torch/data/__init__.py               |  1 +
 hyperion/torch/data/audio_dataset.py          | 32 ++++---
 hyperion/torch/data/bucketing_seg_sampler.py  | 15 ++--
 hyperion/utils/utils.py                       |  1 +
 9 files changed, 118 insertions(+), 48 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 737f42cf..3c9385c7 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -6,6 +6,7 @@ data:
       aug_cfgs: [conf/reverb_noise_aug.yaml]
       wav_scale: 1
     sampler:
+      sampler_type: 'bucketing_seg_sampler'
       batch_size: 32
       iters_per_epoch: 6
     data_loader:
@@ -17,6 +18,7 @@ data:
       aug_cfgs: [conf/reverb_noise_aug.yaml]
       wav_scale: 1
     sampler:
+      sampler_type: 'bucketing_seg_sampler'
       batch_size: 32
       iters_per_epoch: 6
     data_loader:
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index e6f7eac0..00b34870 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -9,6 +9,7 @@ hf_model_name=wav2vec2xlsr300m
 # x-vector training 
 nnet_data=train_clean_100
 
+bpe_model=data/lang_bpe_1000/bpe.model
 # x-vector cfg
 
 nnet_type=hf_wav2vec2transducer
diff --git a/egs/librispeech/v1/local/data_prep.sh b/egs/librispeech/v1/local/data_prep.sh
index c903d45b..0923aeab 100755
--- a/egs/librispeech/v1/local/data_prep.sh
+++ b/egs/librispeech/v1/local/data_prep.sh
@@ -72,6 +72,9 @@ done
 
 spk2utt=$dst/spk2utt
 utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
+utils/data/get_utt2dur.sh $dst 
+awk 'sub(/ *$/, "", $0)' $dst/utt2dur > $dst/utt2dur2
+mv $dst/utt2dur2 $dst/utt2dur
 
 ntrans=$(wc -l <$trans)
 nutt2spk=$(wc -l <$utt2spk)
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 85d2e918..61f00be4 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -35,9 +35,9 @@ if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
-# if [ "$use_wandb" == "true" ];then
-#   extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
-# fi
+if [ "$use_wandb" == "true" ];then
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
 
 
 # Network Training
@@ -50,14 +50,26 @@ if [ $stage -le 1 ]; then
     train_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $train_dir/wav.scp \
-    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
     --data.val.dataset.audio-file $val_dir/wav.scp \
-    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s1_dir $args \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
     --num-gpus $ngpu
-    # --data.train.dataset.text-file $train_dir/text \
-    # --data.val.dataset.text-file $val_dir/text \
-  
+
+# --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+#     --data.train.dataset.audio-file $list_dir/wav.scp \
+#     --data.train.dataset.time-durs-file $list_dir/utt2dur \
+#     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+#     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+#     --data.val.dataset.audio-file $list_dir/wav.scp \
+#     --data.val.dataset.time-durs-file $list_dir/utt2dur \
+#     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+#     --trainer.exp-path $nnet_dir $args \
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 07a6a31a..360c2c0d 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -26,7 +26,8 @@
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
 # from hyperion.torch.data import LibriSpeechAsrDataModule as ASRD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+# from hyperion.torch.data import BucketingSegSampler as Sampler
+from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
 
@@ -38,19 +39,22 @@
 def init_data(partition, rank, num_gpus, **kwargs):
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    sampler_args = kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
         logging.info("init %s dataset", partition)
 
-    ad_args["is_val"] = partition == "val"
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    print("ad_args", ad_args)
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
-
-    sampler = Sampler(dataset, **sampler_args)
+    print("sampler_args", sampler_args)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
@@ -69,7 +73,7 @@ def init_model(num_classes, rank, model_class, **kwargs):
     if rank == 0:
         logging.info("model network args={}".format(model_args))
     # TODO: check model_args 
-    model_args["transducer"]["num_classes"] = num_classes
+    model_args["num_classes"] = num_classes
     model = model_class(**model_args)
     if rank == 0:
         logging.info("model={}".format(model))
@@ -86,13 +90,20 @@ def train_model(gpu_id, args):
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
 
-    ddp_args = ddp.filter_ddp_args(**kwargs)
-    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    kwargs["rank"] = rank
+    # ddp_args = ddp.filter_ddp_args(**kwargs)
+    # device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    # kwargs["rank"] = rank
 
+    # for Debug
+    rank = 0
+    kwargs["rank"] = 0
+    device = "cpu"
+    world_size=1
+    
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.num_classes, **kwargs)
+    model = init_model(list(train_loader.dataset.num_classes.values())[0], **kwargs)
+    # model = init_model(train_loader.dataset.num_classes, **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
@@ -116,8 +127,9 @@ def make_parser(model_class):
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
-    AD.add_class_args(train_parser, prefix="dataset", skip={"segments_file"})
-    Sampler.add_class_args(train_parser, prefix="sampler")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    # Sampler.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -126,8 +138,9 @@ def make_parser(model_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={"segments_file"})
-    Sampler.add_class_args(val_parser, prefix="sampler")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    # Sampler.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -144,19 +157,41 @@ def make_parser(model_class):
         "--data.train.dataset.text_file",
         type=str, 
     )
-    parser.add_argument("--data.val.dataset.text_file", type=str)
-    parser.add_argument("--data.train.data_loader.num_workers", type=int,
-        default=5,)
-    parser.add_argument("--data.val.data_loader.num_workers", type=int,
-        default=5,)
 
     parser.add_argument(
-        "--bpe-model",
-        type=str,
-        default="data/lang_bpe_500/bpe.model",
-        help="Path to the BPE model",
+        "--data.train.dataset.bpe_model",
+        type=str, 
+    )
+
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+
+
+    # parser.add_argument(
+    #     "--data.val.dataset.bpe_model",
+    #     type=str, 
+    # )
+
+
+    # parser.add_argument("--data.train.data_loader.num_workers", type=int,
+    #     default=5,)
+    # parser.add_argument("--data.val.data_loader.num_workers", type=int,
+    #     default=5,)
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    parser.link_arguments(
+        "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
     )
 
+
+    # parser.add_argument(
+    #     "--bpe-model",
+    #     type=str,
+    #     default="data/lang_bpe_500/bpe.model",
+    #     help="Path to the BPE model",
+    # )
+
     # parser.link_arguments(
     #     "data.train.dataset.class_file", "data.val.dataset.class_file"
     # )
@@ -209,5 +244,5 @@ def make_parser(model_class):
 
     args_sc.model_class = model_dict[model_type]
     # torch docs recommend using forkserver
-    multiprocessing.set_start_method("forkserver")
+    # multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index aebcfe8a..e289acbf 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -10,5 +10,6 @@
 from .audio_dataset import AudioDataset
 
 # samplers
+from .bucketing_seg_sampler import BucketingSegSampler
 from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 7ec30bc3..11e8cede 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -26,7 +26,7 @@
 import torch.distributed as dist
 
 from hyperion.np import augment
-
+import pdb
 
 class AudioDataset1(Dataset):
     def __init__(
@@ -453,7 +453,7 @@ def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
 
 from ...utils.class_info import ClassInfo
 from ...utils.segment_set import SegmentSet
-
+from ...utils.text import read_text
 
 class AudioDataset(Dataset):
     def __init__(
@@ -463,7 +463,7 @@ def __init__(
         class_names=None,
         class_files=None,
         bpe_model=None,
-        text_files=None,
+        text_file=None,
         time_durs_file=None,
         aug_cfgs=None,
         num_augs=1,
@@ -518,10 +518,9 @@ def __init__(
             logging.info("loading bpe models")
             self._load_bpe_model(bpe_model, is_val)
 
-        if text_files is not None:
+        if text_file is not None:
             logging.info("loading text files")
-            self._load_text_infos(text_files, is_val)
-
+            self._load_text_infos(text_file, is_val)
         self.return_segment_info = (
             [] if return_segment_info is None else return_segment_info
         )
@@ -532,8 +531,10 @@ def __init__(
 
 
     def _load_bpe_model(self, bpe_model, is_val):
+        if self.rank == 0:
+            logging.info("loading bpe file %s" % bpe_model)
         self.sp  = spm.SentencePieceProcessor()
-        self.sp.load(params.bpe_model)
+        self.sp.load(bpe_model)
         blank_id = self.sp.piece_to_id("<blk>")
         vocab_size = self.sp.get_piece_size()
 
@@ -543,7 +544,9 @@ def _load_text_infos(self, text_file, is_val):
             return
         if self.rank == 0:
             logging.info("loading text file %s" % text_file)
-        self.text_info = TextInfo.load(text_file, self.sp)
+        
+        text = read_text(text_file)
+        self.seg_set["text"] = text.loc[self.seg_set["id"]].text
 
 
 
@@ -742,7 +745,8 @@ def filter_args(**kwargs):
             "num_augs",
             "class_names",
             "class_files",
-            "text_files",
+            "bpe_model",
+            "text_file",
             "return_segment_info",
             "return_orig",
             "time_durs_file",
@@ -795,11 +799,19 @@ def add_class_args(parser, prefix=None, skip={}):
             ),
         )
 
+        parser.add_argument(
+            "--bpe-model",
+            default=None,
+            help=(
+                "bpe model for the text label"
+            ),
+        )
+
         parser.add_argument(
             "--text-file",
             default=None,
             help=(
-                "text file"
+                "text file with words labels for each utterances"
             ),
         )
 
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 224660bb..37794377 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -8,7 +8,6 @@
 import logging
 
 import numpy as np
-
 import torch
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
@@ -37,14 +36,18 @@ def __init__(
         self.depleted_buckets = torch.zeros((num_buckets,), dtype=torch.bool)
 
     def create_buckets(self):
-        sort_idx = torch.argsort(self.seg_set[self.length_column].values)
+        sort_idx = torch.argsort(torch.from_numpy(self.seg_set[self.length_column].values))
         sorted_seg_set = self.seg_set.iloc[sort_idx]
-        cum_lengths = torch.cumsum(sorted_seg_set[self.length_column].values)
+        cum_lengths = torch.cumsum(torch.from_numpy(sorted_seg_set[self.length_column].values),dim=0)
         bucket_length = cum_lengths[-1] / self.num_buckets
         buckets = []
         for i in range(self.num_buckets):
-            bucket_idx = (cum_lengths <= bucket_length) & (cum_lengths > 0)
-            bucket_i = sorted_seg_set.loc[bucket_idx]
+            bucket_bool = (cum_lengths <= bucket_length) & (cum_lengths > 0)
+            bucket_idx = []
+            for i, bo in enumerate(bucket_bool):
+                if bo:
+                    bucket_idx.append(i)
+            bucket_i = sorted_seg_set.iloc[bucket_idx]
             buckets.append(bucket_i)
             cum_lengths -= bucket_length
 
@@ -54,7 +57,7 @@ def _create_bucket_samplers(self):
         buckets = self.create_buckets()
         bucket_samplers = []
         for i in range(self.num_buckets):
-            sampler_i = self.base_sampler(buckets[i], self.seed, **self.base_kwargs)
+            sampler_i = self.base_sampler(buckets[i], **self.base_kwargs)
             bucket_samplers.append(sampler_i)
 
         self.bucket_samplers = bucket_samplers
diff --git a/hyperion/utils/utils.py b/hyperion/utils/utils.py
index 1663fb03..2da78581 100644
--- a/hyperion/utils/utils.py
+++ b/hyperion/utils/utils.py
@@ -352,6 +352,7 @@ def store_transcripts(
             print(f"{cut_id}:\thyp={hyp}", file=f)
 
 
+
 def write_error_stats(
     f: TextIO,
     test_set_name: str,

From a7f1e53063f76f03941cfbedd0ce62bec6b6cc91 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Tue, 25 Oct 2022 23:23:53 -0400
Subject: [PATCH 040/154] initial loading transducer model

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |    2 +-
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |   40 +
 hyperion/bin/train_wav2vec2transducer.py      |  178 +-
 hyperion/torch/models/transducer/__init__.py  |    3 +
 hyperion/torch/models/transducer/conformer.py | 1450 +++++++++++++++++
 hyperion/torch/models/transducer/decoder.py   |   97 ++
 hyperion/torch/models/transducer/joiner.py    |   54 +
 .../wav2transducer/hf_wav2vec2_transducer.py  |   48 +-
 hyperion/utils/text_info.py                   |  104 --
 9 files changed, 1814 insertions(+), 162 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
 create mode 100644 hyperion/torch/models/transducer/conformer.py
 create mode 100644 hyperion/torch/models/transducer/decoder.py
 create mode 100644 hyperion/torch/models/transducer/joiner.py
 delete mode 100644 hyperion/utils/text_info.py

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 3c9385c7..e7cfc8ef 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -23,7 +23,7 @@ data:
       iters_per_epoch: 6
     data_loader:
       num_workers: 8
-model: 
+model: wav2vec2xlsr300m_transducer.yaml
 trainer:
   optim:
     opt_type: sgd
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
new file mode 100644
index 00000000..fe71a40c
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -0,0 +1,40 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+transducer:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 5
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 360c2c0d..7e87c180 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -4,6 +4,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import sys
+import pdb
 import os
 from pathlib import Path
 from jsonargparse import (
@@ -25,11 +26,14 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
-# from hyperion.torch.data import LibriSpeechAsrDataModule as ASRD
-# from hyperion.torch.data import BucketingSegSampler as Sampler
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
+from hyperion.torch.models.transducer import Conformer
+from hyperion.torch.models.transducer import Decoder
+from hyperion.torch.models.transducer import Joiner
+
+
 
 model_dict = {
     "hf_wav2vec2transducer": HFWav2Vec2Transducer,
@@ -80,6 +84,133 @@ def init_model(num_classes, rank, model_class, **kwargs):
     return model
 
 
+
+
+def get_params() -> AttributeDict:
+    """Return a dict containing training parameters.
+
+    All training related parameters that are not passed from the commandline
+    are saved in the variable `params`.
+
+    Commandline options are merged into `params` after they are parsed, so
+    you can also access them via `params`.
+
+    Explanation of options saved in `params`:
+
+        - best_train_loss: Best training loss so far. It is used to select
+                           the model that has the lowest training loss. It is
+                           updated during the training.
+
+        - best_valid_loss: Best validation loss so far. It is used to select
+                           the model that has the lowest validation loss. It is
+                           updated during the training.
+
+        - best_train_epoch: It is the epoch that has the best training loss.
+
+        - best_valid_epoch: It is the epoch that has the best validation loss.
+
+        - batch_idx_train: Used to writing statistics to tensorboard. It
+                           contains number of batches trained so far across
+                           epochs.
+
+        - log_interval:  Print training loss if batch_idx % log_interval` is 0
+
+        - reset_interval: Reset statistics if batch_idx % reset_interval is 0
+
+        - valid_interval:  Run validation if batch_idx % valid_interval is 0
+
+        - feature_dim: The model input dim. It has to match the one used
+                       in computing features.
+
+        - subsampling_factor:  The subsampling factor for the model.
+
+        - attention_dim: Hidden dim for multi-head attention model.
+
+        - num_decoder_layers: Number of decoder layer of transformer decoder.
+
+        - warm_step: The warm_step for Noam optimizer.
+    """
+    params = AttributeDict(
+        {
+            "best_train_loss": float("inf"),
+            "best_valid_loss": float("inf"),
+            "best_train_epoch": -1,
+            "best_valid_epoch": -1,
+            "batch_idx_train": 0,
+            "log_interval": 50,
+            "reset_interval": 200,
+            "valid_interval": 3000,  # For the 100h subset, use 800
+            # parameters for conformer
+            "feature_dim": 80,
+            "encoder_out_dim": 512,
+            "subsampling_factor": 4,
+            "attention_dim": 512,
+            "nhead": 8,
+            "dim_feedforward": 2048,
+            "num_encoder_layers": 12,
+            "vgg_frontend": False,
+            # decoder params
+            "decoder_embedding_dim": 1024,
+            "num_decoder_layers": 2,
+            "decoder_hidden_dim": 512,
+            # parameters for Noam
+            "warm_step": 80000,  # For the 100h subset, use 8k
+            "env_info": get_env_info(),
+        }
+    )
+
+    return params
+
+
+def get_encoder_model(params: AttributeDict):
+    # TODO: We can add an option to switch between Conformer and Transformer
+    encoder = Conformer(
+        num_features=params.feature_dim,
+        output_dim=params.encoder_out_dim,
+        subsampling_factor=params.subsampling_factor,
+        d_model=params.attention_dim,
+        nhead=params.nhead,
+        dim_feedforward=params.dim_feedforward,
+        num_encoder_layers=params.num_encoder_layers,
+        vgg_frontend=params.vgg_frontend,
+    )
+    return encoder
+
+
+def get_decoder_model(params: AttributeDict):
+    decoder = Decoder(
+        vocab_size=params.vocab_size,
+        embedding_dim=params.decoder_embedding_dim,
+        blank_id=params.blank_id,
+        num_layers=params.num_decoder_layers,
+        hidden_dim=params.decoder_hidden_dim,
+        output_dim=params.encoder_out_dim,
+    )
+    return decoder
+
+
+def get_joiner_model(params: AttributeDict):
+    joiner = Joiner(
+        input_dim=params.encoder_out_dim,
+        output_dim=params.vocab_size,
+    )
+    return joiner
+
+
+def get_transducer_model(params: AttributeDict):
+    encoder = get_encoder_model(params)
+    decoder = get_decoder_model(params)
+    joiner = get_joiner_model(params)
+
+    model = Transducer(
+        encoder=encoder,
+        decoder=decoder,
+        joiner=joiner,
+    )
+    return model
+
+
+
 def train_model(gpu_id, args):
 
     config_logger(args.verbose)
@@ -90,20 +221,20 @@ def train_model(gpu_id, args):
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
 
-    # ddp_args = ddp.filter_ddp_args(**kwargs)
-    # device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    # kwargs["rank"] = rank
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
 
-    # for Debug
-    rank = 0
-    kwargs["rank"] = 0
-    device = "cpu"
-    world_size=1
+    # # for Debug
+    # rank = 0
+    # kwargs["rank"] = 0
+    # device = "cpu"
+    # world_size=1
     
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(list(train_loader.dataset.num_classes.values())[0], **kwargs)
-    # model = init_model(train_loader.dataset.num_classes, **kwargs)
+    # model = init_model(train_loader.dataset.num_classes.values())[0], **kwargs)
+    model = init_model(train_loader.dataset.num_classes, **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
@@ -163,19 +294,8 @@ def make_parser(model_class):
         type=str, 
     )
 
-    parser.add_argument("--data.val.dataset.text_file", type=str)
-
-
-    # parser.add_argument(
-    #     "--data.val.dataset.bpe_model",
-    #     type=str, 
-    # )
-
-
-    # parser.add_argument("--data.train.data_loader.num_workers", type=int,
-    #     default=5,)
-    # parser.add_argument("--data.val.data_loader.num_workers", type=int,
-    #     default=5,)
+    parser.add_argument("--data.val.dataset.text_file", type=str) 
+    
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
@@ -184,14 +304,6 @@ def make_parser(model_class):
         "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
     )
 
-
-    # parser.add_argument(
-    #     "--bpe-model",
-    #     type=str,
-    #     default="data/lang_bpe_500/bpe.model",
-    #     help="Path to the BPE model",
-    # )
-
     # parser.link_arguments(
     #     "data.train.dataset.class_file", "data.val.dataset.class_file"
     # )
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
index 20372911..ee711a8d 100644
--- a/hyperion/torch/models/transducer/__init__.py
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -5,3 +5,6 @@
 """
 
 from .transducer import Transducer
+from .conformer import Conformer
+from .decoder import Decoder
+from .joiner import Joiner
\ No newline at end of file
diff --git a/hyperion/torch/models/transducer/conformer.py b/hyperion/torch/models/transducer/conformer.py
new file mode 100644
index 00000000..2bf63320
--- /dev/null
+++ b/hyperion/torch/models/transducer/conformer.py
@@ -0,0 +1,1450 @@
+#!/usr/bin/env python3
+# Copyright (c)  2021  University of Chinese Academy of Sciences (author: Han Zhu)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import math
+import warnings
+from typing import List, Optional, Tuple
+
+import torch
+from torch import Tensor, nn
+from transformer import Transformer
+
+from icefall.utils import make_pad_mask, subsequent_chunk_mask
+
+
+class Conformer(Transformer):
+    """
+    Args:
+        num_features (int): Number of input features
+        output_dim (int): Number of output dimension
+        subsampling_factor (int): subsampling factor of encoder (the convolution layers before transformers)
+        d_model (int): attention dimension
+        nhead (int): number of head
+        dim_feedforward (int): feedforward dimention
+        num_encoder_layers (int): number of encoder layers
+        dropout (float): dropout rate
+        cnn_module_kernel (int): Kernel size of convolution module
+        normalize_before (bool): whether to use layer_norm before the first block.
+        vgg_frontend (bool): whether to use vgg frontend.
+        dynamic_chunk_training (bool): whether to use dynamic chunk training, if
+            you want to train a streaming model, this is expected to be True.
+            When setting True, it will use a masking strategy to make the attention
+            see only limited left and right context.
+        short_chunk_threshold (float): a threshold to determinize the chunk size
+            to be used in masking training, if the randomly generated chunk size
+            is greater than ``max_len * short_chunk_threshold`` (max_len is the
+            max sequence length of current batch) then it will use
+            full context in training (i.e. with chunk size equals to max_len).
+            This will be used only when dynamic_chunk_training is True.
+        short_chunk_size (int): see docs above, if the randomly generated chunk
+            size equals to or less than ``max_len * short_chunk_threshold``, the
+            chunk size will be sampled uniformly from 1 to short_chunk_size.
+            This also will be used only when dynamic_chunk_training is True.
+        num_left_chunks (int): the left context (in chunks) attention can see, the
+            chunk size is decided by short_chunk_threshold and short_chunk_size.
+            A minus value means seeing full left context.
+            This also will be used only when dynamic_chunk_training is True.
+        causal (bool): Whether to use causal convolution in conformer encoder
+            layer. This MUST be True when using dynamic_chunk_training.
+    """
+
+    def __init__(
+        self,
+        num_features: int,
+        output_dim: int,
+        subsampling_factor: int = 4,
+        d_model: int = 256,
+        nhead: int = 4,
+        dim_feedforward: int = 2048,
+        num_encoder_layers: int = 12,
+        dropout: float = 0.1,
+        cnn_module_kernel: int = 31,
+        normalize_before: bool = True,
+        vgg_frontend: bool = False,
+        dynamic_chunk_training: bool = False,
+        short_chunk_threshold: float = 0.75,
+        short_chunk_size: int = 25,
+        num_left_chunks: int = -1,
+        causal: bool = False,
+    ) -> None:
+        super(Conformer, self).__init__(
+            num_features=num_features,
+            output_dim=output_dim,
+            subsampling_factor=subsampling_factor,
+            d_model=d_model,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            num_encoder_layers=num_encoder_layers,
+            dropout=dropout,
+            normalize_before=normalize_before,
+            vgg_frontend=vgg_frontend,
+        )
+
+        self.encoder_layers = num_encoder_layers
+        self.d_model = d_model
+        self.cnn_module_kernel = cnn_module_kernel
+        self.causal = causal
+
+        self.dynamic_chunk_training = dynamic_chunk_training
+        self.short_chunk_threshold = short_chunk_threshold
+        self.short_chunk_size = short_chunk_size
+        self.num_left_chunks = num_left_chunks
+
+        self.encoder_pos = RelPositionalEncoding(d_model, dropout)
+
+        encoder_layer = ConformerEncoderLayer(
+            d_model,
+            nhead,
+            dim_feedforward,
+            dropout,
+            cnn_module_kernel,
+            normalize_before,
+            causal,
+        )
+        self.encoder = ConformerEncoder(encoder_layer, num_encoder_layers)
+        self.normalize_before = normalize_before
+        if self.normalize_before:
+            self.after_norm = nn.LayerNorm(d_model)
+        else:
+            # Note: TorchScript detects that self.after_norm could be used inside forward()
+            #       and throws an error without this change.
+            self.after_norm = identity
+
+        self._init_state: List[torch.Tensor] = [torch.empty(0)]
+
+    def forward(
+        self, x: torch.Tensor, x_lens: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Args:
+          x:
+            The input tensor. Its shape is (batch_size, seq_len, feature_dim).
+          x_lens:
+            A tensor of shape (batch_size,) containing the number of frames in
+            `x` before padding.
+        Returns:
+          Return a tuple containing 2 tensors:
+            - logits, its shape is (batch_size, output_seq_len, output_dim)
+            - logit_lens, a tensor of shape (batch_size,) containing the number
+              of frames in `logits` before padding.
+        """
+        x = self.encoder_embed(x)
+        x, pos_emb = self.encoder_pos(x)
+        x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
+
+        # Caution: We assume the subsampling factor is 4!
+
+        #  lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
+        #
+        # Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
+        lengths = (((x_lens - 1) >> 1) - 1) >> 1
+
+        assert x.size(0) == lengths.max().item()
+
+        src_key_padding_mask = make_pad_mask(lengths)
+
+        if self.dynamic_chunk_training:
+            assert (
+                self.causal
+            ), "Causal convolution is required for streaming conformer."
+            max_len = x.size(0)
+            chunk_size = torch.randint(1, max_len, (1,)).item()
+            if chunk_size > (max_len * self.short_chunk_threshold):
+                chunk_size = max_len
+            else:
+                chunk_size = chunk_size % self.short_chunk_size + 1
+
+            mask = ~subsequent_chunk_mask(
+                size=x.size(0),
+                chunk_size=chunk_size,
+                num_left_chunks=self.num_left_chunks,
+                device=x.device,
+            )
+            x = self.encoder(
+                x, pos_emb, mask=mask, src_key_padding_mask=src_key_padding_mask
+            )  # (T, N, C)
+        else:
+            x = self.encoder(
+                x, pos_emb, mask=None, src_key_padding_mask=src_key_padding_mask
+            )  # (T, N, C)
+
+        if self.normalize_before:
+            x = self.after_norm(x)
+
+        logits = self.encoder_output_layer(x)
+        logits = logits.permute(1, 0, 2)  # (T, N, C) ->(N, T, C)
+
+        return logits, lengths
+
+    @torch.jit.export
+    def get_init_state(
+        self, left_context: int, device: torch.device
+    ) -> List[torch.Tensor]:
+        """Return the initial cache state of the model.
+
+        Args:
+          left_context: The left context size (in frames after subsampling).
+
+        Returns:
+          Return the initial state of the model, it is a list containing two
+          tensors, the first one is the cache for attentions which has a shape
+          of (num_encoder_layers, left_context, encoder_dim), the second one
+          is the cache of conv_modules which has a shape of
+          (num_encoder_layers, cnn_module_kernel - 1, encoder_dim).
+
+          NOTE: the returned tensors are on the given device.
+        """
+        if (
+            len(self._init_state) == 2
+            and self._init_state[0].size(1) == left_context
+        ):
+            # Note: It is OK to share the init state as it is
+            # not going to be modified by the model
+            return self._init_state
+
+        init_states: List[torch.Tensor] = [
+            torch.zeros(
+                (
+                    self.encoder_layers,
+                    left_context,
+                    self.d_model,
+                ),
+                device=device,
+            ),
+            torch.zeros(
+                (
+                    self.encoder_layers,
+                    self.cnn_module_kernel - 1,
+                    self.d_model,
+                ),
+                device=device,
+            ),
+        ]
+
+        self._init_state = init_states
+
+        return init_states
+
+    @torch.jit.export
+    def streaming_forward(
+        self,
+        x: torch.Tensor,
+        x_lens: torch.Tensor,
+        states: Optional[List[torch.Tensor]] = None,
+        processed_lens: Optional[Tensor] = None,
+        left_context: int = 64,
+        right_context: int = 0,
+        chunk_size: int = 16,
+        simulate_streaming: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor, List[torch.Tensor]]:
+        """
+        Args:
+          x:
+            The input tensor. Its shape is (batch_size, seq_len, feature_dim).
+          x_lens:
+            A tensor of shape (batch_size,) containing the number of frames in
+            `x` before padding.
+          states:
+            The decode states for previous frames which contains the cached data.
+            It has two elements, the first element is the attn_cache which has
+            a shape of (encoder_layers, left_context, batch, attention_dim),
+            the second element is the conv_cache which has a shape of
+            (encoder_layers, cnn_module_kernel-1, batch, conv_dim).
+            Note: states will be modified in this function.
+          processed_lens:
+            How many frames (after subsampling) have been processed for each sequence.
+          left_context:
+            How many previous frames the attention can see in current chunk.
+            Note: It's not that each individual frame has `left_context` frames
+            of left context, some have more.
+          right_context:
+            How many future frames the attention can see in current chunk.
+            Note: It's not that each individual frame has `right_context` frames
+            of right context, some have more.
+          chunk_size:
+            The chunk size for decoding, this will be used to simulate streaming
+            decoding using masking.
+          simulate_streaming:
+            If setting True, it will use a masking strategy to simulate streaming
+            fashion (i.e. every chunk data only see limited left context and
+            right context). The whole sequence is supposed to be send at a time
+            When using simulate_streaming.
+        Returns:
+          Return a tuple containing 2 tensors:
+            - logits, its shape is (batch_size, output_seq_len, output_dim)
+            - logit_lens, a tensor of shape (batch_size,) containing the number
+              of frames in `logits` before padding.
+            - states, the updated states(i.e. caches) including the information
+              of current chunk.
+        """
+
+        # x: [N, T, C]
+        # Caution: We assume the subsampling factor is 4!
+
+        #  lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
+        #
+        # Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
+        lengths = (((x_lens - 1) >> 1) - 1) >> 1
+
+        if not simulate_streaming:
+            assert states is not None
+            assert processed_lens is not None
+            assert (
+                len(states) == 2
+                and states[0].shape
+                == (self.encoder_layers, left_context, x.size(0), self.d_model)
+                and states[1].shape
+                == (
+                    self.encoder_layers,
+                    self.cnn_module_kernel - 1,
+                    x.size(0),
+                    self.d_model,
+                )
+            ), f"""The length of states MUST be equal to 2, and the shape of
+             first element should be {(self.encoder_layers, left_context, x.size(0), self.d_model)},
+             given {states[0].shape}. the shape of second element should be
+             {(self.encoder_layers, self.cnn_module_kernel - 1, x.size(0), self.d_model)},
+             given {states[1].shape}."""
+
+            lengths -= 2  # we will cut off 1 frame on each side of encoder_embed output
+            src_key_padding_mask = make_pad_mask(lengths)
+
+            processed_mask = torch.arange(left_context, device=x.device).expand(
+                x.size(0), left_context
+            )
+            processed_lens = processed_lens.view(x.size(0), 1)
+            processed_mask = (processed_lens <= processed_mask).flip(1)
+
+            src_key_padding_mask = torch.cat(
+                [processed_mask, src_key_padding_mask], dim=1
+            )
+
+            embed = self.encoder_embed(x)
+
+            # cut off 1 frame on each size of embed as they see the padding
+            # value which causes a training and decoding mismatch.
+            embed = embed[:, 1:-1, :]
+
+            embed, pos_enc = self.encoder_pos(embed, left_context)
+            embed = embed.permute(1, 0, 2)  # (B, T, F) -> (T, B, F)
+
+            x, states = self.encoder.chunk_forward(
+                embed,
+                pos_enc,
+                src_key_padding_mask=src_key_padding_mask,
+                states=states,
+                left_context=left_context,
+                right_context=right_context,
+            )  # (T, B, F)
+        else:
+            assert states is None
+            states = []  # just to make torch.script.jit happy
+            src_key_padding_mask = make_pad_mask(lengths)
+            x = self.encoder_embed(x)
+            x, pos_emb = self.encoder_pos(x)
+            x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
+
+            assert x.size(0) == lengths.max().item()
+
+            num_left_chunks = -1
+            if left_context >= 0:
+                assert left_context % chunk_size == 0
+                num_left_chunks = left_context // chunk_size
+
+            mask = ~subsequent_chunk_mask(
+                size=x.size(0),
+                chunk_size=chunk_size,
+                num_left_chunks=num_left_chunks,
+                device=x.device,
+            )
+            x = self.encoder(
+                x,
+                pos_emb,
+                mask=mask,
+                src_key_padding_mask=src_key_padding_mask,
+            )  # (T, N, C)
+
+        if self.normalize_before:
+            x = self.after_norm(x)
+
+        logits = self.encoder_output_layer(x)
+        logits = logits.permute(1, 0, 2)  # (T, N, C) ->(N, T, C)
+
+        return logits, lengths, states
+
+
+class ConformerEncoderLayer(nn.Module):
+    """
+    ConformerEncoderLayer is made up of self-attn, feedforward and convolution networks.
+    See: "Conformer: Convolution-augmented Transformer for Speech Recognition"
+
+    Args:
+        d_model: the number of expected features in the input (required).
+        nhead: the number of heads in the multiheadattention models (required).
+        dim_feedforward: the dimension of the feedforward network model (default=2048).
+        dropout: the dropout value (default=0.1).
+        cnn_module_kernel (int): Kernel size of convolution module.
+        normalize_before (bool): whether to use layer_norm before the first block.
+        causal (bool): Whether to use causal convolution in conformer encoder
+            layer. This MUST be True when using dynamic_chunk_training and streaming decoding.
+
+    Examples::
+        >>> encoder_layer = ConformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> pos_emb = torch.rand(32, 19, 512)
+        >>> out = encoder_layer(src, pos_emb)
+    """
+
+    def __init__(
+        self,
+        d_model: int,
+        nhead: int,
+        dim_feedforward: int = 2048,
+        dropout: float = 0.1,
+        cnn_module_kernel: int = 31,
+        normalize_before: bool = True,
+        causal: bool = False,
+    ) -> None:
+        super(ConformerEncoderLayer, self).__init__()
+        self.self_attn = RelPositionMultiheadAttention(
+            d_model, nhead, dropout=0.0
+        )
+
+        self.feed_forward = nn.Sequential(
+            nn.Linear(d_model, dim_feedforward),
+            Swish(),
+            nn.Dropout(dropout),
+            nn.Linear(dim_feedforward, d_model),
+        )
+
+        self.feed_forward_macaron = nn.Sequential(
+            nn.Linear(d_model, dim_feedforward),
+            Swish(),
+            nn.Dropout(dropout),
+            nn.Linear(dim_feedforward, d_model),
+        )
+
+        self.conv_module = ConvolutionModule(
+            d_model, cnn_module_kernel, causal=causal
+        )
+
+        self.norm_ff_macaron = nn.LayerNorm(
+            d_model
+        )  # for the macaron style FNN module
+        self.norm_ff = nn.LayerNorm(d_model)  # for the FNN module
+        self.norm_mha = nn.LayerNorm(d_model)  # for the MHA module
+
+        self.ff_scale = 0.5
+
+        self.norm_conv = nn.LayerNorm(d_model)  # for the CNN module
+        self.norm_final = nn.LayerNorm(
+            d_model
+        )  # for the final output of the block
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.normalize_before = normalize_before
+
+    def forward(
+        self,
+        src: Tensor,
+        pos_emb: Tensor,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+    ) -> Tensor:
+        """
+        Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            pos_emb: Positional embedding tensor (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        Shape:
+            src: (S, N, E).
+            pos_emb: (N, 2*S-1, E).
+            src_mask: (S, S).
+            src_key_padding_mask: (N, S).
+            S is the source sequence length, N is the batch size, E is the feature number
+        """
+        # macaron style feed forward module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_ff_macaron(src)
+        src = residual + self.ff_scale * self.dropout(
+            self.feed_forward_macaron(src)
+        )
+        if not self.normalize_before:
+            src = self.norm_ff_macaron(src)
+
+        # multi-headed self-attention module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_mha(src)
+
+        src_att = self.self_attn(
+            src,
+            src,
+            src,
+            pos_emb=pos_emb,
+            attn_mask=src_mask,
+            key_padding_mask=src_key_padding_mask,
+        )[0]
+        src = residual + self.dropout(src_att)
+        if not self.normalize_before:
+            src = self.norm_mha(src)
+
+        # convolution module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_conv(src)
+
+        src, _ = self.conv_module(src)
+        src = residual + self.dropout(src)
+
+        if not self.normalize_before:
+            src = self.norm_conv(src)
+
+        # feed forward module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_ff(src)
+        src = residual + self.ff_scale * self.dropout(self.feed_forward(src))
+        if not self.normalize_before:
+            src = self.norm_ff(src)
+
+        if self.normalize_before:
+            src = self.norm_final(src)
+
+        return src
+
+    @torch.jit.export
+    def chunk_forward(
+        self,
+        src: Tensor,
+        pos_emb: Tensor,
+        states: List[Tensor],
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        left_context: int = 0,
+        right_context: int = 0,
+    ) -> Tuple[Tensor, List[Tensor]]:
+        """
+        Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            pos_emb: Positional embedding tensor (required).
+            states:
+              The decode states for previous frames which contains the cached data.
+              It has two elements, the first element is the attn_cache which has
+              a shape of (left_context, batch, attention_dim),
+              the second element is the conv_cache which has a shape of
+              (cnn_module_kernel-1, batch, conv_dim).
+              Note: states will be modified in this function.
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+            left_context:
+              How many previous frames the attention can see in current chunk.
+              Note: It's not that each individual frame has `left_context` frames
+              of left context, some have more.
+            right_context:
+              How many future frames the attention can see in current chunk.
+              Note: It's not that each individual frame has `right_context` frames
+              of right context, some have more.
+        Shape:
+            src: (S, N, E).
+            pos_emb: (N, 2*(S+left_context)-1, E).
+            src_mask: (S, S).
+            src_key_padding_mask: (N, S).
+            S is the source sequence length, N is the batch size, E is the feature number
+        """
+
+        # macaron style feed forward module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_ff_macaron(src)
+        src = residual + self.ff_scale * self.dropout(
+            self.feed_forward_macaron(src)
+        )
+        if not self.normalize_before:
+            src = self.norm_ff_macaron(src)
+
+        # multi-headed self-attention module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_mha(src)
+
+        # We put the attention cache this level (i.e. before linear transformation)
+        # to save memory consumption, when decoding in streaming fashion, the
+        # batch size would be thousands (for 32GB machine), if we cache key & val
+        # separately, it needs extra several GB memory.
+        # TODO(WeiKang): Move cache to self_attn level (i.e. cache key & val
+        # separately) if needed.
+        key = torch.cat([states[0], src], dim=0)
+        val = key
+        if right_context > 0:
+            states[0] = key[
+                -(left_context + right_context) : -right_context, ...  # noqa
+            ]
+        else:
+            states[0] = key[-left_context:, ...]
+
+        src_att = self.self_attn(
+            src,
+            key,
+            val,
+            pos_emb=pos_emb,
+            attn_mask=src_mask,
+            key_padding_mask=src_key_padding_mask,
+            left_context=left_context,
+        )[0]
+        src = residual + self.dropout(src_att)
+        if not self.normalize_before:
+            src = self.norm_mha(src)
+
+        # convolution module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_conv(src)
+
+        src, conv_cache = self.conv_module(
+            src, states[1], right_context=right_context
+        )
+        states[1] = conv_cache
+        src = residual + self.dropout(src)
+
+        if not self.normalize_before:
+            src = self.norm_conv(src)
+
+        # feed forward module
+        residual = src
+        if self.normalize_before:
+            src = self.norm_ff(src)
+        src = residual + self.ff_scale * self.dropout(self.feed_forward(src))
+        if not self.normalize_before:
+            src = self.norm_ff(src)
+
+        if self.normalize_before:
+            src = self.norm_final(src)
+
+        return src, states
+
+
+class ConformerEncoder(nn.Module):
+    r"""ConformerEncoder is a stack of N encoder layers
+
+    Args:
+        encoder_layer: an instance of the ConformerEncoderLayer() class (required).
+        num_layers: the number of sub-encoder-layers in the encoder (required).
+
+    Examples::
+        >>> encoder_layer = ConformerEncoderLayer(d_model=512, nhead=8)
+        >>> conformer_encoder = ConformerEncoder(encoder_layer, num_layers=6)
+        >>> src = torch.rand(10, 32, 512)
+        >>> pos_emb = torch.rand(32, 19, 512)
+        >>> out = conformer_encoder(src, pos_emb)
+    """
+
+    def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList(
+            [copy.deepcopy(encoder_layer) for i in range(num_layers)]
+        )
+        self.num_layers = num_layers
+
+    def forward(
+        self,
+        src: Tensor,
+        pos_emb: Tensor,
+        mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+    ) -> Tensor:
+        r"""Pass the input through the encoder layers in turn.
+
+        Args:
+            src: the sequence to the encoder (required).
+            pos_emb: Positional embedding tensor (required).
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+        Shape:
+
+        Shape:
+            src: (S, N, E).
+            pos_emb: (N, 2*S-1, E).
+            mask: (S, S).
+            src_key_padding_mask: (N, S).
+            S is the source sequence length, T is the target sequence length, N is the batch size, E is the feature number
+
+        """
+        output = src
+
+        for layer_index, mod in enumerate(self.layers):
+            output = mod(
+                output,
+                pos_emb,
+                src_mask=mask,
+                src_key_padding_mask=src_key_padding_mask,
+            )
+        return output
+
+    @torch.jit.export
+    def chunk_forward(
+        self,
+        src: Tensor,
+        pos_emb: Tensor,
+        states: List[Tensor],
+        mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        left_context: int = 0,
+        right_context: int = 0,
+    ) -> Tuple[Tensor, List[Tensor]]:
+        r"""Pass the input through the encoder layers in turn.
+
+        Args:
+            src: the sequence to the encoder (required).
+            pos_emb: Positional embedding tensor (required).
+            states:
+              The decode states for previous frames which contains the cached data.
+              It has two elements, the first element is the attn_cache which has
+              a shape of (encoder_layers, left_context, batch, attention_dim),
+              the second element is the conv_cache which has a shape of
+              (encoder_layers, cnn_module_kernel-1, batch, conv_dim).
+              Note: states will be modified in this function.
+            mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional).
+            left_context:
+              How many previous frames the attention can see in current chunk.
+              Note: It's not that each individual frame has `left_context` frames
+              of left context, some have more.
+            right_context:
+              How many future frames the attention can see in current chunk.
+              Note: It's not that each individual frame has `right_context` frames
+              of right context, some have more.
+        Shape:
+            src: (S, N, E).
+            pos_emb: (N, 2*(S+left_context)-1, E).
+            mask: (S, S).
+            src_key_padding_mask: (N, S).
+            S is the source sequence length, T is the target sequence length, N is the batch size, E is the feature number
+
+        """
+        assert not self.training
+        output = src
+
+        for layer_index, mod in enumerate(self.layers):
+            cache = [states[0][layer_index], states[1][layer_index]]
+            output, cache = mod.chunk_forward(
+                output,
+                pos_emb,
+                states=cache,
+                src_mask=mask,
+                src_key_padding_mask=src_key_padding_mask,
+                left_context=left_context,
+                right_context=right_context,
+            )
+            states[0][layer_index] = cache[0]
+            states[1][layer_index] = cache[1]
+
+        return output, states
+
+
+class RelPositionalEncoding(torch.nn.Module):
+    """Relative positional encoding module.
+
+    See : Appendix B in "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context"
+    Modified from https://github.com/espnet/espnet/blob/master/espnet/nets/pytorch_backend/transformer/embedding.py
+
+    Args:
+        d_model: Embedding dimension.
+        dropout_rate: Dropout rate.
+        max_len: Maximum input length.
+
+    """
+
+    def __init__(
+        self, d_model: int, dropout_rate: float, max_len: int = 5000
+    ) -> None:
+        """Construct an PositionalEncoding object."""
+        super(RelPositionalEncoding, self).__init__()
+        self.d_model = d_model
+        self.xscale = math.sqrt(self.d_model)
+        self.dropout = torch.nn.Dropout(p=dropout_rate)
+        self.pe = None
+        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+
+    def extend_pe(self, x: Tensor, left_context: int = 0) -> None:
+        """Reset the positional encodings."""
+        x_size_1 = x.size(1) + left_context
+        if self.pe is not None:
+            # self.pe contains both positive and negative parts
+            # the length of self.pe is 2 * input_len - 1
+            if self.pe.size(1) >= x_size_1 * 2 - 1:
+                # Note: TorchScript doesn't implement operator== for torch.Device
+                if self.pe.dtype != x.dtype or str(self.pe.device) != str(
+                    x.device
+                ):
+                    self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+                return
+        # Suppose `i` means to the position of query vector and `j` means the
+        # position of key vector. We use position relative positions when keys
+        # are to the left (i>j) and negative relative positions otherwise (i<j).
+        pe_positive = torch.zeros(x_size_1, self.d_model)
+        pe_negative = torch.zeros(x_size_1, self.d_model)
+        position = torch.arange(0, x_size_1, dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.d_model, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.d_model)
+        )
+        pe_positive[:, 0::2] = torch.sin(position * div_term)
+        pe_positive[:, 1::2] = torch.cos(position * div_term)
+        pe_negative[:, 0::2] = torch.sin(-1 * position * div_term)
+        pe_negative[:, 1::2] = torch.cos(-1 * position * div_term)
+
+        # Reserve the order of positive indices and concat both positive and
+        # negative indices. This is used to support the shifting trick
+        # as in "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context"
+        pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0)
+        pe_negative = pe_negative[1:].unsqueeze(0)
+        pe = torch.cat([pe_positive, pe_negative], dim=1)
+        self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+    def forward(
+        self, x: torch.Tensor, left_context: int = 0
+    ) -> Tuple[Tensor, Tensor]:
+        """Add positional encoding.
+
+        Args:
+            x (torch.Tensor): Input tensor (batch, time, `*`).
+            left_context (int): left context (in frames) used during streaming decoding.
+                this is used only in real streaming decoding, in other circumstances,
+                it MUST be 0.
+        Returns:
+            torch.Tensor: Encoded tensor (batch, time, `*`).
+            torch.Tensor: Encoded tensor (batch, 2*time-1, `*`).
+
+        """
+        self.extend_pe(x, left_context)
+        x = x * self.xscale
+        x_size_1 = x.size(1) + left_context
+        pos_emb = self.pe[
+            :,
+            self.pe.size(1) // 2
+            - x_size_1
+            + 1 : self.pe.size(1) // 2  # noqa E203
+            + x.size(1),
+        ]
+        return self.dropout(x), self.dropout(pos_emb)
+
+
+class RelPositionMultiheadAttention(nn.Module):
+    r"""Multi-Head Attention layer with relative position encoding
+
+    See reference: "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context"
+
+    Args:
+        embed_dim: total dimension of the model.
+        num_heads: parallel attention heads.
+        dropout: a Dropout layer on attn_output_weights. Default: 0.0.
+
+    Examples::
+
+        >>> rel_pos_multihead_attn = RelPositionMultiheadAttention(embed_dim, num_heads)
+        >>> attn_output, attn_output_weights = multihead_attn(query, key, value, pos_emb)
+    """
+
+    def __init__(
+        self,
+        embed_dim: int,
+        num_heads: int,
+        dropout: float = 0.0,
+    ) -> None:
+        super(RelPositionMultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+
+        self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True)
+
+        # linear transformation for positional encoding.
+        self.linear_pos = nn.Linear(embed_dim, embed_dim, bias=False)
+        # these two learnable bias are used in matrix c and matrix d
+        # as described in "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" Section 3.3
+        self.pos_bias_u = nn.Parameter(torch.Tensor(num_heads, self.head_dim))
+        self.pos_bias_v = nn.Parameter(torch.Tensor(num_heads, self.head_dim))
+
+        self._reset_parameters()
+
+    def _reset_parameters(self) -> None:
+        nn.init.xavier_uniform_(self.in_proj.weight)
+        nn.init.constant_(self.in_proj.bias, 0.0)
+        nn.init.constant_(self.out_proj.bias, 0.0)
+
+        nn.init.xavier_uniform_(self.pos_bias_u)
+        nn.init.xavier_uniform_(self.pos_bias_v)
+
+    def forward(
+        self,
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+        pos_emb: Tensor,
+        key_padding_mask: Optional[Tensor] = None,
+        need_weights: bool = True,
+        attn_mask: Optional[Tensor] = None,
+        left_context: int = 0,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        r"""
+        Args:
+            query, key, value: map a query and a set of key-value pairs to an output.
+            pos_emb: Positional embedding tensor
+            key_padding_mask: if provided, specified padding elements in the key will
+                be ignored by the attention. When given a binary mask and a value is True,
+                the corresponding value on the attention layer will be ignored. When given
+                a byte mask and a value is non-zero, the corresponding value on the attention
+                layer will be ignored
+            need_weights: output attn_output_weights.
+            attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+                the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+            left_context (int): left context (in frames) used during streaming decoding.
+                this is used only in real streaming decoding, in other circumstances,
+                it MUST be 0.
+
+        Shape:
+            - Inputs:
+            - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+            the embedding dimension.
+            - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+            the embedding dimension.
+            - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+            the embedding dimension.
+            - pos_emb: :math:`(N, 2*L-1, E)` where L is the target sequence length, N is the batch size, E is
+            the embedding dimension.
+            - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+            If a ByteTensor is provided, the non-zero positions will be ignored while the position
+            with the zero positions will be unchanged. If a BoolTensor is provided, the positions with the
+            value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+            - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+            3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+            S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+            positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+            while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+            is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+            is provided, it will be added to the attention weight.
+
+            - Outputs:
+            - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+            E is the embedding dimension.
+            - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+            L is the target sequence length, S is the source sequence length.
+        """
+        return self.multi_head_attention_forward(
+            query,
+            key,
+            value,
+            pos_emb,
+            self.embed_dim,
+            self.num_heads,
+            self.in_proj.weight,
+            self.in_proj.bias,
+            self.dropout,
+            self.out_proj.weight,
+            self.out_proj.bias,
+            training=self.training,
+            key_padding_mask=key_padding_mask,
+            need_weights=need_weights,
+            attn_mask=attn_mask,
+            left_context=left_context,
+        )
+
+    def rel_shift(self, x: Tensor, left_context: int = 0) -> Tensor:
+        """Compute relative positional encoding.
+
+        Args:
+            x: Input tensor (batch, head, time1, 2*time1-1).
+                time1 means the length of query vector.
+            left_context (int): left context (in frames) used during streaming decoding.
+                this is used only in real streaming decoding, in other circumstances,
+                it MUST be 0.
+
+        Returns:
+            Tensor: tensor of shape (batch, head, time1, time2)
+          (note: time2 has the same value as time1, but it is for
+          the key, while time1 is for the query).
+        """
+        (batch_size, num_heads, time1, n) = x.shape
+        time2 = time1 + left_context
+
+        assert (
+            n == left_context + 2 * time1 - 1
+        ), f"{n} == {left_context} + 2 * {time1} - 1"
+
+        # Note: TorchScript requires explicit arg for stride()
+        batch_stride = x.stride(0)
+        head_stride = x.stride(1)
+        time1_stride = x.stride(2)
+        n_stride = x.stride(3)
+        return x.as_strided(
+            (batch_size, num_heads, time1, time2),
+            (batch_stride, head_stride, time1_stride - n_stride, n_stride),
+            storage_offset=n_stride * (time1 - 1),
+        )
+
+    def multi_head_attention_forward(
+        self,
+        query: Tensor,
+        key: Tensor,
+        value: Tensor,
+        pos_emb: Tensor,
+        embed_dim_to_check: int,
+        num_heads: int,
+        in_proj_weight: Tensor,
+        in_proj_bias: Tensor,
+        dropout_p: float,
+        out_proj_weight: Tensor,
+        out_proj_bias: Tensor,
+        training: bool = True,
+        key_padding_mask: Optional[Tensor] = None,
+        need_weights: bool = True,
+        attn_mask: Optional[Tensor] = None,
+        left_context: int = 0,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        r"""
+        Args:
+            query, key, value: map a query and a set of key-value pairs to an output.
+            pos_emb: Positional embedding tensor
+            embed_dim_to_check: total dimension of the model.
+            num_heads: parallel attention heads.
+            in_proj_weight, in_proj_bias: input projection weight and bias.
+            dropout_p: probability of an element to be zeroed.
+            out_proj_weight, out_proj_bias: the output projection weight and bias.
+            training: apply dropout if is ``True``.
+            key_padding_mask: if provided, specified padding elements in the key will
+                be ignored by the attention. This is an binary mask. When the value is True,
+                the corresponding value on the attention layer will be filled with -inf.
+            need_weights: output attn_output_weights.
+            attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+                the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+            left_context (int): left context (in frames) used during streaming decoding.
+                this is used only in real streaming decoding, in other circumstances,
+                it MUST be 0.
+
+        Shape:
+            Inputs:
+            - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+            the embedding dimension.
+            - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+            the embedding dimension.
+            - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+            the embedding dimension.
+            - pos_emb: :math:`(N, 2*L-1, E)` or :math:`(1, 2*L-1, E)` where L is the target sequence
+            length, N is the batch size, E is the embedding dimension.
+            - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+            If a ByteTensor is provided, the non-zero positions will be ignored while the zero positions
+            will be unchanged. If a BoolTensor is provided, the positions with the
+            value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+            - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+            3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+            S is the source sequence length. attn_mask ensures that position i is allowed to attend the unmasked
+            positions. If a ByteTensor is provided, the non-zero positions are not allowed to attend
+            while the zero positions will be unchanged. If a BoolTensor is provided, positions with ``True``
+            are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+            is provided, it will be added to the attention weight.
+
+            Outputs:
+            - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+            E is the embedding dimension.
+            - attn_output_weights: :math:`(N, L, S)` where N is the batch size,
+            L is the target sequence length, S is the source sequence length.
+        """
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == embed_dim_to_check
+        assert key.size(0) == value.size(0) and key.size(1) == value.size(1)
+
+        head_dim = embed_dim // num_heads
+        assert (
+            head_dim * num_heads == embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        scaling = float(head_dim) ** -0.5
+
+        if torch.equal(query, key) and torch.equal(key, value):
+            # self-attention
+            q, k, v = nn.functional.linear(
+                query, in_proj_weight, in_proj_bias
+            ).chunk(3, dim=-1)
+
+        elif torch.equal(key, value):
+            # encoder-decoder attention
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = nn.functional.linear(query, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim
+            _end = None
+            _w = in_proj_weight[_start:, :]
+            if _b is not None:
+                _b = _b[_start:]
+            k, v = nn.functional.linear(key, _w, _b).chunk(2, dim=-1)
+
+        else:
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = 0
+            _end = embed_dim
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            q = nn.functional.linear(query, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim
+            _end = embed_dim * 2
+            _w = in_proj_weight[_start:_end, :]
+            if _b is not None:
+                _b = _b[_start:_end]
+            k = nn.functional.linear(key, _w, _b)
+
+            # This is inline in_proj function with in_proj_weight and in_proj_bias
+            _b = in_proj_bias
+            _start = embed_dim * 2
+            _end = None
+            _w = in_proj_weight[_start:, :]
+            if _b is not None:
+                _b = _b[_start:]
+            v = nn.functional.linear(value, _w, _b)
+
+        if attn_mask is not None:
+            assert (
+                attn_mask.dtype == torch.float32
+                or attn_mask.dtype == torch.float64
+                or attn_mask.dtype == torch.float16
+                or attn_mask.dtype == torch.uint8
+                or attn_mask.dtype == torch.bool
+            ), "Only float, byte, and bool types are supported for attn_mask, not {}".format(
+                attn_mask.dtype
+            )
+            if attn_mask.dtype == torch.uint8:
+                warnings.warn(
+                    "Byte tensor for attn_mask is deprecated. Use bool tensor instead."
+                )
+                attn_mask = attn_mask.to(torch.bool)
+
+            if attn_mask.dim() == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+                if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:
+                    raise RuntimeError(
+                        "The size of the 2D attn_mask is not correct."
+                    )
+            elif attn_mask.dim() == 3:
+                if list(attn_mask.size()) != [
+                    bsz * num_heads,
+                    query.size(0),
+                    key.size(0),
+                ]:
+                    raise RuntimeError(
+                        "The size of the 3D attn_mask is not correct."
+                    )
+            else:
+                raise RuntimeError(
+                    "attn_mask's dimension {} is not supported".format(
+                        attn_mask.dim()
+                    )
+                )
+            # attn_mask's dim is 3 now.
+
+        # convert ByteTensor key_padding_mask to bool
+        if (
+            key_padding_mask is not None
+            and key_padding_mask.dtype == torch.uint8
+        ):
+            warnings.warn(
+                "Byte tensor for key_padding_mask is deprecated. Use bool tensor instead."
+            )
+            key_padding_mask = key_padding_mask.to(torch.bool)
+
+        q = q.contiguous().view(tgt_len, bsz, num_heads, head_dim)
+        k = k.contiguous().view(-1, bsz, num_heads, head_dim)
+        v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1)
+
+        src_len = k.size(0)
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz, "{} == {}".format(
+                key_padding_mask.size(0), bsz
+            )
+            assert key_padding_mask.size(1) == src_len, "{} == {}".format(
+                key_padding_mask.size(1), src_len
+            )
+
+        q = q.transpose(0, 1)  # (batch, time1, head, d_k)
+
+        pos_emb_bsz = pos_emb.size(0)
+        assert pos_emb_bsz in (1, bsz)  # actually it is 1
+        p = self.linear_pos(pos_emb).view(pos_emb_bsz, -1, num_heads, head_dim)
+
+        # (batch, 2*time1, head, d_k) --> (batch, head, d_k, 2*time -1)
+        p = p.permute(0, 2, 3, 1)
+
+        q_with_bias_u = (q + self.pos_bias_u).transpose(
+            1, 2
+        )  # (batch, head, time1, d_k)
+
+        q_with_bias_v = (q + self.pos_bias_v).transpose(
+            1, 2
+        )  # (batch, head, time1, d_k)
+
+        # compute attention score
+        # first compute matrix a and matrix c
+        # as described in "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" Section 3.3
+        k = k.permute(1, 2, 3, 0)  # (batch, head, d_k, time2)
+        matrix_ac = torch.matmul(
+            q_with_bias_u, k
+        )  # (batch, head, time1, time2)
+
+        # compute matrix b and matrix d
+        matrix_bd = torch.matmul(
+            q_with_bias_v, p
+        )  # (batch, head, time1, 2*time1-1)
+
+        matrix_bd = self.rel_shift(matrix_bd, left_context=left_context)
+
+        attn_output_weights = (
+            matrix_ac + matrix_bd
+        ) * scaling  # (batch, head, time1, time2)
+
+        attn_output_weights = attn_output_weights.view(
+            bsz * num_heads, tgt_len, -1
+        )
+
+        assert list(attn_output_weights.size()) == [
+            bsz * num_heads,
+            tgt_len,
+            src_len,
+        ]
+
+        if attn_mask is not None:
+            if attn_mask.dtype == torch.bool:
+                attn_output_weights.masked_fill_(attn_mask, float("-inf"))
+            else:
+                attn_output_weights += attn_mask
+
+        if key_padding_mask is not None:
+            attn_output_weights = attn_output_weights.view(
+                bsz, num_heads, tgt_len, src_len
+            )
+            attn_output_weights = attn_output_weights.masked_fill(
+                key_padding_mask.unsqueeze(1).unsqueeze(2),
+                float("-inf"),
+            )
+            attn_output_weights = attn_output_weights.view(
+                bsz * num_heads, tgt_len, src_len
+            )
+
+        attn_output_weights = nn.functional.softmax(attn_output_weights, dim=-1)
+
+        # If we are using dynamic_chunk_training and setting a limited
+        # num_left_chunks, the attention may only see the padding values which
+        # will also be masked out by `key_padding_mask`, at this circumstances,
+        # the whole column of `attn_output_weights` will be `-inf`
+        # (i.e. be `nan` after softmax), so, we fill `0.0` at the masking
+        # positions to avoid invalid loss value below.
+        if (
+            attn_mask is not None
+            and attn_mask.dtype == torch.bool
+            and key_padding_mask is not None
+        ):
+            combined_mask = attn_mask.unsqueeze(0) | key_padding_mask.unsqueeze(
+                1
+            ).unsqueeze(2)
+            attn_output_weights = attn_output_weights.view(
+                bsz, num_heads, tgt_len, src_len
+            )
+            attn_output_weights = attn_output_weights.masked_fill(
+                combined_mask, 0.0
+            )
+            attn_output_weights = attn_output_weights.view(
+                bsz * num_heads, tgt_len, src_len
+            )
+
+        attn_output_weights = nn.functional.dropout(
+            attn_output_weights, p=dropout_p, training=training
+        )
+
+        attn_output = torch.bmm(attn_output_weights, v)
+        assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim]
+        attn_output = (
+            attn_output.transpose(0, 1)
+            .contiguous()
+            .view(tgt_len, bsz, embed_dim)
+        )
+        attn_output = nn.functional.linear(
+            attn_output, out_proj_weight, out_proj_bias
+        )
+
+        if need_weights:
+            # average attention weights over heads
+            attn_output_weights = attn_output_weights.view(
+                bsz, num_heads, tgt_len, src_len
+            )
+            return attn_output, attn_output_weights.sum(dim=1) / num_heads
+        else:
+            return attn_output, None
+
+
+class ConvolutionModule(nn.Module):
+    """ConvolutionModule in Conformer model.
+    Modified from https://github.com/espnet/espnet/blob/master/espnet/nets/pytorch_backend/conformer/convolution.py
+
+    Args:
+        channels (int): The number of channels of conv layers.
+        kernel_size (int): Kernerl size of conv layers.
+        bias (bool): Whether to use bias in conv layers (default=True).
+        causal (bool): Whether to use causal convolution.
+    """
+
+    def __init__(
+        self,
+        channels: int,
+        kernel_size: int,
+        bias: bool = True,
+        causal: bool = False,
+    ) -> None:
+        """Construct an ConvolutionModule object."""
+        super(ConvolutionModule, self).__init__()
+        # kernerl_size should be a odd number for 'SAME' padding
+        assert (kernel_size - 1) % 2 == 0
+        self.causal = causal
+
+        self.pointwise_conv1 = nn.Conv1d(
+            channels,
+            2 * channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+
+        self.lorder = kernel_size - 1
+        padding = (kernel_size - 1) // 2
+        if self.causal:
+            padding = 0
+
+        self.depthwise_conv = nn.Conv1d(
+            channels,
+            channels,
+            kernel_size,
+            stride=1,
+            padding=padding,
+            groups=channels,
+            bias=bias,
+        )
+        self.norm = nn.LayerNorm(channels)
+        self.pointwise_conv2 = nn.Conv1d(
+            channels,
+            channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        self.activation = Swish()
+
+    def forward(
+        self,
+        x: Tensor,
+        cache: Optional[Tensor] = None,
+        right_context: int = 0,
+    ) -> Tuple[Tensor, Tensor]:
+        """Compute convolution module.
+
+        Args:
+            x: Input tensor (#time, batch, channels).
+
+        Returns:
+            Tensor: Output tensor (#time, batch, channels).
+
+        """
+        # exchange the temporal dimension and the feature dimension
+        x = x.permute(1, 2, 0)  # (#batch, channels, time).
+
+        # GLU mechanism
+        x = self.pointwise_conv1(x)  # (batch, 2*channels, time)
+        x = nn.functional.glu(x, dim=1)  # (batch, channels, time)
+
+        # 1D Depthwise Conv
+        if self.causal and self.lorder > 0:
+            if cache is None:
+                # Make depthwise_conv causal by
+                # manualy padding self.lorder zeros to the left
+                x = nn.functional.pad(x, (self.lorder, 0), "constant", 0.0)
+            else:
+                assert (
+                    not self.training
+                ), "Cache should be None in training time"
+                assert cache.size(0) == self.lorder
+                x = torch.cat([cache.permute(1, 2, 0), x], dim=2)
+                if right_context > 0:
+                    cache = x.permute(2, 0, 1)[
+                        -(self.lorder + right_context) : (  # noqa
+                            -right_context
+                        ),
+                        ...,
+                    ]
+                else:
+                    cache = x.permute(2, 0, 1)[-self.lorder :, ...]  # noqa
+
+        x = self.depthwise_conv(x)
+        # x is (batch, channels, time)
+        x = x.permute(0, 2, 1)
+        x = self.norm(x)
+        x = x.permute(0, 2, 1)
+
+        x = self.activation(x)
+
+        x = self.pointwise_conv2(x)  # (batch, channel, time)
+
+        if cache is None:
+            cache = torch.empty(0)
+
+        return x.permute(2, 0, 1), cache
+
+
+class Swish(torch.nn.Module):
+    """Construct an Swish object."""
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Return Swich activation function."""
+        return x * torch.sigmoid(x)
+
+
+def identity(x):
+    return x
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
new file mode 100644
index 00000000..333fff30
--- /dev/null
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -0,0 +1,97 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+
+
+# TODO(fangjun): Support switching between LSTM and GRU
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        vocab_size: int,
+        embedding_dim: int,
+        blank_id: int,
+        num_layers: int,
+        hidden_dim: int,
+        output_dim: int,
+        embedding_dropout: float = 0.0,
+        rnn_dropout: float = 0.0,
+    ):
+        """
+        Args:
+          vocab_size:
+            Number of tokens of the modeling unit including blank.
+          embedding_dim:
+            Dimension of the input embedding.
+          blank_id:
+            The ID of the blank symbol.
+          num_layers:
+            Number of LSTM layers.
+          hidden_dim:
+            Hidden dimension of LSTM layers.
+          output_dim:
+            Output dimension of the decoder.
+          embedding_dropout:
+            Dropout rate for the embedding layer.
+          rnn_dropout:
+            Dropout for LSTM layers.
+        """
+        super().__init__()
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embedding_dim=embedding_dim,
+            padding_idx=blank_id,
+        )
+        self.embedding_dropout = nn.Dropout(embedding_dropout)
+        # TODO(fangjun): Use layer normalized LSTM
+        self.rnn = nn.LSTM(
+            input_size=embedding_dim,
+            hidden_size=hidden_dim,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=rnn_dropout,
+        )
+        self.blank_id = blank_id
+        self.output_linear = nn.Linear(hidden_dim, output_dim)
+
+    def forward(
+        self,
+        y: torch.Tensor,
+        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        """
+        Args:
+          y:
+            A 2-D tensor of shape (N, U) with BOS prepended.
+          states:
+            A tuple of two tensors containing the states information of
+            LSTM layers in this decoder.
+        Returns:
+          Return a tuple containing:
+
+            - rnn_output, a tensor of shape (N, U, C)
+            - (h, c), containing the state information for LSTM layers.
+              Both are of shape (num_layers, N, C)
+        """
+        embedding_out = self.embedding(y)
+        embedding_out = self.embedding_dropout(embedding_out)
+        rnn_out, (h, c) = self.rnn(embedding_out, states)
+        out = self.output_linear(rnn_out)
+
+        return out, (h, c)
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
new file mode 100644
index 00000000..2ef3f1de
--- /dev/null
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -0,0 +1,54 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.nn as nn
+
+
+class Joiner(nn.Module):
+    def __init__(self, input_dim: int, output_dim: int):
+        super().__init__()
+
+        self.output_linear = nn.Linear(input_dim, output_dim)
+
+    def forward(
+        self, encoder_out: torch.Tensor, decoder_out: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Args:
+          encoder_out:
+            Output from the encoder. Its shape is (N, T, C).
+          decoder_out:
+            Output from the decoder. Its shape is (N, U, C).
+        Returns:
+          Return a tensor of shape (N, T, U, C).
+        """
+        assert encoder_out.ndim == decoder_out.ndim == 3
+        assert encoder_out.size(0) == decoder_out.size(0)
+        assert encoder_out.size(2) == decoder_out.size(2)
+
+        encoder_out = encoder_out.unsqueeze(2)
+        # Now encoder_out is (N, T, 1, C)
+
+        decoder_out = decoder_out.unsqueeze(1)
+        # Now decoder_out is (N, 1, U, C)
+
+        logit = encoder_out + decoder_out
+        logit = torch.tanh(logit)
+
+        output = self.output_linear(logit)
+
+        return output
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index e2c6e1be..3a55ac83 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -21,7 +21,7 @@ class HFWav2Vec2Transducer(HFWav2Transducer):
 
     Attributes:
        hf_feats: hugging face model wrapper object.
-       xvector: x-vector model object.
+       transducer: transducer model object.
        feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
                           the wav2vec "num_layers".
        feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
@@ -29,12 +29,12 @@ class HFWav2Vec2Transducer(HFWav2Transducer):
     """
 
     def __init__(
-        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+        self, hf_feats, transducer, feat_fusion_start=0, feat_fusion_method="weighted-avg"
     ):
 
         super().__init__()
         self.hf_feats = hf_feats
-        self.xvector = xvector
+        self.transducer = transducer
         self.feat_fusion_start = feat_fusion_start
         self.feat_fusion_method = feat_fusion_method
         self._hf_context = contextlib.nullcontext()
@@ -85,7 +85,7 @@ def _fuse_hid_feats(self, hid_feats):
         return feats
 
     def compute_prototype_affinity(self):
-        return self.xvector.compute_prototype_affinity()
+        return self.transducer.compute_prototype_affinity()
 
     def update_loss_margin(self, epoch):
         """Updates the value of the margin in AAM/AM-softmax losses
@@ -94,7 +94,7 @@ def update_loss_margin(self, epoch):
         Args:
           epoch: epoch which is about to start
         """
-        self.xvector.update_loss_margin(epoch)
+        self.transducer.update_loss_margin(epoch)
 
     def rebuild_output_layer(
         self,
@@ -107,7 +107,7 @@ def rebuild_output_layer(
         intertop_margin=0.0,
         num_subcenters=2,
     ):
-        self.xvector.rebuild_output_layer(
+        self.transducer.rebuild_output_layer(
             num_classes=num_classes,
             loss_type=loss_type,
             cos_scale=cos_scale,
@@ -190,7 +190,7 @@ def forward(
         feats, hid_feats, feat_lengths = self.forward_feats(
             x, x_lengths, return_feat_layers
         )
-        output = self.xvector(
+        output = self.transducer(
             feats,
             feat_lengths,
             y,
@@ -203,7 +203,7 @@ def forward(
             return output
 
         if not isinstance(output, dict):
-            # if the xvector just returned the logits we put then into a dictionary
+            # if the transducer just returned the logits we put then into a dictionary
             # to append the hid feats later.
             output["logits"] = output
 
@@ -233,7 +233,7 @@ def extract_embed(
             * feats.size(-1)
             // x.size(-1)
         )
-        return self.xvector.extract_embed(
+        return self.transducer.extract_embed(
             feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
         )
 
@@ -266,8 +266,8 @@ def set_train_mode(self, mode):
             self.unfreeze()
             self.freeze_feat_fuser()
             self.freeze_hf_feats()
-            self.xvector.freeze_preembed_layers()
-        elif mode in ["ft-xvector", "ft-xvector-nograd"]:
+            self.transducer.freeze_preembed_layers()
+        elif mode in ["ft-transducer", "ft-transducer-nograd"]:
             self.unfreeze()
             self.freeze_hf_feats()
             self.freeze_feat_fuser()
@@ -296,16 +296,16 @@ def _train(self, train_mode: str):
             super()._train(train_mode)
         elif train_mode == "ft-embed-affine":
             self.hf_feats.train()
-            self.xvector._train("ft-embed_affine")
+            self.transducer._train("ft-embed_affine")
         elif train_mode in [
-            "ft-xvector",
+            "ft-transducer",
             "hf-feats-frozen",
-            "ft-xvector-nograd",
+            "ft-transducer-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
-            self.xvector._train("full")
+            self.transducer._train("full")
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
@@ -315,9 +315,9 @@ def valid_train_modes():
             "full",
             "frozen",
             "ft-embed-affine",
-            "ft-xvector",
+            "ft-transducer",
             "hf-feats-frozen",
-            "ft-xvector-nograd",
+            "ft-transducer-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
         ]
@@ -326,7 +326,7 @@ def valid_train_modes():
     def filter_args(**kwargs):
         valid_args = (
             "hf_feats",
-            "xvector",
+            "transducer",
             "feat_fusion_start",
             "feat_fusion_method",
         )
@@ -336,12 +336,12 @@ def filter_args(**kwargs):
     def get_config(self):
 
         hf_cfg = self.hf_feats.get_config()
-        xvec_cfg = self.xvector.get_config()
+        xvec_cfg = self.transducer.get_config()
         del hf_cfg["class_name"]
         del xvec_cfg["class_name"]
         config = {
             "hf_feats": hf_cfg,
-            "xvector": xvec_cfg,
+            "transducer": xvec_cfg,
             "feat_fusion_start": self.feat_fusion_start,
             "feat_fusion_method": self.feat_fusion_method,
         }
@@ -349,10 +349,10 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    def change_config(self, hf_feats, xvector):
-        logging.info("changing hf wav2xvector config")
+    def change_config(self, hf_feats, transducer):
+        logging.info("changing hf wav2transducer config")
         self.hf_feats.change_config(**hf_feats)
-        self.xvector.change_config(**xvector)
+        self.transducer.change_config(**transducer)
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
@@ -384,5 +384,5 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser.add_argument(
                 "--" + prefix,
                 action=ActionParser(parser=parser),
-                help="xvector options",
+                help="transducer options",
             )
diff --git a/hyperion/utils/text_info.py b/hyperion/utils/text_info.py
deleted file mode 100644
index 4d7e2c3a..00000000
--- a/hyperion/utils/text_info.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
- Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-
-from .info_table import InfoTable
-
-
-def read_2column_text(path: Union[Path, str]) -> Dict[str, str]:
-    """Read a text file having 2 column as dict object.
-
-    Examples:
-        wav.scp:
-            key1 /some/path/a.wav
-            key2 /some/path/b.wav
-
-        >>> read_2column_text('wav.scp')
-        {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}
-
-    """
-    assert check_argument_types()
-
-    data = {}
-    with Path(path).open("r", encoding="utf-8") as f:
-        for linenum, line in enumerate(f, 1):
-            sps = line.rstrip().split(maxsplit=1)
-            if len(sps) == 1:
-                k, v = sps[0], ""
-            else:
-                k, v = sps
-            if k in data:
-                raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
-            data[k] = v
-    return data
-
-
-
-class TextInfo(InfoTable):
-    def __init__(self, df):
-        super().__init__(df)
-        if "weights" not in self.df:
-            self.set_uniform_weights()
-        else:
-            self.df["weights"] /= self.df["weigths"].sum()
-
-    def set_uniform_weights(self):
-        self.df["weights"] = 1 / len(self.df)
-
-    def set_weights(self, weights):
-        self.df["weights"] = weights / weights.sum()
-
-    def exp_weights(self, x):
-        weights = self.df["weights"] ** x
-        self.set_weights(weights)
-
-    def set_zero_weight(self, id):
-        self.df.loc[id, "weights"] = 0
-        self.df["weights"] /= self.df["weights"].sum()
-
-    @property
-    def weights(self, id):
-        return self.df.loc[id, "weights"]
-
-    @property
-    def num_classes(self):
-        return self.df["class_idx"].values.max() + 1
-
-    @classmethod
-    def load(cls, file_path, sp, sep=None):
-        """Loads utt2info list from text file.
-
-        Args:
-          file_path: File to read the list.
-          sp: SentencePieceProcessor from the BPE model
-          sep: Separator between the key and file_path in the text file.
-          dtype: Dictionary with the dtypes of each column.
-        Returns:
-          Utt2Info object
-        """
-        #TODO: load text information
-        """Loads utt2info list from text file.
-
-        Args:
-          file_path: File to read the list.
-          sp: SentencePieceProcessor for bpe.
-        Returns:
-          Utt2Info object
-        """            
-        # # y: k2.RaggedTensor,
-        # # A ragged tensor with 2 axes [utt][label]. It contains labels of each utterance.
-        # y = sp.encode(texts, out_type=int)
-        # y = k2.RaggedTensor(y).to(device)
-        file_path = Path(file_path)
-        text_df = super().load(file_path, sep, name="text_label")
-        # for i, text in enumerate(text_df["text_label"]):
-        #     y = sp.encode(text, out_type=int)
-        #     y = k2.RaggedTensor(y).to(device)
-        #     text_df["text_label"][i] = y
-
-        return text_df

From 30420e8adad694e797c04aa936fecc7586f3a0c6 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 31 Oct 2022 16:33:35 -0400
Subject: [PATCH 041/154] Data preparation and implemented Wav2vec2Transducer
 Models

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  22 +-
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |  51 +--
 hyperion/bin/train_wav2vec2transducer.py      | 212 +++------
 hyperion/torch/data/audio_dataset.py          |  41 +-
 hyperion/torch/data/bucketing_seg_sampler.py  |   6 +-
 hyperion/torch/models/transducer/conformer.py |  63 ++-
 hyperion/torch/models/transducer/decoder.py   |  38 ++
 hyperion/torch/models/transducer/joiner.py    |  16 +
 .../torch/models/transducer/transducer.py     | 155 ++++++-
 .../wav2transducer/hf_wav2transducer.py       |  77 ++--
 .../wav2transducer/hf_wav2vec2_transducer.py  | 404 +++---------------
 hyperion/torch/trainers/transducer_trainer.py |  13 +-
 12 files changed, 456 insertions(+), 642 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index e7cfc8ef..91adaa35 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -1,22 +1,32 @@
 data:
   train:
     dataset:
-      # max_chunk_length: 3.0
-      # min_chunk_length: 3.0
-      aug_cfgs: [conf/reverb_noise_aug.yaml]
       wav_scale: 1
+      # class_names:
+      #   - text
+      # aug_cfgs: 
+      #   - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
     sampler:
+      # sampler_type: 'seg_sampler'
       sampler_type: 'bucketing_seg_sampler'
-      batch_size: 32
+      batch_size: 4
       iters_per_epoch: 6
     data_loader:
-      num_workers: 8
+      num_workers: 1
   val:
     dataset:
       # max_chunk_length: 4.0
       # min_chunk_length: 4.0
-      aug_cfgs: [conf/reverb_noise_aug.yaml]
+      # aug_cfgs: [conf/reverb_noise_aug.yaml]
       wav_scale: 1
+      # class_names:
+      #   - text
+      # aug_cfgs:
+      #   - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
     sampler:
       sampler_type: 'bucketing_seg_sampler'
       batch_size: 32
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index fe71a40c..3f97feb7 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -1,40 +1,21 @@
 hf_feats:
   pretrained_model_path: microsoft/wavlm-base-plus
+  # test_param: xyz
 transducer:
-  resnet_enc:
-    in_feats: 765
-    in_conv_channels: 512
-    in_kernel_size: 5
-    in_stride: 1
-    resb_type: seres2bn
-    resb_repeats:
-      - 1
-      - 1
-      - 1
-    resb_channels:
-      - 512
-    resb_kernel_sizes:
-      - 3
-    resb_dilations:
-      - 2
-      - 3
-      - 4
-    resb_strides:
-      - 1
-    res2net_width_factor: 1
-    res2net_scale: 8
-    se_r: 4
-    multilayer: true
-    multilayer_concat: true
-    endpoint_channels: 1536
-  pool_net:
-    pool_type: ch-wise-att-mean+stddev
-    inner_feats: 128
-  embed_dim: 192
-  cos_scale: 32.0
-  margin: 0.2
-  margin_warmup_epochs: 5
-  intertop_margin: 0.1
-  dropout_rate: 0.0
+  encoder_out_dim: 768
+  conformer_enc:
+    num_features: 80
+    subsampling_factor: 4
+    d_model: 512
+    nhead: 8
+    dim_feedforward: 2048
+    num_encoder_layers: 12
+    vgg_frontend: False
+  decoder:
+    # vocab_size: 1000
+    # blank_id: 0
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
 feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 7e87c180..8c539cd1 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -4,7 +4,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import sys
-import pdb
 import os
 from pathlib import Path
 from jsonargparse import (
@@ -13,6 +12,7 @@
     ActionParser,
     namespace_to_dict,
 )
+import k2
 import time
 import logging
 import multiprocessing
@@ -29,10 +29,7 @@
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
-from hyperion.torch.models.transducer import Conformer
-from hyperion.torch.models.transducer import Decoder
-from hyperion.torch.models.transducer import Joiner
-
+from torch.nn.utils.rnn import pad_sequence
 
 
 model_dict = {
@@ -40,10 +37,28 @@
 }
 
 
+def my_collate(batch):
+    audio = []
+    audio_length = []
+    target = []
+    for i, record in enumerate(batch):
+        wav = torch.as_tensor(record[0])
+        audio.append(wav)
+        audio_length.append(wav.shape[0])
+        target.append(record[1])
+        if i==4:
+            break
+    audio = pad_sequence(audio)
+    audio_length = torch.as_tensor(audio_length)
+    target = k2.RaggedTensor(target)
+    return torch.transpose(audio,0,1), audio_length, target
+
+
+
 def init_data(partition, rank, num_gpus, **kwargs):
-    kwargs = kwargs["data"][partition]
-    ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = kwargs["sampler"]
+    data_kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**data_kwargs["dataset"])
+    sampler_args = data_kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
@@ -52,32 +67,31 @@ def init_data(partition, rank, num_gpus, **kwargs):
     is_val = partition == "val"
     ad_args["is_val"] = is_val
     sampler_args["shuffle"] = not is_val
-    print("ad_args", ad_args)
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
-    print("sampler_args", sampler_args)
     sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
 
-    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers = data_kwargs["data_loader"]["num_workers"]
     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
     largs = (
         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
     )
-    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=my_collate)
     return data_loader
 
 
-def init_model(num_classes, rank, model_class, **kwargs):
+def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
     model_args = model_class.filter_args(**kwargs["model"])
     if rank == 0:
         logging.info("model network args={}".format(model_args))
     # TODO: check model_args 
-    model_args["num_classes"] = num_classes
+    model_args["transducer"]["blank_id"] = blank_id
+    model_args["transducer"]["vocab_size"] = vocab_size
     model = model_class(**model_args)
     if rank == 0:
         logging.info("model={}".format(model))
@@ -86,130 +100,6 @@ def init_model(num_classes, rank, model_class, **kwargs):
 
 
 
-def get_params() -> AttributeDict:
-    """Return a dict containing training parameters.
-
-    All training related parameters that are not passed from the commandline
-    are saved in the variable `params`.
-
-    Commandline options are merged into `params` after they are parsed, so
-    you can also access them via `params`.
-
-    Explanation of options saved in `params`:
-
-        - best_train_loss: Best training loss so far. It is used to select
-                           the model that has the lowest training loss. It is
-                           updated during the training.
-
-        - best_valid_loss: Best validation loss so far. It is used to select
-                           the model that has the lowest validation loss. It is
-                           updated during the training.
-
-        - best_train_epoch: It is the epoch that has the best training loss.
-
-        - best_valid_epoch: It is the epoch that has the best validation loss.
-
-        - batch_idx_train: Used to writing statistics to tensorboard. It
-                           contains number of batches trained so far across
-                           epochs.
-
-        - log_interval:  Print training loss if batch_idx % log_interval` is 0
-
-        - reset_interval: Reset statistics if batch_idx % reset_interval is 0
-
-        - valid_interval:  Run validation if batch_idx % valid_interval is 0
-
-        - feature_dim: The model input dim. It has to match the one used
-                       in computing features.
-
-        - subsampling_factor:  The subsampling factor for the model.
-
-        - attention_dim: Hidden dim for multi-head attention model.
-
-        - num_decoder_layers: Number of decoder layer of transformer decoder.
-
-        - warm_step: The warm_step for Noam optimizer.
-    """
-    params = AttributeDict(
-        {
-            "best_train_loss": float("inf"),
-            "best_valid_loss": float("inf"),
-            "best_train_epoch": -1,
-            "best_valid_epoch": -1,
-            "batch_idx_train": 0,
-            "log_interval": 50,
-            "reset_interval": 200,
-            "valid_interval": 3000,  # For the 100h subset, use 800
-            # parameters for conformer
-            "feature_dim": 80,
-            "encoder_out_dim": 512,
-            "subsampling_factor": 4,
-            "attention_dim": 512,
-            "nhead": 8,
-            "dim_feedforward": 2048,
-            "num_encoder_layers": 12,
-            "vgg_frontend": False,
-            # decoder params
-            "decoder_embedding_dim": 1024,
-            "num_decoder_layers": 2,
-            "decoder_hidden_dim": 512,
-            # parameters for Noam
-            "warm_step": 80000,  # For the 100h subset, use 8k
-            "env_info": get_env_info(),
-        }
-    )
-
-    return params
-
-
-def get_encoder_model(params: AttributeDict):
-    # TODO: We can add an option to switch between Conformer and Transformer
-    encoder = Conformer(
-        num_features=params.feature_dim,
-        output_dim=params.encoder_out_dim,
-        subsampling_factor=params.subsampling_factor,
-        d_model=params.attention_dim,
-        nhead=params.nhead,
-        dim_feedforward=params.dim_feedforward,
-        num_encoder_layers=params.num_encoder_layers,
-        vgg_frontend=params.vgg_frontend,
-    )
-    return encoder
-
-
-def get_decoder_model(params: AttributeDict):
-    decoder = Decoder(
-        vocab_size=params.vocab_size,
-        embedding_dim=params.decoder_embedding_dim,
-        blank_id=params.blank_id,
-        num_layers=params.num_decoder_layers,
-        hidden_dim=params.decoder_hidden_dim,
-        output_dim=params.encoder_out_dim,
-    )
-    return decoder
-
-
-def get_joiner_model(params: AttributeDict):
-    joiner = Joiner(
-        input_dim=params.encoder_out_dim,
-        output_dim=params.vocab_size,
-    )
-    return joiner
-
-
-def get_transducer_model(params: AttributeDict):
-    encoder = get_encoder_model(params)
-    decoder = get_decoder_model(params)
-    joiner = get_joiner_model(params)
-
-    model = Transducer(
-        encoder=encoder,
-        decoder=decoder,
-        joiner=joiner,
-    )
-    return model
-
-
 
 def train_model(gpu_id, args):
 
@@ -221,20 +111,20 @@ def train_model(gpu_id, args):
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
 
-    ddp_args = ddp.filter_ddp_args(**kwargs)
-    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    kwargs["rank"] = rank
+    # ddp_args = ddp.filter_ddp_args(**kwargs)
+    # device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    # kwargs["rank"] = rank
+
+    # for Debug
+    rank = 0
+    kwargs["rank"] = 0
+    device = "cpu"
+    world_size=1
 
-    # # for Debug
-    # rank = 0
-    # kwargs["rank"] = 0
-    # device = "cpu"
-    # world_size=1
-    
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    # model = init_model(train_loader.dataset.num_classes.values())[0], **kwargs)
-    model = init_model(train_loader.dataset.num_classes, **kwargs)
+    # model = init_model(train_loader.dataset.num_classes, **kwargs)
+    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"), train_loader.dataset.sp.get_piece_size(), **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
@@ -255,12 +145,11 @@ def train_model(gpu_id, args):
 
 def make_parser(model_class):
     parser = ArgumentParser()
-
+    
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
     AD.add_class_args(train_parser, prefix="dataset", skip={})
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
-    # Sampler.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -271,7 +160,6 @@ def make_parser(model_class):
     val_parser = ArgumentParser(prog="")
     AD.add_class_args(val_parser, prefix="dataset", skip={})
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
-    # Sampler.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -288,13 +176,20 @@ def make_parser(model_class):
         "--data.train.dataset.text_file",
         type=str, 
     )
+    
+    parser.add_argument("--data.val.dataset.text_file", type=str) 
+    
+    # parser.add_argument(
+    #     "--data.train.dataset.class_files",
+    #     type=str, 
+    # )
 
     parser.add_argument(
         "--data.train.dataset.bpe_model",
         type=str, 
     )
 
-    parser.add_argument("--data.val.dataset.text_file", type=str) 
+    # parser.add_argument("--data.val.dataset.class_files", type=str) 
     
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
@@ -304,16 +199,6 @@ def make_parser(model_class):
         "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
     )
 
-    # parser.link_arguments(
-    #     "data.train.dataset.class_file", "data.val.dataset.class_file"
-    # )
-    # parser.link_arguments(
-    #     "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
-    # )
-    # parser.link_arguments(
-    #     "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    # )
-
     model_class.add_class_args(parser, prefix="model")
     Trainer.add_class_args(
         parser, prefix="trainer", train_modes=model_class.valid_train_modes()
@@ -328,7 +213,6 @@ def make_parser(model_class):
 
 
 if __name__ == "__main__":
-
     parser = ArgumentParser(description="Train Wav2Vec2Transducer model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 11e8cede..8929868f 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -21,12 +21,12 @@
 
 import k2
 import sentencepiece as spm
+from torch.nn.utils.rnn import pad_sequence
 
 from torch.utils.data import Dataset
 import torch.distributed as dist
 
 from hyperion.np import augment
-import pdb
 
 class AudioDataset1(Dataset):
     def __init__(
@@ -539,7 +539,6 @@ def _load_bpe_model(self, bpe_model, is_val):
         vocab_size = self.sp.get_piece_size()
 
     def _load_text_infos(self, text_file, is_val):
-        #TODO: load bpe and text into data structure
         if text_file is None:
             return
         if self.rank == 0:
@@ -547,6 +546,7 @@ def _load_text_infos(self, text_file, is_val):
         
         text = read_text(text_file)
         self.seg_set["text"] = text.loc[self.seg_set["id"]].text
+        self.text_info = ClassInfo(text)
 
 
 
@@ -653,6 +653,7 @@ def _read_audio(self, seg_id, start, duration):
 
     def _apply_augs(self, x, num_samples, reverb_context_samples):
         x_augs = []
+        
         # for each type of augmentation
         for i, augmenter in enumerate(self.augmenters):
             # we do n_augs per augmentation type
@@ -677,34 +678,16 @@ def _get_segment_info(self, seg_id):
                 class_info = self.class_info[info_name]
                 idx = class_info.loc[seg_info, "class_idx"]
                 seg_info = idx
+            if info_name  == "text":
+                text = self.text_info.loc[seg_id, "text"]
+                seg_info = self.sp.encode(text, out_type=int)
 
             r.append(seg_info)
 
         return r
 
 
-    def _get_text_info(self, seg_id):
-        #TODO: bpe labels from data structure for getitem 
-        r = []
-        # converts the class_ids to integers
-        for info_name in self.return_segment_info:
-            seg_info = self.seg_set.loc[seg_id, info_name]
-            if info_name in self.text_info:
-                # if the type of information is a text
-                # we use the text information table to
-                # convert from id to text labels
-                text_info = self.text_info[info_name]
-                idx = text_info.loc[seg_info, "class_idx"]
-                seg_info = idx
-                y = sp.encode(text, out_type=int)
-                y = k2.RaggedTensor(y).to(device)
-
-            r.append(y)
-
-        return r
-
     def __getitem__(self, segment):
-        #TODO: check the start/end time for Recognition
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
         if self.augmenters:
@@ -712,6 +695,7 @@ def __getitem__(self, segment):
             num_samples = int(duration * fs)
             reverb_context_samples = len(x) - num_samples
             x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
+            
             r = x_augs
 
             # add original non augmented audio
@@ -722,14 +706,9 @@ def __getitem__(self, segment):
         else:
             r = [x]
 
-        #TODO: Add it back for both case
-        # # adds the segment labels
-        # seg_info = self._get_segment_info(seg_id)
-        # r.extend(seg_info)
-
-        # adds the text labels
-        text_info = self._get_text_info(seg_id)
-        r.extend(text_info)
+        # adds the segment labels
+        seg_info = self._get_segment_info(seg_id)
+        r.extend(seg_info)
 
 
         return (*r,)
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 37794377..8dbc4e45 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -12,6 +12,7 @@
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
 import torch.distributed as dist
+from torch.nn.utils.rnn import pad_sequence
 
 
 class BucketingSegSampler(HypSampler):
@@ -62,6 +63,9 @@ def _create_bucket_samplers(self):
 
         self.bucket_samplers = bucket_samplers
 
+    def __len__(self):
+        return self._len
+
     def _compute_len(self):
         self._len = 0
         for i in range(self.num_buckets):
@@ -93,7 +97,7 @@ def __next__(self):
             if self.depleted_buckets[bucket_idx]:
                 continue
 
-            bucket = self.buckets[bucket_idx]
+            bucket = self.bucket_samplers[bucket_idx]
             try:
                 batch = next(bucket)
                 break
diff --git a/hyperion/torch/models/transducer/conformer.py b/hyperion/torch/models/transducer/conformer.py
index 2bf63320..734c28ce 100644
--- a/hyperion/torch/models/transducer/conformer.py
+++ b/hyperion/torch/models/transducer/conformer.py
@@ -20,11 +20,12 @@
 import warnings
 from typing import List, Optional, Tuple
 
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import torch
 from torch import Tensor, nn
-from transformer import Transformer
+from .transformer import Transformer
 
-from icefall.utils import make_pad_mask, subsequent_chunk_mask
+from hyperion.utils.utils import make_pad_mask, subsequent_chunk_mask
 
 
 class Conformer(Transformer):
@@ -388,6 +389,64 @@ def streaming_forward(
         return logits, lengths, states
 
 
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "num_features",
+            "encoder_out_dim",
+            "subsampling_factor",
+            "d_model",
+            "nhead",
+            "dim_feedforward",
+            "num_encoder_layers",
+            "vgg_frontend",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+
+        parser.add_argument(
+            "--num-features", default=80, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--subsampling-factor", default=4, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--d-model", default=512, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--nhead", default=8, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--dim-feedforward", default=2048, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--num-encoder-layers", default=12, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--vgg-frontend", default=False, type=bool, help=("")
+        )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+
+
+
+
+
 class ConformerEncoderLayer(nn.Module):
     """
     ConformerEncoderLayer is made up of self-attn, feedforward and convolution networks.
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 333fff30..0b47e80c 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -18,6 +18,7 @@
 
 import torch
 import torch.nn as nn
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 
 # TODO(fangjun): Support switching between LSTM and GRU
@@ -95,3 +96,40 @@ def forward(
         out = self.output_linear(rnn_out)
 
         return out, (h, c)
+
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "embedding_dim",
+            "num_layers",
+            "hidden_dim",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+            
+        parser.add_argument(
+            "--embedding-dim", default=1024, type=int, help=("feature dimension")
+        )
+
+        parser.add_argument(
+            "--num-layers", default=2, type=int, help=("")
+        )
+
+        parser.add_argument(
+            "--hidden-dim", default=512, type=int, help=("")
+        )
+
+
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 2ef3f1de..72376b3c 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -52,3 +52,19 @@ def forward(
         output = self.output_linear(logit)
 
         return output
+
+    # @staticmethod
+    # def filter_args(**kwargs):
+    #     valid_args = (
+    #         "encoder_out_dim",
+    #     )
+    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    #     return args
+
+    # @staticmethod
+    # def add_class_args(parser, prefix=None, skip=set()):
+
+    #     parser.add_argument(
+    #         "--encoder-out-dim", default=512, type=int, help=("")
+    #     )
\ No newline at end of file
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index ff12ef18..b34ff4cc 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -18,6 +18,7 @@
 Note we use `rnnt_loss` from torchaudio, which exists only in
 torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
 """
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import k2
 import torch
 import torch.nn as nn
@@ -25,19 +26,25 @@
 import torchaudio.functional
 from .encoder_interface import EncoderInterface
 
+from ...torch_model import TorchModel
 from hyperion.utils.utils import add_sos
+from .conformer import Conformer
+from .decoder import Decoder
+from .joiner import Joiner
 
 
-class Transducer(nn.Module):
+class Transducer(TorchModel):
     """It implements https://arxiv.org/pdf/1211.3711.pdf
     "Sequence Transduction with Recurrent Neural Networks"
     """
 
     def __init__(
-        self,
-        encoder: EncoderInterface,
-        decoder: nn.Module,
-        joiner: nn.Module,
+        self, 
+        vocab_size,
+        blank_id,
+        encoder_out_dim,
+        conformer_enc,
+        decoder,
     ):
         """
         Args:
@@ -56,12 +63,20 @@ def __init__(
             unnormalized probs, i.e., not processed by log-softmax.
         """
         super().__init__()
-        assert isinstance(encoder, EncoderInterface)
-        assert hasattr(decoder, "blank_id")
+        # assert isinstance(encoder, EncoderInterface)
+        # assert hasattr(decoder, "blank_id")
+        conformer_enc["output_dim"] = encoder_out_dim
+        decoder["blank_id"] = blank_id
+        decoder["vocab_size"] = vocab_size
+        decoder["output_dim"] = encoder_out_dim
+        joiner = {"input_dim":encoder_out_dim, "output_dim":vocab_size}
+
+        self.encoder = Conformer(**conformer_enc)
+        self.decoder = Decoder(**decoder)
+        self.joiner = Joiner(**joiner)
+
+
 
-        self.encoder = encoder
-        self.decoder = decoder
-        self.joiner = joiner
 
     def forward(
         self,
@@ -85,12 +100,14 @@ def forward(
         assert x.ndim == 3, x.shape
         assert x_lens.ndim == 1, x_lens.shape
         assert y.num_axes == 2, y.num_axes
-
+        
         assert x.size(0) == x_lens.size(0) == y.dim0
 
-        encoder_out, x_lens = self.encoder(x, x_lens)
+        #  wav2vec2 works as encoder
+        # encoder_out, x_lens = self.encoder(x, x_lens)
         assert torch.all(x_lens > 0)
 
+        encoder_out = x
         # Now for the decoder, i.e., the prediction network
         row_splits = y.shape.row_splits(1)
         y_lens = row_splits[1:] - row_splits[:-1]
@@ -113,14 +130,124 @@ def forward(
             f"Current torchaudio version: {torchaudio.__version__}\n"
             "Please install a version >= 0.10.0"
         )
+        
+        x_lens = x_lens.to(torch.int32)
+
 
         loss = torchaudio.functional.rnnt_loss(
             logits=logits,
-            targets=y_padded,
+            targets=y_padded.to(torch.int32),
             logit_lengths=x_lens,
             target_lengths=y_lens,
             blank=blank_id,
             reduction="sum",
         )
 
-        return loss
+        return logits, loss
+
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.unfreeze()
+            self.freeze_preembed_layers()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        self._train_mode = mode
+
+    @classmethod
+    def load(cls, file_path=None, cfg=None, state_dict=None):
+        cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
+        encoder_net = TorchNALoader.load_from_cfg(cfg=cfg["encoder_cfg"])
+        for k in "encoder_cfg":
+            del cfg[k]
+
+        model = cls(encoder_net, **cfg)
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+
+        return model
+
+
+    def _train(self, train_mode: str):
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode == "ft-embed-affine":
+            self.encoder_net.eval()
+            if self.proj is not None:
+                self.proj.eval()
+
+            self.pool_net.eval()
+            self.classif_net.train()
+            layer_list = [l for l in range(self.embed_layer)]
+            self.classif_net.put_layers_in_eval_mode(layer_list)
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return ["full", "frozen", "ft-embed-affine"]
+
+    def get_config(self):
+        enc_cfg = self.encoder.get_config()
+        dec_cfg = self.decoder.get_config()
+        join_cfg = self.joiner.get_config()
+
+        config = {
+            "encoder_out_dim" : self.encoder_out_dim,
+            "conformer_enc": enc_cfg,
+            "decoder": dec_cfg,
+            "joiner": join_cfg,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        # get arguments for pooling
+        encoder_args = Conformer.filter_args(**kwargs["conformer_enc"])
+        decoder_args = Decoder.filter_args(**kwargs["decoder"])
+        # joiner_args = Joiner.filter_args(**kwargs["joiner"])
+
+        valid_args = (
+            "encoder_out_dim",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        args["conformer_enc"] = encoder_args
+        args["decoder"] = decoder_args
+        # args["joiner"] = joiner_args 
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+
+
+        Conformer.add_class_args(
+            parser, prefix="conformer_enc", skip=[]
+        )
+
+        Decoder.add_class_args(
+            parser, prefix="decoder", skip=[]
+        )
+
+        parser.add_argument(
+            "--encoder-out-dim", default=512, type=int, help=("")
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index 1e038f17..7956c9ba 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -1,5 +1,5 @@
 """
- Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
@@ -13,6 +13,8 @@
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
+# from ..wav2xvectors.hf_wav2xvector import HFWav2XVector
+
 
 
 class HFWav2Transducer(TorchModel):
@@ -20,7 +22,7 @@ class HFWav2Transducer(TorchModel):
 
     Attributes:
        hf_feats: hugging face model wrapper object.
-       xvector: x-vector model object.
+       transducer: transducer model object.
        feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
                           the wav2vec "num_layers".
        feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
@@ -28,12 +30,12 @@ class HFWav2Transducer(TorchModel):
     """
 
     def __init__(
-        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+        self, hf_feats, transducer, feat_fusion_start=0, feat_fusion_method="weighted-avg"
     ):
 
         super().__init__()
         self.hf_feats = hf_feats
-        self.xvector = xvector
+        self.transducer = transducer
         self.feat_fusion_start = feat_fusion_start
         self.feat_fusion_method = feat_fusion_method
         self._hf_context = contextlib.nullcontext()
@@ -84,16 +86,16 @@ def _fuse_hid_feats(self, hid_feats):
         return feats
 
     def compute_prototype_affinity(self):
-        return self.xvector.compute_prototype_affinity()
+        return self.transducer.compute_prototype_affinity()
 
-    def update_loss_margin(self, epoch):
-        """Updates the value of the margin in AAM/AM-softmax losses
-           given the epoch number
+    # def update_loss_margin(self, epoch):
+    #     """Updates the value of the margin in AAM/AM-softmax losses
+    #        given the epoch number
 
-        Args:
-          epoch: epoch which is about to start
-        """
-        self.xvector.update_loss_margin(epoch)
+    #     Args:
+    #       epoch: epoch which is about to start
+    #     """
+    #     self.transducer.update_loss_margin(epoch)
 
     def rebuild_output_layer(
         self,
@@ -106,7 +108,7 @@ def rebuild_output_layer(
         intertop_margin=0.0,
         num_subcenters=2,
     ):
-        self.xvector.rebuild_output_layer(
+        self.transducer.rebuild_output_layer(
             num_classes=num_classes,
             loss_type=loss_type,
             cos_scale=cos_scale,
@@ -189,25 +191,25 @@ def forward(
         feats, hid_feats, feat_lengths = self.forward_feats(
             x, x_lengths, return_feat_layers
         )
-        output = self.xvector(
+
+        feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
+        
+        output, loss = self.transducer(
             feats,
             feat_lengths,
             y,
-            return_enc_layers=return_enc_layers,
-            return_classif_layers=return_classif_layers,
-            return_logits=return_logits,
         )
 
         if not return_feat_layers:
-            return output
+            return output, loss
 
         if not isinstance(output, dict):
-            # if the xvector just returned the logits we put then into a dictionary
+            # if the transducer just returned the logits we put then into a dictionary
             # to append the hid feats later.
             output["logits"] = output
 
         output["h_feats"] = hid_feats
-        return output
+        return output, loss
 
     def extract_embed(
         self,
@@ -232,7 +234,7 @@ def extract_embed(
             * feats.size(-1)
             // x.size(-1)
         )
-        return self.xvector.extract_embed(
+        return self.transducer.extract_embed(
             feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
         )
 
@@ -265,8 +267,8 @@ def set_train_mode(self, mode):
             self.unfreeze()
             self.freeze_feat_fuser()
             self.freeze_hf_feats()
-            self.xvector.freeze_preembed_layers()
-        elif mode in ["ft-xvector", "ft-xvector-nograd"]:
+            self.transducer.freeze_preembed_layers()
+        elif mode in ["ft-transducer", "ft-transducer-nograd"]:
             self.unfreeze()
             self.freeze_hf_feats()
             self.freeze_feat_fuser()
@@ -295,16 +297,16 @@ def _train(self, train_mode: str):
             super()._train(train_mode)
         elif train_mode == "ft-embed-affine":
             self.hf_feats.train()
-            self.xvector._train("ft-embed_affine")
+            self.transducer._train("ft-embed_affine")
         elif train_mode in [
-            "ft-xvector",
+            "ft-transducer",
             "hf-feats-frozen",
-            "ft-xvector-nograd",
+            "ft-transducer-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
-            self.xvector._train("full")
+            self.transducer._train("full")
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
@@ -314,9 +316,9 @@ def valid_train_modes():
             "full",
             "frozen",
             "ft-embed-affine",
-            "ft-xvector",
+            "ft-transducer",
             "hf-feats-frozen",
-            "ft-xvector-nograd",
+            "ft-transducer-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
         ]
@@ -325,7 +327,7 @@ def valid_train_modes():
     def filter_args(**kwargs):
         valid_args = (
             "hf_feats",
-            "xvector",
+            "transducer",
             "feat_fusion_start",
             "feat_fusion_method",
         )
@@ -333,14 +335,13 @@ def filter_args(**kwargs):
         return args
 
     def get_config(self):
-
         hf_cfg = self.hf_feats.get_config()
-        xvec_cfg = self.xvector.get_config()
+        tran_cfg = self.transducer.get_config()
         del hf_cfg["class_name"]
-        del xvec_cfg["class_name"]
+        del tran_cfg["class_name"]
         config = {
             "hf_feats": hf_cfg,
-            "xvector": xvec_cfg,
+            "transducer": tran_cfg,
             "feat_fusion_start": self.feat_fusion_start,
             "feat_fusion_method": self.feat_fusion_method,
         }
@@ -348,10 +349,10 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    def change_config(self, hf_feats, xvector):
-        logging.info("changing hf wav2xvector config")
+    def change_config(self, hf_feats, transducer):
+        logging.info("changing hf wav2transducer config")
         self.hf_feats.change_config(**hf_feats)
-        self.xvector.change_config(**xvector)
+        self.transducer.change_config(**transducer)
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
@@ -385,3 +386,5 @@ def add_class_args(parser, prefix=None, skip=set()):
                 action=ActionParser(parser=parser),
                 help="xvector options",
             )
+
+
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index 3a55ac83..79c4ca86 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -1,388 +1,98 @@
 """
- Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-import contextlib
 from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
 
 import torch
 import torch.nn as nn
 
-# import torch.nn.functional as nnf
-
-from ...torch_model import TorchModel
-from ...utils import remove_silence
+from ..transducer import Transducer
+from ...tpm import HFWav2Vec2
 from .hf_wav2transducer import HFWav2Transducer
 
 
+
 class HFWav2Vec2Transducer(HFWav2Transducer):
-    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+    """Class extracting Wav2Vec2 + ResNet1d x-vectors from waveform.
 
     Attributes:
-       hf_feats: hugging face model wrapper object.
-       transducer: transducer model object.
-       feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
-                          the wav2vec "num_layers".
-       feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+      Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      transducer: Transducer configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
                            than one layer is used.
     """
 
     def __init__(
-        self, hf_feats, transducer, feat_fusion_start=0, feat_fusion_method="weighted-avg"
-    ):
-
-        super().__init__()
-        self.hf_feats = hf_feats
-        self.transducer = transducer
-        self.feat_fusion_start = feat_fusion_start
-        self.feat_fusion_method = feat_fusion_method
-        self._hf_context = contextlib.nullcontext()
-        self._make_fuser()
-
-    def _make_fuser(self):
-        if self.feat_fusion_method == "last":
-            self.feat_fuser = None
-            return
-
-        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
-        layer_dim = self.hf_feats.hidden_size
-        if self.feat_fusion_method == "weighted-avg":
-            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
-        elif self.feat_fusion_method == "linear":
-            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
-        elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
-
-    def _fuse_hid_feats(self, hid_feats):
-        """Fuses the hidden features from the Wav2Vec model.
-
-        Args:
-          hid_feats: list of hidden features Tensors from Wav2Vec model.
-
-        Returns:
-          Tensor of fused features (batch, channels, time)
-        """
-        if len(hid_feats) == 1:
-            # There is only one layer of features
-            return hid_feats[0]
-
-        hid_feats = hid_feats[self.feat_fusion_start :]
-        if self.feat_fusion_method == "weighted-avg":
-            hid_feats = torch.stack(hid_feats, dim=-1)
-            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
-            feats = torch.sum(hid_feats * norm_weights, dim=-1)
-        elif self.feat_fusion_method == "linear":
-            hid_feats = torch.stack(hid_feats, dim=-1)
-            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
-        elif self.feat_fusion_method == "cat":
-            hid_feats = torch.cat(hid_feats, dim=-1)
-            feats = self.feat_fuser(hid_feats)
-        elif self.feat_fusion_method == "last":
-            feats = hid_feats[-1]
-
-        return feats
-
-    def compute_prototype_affinity(self):
-        return self.transducer.compute_prototype_affinity()
-
-    def update_loss_margin(self, epoch):
-        """Updates the value of the margin in AAM/AM-softmax losses
-           given the epoch number
-
-        Args:
-          epoch: epoch which is about to start
-        """
-        self.transducer.update_loss_margin(epoch)
-
-    def rebuild_output_layer(
-        self,
-        num_classes=None,
-        loss_type="arc-softmax",
-        cos_scale=64,
-        margin=0.3,
-        margin_warmup_epochs=10,
-        intertop_k=5,
-        intertop_margin=0.0,
-        num_subcenters=2,
-    ):
-        self.transducer.rebuild_output_layer(
-            num_classes=num_classes,
-            loss_type=loss_type,
-            cos_scale=cos_scale,
-            margin=margin,
-            margin_warmup_epochs=margin_warmup_epochs,
-            intertop_k=intertop_k,
-            intertop_margin=intertop_margin,
-            num_subcenters=num_subcenters,
-        )
-
-    def forward_feats(
-        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
-    ):
-        return_hid_states = (
-            False
-            if return_feat_layers is None and self.feat_fusion_method == "last"
-            else True
-        )
-        with self._hf_context:
-            hf_output = self.hf_feats(
-                x,
-                x_lengths,
-                return_hid_states=return_hid_states,
-                chunk_length=chunk_length,
-                detach_chunks=detach_chunks,
-            )
-        feat_lengths = hf_output["hidden_states_lengths"]
-        if return_hid_states:
-            hid_feats = hf_output["hidden_states"]
-            feats = self._fuse_hid_feats(hid_feats)
-        else:
-            hid_feats = None
-            feats = hf_output["last_hidden_state"]
-
-        feats = feats.transpose(1, 2)
-        if return_feat_layers is not None:
-            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
-            # as the hidden features of the x-vector encoder.
-            hid_feats = [
-                f.transpose(1, 2)
-                for i, f in enumerate(hid_feats)
-                if i in return_feat_layers
-            ]
-        else:
-            hid_feats = None
-
-        return feats, hid_feats, feat_lengths
-
-    def forward(
-        self,
-        x,
-        x_lengths=None,
-        y=None,
-        return_feat_layers=None,
-        return_enc_layers=None,
-        return_classif_layers=None,
-        return_logits=True,
-    ):
-        """Forward function. If returns the logits posteriors of the classes.
-        It can also returns the hidden representations in the wav2vec feature extractor,
-        the x-vector encoder and the
-        classification head. In this case the ouput variable is a dictionary.
-
-        Args:
-          x: input features tensor with shape=(batch, in_feats, time)
-          x_lengths: time lengths of the features with shape=(batch,)
-          y: target classes torch.long tensor with shape=(batch,)
-          return_feat_layers: list of integers indicating, which wav2vec layers
-                             we should return. If None, no wav2vec layers are returned.
-          return_enc_layers: list of integers indicating, which encoder layers
-                             we should return. If None, no encoder layers are returned.
-          return_enc_layers: list of integers indicating, which classification head layers
-                             we should return. If None, no head layers are returned.
-          return_logits: if True, it adds the logits to the output dictionary.
-        Returns:
-          Tensor with class logits with shape=(batch, num_classes) or
-          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
-          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
-        """
-        feats, hid_feats, feat_lengths = self.forward_feats(
-            x, x_lengths, return_feat_layers
-        )
-        output = self.transducer(
-            feats,
-            feat_lengths,
-            y,
-            return_enc_layers=return_enc_layers,
-            return_classif_layers=return_classif_layers,
-            return_logits=return_logits,
-        )
-
-        if not return_feat_layers:
-            return output
-
-        if not isinstance(output, dict):
-            # if the transducer just returned the logits we put then into a dictionary
-            # to append the hid feats later.
-            output["logits"] = output
-
-        output["h_feats"] = hid_feats
-        return output
-
-    def extract_embed(
         self,
-        x,
-        x_lengths=None,
-        vad_samples=None,
-        hf_chunk_length=0,
-        xvec_chunk_length=0,
-        embed_layer=None,
-        detach_chunks=False,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, Transducer],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
     ):
 
-        if vad_samples is not None:
-            x, x_lengths = remove_silence(x, x_lengths)
-
-        feats, _, feat_lengths = self.forward_feats(
-            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
-        )
-        xvec_chunk_length = int(
-            xvec_chunk_length
-            * self.hf_feats.sample_frequency
-            * feats.size(-1)
-            // x.size(-1)
-        )
-        return self.transducer.extract_embed(
-            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
-        )
-
-    def freeze_feat_fuser(self):
-        if self.feat_fuser is None:
-            return
-
-        if self.feat_fusion_method == "weighted-avg":
-            self.feat_fuser.requires_grad = False
-            return
-
-        for param in self.feat_fuser.parameters():
-            param.requires_grad = False
-
-    def freeze_hf_feats(self):
-        self.hf_feats.freeze()
-
-    def freeze_hf_feature_encoder(self):
-        self.hf_feats.freeze_feature_encoder()
-
-    def set_train_mode(self, mode):
-        if mode == self._train_mode:
-            return
-
-        if mode == "full":
-            self.unfreeze()
-        elif mode == "frozen":
-            self.freeze()
-        elif mode == "ft-embed-affine":
-            self.unfreeze()
-            self.freeze_feat_fuser()
-            self.freeze_hf_feats()
-            self.transducer.freeze_preembed_layers()
-        elif mode in ["ft-transducer", "ft-transducer-nograd"]:
-            self.unfreeze()
-            self.freeze_hf_feats()
-            self.freeze_feat_fuser()
-        elif mode in ["hf-feats-frozen", "hf-feats-frozen-nograd"]:
-            self.unfreeze()
-            self.freeze_hf_feats()
-        elif mode == "hf-feat-extractor-frozen":
-            self.unfreeze()
-            self.freeze_hf_feature_encoder()
+        if isinstance(hf_feats, dict):
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
         else:
-            raise ValueError(f"invalid train_mode={mode}")
+            assert isinstance(hf_feats, HFWav2Vec2)
 
-        logging.info("train mode set to %s", mode)
-
-        if "nograd" in mode:
-            logging.info("using torch.no_grad for hf_feats")
-            self._hf_context = torch.no_grad()
+        if isinstance(transducer, dict):
+            if "class_name" in transducer:
+                del transducer["class_name"]
+            transducer = Transducer(**transducer)
         else:
-            self._hf_context = contextlib.nullcontext()
-
-        self._train_mode = mode
+            assert isinstance(transducer, Transducer)
+            # assert transducer.encoder_net.in_feats == hf_feats.hidden_size
 
-    def _train(self, train_mode: str):
-
-        if train_mode in ["full", "frozen"]:
-            super()._train(train_mode)
-        elif train_mode == "ft-embed-affine":
-            self.hf_feats.train()
-            self.transducer._train("ft-embed_affine")
-        elif train_mode in [
-            "ft-transducer",
-            "hf-feats-frozen",
-            "ft-transducer-nograd",
-            "hf-feats-frozen-nograd",
-            "hf-feat-extractor-frozen",
-        ]:
-            self.hf_feats.train()
-            self.transducer._train("full")
-        else:
-            raise ValueError(f"invalid train_mode={train_mode}")
-
-    @staticmethod
-    def valid_train_modes():
-        return [
-            "full",
-            "frozen",
-            "ft-embed-affine",
-            "ft-transducer",
-            "hf-feats-frozen",
-            "ft-transducer-nograd",
-            "hf-feats-frozen-nograd",
-            "hf-feat-extractor-frozen",
-        ]
+        super().__init__(hf_feats, transducer, feat_fusion_start, feat_fusion_method)
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "hf_feats",
-            "transducer",
-            "feat_fusion_start",
-            "feat_fusion_method",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        return args
-
-    def get_config(self):
+        base_args = HFWav2Transducer.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = Transducer.filter_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
 
-        hf_cfg = self.hf_feats.get_config()
-        xvec_cfg = self.transducer.get_config()
-        del hf_cfg["class_name"]
-        del xvec_cfg["class_name"]
-        config = {
-            "hf_feats": hf_cfg,
-            "transducer": xvec_cfg,
-            "feat_fusion_start": self.feat_fusion_start,
-            "feat_fusion_method": self.feat_fusion_method,
-        }
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        Transducer.add_class_args(parser, prefix="transducer")
+        HFWav2Transducer.add_class_args(parser)
 
-    def change_config(self, hf_feats, transducer):
-        logging.info("changing hf wav2transducer config")
-        self.hf_feats.change_config(**hf_feats)
-        self.transducer.change_config(**transducer)
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=set()):
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = Transducer.filter_finetune_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
 
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        parser.add_argument(
-            "--feat-fusion-start",
-            default=0,
-            type=int,
-            help=(
-                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
-                "the wav2vec num_layers"
-            ),
-        )
-        parser.add_argument(
-            "--feat-fusion-method",
-            default="weighted-avg",
-            choices=["weighted-avg", "linear", "cat", "last"],
-            help=(
-                "method to fuse the hidden layers from the wav2vec model "
-                "in [weighted-avg, cat]"
-            ),
-        )
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
+        Transducer.add_finetune_args(parser, prefix="transducer")
 
         if prefix is not None:
-            outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
-                help="transducer options",
-            )
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index a67da181..82a4f2bd 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -8,6 +8,7 @@
 import logging
 
 import torch
+import torchaudio
 import torch.nn as nn
 
 from ..utils import MetricAcc
@@ -117,23 +118,25 @@ def train_epoch(self, data_loader):
           data_loader: pytorch data loader returning features and class labels.
         """
 
-        self.model.update_loss_margin(self.cur_epoch)
+        # self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
-        for batch, (data, target) in enumerate(data_loader):
+        self.sp = data_loader.dataset.sp
+        for batch, (data, audio_length, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
             # TODO: Check and Modify data, target
-            data, target = data.to(self.device), target.to(self.device)
+            data, audio_length, target = data.to(self.device), audio_length.to(self.device), target.to(self.device)
             batch_size = data.shape[0]
 
             with self.amp_autocast():
-                output = self.model(data, y=target)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
+                output, loss = self.model(data, x_lengths=audio_length, y=target)
+                loss = loss.mean() / self.grad_acc_steps
+                # loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()

From bc85ec756c6715612dc988e446e0dc4f4bd9a766 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 3 Nov 2022 21:48:14 -0400
Subject: [PATCH 042/154] some bug fixes in subcenter loss and class-weighted
 sampler

---
 .../data/class_weighted_seg_chunk_sampler.py  | 17 ++++-
 hyperion/torch/layers/margin_losses.py        | 71 +++++++++++--------
 hyperion/torch/models/xvectors/xvector.py     | 18 ++++-
 hyperion/utils/class_info.py                  | 10 +--
 hyperion/utils/info_table.py                  | 26 +++----
 5 files changed, 87 insertions(+), 55 deletions(-)

diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 05b222c7..07a61b8f 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -182,8 +182,19 @@ def _gather_class_info(self):
         )
         self.map_class_to_segs_idx = {}
         for class_id in self.class_info["id"].values:
-            seg_ids = map_class_to_segs.loc[class_id, "id"].values
-            seg_idx = self.seg_set.get_loc(seg_ids)
+            if class_id in map_class_to_segs.index:
+                seg_ids = map_class_to_segs.loc[class_id, "id"]
+                if isinstance(seg_ids, str):
+                    seg_ids = [seg_ids]
+                else:
+                    seg_ids = seg_ids.values
+
+                seg_idx = self.seg_set.get_loc(seg_ids)
+            else:
+                seg_idx = []
+                self.class_info.loc[class_id, "weights"] = 0.0
+                self.class_info.renorm_weights()
+
             self.map_class_to_segs_idx[class_id] = seg_idx
 
     def _set_class_weights(self):
@@ -231,7 +242,7 @@ def set_hard_prototypes(self, affinity_matrix):
         ).indices
 
     def get_hard_prototypes(self, class_idx):
-        return self.hard_prototypes[class_idx].flatten()
+        return self.hard_prototypes[class_idx].flatten().numpy()
 
     def _sample_chunk_length(self):
         if self.var_batch_size:
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index 6443ea02..acb7a514 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -77,15 +77,18 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
-            self.__class__.__name__,
-            self.in_feats,
-            self.num_classes,
-            self.cos_scale,
-            self.margin,
-            self.margin_warmup_epochs,
-            self.intertop_k,
-            self.intertop_margin,
+        s = (
+            "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)"
+            % (
+                self.__class__.__name__,
+                self.in_feats,
+                self.num_classes,
+                self.cos_scale,
+                self.margin,
+                self.margin_warmup_epochs,
+                self.intertop_k,
+                self.intertop_margin,
+            )
         )
         return s
 
@@ -224,15 +227,18 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
-            self.__class__.__name__,
-            self.in_feats,
-            self.num_classes,
-            self.cos_scale,
-            self.margin,
-            self.margin_warmup_epochs,
-            self.intertop_k,
-            self.intertop_margin,
+        s = (
+            "%s(in_feats=%d, num_classes=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)"
+            % (
+                self.__class__.__name__,
+                self.in_feats,
+                self.num_classes,
+                self.cos_scale,
+                self.margin,
+                self.margin_warmup_epochs,
+                self.intertop_k,
+                self.intertop_margin,
+            )
         )
         return s
 
@@ -361,20 +367,25 @@ def __init__(
         )
 
     def __str__(self):
-        s = "%s(in_feats=%d, num_classes=%d, num_subcenters=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)" % (
-            self.__class__.__name__,
-            self.in_feats,
-            self.num_classes,
-            self.num_subcenters,
-            self.cos_scale,
-            self.margin,
-            self.margin_warmup_epochs,
-            self.intertop_k,
-            self.intertop_margin,
+        s = (
+            "%s(in_feats=%d, num_classes=%d, num_subcenters=%d, cos_scale=%.2f, margin=%.2f, margin_warmup_epochs=%d, intertop_k=%d, intertop_margin=%f)"
+            % (
+                self.__class__.__name__,
+                self.in_feats,
+                self.num_classes,
+                self.num_subcenters,
+                self.cos_scale,
+                self.margin,
+                self.margin_warmup_epochs,
+                self.intertop_k,
+                self.intertop_margin,
+            )
         )
         return s
 
     def _update_counts(self, y, proto_idx):
+        idx1 = torch.arange(y.size(0))
+        proto_idx = proto_idx[idx1, y]
         self.subcenter_counts[y, proto_idx] += 1
         # we make counts relative to avoid risk of overflowing the integers
         min_counts, _ = torch.min(self.subcenter_counts, dim=1, keepdim=True)
@@ -445,7 +456,9 @@ def get_main_prototype_kernel(self):
             self.subcenter_counts, dim=-1
         )  # get indices for the main prototype
         idx1 = torch.arange(self.num_classes)
-        kernel = kernel.view(-1, self.num_classes, self.num_subcenters)[:, idx1, idx2]
+        kernel = self.kernel.view(-1, self.num_classes, self.num_subcenters)[
+            :, idx1, idx2
+        ]
         return kernel
 
     def compute_prototype_affinity(self):
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 197ef5a9..15f0ce86 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -572,14 +572,26 @@ def rebuild_output_layer(
         intertop_margin=0.0,
         num_subcenters=2,
     ):
-        if (self.num_classes is not None and self.num_classes != num_classes) or (
-            self.loss_type != loss_type
+        if (
+            (self.num_classes is not None and self.num_classes != num_classes)
+            or (self.loss_type != loss_type)
+            or (
+                loss_type == "subcenter-arc-softmax"
+                and self.classif_net.num_subcenters != num_subcenters
+            )
         ):
             # if we change the number of classes or the loss-type
             # we need to reinitiate the last layer
             logging.info("rebuilding output layer")
             self.classif_net.rebuild_output_layer(
-                num_classes, loss_type, cos_scale, margin, margin_warmup_epochs
+                num_classes,
+                loss_type,
+                cos_scale,
+                margin,
+                margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
+                num_subcenters=num_subcenters,
             )
             return
 
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index f1eaf665..9e158d87 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -30,6 +30,10 @@ def set_uniform_weights(self):
     def set_weights(self, weights):
         self.df["weights"] = weights / weights.sum()
 
+    def renorm_weights(self):
+        weights = self.df["weights"]
+        self.df["weights"] = weights / weights.sum()
+
     def exp_weights(self, x):
         weights = self.df["weights"] ** x
         self.set_weights(weights)
@@ -62,11 +66,7 @@ def load(cls, file_path, sep=None):
         if ext == "":
             # if no extension we load as kaldi utt2spk file
             df = pd.read_csv(
-                file_path,
-                sep=" ",
-                header=None,
-                names=["id"],
-                dtype={"id": np.str},
+                file_path, sep=" ", header=None, names=["id"], dtype={"id": np.str},
             )
             return cls(df)
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 217f1f9a..80199a33 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -45,7 +45,7 @@ def __str__(self):
 
     @property
     def __repr__(self):
-        return self.df.__repr__ 
+        return self.df.__repr__
 
     @property
     def iat(self):
@@ -75,6 +75,10 @@ def __setitem__(self):
     def __contains__(self):
         return self.df.__contains__
 
+    @property
+    def index(self):
+        return self.df.index
+
     def save(self, file_path, sep=None):
         """Saves info table to file
 
@@ -144,9 +148,7 @@ def split(self, idx, num_parts, group_by=None):
         if group_by is None:
             _, idx1 = split_list(self.df["id"], idx, num_parts)
         else:
-            _, idx1 = split_list_group_by_key(
-                self.df[group_by], idx, num_parts
-            )
+            _, idx1 = split_list_group_by_key(self.df[group_by], idx, num_parts)
 
         df = self.df.iloc[idx1]
         return self.__class__(df)
@@ -166,7 +168,9 @@ def merge(cls, tables):
         return cls(df)
 
     def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
-        assert items is None or iindex is None, "items and iindex cannot be not None at the same time"
+        assert (
+            items is None or iindex is None
+        ), "items and iindex cannot be not None at the same time"
         df = self.df
 
         if not keep:
@@ -195,9 +199,8 @@ def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
 
             if columns is not None:
                 df = df[columns]
-       
-        return self.__class__(df)
 
+        return self.__class__(df)
 
     def __eq__(self, other):
         """Equal operator"""
@@ -216,8 +219,6 @@ def __cmp__(self, other):
             return 0
         return 1
 
-
-
     # def __len__(self):
     #     """Returns the number of elements in the list."""
     #     return len(self.df)
@@ -316,8 +317,6 @@ def __cmp__(self, other):
     #     utt_info = self.utt_info.iloc[idx1]
     #     return Utt2Info(utt_info)
 
-    
-
     # def filter(self, filter_key, keep=True):
     #     """Removes elements from Utt2Info object by key
 
@@ -411,13 +410,10 @@ def get_loc(self, keys):
         loc = self.df.index.get_loc(keys)
         if isinstance(loc, int):
             return loc
-        elif isinstance(loc, np.ndarray) and loc.dtype==np.bool:
+        elif isinstance(loc, np.ndarray) and loc.dtype == np.bool:
             return np.nonzero(loc)[0]
         else:
             return list(range(loc.start, loc.stop, loc.step))
 
     def get_col_idx(self, keys):
         return self.df.columns.get_loc(keys)
-        
-
-    
\ No newline at end of file

From 450f50d35c1421cf561841862d306cb8a1afcf05 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Sat, 5 Nov 2022 13:53:04 -0400
Subject: [PATCH 043/154] Wav2vec2 Transducer update data augmentation

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  15 +-
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |  18 +-
 hyperion/bin/train_wav2vec2transducer.py      |  32 +-
 hyperion/torch/data/audio_dataset.py          |   5 +-
 .../torch/models/transducer/subsampling.py    | 161 +++++++
 .../torch/models/transducer/transducer.py     |  24 +-
 .../torch/models/transducer/transformer.py    | 418 ++++++++++++++++++
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      |   4 +-
 hyperion/torch/trainers/transducer_trainer.py |   2 -
 hyperion/utils/text.py                        |  29 ++
 10 files changed, 651 insertions(+), 57 deletions(-)
 create mode 100644 hyperion/torch/models/transducer/subsampling.py
 create mode 100644 hyperion/torch/models/transducer/transformer.py
 create mode 100644 hyperion/utils/text.py

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 91adaa35..2ee0a1aa 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -2,10 +2,8 @@ data:
   train:
     dataset:
       wav_scale: 1
-      # class_names:
-      #   - text
-      # aug_cfgs: 
-      #   - conf/reverb_noise_aug.yaml
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
       return_segment_info:
         - text
     sampler:
@@ -17,14 +15,9 @@ data:
       num_workers: 1
   val:
     dataset:
-      # max_chunk_length: 4.0
-      # min_chunk_length: 4.0
-      # aug_cfgs: [conf/reverb_noise_aug.yaml]
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
       wav_scale: 1
-      # class_names:
-      #   - text
-      # aug_cfgs:
-      #   - conf/reverb_noise_aug.yaml
       return_segment_info:
         - text
     sampler:
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index 3f97feb7..d736dc86 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -1,16 +1,16 @@
 hf_feats:
-  pretrained_model_path: microsoft/wavlm-base-plus
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
   # test_param: xyz
 transducer:
   encoder_out_dim: 768
-  conformer_enc:
-    num_features: 80
-    subsampling_factor: 4
-    d_model: 512
-    nhead: 8
-    dim_feedforward: 2048
-    num_encoder_layers: 12
-    vgg_frontend: False
+#  conformer_enc:
+  #  num_features: 80
+  #  subsampling_factor: 4
+  #  d_model: 512
+  #  nhead: 8
+  #  dim_feedforward: 2048
+  #  num_encoder_layers: 12
+  #  vgg_frontend: False
   decoder:
     # vocab_size: 1000
     # blank_id: 0
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 8c539cd1..bfc6248a 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -37,7 +37,7 @@
 }
 
 
-def my_collate(batch):
+def transducer_collate(batch):
     audio = []
     audio_length = []
     target = []
@@ -46,8 +46,6 @@ def my_collate(batch):
         audio.append(wav)
         audio_length.append(wav.shape[0])
         target.append(record[1])
-        if i==4:
-            break
     audio = pad_sequence(audio)
     audio_length = torch.as_tensor(audio_length)
     target = k2.RaggedTensor(target)
@@ -81,7 +79,7 @@ def init_data(partition, rank, num_gpus, **kwargs):
     largs = (
         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
     )
-    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=my_collate)
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate)
     return data_loader
 
 
@@ -111,25 +109,24 @@ def train_model(gpu_id, args):
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
 
-    # ddp_args = ddp.filter_ddp_args(**kwargs)
-    # device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
-    # kwargs["rank"] = rank
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
 
-    # for Debug
-    rank = 0
-    kwargs["rank"] = 0
-    device = "cpu"
-    world_size=1
+    # # for Debug
+    # rank = 0
+    # kwargs["rank"] = 0
+    # device = "cpu"
+    # world_size=1
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    # model = init_model(train_loader.dataset.num_classes, **kwargs)
     model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"), train_loader.dataset.sp.get_piece_size(), **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
-    metrics = {"acc": CategoricalAccuracy()}
+    metrics = {} #{"acc": CategoricalAccuracy()}
     trainer = Trainer(
         model,
         device=device,
@@ -179,18 +176,11 @@ def make_parser(model_class):
     
     parser.add_argument("--data.val.dataset.text_file", type=str) 
     
-    # parser.add_argument(
-    #     "--data.train.dataset.class_files",
-    #     type=str, 
-    # )
-
     parser.add_argument(
         "--data.train.dataset.bpe_model",
         type=str, 
     )
 
-    # parser.add_argument("--data.val.dataset.class_files", type=str) 
-    
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 8929868f..35b7d85b 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -692,7 +692,10 @@ def __getitem__(self, segment):
         x, fs = self._read_audio(seg_id, start, duration)
         if self.augmenters:
             # augmentations
-            num_samples = int(duration * fs)
+            if duration == 0:
+                num_samples = len(x)
+            else:
+                num_samples = int(duration * fs)
             reverb_context_samples = len(x) - num_samples
             x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
             
diff --git a/hyperion/torch/models/transducer/subsampling.py b/hyperion/torch/models/transducer/subsampling.py
new file mode 100644
index 00000000..542fb036
--- /dev/null
+++ b/hyperion/torch/models/transducer/subsampling.py
@@ -0,0 +1,161 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import torch.nn as nn
+
+
+class Conv2dSubsampling(nn.Module):
+    """Convolutional 2D subsampling (to 1/4 length).
+
+    Convert an input of shape (N, T, idim) to an output
+    with shape (N, T', odim), where
+    T' = ((T-1)//2 - 1)//2, which approximates T' == T//4
+
+    It is based on
+    https://github.com/espnet/espnet/blob/master/espnet/nets/pytorch_backend/transformer/subsampling.py  # noqa
+    """
+
+    def __init__(self, idim: int, odim: int) -> None:
+        """
+        Args:
+          idim:
+            Input dim. The input shape is (N, T, idim).
+            Caution: It requires: T >=7, idim >=7
+          odim:
+            Output dim. The output shape is (N, ((T-1)//2 - 1)//2, odim)
+        """
+        assert idim >= 7
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels=1, out_channels=odim, kernel_size=3, stride=2
+            ),
+            nn.ReLU(),
+            nn.Conv2d(
+                in_channels=odim, out_channels=odim, kernel_size=3, stride=2
+            ),
+            nn.ReLU(),
+        )
+        self.out = nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Subsample x.
+
+        Args:
+          x:
+            Its shape is (N, T, idim).
+
+        Returns:
+          Return a tensor of shape (N, ((T-1)//2 - 1)//2, odim)
+        """
+        # On entry, x is (N, T, idim)
+        x = x.unsqueeze(1)  # (N, T, idim) -> (N, 1, T, idim) i.e., (N, C, H, W)
+        x = self.conv(x)
+        # Now x is of shape (N, odim, ((T-1)//2 - 1)//2, ((idim-1)//2 - 1)//2)
+        b, c, t, f = x.size()
+        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
+        # Now x is of shape (N, ((T-1)//2 - 1))//2, odim)
+        return x
+
+
+class VggSubsampling(nn.Module):
+    """Trying to follow the setup described in the following paper:
+    https://arxiv.org/pdf/1910.09799.pdf
+
+    This paper is not 100% explicit so I am guessing to some extent,
+    and trying to compare with other VGG implementations.
+
+    Convert an input of shape (N, T, idim) to an output
+    with shape (N, T', odim), where
+    T' = ((T-1)//2 - 1)//2, which approximates T' = T//4
+    """
+
+    def __init__(self, idim: int, odim: int) -> None:
+        """Construct a VggSubsampling object.
+
+        This uses 2 VGG blocks with 2 Conv2d layers each,
+        subsampling its input by a factor of 4 in the time dimensions.
+
+        Args:
+          idim:
+            Input dim. The input shape is (N, T, idim).
+            Caution: It requires: T >=7, idim >=7
+          odim:
+            Output dim. The output shape is (N, ((T-1)//2 - 1)//2, odim)
+        """
+        super().__init__()
+
+        cur_channels = 1
+        layers = []
+        block_dims = [32, 64]
+
+        # The decision to use padding=1 for the 1st convolution, then padding=0
+        # for the 2nd and for the max-pooling, and ceil_mode=True, was driven by
+        # a back-compatibility concern so that the number of frames at the
+        # output would be equal to:
+        #  (((T-1)//2)-1)//2.
+        # We can consider changing this by using padding=1 on the
+        # 2nd convolution, so the num-frames at the output would be T//4.
+        for block_dim in block_dims:
+            layers.append(
+                torch.nn.Conv2d(
+                    in_channels=cur_channels,
+                    out_channels=block_dim,
+                    kernel_size=3,
+                    padding=1,
+                    stride=1,
+                )
+            )
+            layers.append(torch.nn.ReLU())
+            layers.append(
+                torch.nn.Conv2d(
+                    in_channels=block_dim,
+                    out_channels=block_dim,
+                    kernel_size=3,
+                    padding=0,
+                    stride=1,
+                )
+            )
+            layers.append(
+                torch.nn.MaxPool2d(
+                    kernel_size=2, stride=2, padding=0, ceil_mode=True
+                )
+            )
+            cur_channels = block_dim
+
+        self.layers = nn.Sequential(*layers)
+
+        self.out = nn.Linear(
+            block_dims[-1] * (((idim - 1) // 2 - 1) // 2), odim
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Subsample x.
+
+        Args:
+          x:
+            Its shape is (N, T, idim).
+
+        Returns:
+          Return a tensor of shape (N, ((T-1)//2 - 1)//2, odim)
+        """
+        x = x.unsqueeze(1)
+        x = self.layers(x)
+        b, c, t, f = x.size()
+        x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f))
+        return x
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index b34ff4cc..6fcc965d 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -28,7 +28,7 @@
 
 from ...torch_model import TorchModel
 from hyperion.utils.utils import add_sos
-from .conformer import Conformer
+# from .conformer import Conformer
 from .decoder import Decoder
 from .joiner import Joiner
 
@@ -43,7 +43,7 @@ def __init__(
         vocab_size,
         blank_id,
         encoder_out_dim,
-        conformer_enc,
+        # conformer_enc,
         decoder,
     ):
         """
@@ -65,13 +65,13 @@ def __init__(
         super().__init__()
         # assert isinstance(encoder, EncoderInterface)
         # assert hasattr(decoder, "blank_id")
-        conformer_enc["output_dim"] = encoder_out_dim
+        # conformer_enc["output_dim"] = encoder_out_dim
         decoder["blank_id"] = blank_id
         decoder["vocab_size"] = vocab_size
         decoder["output_dim"] = encoder_out_dim
         joiner = {"input_dim":encoder_out_dim, "output_dim":vocab_size}
 
-        self.encoder = Conformer(**conformer_enc)
+        # self.encoder = Conformer(**conformer_enc)
         self.decoder = Decoder(**decoder)
         self.joiner = Joiner(**joiner)
 
@@ -196,13 +196,13 @@ def valid_train_modes():
         return ["full", "frozen", "ft-embed-affine"]
 
     def get_config(self):
-        enc_cfg = self.encoder.get_config()
+        # enc_cfg = self.encoder.get_config()
         dec_cfg = self.decoder.get_config()
         join_cfg = self.joiner.get_config()
 
         config = {
-            "encoder_out_dim" : self.encoder_out_dim,
-            "conformer_enc": enc_cfg,
+            # "encoder_out_dim" : self.encoder_out_dim,
+            # "conformer_enc": enc_cfg,
             "decoder": dec_cfg,
             "joiner": join_cfg,
         }
@@ -214,7 +214,7 @@ def get_config(self):
     def filter_args(**kwargs):
 
         # get arguments for pooling
-        encoder_args = Conformer.filter_args(**kwargs["conformer_enc"])
+        # encoder_args = Conformer.filter_args(**kwargs["conformer_enc"])
         decoder_args = Decoder.filter_args(**kwargs["decoder"])
         # joiner_args = Joiner.filter_args(**kwargs["joiner"])
 
@@ -223,7 +223,7 @@ def filter_args(**kwargs):
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
-        args["conformer_enc"] = encoder_args
+        # args["conformer_enc"] = encoder_args
         args["decoder"] = decoder_args
         # args["joiner"] = joiner_args 
         return args
@@ -237,9 +237,9 @@ def add_class_args(parser, prefix=None, skip=set()):
 
 
 
-        Conformer.add_class_args(
-            parser, prefix="conformer_enc", skip=[]
-        )
+        # Conformer.add_class_args(
+        #     parser, prefix="conformer_enc", skip=[]
+        # )
 
         Decoder.add_class_args(
             parser, prefix="decoder", skip=[]
diff --git a/hyperion/torch/models/transducer/transformer.py b/hyperion/torch/models/transducer/transformer.py
new file mode 100644
index 00000000..38edbd62
--- /dev/null
+++ b/hyperion/torch/models/transducer/transformer.py
@@ -0,0 +1,418 @@
+# Copyright    2021 University of Chinese Academy of Sciences (author: Han Zhu)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import math
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+from .encoder_interface import EncoderInterface
+from .subsampling import Conv2dSubsampling, VggSubsampling
+
+from hyperion.utils.utils import make_pad_mask
+
+
+class Transformer(EncoderInterface):
+    def __init__(
+        self,
+        num_features: int,
+        output_dim: int,
+        subsampling_factor: int = 4,
+        d_model: int = 256,
+        nhead: int = 4,
+        dim_feedforward: int = 2048,
+        num_encoder_layers: int = 12,
+        dropout: float = 0.1,
+        normalize_before: bool = True,
+        vgg_frontend: bool = False,
+    ) -> None:
+        """
+        Args:
+          num_features:
+            The input dimension of the model.
+          output_dim:
+            The output dimension of the model.
+          subsampling_factor:
+            Number of output frames is num_in_frames // subsampling_factor.
+            Currently, subsampling_factor MUST be 4.
+          d_model:
+            Attention dimension.
+          nhead:
+            Number of heads in multi-head attention.
+            Must satisfy d_model // nhead == 0.
+          dim_feedforward:
+            The output dimension of the feedforward layers in encoder.
+          num_encoder_layers:
+            Number of encoder layers.
+          dropout:
+            Dropout in encoder.
+          normalize_before:
+            If True, use pre-layer norm; False to use post-layer norm.
+          vgg_frontend:
+            True to use vgg style frontend for subsampling.
+        """
+        super().__init__()
+
+        self.num_features = num_features
+        self.output_dim = output_dim
+        self.subsampling_factor = subsampling_factor
+        if subsampling_factor != 4:
+            raise NotImplementedError("Support only 'subsampling_factor=4'.")
+
+        # self.encoder_embed converts the input of shape (N, T, num_features)
+        # to the shape (N, T//subsampling_factor, d_model).
+        # That is, it does two things simultaneously:
+        #   (1) subsampling: T -> T//subsampling_factor
+        #   (2) embedding: num_features -> d_model
+        if vgg_frontend:
+            self.encoder_embed = VggSubsampling(num_features, d_model)
+        else:
+            self.encoder_embed = Conv2dSubsampling(num_features, d_model)
+
+        self.encoder_pos = PositionalEncoding(d_model, dropout)
+
+        encoder_layer = TransformerEncoderLayer(
+            d_model=d_model,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            normalize_before=normalize_before,
+        )
+
+        if normalize_before:
+            encoder_norm = nn.LayerNorm(d_model)
+        else:
+            encoder_norm = None
+
+        self.encoder = nn.TransformerEncoder(
+            encoder_layer=encoder_layer,
+            num_layers=num_encoder_layers,
+            norm=encoder_norm,
+        )
+
+        # TODO(fangjun): remove dropout
+        self.encoder_output_layer = nn.Sequential(
+            nn.Dropout(p=dropout), nn.Linear(d_model, output_dim)
+        )
+
+    def forward(
+        self, x: torch.Tensor, x_lens: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Args:
+          x:
+            The input tensor. Its shape is (batch_size, seq_len, feature_dim).
+          x_lens:
+            A tensor of shape (batch_size,) containing the number of frames in
+            `x` before padding.
+        Returns:
+          Return a tuple containing 2 tensors:
+            - logits, its shape is (batch_size, output_seq_len, output_dim)
+            - logit_lens, a tensor of shape (batch_size,) containing the number
+              of frames in `logits` before padding.
+        """
+        x = self.encoder_embed(x)
+        x = self.encoder_pos(x)
+        x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
+
+        # Caution: We assume the subsampling factor is 4!
+        lengths = ((x_lens - 1) // 2 - 1) // 2
+        assert x.size(0) == lengths.max().item()
+
+        mask = make_pad_mask(lengths)
+        x = self.encoder(x, src_key_padding_mask=mask)  # (T, N, C)
+
+        logits = self.encoder_output_layer(x)
+        logits = logits.permute(1, 0, 2)  # (T, N, C) ->(N, T, C)
+
+        return logits, lengths
+
+
+class TransformerEncoderLayer(nn.Module):
+    """
+    Modified from torch.nn.TransformerEncoderLayer.
+    Add support of normalize_before,
+    i.e., use layer_norm before the first block.
+
+    Args:
+      d_model:
+        the number of expected features in the input (required).
+      nhead:
+        the number of heads in the multiheadattention models (required).
+      dim_feedforward:
+        the dimension of the feedforward network model (default=2048).
+      dropout:
+        the dropout value (default=0.1).
+      activation:
+        the activation function of intermediate layer, relu or
+        gelu (default=relu).
+      normalize_before:
+        whether to use layer_norm before the first block.
+
+    Examples::
+        >>> encoder_layer = TransformerEncoderLayer(d_model=512, nhead=8)
+        >>> src = torch.rand(10, 32, 512)
+        >>> out = encoder_layer(src)
+    """
+
+    def __init__(
+        self,
+        d_model: int,
+        nhead: int,
+        dim_feedforward: int = 2048,
+        dropout: float = 0.1,
+        activation: str = "relu",
+        normalize_before: bool = True,
+    ) -> None:
+        super(TransformerEncoderLayer, self).__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=0.0)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+        self.normalize_before = normalize_before
+
+    def __setstate__(self, state):
+        if "activation" not in state:
+            state["activation"] = nn.functional.relu
+        super(TransformerEncoderLayer, self).__setstate__(state)
+
+    def forward(
+        self,
+        src: torch.Tensor,
+        src_mask: Optional[torch.Tensor] = None,
+        src_key_padding_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Pass the input through the encoder layer.
+
+        Args:
+            src: the sequence to the encoder layer (required).
+            src_mask: the mask for the src sequence (optional).
+            src_key_padding_mask: the mask for the src keys per batch (optional)
+
+        Shape:
+            src: (S, N, E).
+            src_mask: (S, S).
+            src_key_padding_mask: (N, S).
+            S is the source sequence length, T is the target sequence length,
+            N is the batch size, E is the feature number
+        """
+        residual = src
+        if self.normalize_before:
+            src = self.norm1(src)
+        src2 = self.self_attn(
+            src,
+            src,
+            src,
+            attn_mask=src_mask,
+            key_padding_mask=src_key_padding_mask,
+        )[0]
+        src = residual + self.dropout1(src2)
+        if not self.normalize_before:
+            src = self.norm1(src)
+
+        residual = src
+        if self.normalize_before:
+            src = self.norm2(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = residual + self.dropout2(src2)
+        if not self.normalize_before:
+            src = self.norm2(src)
+        return src
+
+
+def _get_activation_fn(activation: str):
+    if activation == "relu":
+        return nn.functional.relu
+    elif activation == "gelu":
+        return nn.functional.gelu
+
+    raise RuntimeError(
+        "activation should be relu/gelu, not {}".format(activation)
+    )
+
+
+class PositionalEncoding(nn.Module):
+    """This class implements the positional encoding
+    proposed in the following paper:
+
+    - Attention Is All You Need: https://arxiv.org/pdf/1706.03762.pdf
+
+        PE(pos, 2i) = sin(pos / (10000^(2i/d_modle))
+        PE(pos, 2i+1) = cos(pos / (10000^(2i/d_modle))
+
+    Note::
+
+      1 / (10000^(2i/d_model)) = exp(-log(10000^(2i/d_model)))
+                               = exp(-1* 2i / d_model * log(100000))
+                               = exp(2i * -(log(10000) / d_model))
+    """
+
+    def __init__(self, d_model: int, dropout: float = 0.1) -> None:
+        """
+        Args:
+          d_model:
+            Embedding dimension.
+          dropout:
+            Dropout probability to be applied to the output of this module.
+        """
+        super().__init__()
+        self.d_model = d_model
+        self.xscale = math.sqrt(self.d_model)
+        self.dropout = nn.Dropout(p=dropout)
+        # not doing: self.pe = None because of errors thrown by torchscript
+        self.pe = torch.zeros(1, 0, self.d_model, dtype=torch.float32)
+
+    def extend_pe(self, x: torch.Tensor) -> None:
+        """Extend the time t in the positional encoding if required.
+
+        The shape of `self.pe` is (1, T1, d_model). The shape of the input x
+        is (N, T, d_model). If T > T1, then we change the shape of self.pe
+        to (N, T, d_model). Otherwise, nothing is done.
+
+        Args:
+          x:
+            It is a tensor of shape (N, T, C).
+        Returns:
+          Return None.
+        """
+        if self.pe is not None:
+            if self.pe.size(1) >= x.size(1):
+                self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+                return
+        pe = torch.zeros(x.size(1), self.d_model, dtype=torch.float32)
+        position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.d_model, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        # Now pe is of shape (1, T, d_model), where T is x.size(1)
+        self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Add positional encoding.
+
+        Args:
+          x:
+            Its shape is (N, T, C)
+
+        Returns:
+          Return a tensor of shape (N, T, C)
+        """
+        self.extend_pe(x)
+        x = x * self.xscale + self.pe[:, : x.size(1), :]
+        return self.dropout(x)
+
+
+class Noam(object):
+    """
+    Implements Noam optimizer.
+
+    Proposed in
+    "Attention Is All You Need", https://arxiv.org/pdf/1706.03762.pdf
+
+    Modified from
+    https://github.com/espnet/espnet/blob/master/espnet/nets/pytorch_backend/transformer/optimizer.py  # noqa
+
+    Args:
+      params:
+        iterable of parameters to optimize or dicts defining parameter groups
+      model_size:
+        attention dimension of the transformer model
+      factor:
+        learning rate factor
+      warm_step:
+        warmup steps
+    """
+
+    def __init__(
+        self,
+        params,
+        model_size: int = 256,
+        factor: float = 10.0,
+        warm_step: int = 25000,
+        weight_decay=0,
+    ) -> None:
+        """Construct an Noam object."""
+        self.optimizer = torch.optim.Adam(
+            params, lr=0, betas=(0.9, 0.98), eps=1e-9, weight_decay=weight_decay
+        )
+        self._step = 0
+        self.warmup = warm_step
+        self.factor = factor
+        self.model_size = model_size
+        self._rate = 0
+
+    @property
+    def param_groups(self):
+        """Return param_groups."""
+        return self.optimizer.param_groups
+
+    def step(self):
+        """Update parameters and rate."""
+        self._step += 1
+        rate = self.rate()
+        for p in self.optimizer.param_groups:
+            p["lr"] = rate
+        self._rate = rate
+        self.optimizer.step()
+
+    def rate(self, step=None):
+        """Implement `lrate` above."""
+        if step is None:
+            step = self._step
+        return (
+            self.factor
+            * self.model_size ** (-0.5)
+            * min(step ** (-0.5), step * self.warmup ** (-1.5))
+        )
+
+    def zero_grad(self):
+        """Reset gradient."""
+        self.optimizer.zero_grad()
+
+    def state_dict(self):
+        """Return state_dict."""
+        return {
+            "_step": self._step,
+            "warmup": self.warmup,
+            "factor": self.factor,
+            "model_size": self.model_size,
+            "_rate": self._rate,
+            "optimizer": self.optimizer.state_dict(),
+        }
+
+    def load_state_dict(self, state_dict):
+        """Load state_dict."""
+        for key, value in state_dict.items():
+            if key == "optimizer":
+                self.optimizer.load_state_dict(state_dict["optimizer"])
+            else:
+                setattr(self, key, value)
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 1dceed1c..ed3fcbb3 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -273,7 +273,7 @@ def _preprocess(self, x, x_lengths=None):
         """Prepares input audio to be used as input to wav2vec style model."""
         x_mask = seq_lengths_to_mask(x_lengths, x.size(-1), dtype=torch.long)
         if self.normalize_input:
-            x = self._normalize(x, x_lengths)
+            x = self._normalize(x, x_mask)
 
         if self.use_input_attention_mask:
             x_mask = None
@@ -570,6 +570,8 @@ def add_class_args(parser, prefix=None, skip=set()):
             default=None,
             help=("file path or HuggingFace Hub path to pre-trained model"),
         )
+
+
         parser.add_argument(
             "--normalize-input",
             default=True,
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 82a4f2bd..74363066 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -118,8 +118,6 @@ def train_epoch(self, data_loader):
           data_loader: pytorch data loader returning features and class labels.
         """
 
-        # self.model.update_loss_margin(self.cur_epoch)
-
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
diff --git a/hyperion/utils/text.py b/hyperion/utils/text.py
new file mode 100644
index 00000000..5e06ad0c
--- /dev/null
+++ b/hyperion/utils/text.py
@@ -0,0 +1,29 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+
+def read_text(text_file: str):
+    # assert check_argument_types()
+    text_file = Path(text_file)
+
+    data = {"id":[],"text":[]}
+    with Path(text_file).open("r", encoding="utf-8") as f:
+        for linenum, line in enumerate(f, 1):
+            sps = line.rstrip().split(maxsplit=1)
+            if len(sps) == 1:
+                k, v = sps[0], ""
+            else:
+                k, v = sps
+            # if k in data:
+            #     raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
+            data["id"].append(k)
+            data["text"].append(v)
+    return pd.DataFrame(data=data, index=data["id"])
+

From 5ac5496400f7dfd6939a186460524dc4aec2a33b Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 7 Nov 2022 10:55:26 -0500
Subject: [PATCH 044/154] gradient clipping and multi-gpu

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml | 12 +++--
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |  1 -
 .../torch/models/transducer/transducer.py     | 24 ++--------
 hyperion/torch/trainers/transducer_trainer.py | 46 +++++++++++++++++++
 hyperion/torch/utils/ddp.py                   |  6 ++-
 5 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 2ee0a1aa..79eadb07 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -9,10 +9,11 @@ data:
     sampler:
       # sampler_type: 'seg_sampler'
       sampler_type: 'bucketing_seg_sampler'
+      min_batch_size: 4
       batch_size: 4
       iters_per_epoch: 6
     data_loader:
-      num_workers: 1
+      num_workers: 8
   val:
     dataset:
       aug_cfgs: 
@@ -21,8 +22,10 @@ data:
       return_segment_info:
         - text
     sampler:
+      # sampler_type: 'seg_sampler'
       sampler_type: 'bucketing_seg_sampler'
-      batch_size: 32
+      min_batch_size: 2
+      batch_size: 2
       iters_per_epoch: 6
     data_loader:
       num_workers: 8
@@ -30,7 +33,7 @@ model: wav2vec2xlsr300m_transducer.yaml
 trainer:
   optim:
     opt_type: sgd
-    lr: 0.45
+    lr: 0.003
     momentum: 0.9
     weight_decay: 4e-4
   lrsched:
@@ -38,9 +41,10 @@ trainer:
     decay_rate: 0.5
     decay_steps: 4200
     hold_steps: 1500
-    min_lr: 4e-4
+    min_lr: 4e-5
     warmup_steps: 1500
     update_lr_on_opt_step: true
+  grad_clip: 100
   use_amp: true
   log_interval: 1000
   epochs: 60
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index d736dc86..57f7272b 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -1,6 +1,5 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
-  # test_param: xyz
 transducer:
   encoder_out_dim: 768
 #  conformer_enc:
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 6fcc965d..4fa9fc0b 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -142,6 +142,9 @@ def forward(
             blank=blank_id,
             reduction="sum",
         )
+        # print("loss",loss)
+        # print("logits",logits)
+        # print("y_padded",y_padded)
 
         return logits, loss
 
@@ -162,32 +165,11 @@ def set_train_mode(self, mode):
 
         self._train_mode = mode
 
-    @classmethod
-    def load(cls, file_path=None, cfg=None, state_dict=None):
-        cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
-        encoder_net = TorchNALoader.load_from_cfg(cfg=cfg["encoder_cfg"])
-        for k in "encoder_cfg":
-            del cfg[k]
-
-        model = cls(encoder_net, **cfg)
-        if state_dict is not None:
-            model.load_state_dict(state_dict)
-
-        return model
 
 
     def _train(self, train_mode: str):
         if train_mode in ["full", "frozen"]:
             super()._train(train_mode)
-        elif train_mode == "ft-embed-affine":
-            self.encoder_net.eval()
-            if self.proj is not None:
-                self.proj.eval()
-
-            self.pool_net.eval()
-            self.classif_net.train()
-            layer_list = [l for l in range(self.embed_layer)]
-            self.classif_net.put_layers_in_eval_mode(layer_list)
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 74363066..f3047c7e 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -122,7 +122,13 @@ def train_epoch(self, data_loader):
         batch_metrics = ODict()
         self.model.train()
         self.sp = data_loader.dataset.sp
+        # for batch, (data, audio_length, target) in enumerate(data_loader):
+        #     print("batch",batch)
+        #     print("data shape",data.shape)
+
         for batch, (data, audio_length, target) in enumerate(data_loader):
+            # print("batch index", batch)
+            # print("batch size", data.shape)
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
@@ -159,3 +165,43 @@ def train_epoch(self, data_loader):
         logs = ODict(("train_" + k, v) for k, v in logs.items())
         logs["lr"] = self._get_lr()
         return logs
+
+
+    def validation_epoch(self, data_loader, swa_update_bn=False):
+        """Validation epoch loop
+
+        Args:
+          data_loader: PyTorch data loader return input/output pairs.
+          sw_update_bn: wheter or not, update batch-norm layers in SWA.
+        """
+
+        metric_acc = MetricAcc(self.device)
+        batch_metrics = ODict()
+        with torch.no_grad():
+            if swa_update_bn:
+                log_tag = "train_"
+                self.train()
+            else:
+                log_tag = "val_"
+                self.model.eval()
+
+            for batch, (data, audio_length, target) in enumerate(data_loader):
+                data, audio_length, target = data.to(self.device), audio_length.to(self.device), target.to(self.device)
+                batch_size = data.shape[0]
+                # data, target = data.to(self.device), target.to(self.device)
+                # batch_size = data.shape[0]
+
+                with self.amp_autocast():
+                    output, loss = self.model(data, x_lengths=audio_length, y=target)
+                    # output = self.model(data)
+                    # loss = self.loss(output, target)
+
+                batch_metrics["loss"] = loss.mean().item()
+                for k, metric in self.metrics.items():
+                    batch_metrics[k] = metric(output, target)
+
+                metric_acc.update(batch_metrics, batch_size)
+
+        logs = metric_acc.metrics
+        logs = ODict((log_tag + k, v) for k, v in logs.items())
+        return logs
\ No newline at end of file
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 7038cff3..038b3685 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -59,7 +59,7 @@ def ddp_init(
     os.environ["MASTER_PORT"] = master_port
 
     logging.info(
-        f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port}"
+        f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port} gpu_id={gpu_id}" 
     )
     dist.init_process_group(
         "nccl",
@@ -67,7 +67,9 @@ def ddp_init(
         world_size=world_size,
     )
     torch.tensor([0]).to(gpu_id)
-    return gpu_id, rank, world_size
+    device = torch.device('cuda', gpu_id)
+    return device, rank, world_size
+    # return gpu_id, rank, world_size
 
 
 def ddp_cleanup():

From 4be4d87a7b7979cd3c644763910f93e643f05e7d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 9 Nov 2022 11:11:56 -0500
Subject: [PATCH 045/154] add resampling option to audio dataset

---
 .../xvectors/extract_xvectors_from_wav.sh     |  30 +-
 hyperion/np/metrics/utils.py                  | 110 +++
 hyperion/torch/data/audio_dataset.py          | 897 +++++++++---------
 3 files changed, 594 insertions(+), 443 deletions(-)

diff --git a/hyp_utils/xvectors/extract_xvectors_from_wav.sh b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
index ef06d94d..0b5227cc 100755
--- a/hyp_utils/xvectors/extract_xvectors_from_wav.sh
+++ b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
@@ -137,21 +137,27 @@ if [ $stage -le 2 ]; then
 fi
 
 if [ $stage -le 3 ]; then
-    if [ -n "$data_out_dir" ];then
-	echo "$0: creating data dir $data_out_dir for augmented x-vectors"
-	mkdir -p $data_out_dir
-	awk -F "," '$1 != "key_aug" { print $1,$2}' $output_dir/aug_info.csv \
-	> $data_out_dir/augm2clean 
-	awk -v u2s=$data_dir/utt2spk 'BEGIN{
+  if [ -n "$data_out_dir" ];then
+      echo "$0: creating data dir $data_out_dir for augmented x-vectors"
+      mkdir -p $data_out_dir
+      awk -F "," '$1 != "key_aug" { print $1,$2}' $output_dir/aug_info.csv \
+	  > $data_out_dir/augm2clean
+
+      for f in utt2spk utt2lang
+      do
+	if [ -f $data_dir/utt2spk ];then
+	  awk -v u2s=$data_dir/$f 'BEGIN{
 while(getline < u2s)
 {
    spk[$1]=$2
 }
 }
-{ print $1,spk[$2]}' $data_out_dir/augm2clean > $data_out_dir/utt2spk
-	utils/utt2spk_to_spk2utt.pl $data_out_dir/utt2spk > $data_out_dir/spk2utt
-	cp $output_dir/utt2num_frames $data_out_dir
-    else
-	cp $output_dir/utt2num_frames $data_dir
-    fi
+{ print $1,spk[$2]}' $data_out_dir/augm2clean > $data_out_dir/$f
+	fi
+      done
+      utils/utt2spk_to_spk2utt.pl $data_out_dir/utt2spk > $data_out_dir/spk2utt
+      cp $output_dir/utt2num_frames $data_out_dir
+  else
+    cp $output_dir/utt2num_frames $data_dir
+  fi
 fi
diff --git a/hyperion/np/metrics/utils.py b/hyperion/np/metrics/utils.py
index 4f06bb18..c5871dfd 100644
--- a/hyperion/np/metrics/utils.py
+++ b/hyperion/np/metrics/utils.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 from ...hyp_defs import float_cpu
+from ...utils.math import softmax, logsumexp
 
 
 def effective_prior(p_tar, c_miss, c_fa):
@@ -27,6 +28,115 @@ def effective_prior(p_tar, c_miss, c_fa):
     return p_eff
 
 
+def lre_priors(num_classes, p_tar, p_oos=0.0):
+    """Returns all prior distributions as needed for LRE language detection task.
+
+    Args:
+      num_classes: number of target classes.
+      p_tar: target prior.
+      p_oos: prior of out-of-set hypothesis.
+
+    Returns
+      Matrix of priors P with shape (num_classes, num_classes) or (num_classes, num_classes+1) if p_oos > 0, where P(i,:) are the priors for the case that class i is the target class.
+    """
+    I = np.eye(num_classes)
+    ones = np.ones((num_classes, num_classes))
+    priors = (1 - p_tar - p_oos) * (ones - I) / (num_classes - 1) + p_tar * I
+    if p_oos > 0:
+        priors_oos = p_oos * np.ones((num_classes, 1))
+        priors = np.concatenate((priors, priors_oos), axis=-1)
+
+    return priors
+
+
+def loglk2llr(loglk, priors, target_idx=None):
+    """Converts log-likelihoods to detection log-likelihood ratios.
+
+    Args:
+     loglk: log-likelihood matrix P(x_t | class_i) with shape = (num_samples, num_classes)
+     priors:  vector of prior probabilities, positive, sum up to one.
+     target_idx: index of the target class, the other classes are assumed to be non-target classes,
+                 it can be also a list of indexes to consider multiple target classes.
+                 if None, it returns matrix with LLR w.r.t. all classes.
+
+    Returns:
+     Matrix of log-likelihood ratios LLR = log P(x_t | class_i) / log P(x_t / non-class_i) with
+      shape (num_samples, num_target_classes), if None, num_target_classes=num_classes
+
+    """
+
+    num_classes = loglk.shape[1]
+    assert num_classes == len(priors), "wrong prior length"
+    assert np.all(priors >= 0), "negative priors present"
+    assert np.abs(np.log(np.sum(priors))) > 0.001, "priors does not sum up to one"
+    assert target_idx is None or target_idx >= 0 and target_idx < num_classes
+    if target_idx is None:
+        target_idx = np.arange(num_classes)
+    elif isinstance(target_idx, int):
+        target_idx = [target_idx]
+
+    num_target_classes = len(target_idx)
+    llr = np.zeros((loglk.shape[0], num_target_classes), dtype=loglk.dtype)
+    for i, target in enumerate(target_idx):
+        priors_i = np.copy(priors)
+        priors[target] = 0
+        priors /= np.sum(priors)
+        priors[target] = 1
+        llr = llr + np.log(priors)
+        non_idx = np.concatenate(
+            (np.arange(target_idx), np.arange(target_idx + 1, num_classes))
+        )
+        llr[:, i] = loglk[:, target] - logsumexp(llglk[:, non_idx], axis=-1)
+
+    return llr
+
+
+def loglk2posterior(loglk, priors):
+    """Converts log-likelihoods to posteriors
+
+    Args:
+     loglk: log-likelihood matrix P(x_t | class_i) with shape = (num_samples, num_classes)
+     priors:  vector of prior probabilities, positive, sum up to one.
+
+    Returns:
+     Matrix of posteriors with shape = (num_samples, num_classes)
+
+    """
+
+    num_classes = loglk.shape[1]
+    assert num_classes == len(priors), "wrong prior length"
+    assert np.all(priors >= 0), "negative priors present"
+    assert np.abs(np.log(np.sum(priors))) > 0.001, "priors does not sum up to one"
+
+    log_post = loglk + np.log(priors)
+    return softmax(log_post, axis=-1)
+
+
+def lre_loglk2llr(loglk, p_tar, p_oos=0):
+    """Converts log-likelihoods to detection log-likelihood ratios suitable for LRE.
+
+    Args:
+     loglk: log-likelihood matrix P(x_t | class_i) with shape = (num_samples, num_classes)
+     priors:  prior prob that each language is the target language
+     p_oos: prior prob that test language is out-of-set.
+
+    Returns:
+     Matrix of log-likelihood ratios LLR = log P(x_t | class_i) / log P(x_t / non-class_i) with
+      shape (num_samples, classes),
+
+    """
+
+    num_tar_classes = loglk.shape[-1]
+    if p_oos == 0:
+        num_tar_classes -= 1
+    priors = llr_priors(num_tar_classes, p_tar, p_oos)
+    llr = np.zeros_like((loglk.shape[0], num_tar_classes), dtype=loglk.dtype)
+    for i in range(num_tar_classes):
+        llr[:, i] = loglk2llr(loglk, priors[i], target_idx=i)
+
+    return llr
+
+
 def pavx(y):
     """PAV: Pool Adjacent Violators algorithm. Non-paramtetric optimization subject to monotonicity.
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 8c69c3e1..f24ca8c5 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -12,6 +12,7 @@
 import pandas as pd
 
 import torch
+import torchaudio.transforms as tat
 
 from ..torch_defs import floatstr_torch
 from ...io import RandomAccessAudioReader as AR
@@ -24,427 +25,427 @@
 from hyperion.np import augment
 
 
-class AudioDataset1(Dataset):
-    def __init__(
-        self,
-        audio_file,
-        key_file,
-        class_file=None,
-        time_durs_file=None,
-        min_chunk_length=1,
-        max_chunk_length=None,
-        aug_cfg=None,
-        return_fullseqs=False,
-        return_class=True,
-        return_clean_aug_pair=False,
-        transpose_input=False,
-        wav_scale=2 ** 15 - 1,
-        is_val=False,
-    ):
-
-        try:
-            rank = dist.get_rank()
-            world_size = dist.get_world_size()
-        except:
-            rank = 0
-            world_size = 1
-
-        self.rank = rank
-        self.world_size = world_size
-
-        if rank == 0:
-            logging.info("opening dataset %s", audio_file)
-        self.r = AR(audio_file, wav_scale=wav_scale)
-        if rank == 0:
-            logging.info("loading utt2info file %s" % key_file)
-        self.u2c = Utt2Info.load(key_file, sep=" ")
-        if rank == 0:
-            logging.info("dataset contains %d seqs" % self.num_seqs)
-
-        self.is_val = is_val
-        self._read_time_durs_file(time_durs_file)
-
-        self._prune_short_seqs(min_chunk_length)
-
-        self.short_seq_exist = self._seq_shorter_than_max_length_exists(
-            max_chunk_length
-        )
-
-        self._prepare_class_info(class_file)
-
-        if max_chunk_length is None:
-            max_chunk_length = min_chunk_length
-        self._min_chunk_length = min_chunk_length
-        self._max_chunk_length = max_chunk_length
-
-        self.return_fullseqs = return_fullseqs
-        self.return_class = return_class
-        self.return_clean_aug_pair = return_clean_aug_pair
-
-        self.transpose_input = transpose_input
-
-        self.augmenter = None
-        self.reverb_context = 0
-        if aug_cfg is not None:
-            self.augmenter = SpeechAugment.create(
-                aug_cfg, random_seed=112358 + 1000 * rank
-            )
-            self.reverb_context = self.augmenter.max_reverb_context
-
-    def _read_time_durs_file(self, file_path):
-        if self.rank == 0:
-            logging.info("reading time_durs file %s" % file_path)
-        nf_df = pd.read_csv(file_path, header=None, sep=" ")
-        nf_df.index = nf_df[0]
-        self._seq_lengths = nf_df.loc[self.u2c.key, 1].values
-
-    @property
-    def wav_scale(self):
-        return self.r.wav_scale
-
-    @property
-    def num_seqs(self):
-        return len(self.u2c)
-
-    def __len__(self):
-        return self.num_seqs
-
-    @property
-    def seq_lengths(self):
-        return self._seq_lengths
-
-    @property
-    def total_length(self):
-        return np.sum(self.seq_lengths)
-
-    @property
-    def min_chunk_length(self):
-        if self.return_fullseqs:
-            self._min_chunk_length = np.min(self.seq_lengths)
-        return self._min_chunk_length
-
-    @property
-    def max_chunk_length(self):
-        if self._max_chunk_length is None:
-            self._max_chunk_length = np.max(self.seq_lengths)
-        return self._max_chunk_length
-
-    @property
-    def min_seq_length(self):
-        return np.min(self.seq_lengths)
-
-    @property
-    def max_seq_length(self):
-        return np.max(self.seq_lengths)
-
-    def _prune_short_seqs(self, min_length):
-        if self.rank == 0:
-            logging.info("pruning short seqs")
-        keep_idx = self.seq_lengths >= min_length
-        self.u2c = self.u2c.filter_index(keep_idx)
-        self._seq_lengths = self.seq_lengths[keep_idx]
-        if self.rank == 0:
-            logging.info(
-                "pruned seqs with min_length < %f,"
-                "keep %d/%d seqs" % (min_length, self.num_seqs, len(keep_idx))
-            )
-
-    def _prepare_class_info(self, class_file):
-        class_weights = None
-        if class_file is None:
-            classes, class_idx = np.unique(self.u2c.info, return_inverse=True)
-            class2idx = {k: i for i, k in enumerate(classes)}
-        else:
-            if self.rank == 0:
-                logging.info("reading class-file %s" % (class_file))
-            class_info = pd.read_csv(class_file, header=None, sep=" ")
-            class2idx = {str(k): i for i, k in enumerate(class_info[0])}
-            class_idx = np.array([class2idx[k] for k in self.u2c.info], dtype=int)
-            if class_info.shape[1] == 2:
-                class_weights = np.array(class_info[1]).astype(
-                    floatstr_torch(), copy=False
-                )
-
-        self.num_classes = len(class2idx)
-
-        class2utt_idx = {}
-        class2num_utt = np.zeros((self.num_classes,), dtype=int)
-
-        for k in range(self.num_classes):
-            idx = (class_idx == k).nonzero()[0]
-            class2utt_idx[k] = idx
-            class2num_utt[k] = len(idx)
-            if class2num_utt[k] == 0:
-                if not self.is_val:
-                    logging.warning("class %d doesn't have any samples" % (k))
-                if class_weights is None:
-                    class_weights = np.ones((self.num_classes,), dtype=floatstr_torch())
-                class_weights[k] = 0
-
-        count_empty = np.sum(class2num_utt == 0)
-        if count_empty > 0:
-            logging.warning("%d classes have 0 samples" % (count_empty))
-
-        self.utt_idx2class = class_idx
-        self.class2utt_idx = class2utt_idx
-        self.class2num_utt = class2num_utt
-        if class_weights is not None:
-            class_weights /= np.sum(class_weights)
-            class_weights = torch.Tensor(class_weights)
-        self.class_weights = class_weights
-
-        if self.short_seq_exist:
-            # if there are seq shorter than max_chunk_lenght we need some extra variables
-            # we will need class_weights to put to 0 classes that have all utts shorter than the batch chunk length
-            if self.class_weights is None:
-                self.class_weights = torch.ones((self.num_classes,))
-
-            # we need the max length of the utterances of each class
-            class2max_length = torch.zeros((self.num_classes,), dtype=torch.float)
-            for c in range(self.num_classes):
-                if class2num_utt[c] > 0:
-                    class2max_length[c] = np.max(
-                        self.seq_lengths[self.class2utt_idx[c]]
-                    )
-
-            self.class2max_length = class2max_length
-
-    def _seq_shorter_than_max_length_exists(self, max_length):
-        return np.any(self.seq_lengths < max_length)
-
-    @property
-    def var_chunk_length(self):
-        return self.min_chunk_length < self.max_chunk_length
-
-    def get_random_chunk_length(self):
-
-        if self.var_chunk_length:
-            return (
-                torch.rand(size=(1,)).item()
-                * (self.max_chunk_length - self.min_chunk_length)
-                + self.min_chunk_length
-            )
-
-        return self.max_chunk_length
-
-    def __getitem__(self, index):
-        # logging.info('{} {} {} get item {}'.format(
-        #     self, os.getpid(), threading.get_ident(), index))
-        if self.return_fullseqs:
-            return self._get_fullseq(index)
-        else:
-            return self._get_random_chunk(index)
-
-    def _get_fullseq(self, index):
-        key = self.u2c.key[index]
-        x, fs = self.r.read([key])
-        x = x[0].astype(floatstr_torch(), copy=False)
-        x_clean = x
-        if self.augmenter is not None:
-            x, aug_info = self.augmenter(x)
-
-        if self.transpose_input:
-            x = x[None, :]
-            if self.return_clean_aug_pair:
-                x_clean = x_clean[None, :]
-
-        if self.return_clean_aug_pair:
-            r = x, x_clean
-
-        if not self.return_class:
-            return r
-
-        class_idx = self.utt_idx2class[index]
-        r = *r, class_idx
-        return r
-
-    def _get_random_chunk(self, index):
-
-        if len(index) == 2:
-            index, chunk_length = index
-        else:
-            chunk_length = self.max_chunk_length
-
-        key = self.u2c.key[index]
-
-        full_seq_length = self.seq_lengths[index]
-        assert (
-            chunk_length <= full_seq_length
-        ), "chunk_length(%d) <= full_seq_length(%d)" % (chunk_length, full_seq_length)
-
-        time_offset = torch.rand(size=(1,)).item() * (full_seq_length - chunk_length)
-        reverb_context = min(self.reverb_context, time_offset)
-        time_offset -= reverb_context
-        read_chunk_length = chunk_length + reverb_context
-
-        # logging.info('get-random-chunk {} {} {} {} {}'.format(index, key, time_offset, chunk_length, full_seq_length ))
-        x, fs = self.r.read([key], time_offset=time_offset, time_durs=read_chunk_length)
-
-        # try:
-        #     x, fs = self.r.read([key], time_offset=time_offset,
-        #                     time_durs=read_chunk_length)
-        # except:
-        #     # some files produce error in the fseek after reading the data,
-        #     # this seems an issue from pysoundfile or soundfile lib itself
-        #     # reading from a sligthly different starting position seems to solve the problem in most cases
-        #     try:
-        #         logging.info('error-1 reading at key={} totol_dur={} offset={} read_chunk_length={}, retrying...'.format(
-        #             key, full_seq_length, time_offset, read_chunk_length))
-        #         time_offset = math.floor(time_offset)
-        #         x, fs = self.r.read([key], time_offset=time_offset,
-        #                             time_durs=read_chunk_length)
-        #     except:
-        #         try:
-        #             # if changing the value of time-offset doesn't solve the issue, we try to read from
-        #             # from time-offset to the end of the file, and remove the extra frames later
-        #             logging.info('error-2 reading at key={} totol_dur={} offset={} retrying reading until end-of-file ...'.format(
-        #                 key, full_seq_length, time_offset))
-        #             x, fs = self.r.read([key], time_offset=time_offset)
-        #             x = [x[0][:int(read_chunk_length * fs[0])]]
-        #         except:
-        #             # try to read the full file
-        #             logging.info('error-3 reading at key={} totol_dur={} retrying reading full file ...'.format(
-        #                 key, full_seq_length))
-        #             x, fs = self.r.read([key])
-        #             x = [x[0][:int(read_chunk_length * fs[0])]]
-
-        x = x[0]
-        fs = fs[0]
-
-        x_clean = x
-        logging.info("hola1")
-        if self.augmenter is not None:
-            logging.info("hola2")
-            chunk_length_samples = int(chunk_length * fs)
-            end_idx = len(x)
-            reverb_context_samples = end_idx - chunk_length_samples
-            assert reverb_context_samples >= 0, (
-                "key={} time-offset={}, read-chunk={} "
-                "read-x-samples={}, chunk_samples={}, reverb_context_samples={}"
-            ).format(
-                key,
-                time_offset,
-                read_chunk_length,
-                end_idx,
-                chunk_length_samples,
-                reverb_context_samples,
-            )
-            # end_idx = reverb_context_samples + chunk_length_samples
-            x, aug_info = self.augmenter(x)
-            x = x[reverb_context_samples:end_idx]
-            if self.return_clean_aug_pair:
-                x_clean = x_clean[reverb_context_samples:end_idx]
-                x_clean = x_clean.astype(floatstr_torch(), copy=False)
-            # x_clean = x_clean[reverb_context_samples:]
-            # logging.info('augmentation x-clean={}, x={}, aug_info={}'.format(
-            #    x_clean.shape, x.shape, aug_info))
-        #     if len(x) != 64000:
-        #         logging.info('x!=4s, {} {} {} {} {} {} {} {}'.format(len(x),reverb_context, reverb_context_samples, chunk_length, chunk_length_samples, end_idx, fs, read_chunk_length))
-
-        # if len(x) != 64000:
-        #         logging.info('x!=4s-2, {} {} {} {}'.format(len(x), chunk_length, fs, read_chunk_length))
-
-        if self.transpose_input:
-            x = x[None, :]
-            if self.return_clean_aug_pair:
-                x_clean = x_clean[None, :]
-
-        x = x.astype(floatstr_torch(), copy=False)
-        if self.return_clean_aug_pair:
-            r = x, x_clean
-        else:
-            r = (x,)
-
-        if not self.return_class:
-            return r
-
-        class_idx = self.utt_idx2class[index]
-        r = *r, class_idx
-        return r
-
-    @staticmethod
-    def filter_args(**kwargs):
-
-        ar_args = AR.filter_args(**kwargs)
-        valid_args = (
-            "audio_file",
-            "key_file",
-            "aug_cfg",
-            "path_prefix",
-            "class_file",
-            "time_durs_file",
-            "min_chunk_length",
-            "max_chunk_length",
-            "return_fullseqs",
-            "part_idx",
-            "num_parts",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        args.update(ar_args)
-        return args
-
-    @staticmethod
-    def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        if "audio_file" not in skip:
-            parser.add_argument(
-                "--audio-file",
-                required=True,
-                help=("audio manifest file"),
-            )
-
-        if "key_file" not in skip:
-            parser.add_argument(
-                "--key-file",
-                required=True,
-                help=("key manifest file"),
-            )
-
-        parser.add_argument(
-            "--class-file",
-            default=None,
-            help=("ordered list of classes keys, it can contain class weights"),
-        )
-
-        parser.add_argument(
-            "--time-durs-file", default=None, help=("utt to duration in secs file")
-        )
-
-        parser.add_argument(
-            "--aug-cfg",
-            default=None,
-            help=("augmentation configuration file."),
-        )
-
-        parser.add_argument(
-            "--min-chunk-length",
-            type=float,
-            default=None,
-            help=("minimum length of sequence chunks"),
-        )
-        parser.add_argument(
-            "--max-chunk-length",
-            type=float,
-            default=None,
-            help=("maximum length of sequence chunks"),
-        )
-
-        parser.add_argument(
-            "--return-fullseqs",
-            default=False,
-            action="store_true",
-            help=("returns full sequences instead of chunks"),
-        )
-
-        AR.add_class_args(parser)
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='audio dataset options')
-
-    add_argparse_args = add_class_args
+# class AudioDataset1(Dataset):
+#     def __init__(
+#         self,
+#         audio_file,
+#         key_file,
+#         class_file=None,
+#         time_durs_file=None,
+#         min_chunk_length=1,
+#         max_chunk_length=None,
+#         aug_cfg=None,
+#         return_fullseqs=False,
+#         return_class=True,
+#         return_clean_aug_pair=False,
+#         transpose_input=False,
+#         wav_scale=2 ** 15 - 1,
+#         is_val=False,
+#     ):
+
+#         try:
+#             rank = dist.get_rank()
+#             world_size = dist.get_world_size()
+#         except:
+#             rank = 0
+#             world_size = 1
+
+#         self.rank = rank
+#         self.world_size = world_size
+
+#         if rank == 0:
+#             logging.info("opening dataset %s", audio_file)
+#         self.r = AR(audio_file, wav_scale=wav_scale)
+#         if rank == 0:
+#             logging.info("loading utt2info file %s" % key_file)
+#         self.u2c = Utt2Info.load(key_file, sep=" ")
+#         if rank == 0:
+#             logging.info("dataset contains %d seqs" % self.num_seqs)
+
+#         self.is_val = is_val
+#         self._read_time_durs_file(time_durs_file)
+
+#         self._prune_short_seqs(min_chunk_length)
+
+#         self.short_seq_exist = self._seq_shorter_than_max_length_exists(
+#             max_chunk_length
+#         )
+
+#         self._prepare_class_info(class_file)
+
+#         if max_chunk_length is None:
+#             max_chunk_length = min_chunk_length
+#         self._min_chunk_length = min_chunk_length
+#         self._max_chunk_length = max_chunk_length
+
+#         self.return_fullseqs = return_fullseqs
+#         self.return_class = return_class
+#         self.return_clean_aug_pair = return_clean_aug_pair
+
+#         self.transpose_input = transpose_input
+
+#         self.augmenter = None
+#         self.reverb_context = 0
+#         if aug_cfg is not None:
+#             self.augmenter = SpeechAugment.create(
+#                 aug_cfg, random_seed=112358 + 1000 * rank
+#             )
+#             self.reverb_context = self.augmenter.max_reverb_context
+
+#     def _read_time_durs_file(self, file_path):
+#         if self.rank == 0:
+#             logging.info("reading time_durs file %s" % file_path)
+#         nf_df = pd.read_csv(file_path, header=None, sep=" ")
+#         nf_df.index = nf_df[0]
+#         self._seq_lengths = nf_df.loc[self.u2c.key, 1].values
+
+#     @property
+#     def wav_scale(self):
+#         return self.r.wav_scale
+
+#     @property
+#     def num_seqs(self):
+#         return len(self.u2c)
+
+#     def __len__(self):
+#         return self.num_seqs
+
+#     @property
+#     def seq_lengths(self):
+#         return self._seq_lengths
+
+#     @property
+#     def total_length(self):
+#         return np.sum(self.seq_lengths)
+
+#     @property
+#     def min_chunk_length(self):
+#         if self.return_fullseqs:
+#             self._min_chunk_length = np.min(self.seq_lengths)
+#         return self._min_chunk_length
+
+#     @property
+#     def max_chunk_length(self):
+#         if self._max_chunk_length is None:
+#             self._max_chunk_length = np.max(self.seq_lengths)
+#         return self._max_chunk_length
+
+#     @property
+#     def min_seq_length(self):
+#         return np.min(self.seq_lengths)
+
+#     @property
+#     def max_seq_length(self):
+#         return np.max(self.seq_lengths)
+
+#     def _prune_short_seqs(self, min_length):
+#         if self.rank == 0:
+#             logging.info("pruning short seqs")
+#         keep_idx = self.seq_lengths >= min_length
+#         self.u2c = self.u2c.filter_index(keep_idx)
+#         self._seq_lengths = self.seq_lengths[keep_idx]
+#         if self.rank == 0:
+#             logging.info(
+#                 "pruned seqs with min_length < %f,"
+#                 "keep %d/%d seqs" % (min_length, self.num_seqs, len(keep_idx))
+#             )
+
+#     def _prepare_class_info(self, class_file):
+#         class_weights = None
+#         if class_file is None:
+#             classes, class_idx = np.unique(self.u2c.info, return_inverse=True)
+#             class2idx = {k: i for i, k in enumerate(classes)}
+#         else:
+#             if self.rank == 0:
+#                 logging.info("reading class-file %s" % (class_file))
+#             class_info = pd.read_csv(class_file, header=None, sep=" ")
+#             class2idx = {str(k): i for i, k in enumerate(class_info[0])}
+#             class_idx = np.array([class2idx[k] for k in self.u2c.info], dtype=int)
+#             if class_info.shape[1] == 2:
+#                 class_weights = np.array(class_info[1]).astype(
+#                     floatstr_torch(), copy=False
+#                 )
+
+#         self.num_classes = len(class2idx)
+
+#         class2utt_idx = {}
+#         class2num_utt = np.zeros((self.num_classes,), dtype=int)
+
+#         for k in range(self.num_classes):
+#             idx = (class_idx == k).nonzero()[0]
+#             class2utt_idx[k] = idx
+#             class2num_utt[k] = len(idx)
+#             if class2num_utt[k] == 0:
+#                 if not self.is_val:
+#                     logging.warning("class %d doesn't have any samples" % (k))
+#                 if class_weights is None:
+#                     class_weights = np.ones((self.num_classes,), dtype=floatstr_torch())
+#                 class_weights[k] = 0
+
+#         count_empty = np.sum(class2num_utt == 0)
+#         if count_empty > 0:
+#             logging.warning("%d classes have 0 samples" % (count_empty))
+
+#         self.utt_idx2class = class_idx
+#         self.class2utt_idx = class2utt_idx
+#         self.class2num_utt = class2num_utt
+#         if class_weights is not None:
+#             class_weights /= np.sum(class_weights)
+#             class_weights = torch.Tensor(class_weights)
+#         self.class_weights = class_weights
+
+#         if self.short_seq_exist:
+#             # if there are seq shorter than max_chunk_lenght we need some extra variables
+#             # we will need class_weights to put to 0 classes that have all utts shorter than the batch chunk length
+#             if self.class_weights is None:
+#                 self.class_weights = torch.ones((self.num_classes,))
+
+#             # we need the max length of the utterances of each class
+#             class2max_length = torch.zeros((self.num_classes,), dtype=torch.float)
+#             for c in range(self.num_classes):
+#                 if class2num_utt[c] > 0:
+#                     class2max_length[c] = np.max(
+#                         self.seq_lengths[self.class2utt_idx[c]]
+#                     )
+
+#             self.class2max_length = class2max_length
+
+#     def _seq_shorter_than_max_length_exists(self, max_length):
+#         return np.any(self.seq_lengths < max_length)
+
+#     @property
+#     def var_chunk_length(self):
+#         return self.min_chunk_length < self.max_chunk_length
+
+#     def get_random_chunk_length(self):
+
+#         if self.var_chunk_length:
+#             return (
+#                 torch.rand(size=(1,)).item()
+#                 * (self.max_chunk_length - self.min_chunk_length)
+#                 + self.min_chunk_length
+#             )
+
+#         return self.max_chunk_length
+
+#     def __getitem__(self, index):
+#         # logging.info('{} {} {} get item {}'.format(
+#         #     self, os.getpid(), threading.get_ident(), index))
+#         if self.return_fullseqs:
+#             return self._get_fullseq(index)
+#         else:
+#             return self._get_random_chunk(index)
+
+#     def _get_fullseq(self, index):
+#         key = self.u2c.key[index]
+#         x, fs = self.r.read([key])
+#         x = x[0].astype(floatstr_torch(), copy=False)
+#         x_clean = x
+#         if self.augmenter is not None:
+#             x, aug_info = self.augmenter(x)
+
+#         if self.transpose_input:
+#             x = x[None, :]
+#             if self.return_clean_aug_pair:
+#                 x_clean = x_clean[None, :]
+
+#         if self.return_clean_aug_pair:
+#             r = x, x_clean
+
+#         if not self.return_class:
+#             return r
+
+#         class_idx = self.utt_idx2class[index]
+#         r = *r, class_idx
+#         return r
+
+#     def _get_random_chunk(self, index):
+
+#         if len(index) == 2:
+#             index, chunk_length = index
+#         else:
+#             chunk_length = self.max_chunk_length
+
+#         key = self.u2c.key[index]
+
+#         full_seq_length = self.seq_lengths[index]
+#         assert (
+#             chunk_length <= full_seq_length
+#         ), "chunk_length(%d) <= full_seq_length(%d)" % (chunk_length, full_seq_length)
+
+#         time_offset = torch.rand(size=(1,)).item() * (full_seq_length - chunk_length)
+#         reverb_context = min(self.reverb_context, time_offset)
+#         time_offset -= reverb_context
+#         read_chunk_length = chunk_length + reverb_context
+
+#         # logging.info('get-random-chunk {} {} {} {} {}'.format(index, key, time_offset, chunk_length, full_seq_length ))
+#         x, fs = self.r.read([key], time_offset=time_offset, time_durs=read_chunk_length)
+
+#         # try:
+#         #     x, fs = self.r.read([key], time_offset=time_offset,
+#         #                     time_durs=read_chunk_length)
+#         # except:
+#         #     # some files produce error in the fseek after reading the data,
+#         #     # this seems an issue from pysoundfile or soundfile lib itself
+#         #     # reading from a sligthly different starting position seems to solve the problem in most cases
+#         #     try:
+#         #         logging.info('error-1 reading at key={} totol_dur={} offset={} read_chunk_length={}, retrying...'.format(
+#         #             key, full_seq_length, time_offset, read_chunk_length))
+#         #         time_offset = math.floor(time_offset)
+#         #         x, fs = self.r.read([key], time_offset=time_offset,
+#         #                             time_durs=read_chunk_length)
+#         #     except:
+#         #         try:
+#         #             # if changing the value of time-offset doesn't solve the issue, we try to read from
+#         #             # from time-offset to the end of the file, and remove the extra frames later
+#         #             logging.info('error-2 reading at key={} totol_dur={} offset={} retrying reading until end-of-file ...'.format(
+#         #                 key, full_seq_length, time_offset))
+#         #             x, fs = self.r.read([key], time_offset=time_offset)
+#         #             x = [x[0][:int(read_chunk_length * fs[0])]]
+#         #         except:
+#         #             # try to read the full file
+#         #             logging.info('error-3 reading at key={} totol_dur={} retrying reading full file ...'.format(
+#         #                 key, full_seq_length))
+#         #             x, fs = self.r.read([key])
+#         #             x = [x[0][:int(read_chunk_length * fs[0])]]
+
+#         x = x[0]
+#         fs = fs[0]
+
+#         x_clean = x
+#         logging.info("hola1")
+#         if self.augmenter is not None:
+#             logging.info("hola2")
+#             chunk_length_samples = int(chunk_length * fs)
+#             end_idx = len(x)
+#             reverb_context_samples = end_idx - chunk_length_samples
+#             assert reverb_context_samples >= 0, (
+#                 "key={} time-offset={}, read-chunk={} "
+#                 "read-x-samples={}, chunk_samples={}, reverb_context_samples={}"
+#             ).format(
+#                 key,
+#                 time_offset,
+#                 read_chunk_length,
+#                 end_idx,
+#                 chunk_length_samples,
+#                 reverb_context_samples,
+#             )
+#             # end_idx = reverb_context_samples + chunk_length_samples
+#             x, aug_info = self.augmenter(x)
+#             x = x[reverb_context_samples:end_idx]
+#             if self.return_clean_aug_pair:
+#                 x_clean = x_clean[reverb_context_samples:end_idx]
+#                 x_clean = x_clean.astype(floatstr_torch(), copy=False)
+#             # x_clean = x_clean[reverb_context_samples:]
+#             # logging.info('augmentation x-clean={}, x={}, aug_info={}'.format(
+#             #    x_clean.shape, x.shape, aug_info))
+#         #     if len(x) != 64000:
+#         #         logging.info('x!=4s, {} {} {} {} {} {} {} {}'.format(len(x),reverb_context, reverb_context_samples, chunk_length, chunk_length_samples, end_idx, fs, read_chunk_length))
+
+#         # if len(x) != 64000:
+#         #         logging.info('x!=4s-2, {} {} {} {}'.format(len(x), chunk_length, fs, read_chunk_length))
+
+#         if self.transpose_input:
+#             x = x[None, :]
+#             if self.return_clean_aug_pair:
+#                 x_clean = x_clean[None, :]
+
+#         x = x.astype(floatstr_torch(), copy=False)
+#         if self.return_clean_aug_pair:
+#             r = x, x_clean
+#         else:
+#             r = (x,)
+
+#         if not self.return_class:
+#             return r
+
+#         class_idx = self.utt_idx2class[index]
+#         r = *r, class_idx
+#         return r
+
+#     @staticmethod
+#     def filter_args(**kwargs):
+
+#         ar_args = AR.filter_args(**kwargs)
+#         valid_args = (
+#             "audio_file",
+#             "key_file",
+#             "aug_cfg",
+#             "path_prefix",
+#             "class_file",
+#             "time_durs_file",
+#             "min_chunk_length",
+#             "max_chunk_length",
+#             "return_fullseqs",
+#             "part_idx",
+#             "num_parts",
+#         )
+#         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+#         args.update(ar_args)
+#         return args
+
+#     @staticmethod
+#     def add_class_args(parser, prefix=None, skip={"audio_file", "key_file"}):
+#         if prefix is not None:
+#             outer_parser = parser
+#             parser = ArgumentParser(prog="")
+
+#         if "audio_file" not in skip:
+#             parser.add_argument(
+#                 "--audio-file",
+#                 required=True,
+#                 help=("audio manifest file"),
+#             )
+
+#         if "key_file" not in skip:
+#             parser.add_argument(
+#                 "--key-file",
+#                 required=True,
+#                 help=("key manifest file"),
+#             )
+
+#         parser.add_argument(
+#             "--class-file",
+#             default=None,
+#             help=("ordered list of classes keys, it can contain class weights"),
+#         )
+
+#         parser.add_argument(
+#             "--time-durs-file", default=None, help=("utt to duration in secs file")
+#         )
+
+#         parser.add_argument(
+#             "--aug-cfg",
+#             default=None,
+#             help=("augmentation configuration file."),
+#         )
+
+#         parser.add_argument(
+#             "--min-chunk-length",
+#             type=float,
+#             default=None,
+#             help=("minimum length of sequence chunks"),
+#         )
+#         parser.add_argument(
+#             "--max-chunk-length",
+#             type=float,
+#             default=None,
+#             help=("maximum length of sequence chunks"),
+#         )
+
+#         parser.add_argument(
+#             "--return-fullseqs",
+#             default=False,
+#             action="store_true",
+#             help=("returns full sequences instead of chunks"),
+#         )
+
+#         AR.add_class_args(parser)
+#         if prefix is not None:
+#             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+#             # help='audio dataset options')
+
+#     add_argparse_args = add_class_args
 
 
 from ...utils.class_info import ClassInfo
@@ -463,6 +464,7 @@ def __init__(
         num_augs=1,
         return_segment_info=None,
         return_orig=False,
+        target_sample_freq=None,
         wav_scale=2 ** 15 - 1,
         is_val=False,
     ):
@@ -514,6 +516,9 @@ def __init__(
         self.num_augs = num_augs
         self._create_augmenters(aug_cfgs)
 
+        self.target_sample_freq = target_sample_freq
+        self.resamplers = {}
+
     def _load_class_infos(self, class_names, class_files, is_val):
         self.class_info = {}
         if class_names is None:
@@ -646,10 +651,37 @@ def _get_segment_info(self, seg_id):
 
         return r
 
+    def _get_resampler(self, fs):
+        if fs in self.resamplers:
+            return self.resamplers[fs]
+
+        resampler = tat.Resample(
+            int(fs),
+            int(self.target_sample_freq),
+            lowpass_filter_width=64,
+            rolloff=0.9475937167399596,
+            resampling_method="kaiser_window",
+            beta=14.769656459379492,
+        )
+        self.resampler[fs] = resampler
+        return resampler
+
+    def _resample(self, x, fs):
+        try:
+            if self.target_sample_freq is None or fs == self.target_sample_freq:
+                return x, fs
+
+            resampler = self._get_resampler(fs)
+            return resampler(x), self.target_sample_freq
+        except:
+            return x, fs
+
     def __getitem__(self, segment):
 
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
+        x, fs = self._resample(x, fs)
+
         if self.augmenters:
             # augmentations
             num_samples = int(duration * fs)
@@ -685,6 +717,7 @@ def filter_args(**kwargs):
             "return_segment_info",
             "return_orig",
             "time_durs_file",
+            "target_sample_freq",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         args.update(ar_args)
@@ -698,16 +731,12 @@ def add_class_args(parser, prefix=None, skip={}):
 
         if "audio_file" not in skip:
             parser.add_argument(
-                "--audio-file",
-                required=True,
-                help=("audio manifest file"),
+                "--audio-file", required=True, help=("audio manifest file"),
             )
 
         if "segments_file" not in skip:
             parser.add_argument(
-                "--segments-file",
-                required=True,
-                help=("segments manifest file"),
+                "--segments-file", required=True, help=("segments manifest file"),
             )
 
         parser.add_argument(
@@ -720,10 +749,7 @@ def add_class_args(parser, prefix=None, skip={}):
         )
 
         parser.add_argument(
-            "--class-files",
-            default=None,
-            nargs="+",
-            help=("list of class info files"),
+            "--class-files", default=None, nargs="+", help=("list of class info files"),
         )
 
         parser.add_argument(
@@ -763,6 +789,15 @@ def add_class_args(parser, prefix=None, skip={}):
             ),
         )
 
+        parser.add_argument(
+            "--target-sample-freq",
+            default=None,
+            type=int,
+            help=(
+                "target sampling frequencey, if not None all audios are converted to this sample freq"
+            ),
+        )
+
         AR.add_class_args(parser)
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))

From 83b7d9c5ef539dea177049e700faf4d8c14b41d2 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Wed, 9 Nov 2022 21:19:19 -0500
Subject: [PATCH 046/154] fix issue in multi-gpus training and update model
 parameters

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  2 +
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |  5 +-
 hyperion/bin/train_wav2vec2transducer.py      |  2 +-
 hyperion/torch/data/bucketing_seg_sampler.py  |  6 +-
 hyperion/torch/models/transducer/decoder.py   | 46 +++++++++++---
 hyperion/torch/models/transducer/joiner.py    | 60 ++++++++++++++++---
 .../torch/models/transducer/transducer.py     | 36 +++--------
 .../wav2transducer/hf_wav2vec2_transducer.py  |  5 +-
 hyperion/torch/trainers/transducer_trainer.py |  5 --
 9 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 79eadb07..6ac61b76 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -12,6 +12,7 @@ data:
       min_batch_size: 4
       batch_size: 4
       iters_per_epoch: 6
+      drop_last: true
     data_loader:
       num_workers: 8
   val:
@@ -27,6 +28,7 @@ data:
       min_batch_size: 2
       batch_size: 2
       iters_per_epoch: 6
+      drop_last: true
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr300m_transducer.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index 57f7272b..3d9d768a 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -1,7 +1,6 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
 transducer:
-  encoder_out_dim: 768
 #  conformer_enc:
   #  num_features: 80
   #  subsampling_factor: 4
@@ -11,10 +10,10 @@ transducer:
   #  num_encoder_layers: 12
   #  vgg_frontend: False
   decoder:
-    # vocab_size: 1000
-    # blank_id: 0
     embedding_dim: 1024
     num_layers: 2
     hidden_dim: 512
+  joiner:
+    num_layers: 1
 feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index bfc6248a..3e4ccb84 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -41,7 +41,7 @@ def transducer_collate(batch):
     audio = []
     audio_length = []
     target = []
-    for i, record in enumerate(batch):
+    for record in batch:
         wav = torch.as_tensor(record[0])
         audio.append(wav)
         audio_length.append(wav.shape[0])
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 8dbc4e45..83e6425c 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -12,7 +12,6 @@
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
 import torch.distributed as dist
-from torch.nn.utils.rnn import pad_sequence
 
 
 class BucketingSegSampler(HypSampler):
@@ -44,10 +43,7 @@ def create_buckets(self):
         buckets = []
         for i in range(self.num_buckets):
             bucket_bool = (cum_lengths <= bucket_length) & (cum_lengths > 0)
-            bucket_idx = []
-            for i, bo in enumerate(bucket_bool):
-                if bo:
-                    bucket_idx.append(i)
+            bucket_idx = np.arange(len(bucket_bool))[bucket_bool]
             bucket_i = sorted_seg_set.iloc[bucket_idx]
             buckets.append(bucket_i)
             cum_lengths -= bucket_length
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 0b47e80c..56caaef6 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -14,11 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 from typing import Optional, Tuple
 
 import torch
 import torch.nn as nn
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 
 # TODO(fangjun): Support switching between LSTM and GRU
@@ -30,7 +30,7 @@ def __init__(
         blank_id: int,
         num_layers: int,
         hidden_dim: int,
-        output_dim: int,
+        in_feats: int,
         embedding_dropout: float = 0.0,
         rnn_dropout: float = 0.0,
     ):
@@ -68,8 +68,14 @@ def __init__(
             batch_first=True,
             dropout=rnn_dropout,
         )
+
+        self.in_feats = in_feats
         self.blank_id = blank_id
-        self.output_linear = nn.Linear(hidden_dim, output_dim)
+        self.vocab_size = vocab_size
+        self.embedding_dim = embedding_dim
+        self.num_layers = num_layers
+        self.hidden_dim = hidden_dim
+        self.output_linear = nn.Linear(hidden_dim, in_feats)
 
     def forward(
         self,
@@ -97,10 +103,26 @@ def forward(
 
         return out, (h, c)
 
+    def get_config(self):
+        config = {
+            "in_feats" : self.in_feats,
+            "blank_id" : self.blank_id,
+            "vocab_size" : self.vocab_size,
+            "embedding_dim" :self.embedding_dim,
+            "num_layers" : self.num_layers,
+            "hidden_dim" : self.hidden_dim,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
 
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
+            "in_feats",
+            "blank_id",
+            "vocab_size",
             "embedding_dim",
             "num_layers",
             "hidden_dim",
@@ -110,12 +132,24 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=set()):
+    def add_class_args(parser, prefix=None, skip=set(["in_feats", "blank_id", "vocab_size" ])):
 
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
-            
+
+        if "in_feats" not in skip:
+            parser.add_argument(
+                "--in-feats", type=int, required=True, help=("input feature dimension")
+            )
+        if "blank_id" not in skip:
+            parser.add_argument(
+                "--blank-id", type=int, required=True, help=("blank id from sp model")
+            )
+        if "vocab_size" not in skip:
+            parser.add_argument(
+                "--vocab-size", type=int, required=True, help=("output prediction dimension")
+            )
         parser.add_argument(
             "--embedding-dim", default=1024, type=int, help=("feature dimension")
         )
@@ -128,8 +162,6 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--hidden-dim", default=512, type=int, help=("")
         )
 
-
-
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 72376b3c..e05e0f50 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -14,15 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import torch
 import torch.nn as nn
 
 
 class Joiner(nn.Module):
-    def __init__(self, input_dim: int, output_dim: int):
+    def __init__(self, in_feats: int, out_dims: int, num_layers: int):
         super().__init__()
+        self.in_feats = in_feats
+        self.out_dims = out_dims
+        self.num_layers = num_layers
 
-        self.output_linear = nn.Linear(input_dim, output_dim)
+        self.output_linear = nn.Linear(in_feats, out_dims)
 
     def forward(
         self, encoder_out: torch.Tensor, decoder_out: torch.Tensor
@@ -53,14 +57,52 @@ def forward(
 
         return output
 
-    # @staticmethod
-    # def filter_args(**kwargs):
-    #     valid_args = (
-    #         "encoder_out_dim",
-    #     )
-    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
-    #     return args
+    def get_config(self):
+        config = {
+            "in_feats" : self.in_feats,
+            "out_dims": self.out_dims,
+            "num_layers": self.num_layers,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "in_feats",
+            "out_dims",
+            "num_layers",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set(["in_feats", "out_dims"])):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument(
+                "--in-feats", type=int, required=True, help=("input feature dimension")
+            )
+
+        if "out_dims" not in skip:
+            parser.add_argument(
+                "--out-dims", type=int, required=True, help=("output feature dimension (vocab size)")
+            )
+        parser.add_argument(
+            "--num-layers", default=1, type=int, help=("layers of the joiner")
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
 
     # @staticmethod
     # def add_class_args(parser, prefix=None, skip=set()):
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 4fa9fc0b..5daace99 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -42,9 +42,9 @@ def __init__(
         self, 
         vocab_size,
         blank_id,
-        encoder_out_dim,
         # conformer_enc,
         decoder,
+        joiner,
     ):
         """
         Args:
@@ -65,13 +65,11 @@ def __init__(
         super().__init__()
         # assert isinstance(encoder, EncoderInterface)
         # assert hasattr(decoder, "blank_id")
-        # conformer_enc["output_dim"] = encoder_out_dim
+
         decoder["blank_id"] = blank_id
         decoder["vocab_size"] = vocab_size
-        decoder["output_dim"] = encoder_out_dim
-        joiner = {"input_dim":encoder_out_dim, "output_dim":vocab_size}
+        joiner["out_dims"] = vocab_size
 
-        # self.encoder = Conformer(**conformer_enc)
         self.decoder = Decoder(**decoder)
         self.joiner = Joiner(**joiner)
 
@@ -142,9 +140,6 @@ def forward(
             blank=blank_id,
             reduction="sum",
         )
-        # print("loss",loss)
-        # print("logits",logits)
-        # print("y_padded",y_padded)
 
         return logits, loss
 
@@ -178,13 +173,10 @@ def valid_train_modes():
         return ["full", "frozen", "ft-embed-affine"]
 
     def get_config(self):
-        # enc_cfg = self.encoder.get_config()
         dec_cfg = self.decoder.get_config()
         join_cfg = self.joiner.get_config()
 
         config = {
-            # "encoder_out_dim" : self.encoder_out_dim,
-            # "conformer_enc": enc_cfg,
             "decoder": dec_cfg,
             "joiner": join_cfg,
         }
@@ -196,18 +188,14 @@ def get_config(self):
     def filter_args(**kwargs):
 
         # get arguments for pooling
-        # encoder_args = Conformer.filter_args(**kwargs["conformer_enc"])
         decoder_args = Decoder.filter_args(**kwargs["decoder"])
-        # joiner_args = Joiner.filter_args(**kwargs["joiner"])
+        joiner_args = Joiner.filter_args(**kwargs["joiner"])
 
-        valid_args = (
-            "encoder_out_dim",
-        )
+        valid_args = ()
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
-        # args["conformer_enc"] = encoder_args
         args["decoder"] = decoder_args
-        # args["joiner"] = joiner_args 
+        args["joiner"] = joiner_args 
         return args
 
     @staticmethod
@@ -217,18 +205,12 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-
-
-        # Conformer.add_class_args(
-        #     parser, prefix="conformer_enc", skip=[]
-        # )
-
         Decoder.add_class_args(
-            parser, prefix="decoder", skip=[]
+            parser, prefix="decoder"
         )
 
-        parser.add_argument(
-            "--encoder-out-dim", default=512, type=int, help=("")
+        Joiner.add_class_args(
+            parser, prefix="joiner"
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index 79c4ca86..242a5ca1 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -45,12 +45,15 @@ def __init__(
             assert isinstance(hf_feats, HFWav2Vec2)
 
         if isinstance(transducer, dict):
+            transducer["decoder"]["in_feats"] = hf_feats.hidden_size
+            transducer["joiner"]["in_feats"] = hf_feats.hidden_size
             if "class_name" in transducer:
                 del transducer["class_name"]
             transducer = Transducer(**transducer)
         else:
             assert isinstance(transducer, Transducer)
-            # assert transducer.encoder_net.in_feats == hf_feats.hidden_size
+            assert transducer.decoder.in_feats == hf_feats.hidden_size
+            assert transducer.joiner.in_feats == hf_feats.hidden_size
 
         super().__init__(hf_feats, transducer, feat_fusion_start, feat_fusion_method)
 
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index f3047c7e..bbe847d0 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -122,13 +122,8 @@ def train_epoch(self, data_loader):
         batch_metrics = ODict()
         self.model.train()
         self.sp = data_loader.dataset.sp
-        # for batch, (data, audio_length, target) in enumerate(data_loader):
-        #     print("batch",batch)
-        #     print("data shape",data.shape)
 
         for batch, (data, audio_length, target) in enumerate(data_loader):
-            # print("batch index", batch)
-            # print("batch size", data.shape)
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:

From ff172b1f90bd953e4c6d82c74bf35224b77bf9e0 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Sat, 12 Nov 2022 19:27:06 -0500
Subject: [PATCH 047/154] update save and load for transducer model

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  8 +--
 .../v1/global_conf/config_transducer_v1.sh    |  2 +
 egs/librispeech/v1/run_011_train_asr.sh       | 13 +----
 hyperion/torch/models/transducer/decoder.py   |  4 +-
 hyperion/torch/models/transducer/joiner.py    |  4 +-
 .../torch/models/transducer/transducer.py     | 58 ++++++++++++++++++-
 .../wav2transducer/hf_wav2transducer.py       |  2 +-
 7 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 6ac61b76..50750cd8 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -7,8 +7,8 @@ data:
       return_segment_info:
         - text
     sampler:
-      # sampler_type: 'seg_sampler'
-      sampler_type: 'bucketing_seg_sampler'
+      sampler_type: 'seg_sampler'
+      # sampler_type: 'bucketing_seg_sampler'
       min_batch_size: 4
       batch_size: 4
       iters_per_epoch: 6
@@ -23,8 +23,8 @@ data:
       return_segment_info:
         - text
     sampler:
-      # sampler_type: 'seg_sampler'
-      sampler_type: 'bucketing_seg_sampler'
+      sampler_type: 'seg_sampler'
+      # sampler_type: 'bucketing_seg_sampler'
       min_batch_size: 2
       batch_size: 2
       iters_per_epoch: 6
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index 00b34870..9f28f551 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -8,6 +8,8 @@ hf_model_name=wav2vec2xlsr300m
 
 # x-vector training 
 nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
 
 bpe_model=data/lang_bpe_1000/bpe.model
 # x-vector cfg
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 61f00be4..7c2c0f70 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -8,7 +8,7 @@
 set -e
 
 stage=1
-ngpu=1
+ngpu=2
 config_file=default_config.sh
 interactive=false
 num_workers=""
@@ -20,7 +20,7 @@ use_wandb=false
 . datapath.sh
 
 train_dir=data/${nnet_data}/
-val_dir=data/dev_clean/
+val_dir=data/${dev_data}/
 
 #add extra args from the command line arguments
 if [ -n "$num_workers" ];then
@@ -61,15 +61,6 @@ if [ $stage -le 1 ]; then
     --data.val.dataset.time-durs-file $val_dir/utt2dur \
     --num-gpus $ngpu
 
-# --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
-#     --data.train.dataset.audio-file $list_dir/wav.scp \
-#     --data.train.dataset.time-durs-file $list_dir/utt2dur \
-#     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
-#     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-#     --data.val.dataset.audio-file $list_dir/wav.scp \
-#     --data.val.dataset.time-durs-file $list_dir/utt2dur \
-#     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
-#     --trainer.exp-path $nnet_dir $args \
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 56caaef6..bd7bd202 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -113,8 +113,8 @@ def get_config(self):
             "hidden_dim" : self.hidden_dim,
         }
 
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        # base_config = super().get_config()
+        return dict(list(config.items()))
 
 
     @staticmethod
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index e05e0f50..0fc1fe51 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -65,8 +65,8 @@ def get_config(self):
             "num_layers": self.num_layers,
         }
 
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        # base_config = super().get_config()
+        return dict(list(config.items()))
 
 
     @staticmethod
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 5daace99..80bf9891 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -70,6 +70,8 @@ def __init__(
         decoder["vocab_size"] = vocab_size
         joiner["out_dims"] = vocab_size
 
+        self.vocab_size = vocab_size
+        self.blank_id = blank_id
         self.decoder = Decoder(**decoder)
         self.joiner = Joiner(**joiner)
 
@@ -177,12 +179,14 @@ def get_config(self):
         join_cfg = self.joiner.get_config()
 
         config = {
+            "blank_id" : self.blank_id,
+            "vocab_size" : self.vocab_size,
             "decoder": dec_cfg,
             "joiner": join_cfg,
         }
 
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        # base_config = super().get_config()
+        return dict(list(config.items()))
 
     @staticmethod
     def filter_args(**kwargs):
@@ -215,3 +219,53 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+
+    # def change_config(
+    #     self,
+    #     override_dropouts=False,
+    #     dropout_rate=0,
+    #     num_classes=None,
+    #     loss_type="arc-softmax",
+    #     cos_scale=64,
+    #     margin=0.3,
+    #     margin_warmup_epochs=10,
+    #     intertop_k=5,
+    #     intertop_margin=0.0,
+    #     num_subcenters=2,
+    # ):
+    #     logging.info("changing x-vector config")
+    #     self.rebuild_output_layer(
+    #         num_classes=num_classes,
+    #         loss_type=loss_type,
+    #         cos_scale=cos_scale,
+    #         margin=margin,
+    #         margin_warmup_epochs=margin_warmup_epochs,
+    #         intertop_k=intertop_k,
+    #         intertop_margin=intertop_margin,
+    #         num_subcenters=num_subcenters,
+    #     )
+
+    #     if override_dropouts:
+    #         logging.info("overriding x-vector dropouts")
+    #         self.encoder_net.change_dropouts(dropout_rate)
+    #         self.classif_net.change_dropouts(dropout_rate)
+
+    # @staticmethod
+    # def filter_finetune_args(**kwargs):
+    #     valid_args = (
+    #     )
+    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+    #     return args
+
+    # @staticmethod
+    # def add_finetune_args(parser, prefix=None):
+    #     if prefix is not None:
+    #         outer_parser = parser
+    #         parser = ArgumentParser(prog="")
+
+    #     if prefix is not None:
+    #         outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    # add_argparse_args = add_class_args
+    # add_argparse_finetune_args = add_finetune_args
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index 7956c9ba..b5bd220f 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -338,7 +338,7 @@ def get_config(self):
         hf_cfg = self.hf_feats.get_config()
         tran_cfg = self.transducer.get_config()
         del hf_cfg["class_name"]
-        del tran_cfg["class_name"]
+        # del tran_cfg["class_name"]
         config = {
             "hf_feats": hf_cfg,
             "transducer": tran_cfg,

From d1a2419c5bb94b182f3ea9649aa8ea2d0bb0a792 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 16 Nov 2022 13:37:18 -0500
Subject: [PATCH 048/154] fix nan in plot tsne

---
 hyperion/bin/plot_embedding_tsne_per_class.py |   1 +
 hyperion/np/classifiers/__init__.py           |   1 +
 hyperion/np/classifiers/svmc.py               | 356 ++++++++++++++++++
 3 files changed, 358 insertions(+)
 create mode 100644 hyperion/np/classifiers/svmc.py

diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 5730cc06..5e832bff 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -97,6 +97,7 @@ def plot_embedding_tsne(
                 # in the low dim space, we cannot use cosine scoring
                 x2 = np.sum(x_tsne ** 2, axis=1)[:, None]
                 d2 = x2 - 2 * np.dot(x_tsne, x_tsne.T) + x2.T
+                d2 = np.clip(d2, a_min=0, a_max=None)
                 scores = -np.sqrt(d2)
             else:
                 scores = cosine_scoring(x_c, x_c)
diff --git a/hyperion/np/classifiers/__init__.py b/hyperion/np/classifiers/__init__.py
index 07da0af8..92a9305d 100644
--- a/hyperion/np/classifiers/__init__.py
+++ b/hyperion/np/classifiers/__init__.py
@@ -10,3 +10,4 @@
 from .greedy_fusion import GreedyFusionBinaryLR
 from .linear_svmc import LinearSVMC
 from .q_scoring_homo_gbe import QScoringHomoGBE
+from .svmc import GaussianSVMC
diff --git a/hyperion/np/classifiers/svmc.py b/hyperion/np/classifiers/svmc.py
new file mode 100644
index 00000000..764c6101
--- /dev/null
+++ b/hyperion/np/classifiers/svmc.py
@@ -0,0 +1,356 @@
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import os
+import logging
+import pickle
+import numpy as np
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from sklearn.svm import SVC as SVC
+
+from ...hyp_defs import float_cpu
+from ..np_model import NPModel
+from ...utils.math import softmax
+
+
+class GaussianSVMC(NPModel):
+    """Gaussian Support Vector Machine for Classification."""
+
+    def __init__(
+        self,
+        C=1.0,
+        gamma="scale",
+        shrinking=True,
+        probability=True,
+        tol=0.0001,
+        cache_size=600,
+        multi_class="ovr",
+        break_ties=True,
+        class_weight=None,
+        random_state=None,
+        max_iter=100,
+        model=None,
+        verbose=0,
+        balance_class_weight=True,
+        lr_seed=1024,
+        labels=None,
+        **kwargs,
+    ):
+
+        super().__init__(**kwargs)
+
+        if class_weight is None and balance_class_weight:
+            class_weight = "balanced"
+
+        if random_state is None:
+            random_state = np.random.RandomState(seed=lr_seed)
+
+        self.balance_class_weight = balance_class_weight
+        if model is None:
+            self.svm = SVC(
+                C=C,
+                kernel="rbf",
+                gamma=gamma,
+                shrinking=shrinking,
+                probability=probability,
+                tol=tol,
+                cache_size=cache_size,
+                class_weight=class_weight,
+                verbose=verbose,
+                max_iter=max_iter,
+                decision_function_shape=multi_class,
+                break_ties=break_ties,
+                random_state=random_state,
+            )
+        else:
+            self.svm = model
+        self.set_labels(labels)
+
+    @property
+    def model_params(self):
+        return self.svm.get_params()
+
+    def set_labels(self, labels):
+        if isinstance(labels, np.ndarray):
+            labels = list(labels)
+        self.labels = labels
+
+    def get_config(self):
+        """Gets configuration hyperparams.
+        Returns:
+          Dictionary with config hyperparams.
+        """
+        config = {
+            "balance_class_weight": self.balance_class_weight,
+            "labels": self.labels,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def predict(self, x, eval_type="cat-post"):
+        """Evaluates the SVM
+
+        Args:
+          x: input features (num_samples, feat_dim),
+             it can be (num_samples,) if feat_dim=1.
+          eval_type: evaluationg method: logit (log-likelihood ratio),
+                     bin-log-post (binary log-posteriors),
+                     bin-post (binary posteriors)
+                     cat-log-post (categorical log-posteriors),
+                     cat-post (categorical posteriors)
+        Returns:
+          Ouput scores (num_samples, num_classes)
+        """
+        if eval_type == "cat-post":
+            return self.svm.predict_proba(x)
+        if eval_type == "cat-log-post":
+            return self.svm.predict_log_proba(x)
+
+        return self.svm.predict_proba(x)
+
+    def __call__(self, x, eval_type="logit"):
+        """Evaluates the SVM
+
+        Args:
+          x: input features (num_samples, feat_dim),
+             it can be (num_samples,) if feat_dim=1.
+          eval_type: evaluationg method: logit (log-likelihood ratio),
+                     bin-log-post (binary log-posteriors),
+                     bin-post (binary posteriors)
+                     cat-log-post (categorical log-posteriors),
+                     cat-post (categorical posteriors)
+        Returns:
+          Ouput scores (num_samples, num_classes)
+        """
+        return self.predict(x, eval_type)
+
+    def fit(self, x, class_ids, sample_weight=None):
+        """Estimates the parameters of the model.
+
+        Args:
+          x: input features (num_samples, feat_dim), it can be (num_samples,) if feat_dim=1.
+          class_ids: class integer [0, num_classes-1] identifier (num_samples,)
+          sample_weight: weight of each sample in the estimation (num_samples,)
+        """
+        print("--------------", type(x[3, 2]), type(class_ids[20]), "--------------")
+        self.svm.fit(x, class_ids)
+        if self.svm.fit_status_:
+            print("SVM did not converge")
+
+    def save(self, file_path):
+        """Saves the model to file.
+
+        Args:
+          file_path: filename path.
+        """
+        file_dir = os.path.dirname(file_path)
+        if not (os.path.isdir(file_dir)):
+            os.makedirs(file_dir, exist_ok=True)
+        split_path = os.path.splitext(file_path)
+        if not split_path[-1] == "sav":
+            file_path = "".join(split_path[0] + ".sav")
+        with open(file_path, "wb") as f:
+            # with h5py.File(file_path, "w") as f:
+            # config = self.to_json()
+            # f.create_dataset("config", data=np.array(config, dtype="S"))
+            self.save_params(f)
+
+    @classmethod
+    def load(cls, file_path):
+        """Loads the model from file.
+
+        Args:
+          file_path: path to the file where the model is stored.
+
+        Returns:
+          Model object.
+        """
+        split_path = os.path.splitext(file_path)
+        if not split_path[-1] == "sav":
+            file_path = "".join(split_path[0] + ".sav")
+
+        # with h5py.File(file_path, "r") as f:
+        with open(file_path, "rb") as f:
+            # json_str = str(np.asarray(f["config"]).astype("U"))
+            # config = cls.load_config_from_json(json_str)
+            config = None
+            return cls.load_params(f, config)
+
+    def save_params(self, f):
+        # params = {"A": self.A, "b": self.b}
+        # self._save_params_from_dict(f, params)
+        pickle.dump(self, f)
+
+    @classmethod
+    def load_params(cls, f, config):
+        # param_list = ["A", "b"]
+        # params = cls._load_params_to_dict(f, config["name"], param_list)
+        # kwargs = dict(list(config.items()) + list(params.items()))
+        # return cls(**kwargs)
+        svmc = pickle.load(f)
+        return svmc
+
+    @staticmethod
+    def filter_class_args(**kwargs):
+        """Extracts the hyperparams of the class from a dictionary.
+
+        Returns:
+          Hyperparamter dictionary to initialize the class.
+        """
+        valid_args = (
+            "nu",
+            "gamma",
+            "shrinking",
+            "probability",
+            "tol",
+            "cache_size",
+            "multi_class",
+            "break_ties",
+            "class_weight",
+            "random_state",
+            "max_iter",
+            "verbose",
+            "balance_class_weight",
+            "lr_seed",
+            "model",
+            "labels",
+        )
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    filter_train_args = filter_class_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """It adds the arguments corresponding to the class to jsonarparse.
+        Args:
+          parser: jsonargparse object
+          prefix: argument prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--c",
+            dest="C",
+            default=1.0,
+            type=float,
+            help="inverse of regularization strength",
+        )
+        # parser.add_argument(
+        #     "--class_weight",
+        #     default=None,
+        #     help="Class weights",
+        # )
+        parser.add_argument(
+            "--gamma",
+            default="scale",
+            choices=["scale", "auto"],
+            help="Kernel coefficient for ‘rbf’",
+        )
+        parser.add_argument(
+            "--shrinking",
+            default=True,
+            type=bool,
+            help="Whether to use the shrinking heuristic",
+        )
+        parser.add_argument(
+            "--probability",
+            default=True,
+            type=bool,
+            help="Whether to enable probability estimates",
+        )
+        parser.add_argument(
+            "--break_ties",
+            default=True,
+            type=bool,
+            help="If true, predict will break ties according to the confidence values of decision_function; otherwise \
+            the first class among the tied classes is returned",
+        )
+        parser.add_argument(
+            "--lr-seed", default=1024, type=int, help="random number generator seed"
+        )
+        parser.add_argument(
+            "--max-iter",
+            dest="max_iter",
+            default=100,
+            type=int,
+            help="only for the newton-cg, sag and lbfgs solvers",
+        )
+        parser.add_argument(
+            "--tol", default=1e-4, type=float, help="tolerance for stopping criteria"
+        )
+        parser.add_argument(
+            "--multi-class",
+            default="ovr",
+            choices=["ovr", "ovo"],
+            help=(
+                "ovr fits a binary problem for each class else "
+                "it minimizes the multinomial loss."
+            ),
+        )
+        parser.add_argument(
+            "--cache_size",
+            default=600,
+            type=int,
+            help="Specify the size of the kernel cache (in MB)",
+        )
+        parser.add_argument(
+            "--verbose",
+            default=0,
+            type=int,
+            help="For the liblinear and lbfgs solvers",
+        )
+        parser.add_argument(
+            "--balance-class-weight",
+            default=False,
+            action=ActionYesNo,
+            help="Balances the weight of each class when computing W",
+        )
+        parser.add_argument("--name", default="svc", help="model name")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
+
+    @staticmethod
+    def filter_eval_args(**kwargs):
+        """Extracts the evaluation time hyperparams of the class from a dictionary.
+
+        Returns:
+          Hyperparameters to evaluate the class.
+        """
+        valid_args = "eval_type"
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_eval_args(parser, prefix=None):
+        """It adds the arguments needed to evaluate the class to jsonarparse.
+        Args:
+          parser: jsonargparse object
+          prefix: argument prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--eval-type",
+            default="cat-post",
+            choices=["cat-logpost", "cat-post"],
+            help=("type of evaluation"),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
+
+    # for backward compatibility
+    filter_train_args = filter_class_args
+    add_argparse_args = add_class_args
+    add_argparse_train_args = add_class_args
+    add_argparse_eval_args = add_eval_args

From 4182374bc6fa2e8e7ee908896256274c8e7dcc7f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 21 Nov 2022 10:08:17 -0500
Subject: [PATCH 049/154] fix train w2v

---
 hyperion/bin/train_wav2vec2xvector.py      | 62 ++++++++++++++++------
 hyperion/np/classifiers/linear_svmc.py     |  2 +-
 hyperion/np/classifiers/svmc.py            | 12 ++---
 hyperion/torch/data/audio_dataset.py       | 14 ++---
 hyperion/torch/trainers/xvector_trainer.py |  1 -
 5 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index e92b9a1a..f1281904 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -3,7 +3,7 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
+# import sys
 import os
 from pathlib import Path
 from jsonargparse import (
@@ -25,7 +25,9 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SegSamplerFactory
+
+# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
     HFWav2Vec2ResNet1dXVector,
@@ -44,19 +46,21 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    sampler_args = kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
         logging.info("init %s dataset", partition)
 
-    ad_args["is_val"] = partition == "val"
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
 
-    sampler = Sampler(dataset, **sampler_args)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
@@ -70,6 +74,36 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
+# def init_data(partition, rank, num_gpus, **kwargs):
+
+#     kwargs = kwargs["data"][partition]
+#     ad_args = AD.filter_args(**kwargs["dataset"])
+#     sampler_args = Sampler.filter_args(**kwargs["sampler"])
+#     if rank == 0:
+#         logging.info("{} audio dataset args={}".format(partition, ad_args))
+#         logging.info("{} sampler args={}".format(partition, sampler_args))
+#         logging.info("init %s dataset", partition)
+
+#     ad_args["is_val"] = partition == "val"
+#     dataset = AD(**ad_args)
+
+#     if rank == 0:
+#         logging.info("init %s samplers", partition)
+
+#     sampler = Sampler(dataset, **sampler_args)
+
+#     if rank == 0:
+#         logging.info("init %s dataloader", partition)
+
+#     num_workers = kwargs["data_loader"]["num_workers"]
+#     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+#     largs = (
+#         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+#     )
+#     data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+#     return data_loader
+
+
 def init_model(num_classes, rank, model_class, **kwargs):
     model_args = model_class.filter_args(**kwargs["model"])
     if rank == 0:
@@ -97,18 +131,14 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.num_classes, **kwargs)
+    model = init_model(list(train_loader.dataset.num_classes.values())[0], **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
-        model,
-        device=device,
-        metrics=metrics,
-        ddp=world_size > 1,
-        **trn_args,
+        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
@@ -120,9 +150,10 @@ def make_parser(model_class):
     parser = ArgumentParser()
 
     parser.add_argument("--cfg", action=ActionConfigFile)
+
     train_parser = ArgumentParser(prog="")
     AD.add_class_args(train_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(train_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -132,7 +163,7 @@ def make_parser(model_class):
 
     val_parser = ArgumentParser(prog="")
     AD.add_class_args(val_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(val_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -144,14 +175,11 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
     parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
     )
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
-    parser.link_arguments(
-        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    )
 
     model_class.add_class_args(parser, prefix="model")
     Trainer.add_class_args(
diff --git a/hyperion/np/classifiers/linear_svmc.py b/hyperion/np/classifiers/linear_svmc.py
index cb95e903..607d83de 100644
--- a/hyperion/np/classifiers/linear_svmc.py
+++ b/hyperion/np/classifiers/linear_svmc.py
@@ -359,7 +359,7 @@ def add_eval_args(parser, prefix=None):
         parser.add_argument(
             "--eval-type",
             default="logit",
-            choices=["logit", "bin-logpost", "bin-post", "cat-logpost", "cat-post"],
+            choices=["logit", "bin-log-post", "bin-post", "cat-log-post", "cat-post"],
             help=("type of evaluation"),
         )
 
diff --git a/hyperion/np/classifiers/svmc.py b/hyperion/np/classifiers/svmc.py
index 764c6101..77a05ff9 100644
--- a/hyperion/np/classifiers/svmc.py
+++ b/hyperion/np/classifiers/svmc.py
@@ -90,7 +90,7 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    def predict(self, x, eval_type="cat-post"):
+    def predict(self, x, eval_type="decision-func"):
         """Evaluates the SVM
 
         Args:
@@ -109,9 +109,9 @@ def predict(self, x, eval_type="cat-post"):
         if eval_type == "cat-log-post":
             return self.svm.predict_log_proba(x)
 
-        return self.svm.predict_proba(x)
+        return self.svm.decision_function(x)
 
-    def __call__(self, x, eval_type="logit"):
+    def __call__(self, x, eval_type="decision-func"):
         """Evaluates the SVM
 
         Args:
@@ -138,7 +138,7 @@ def fit(self, x, class_ids, sample_weight=None):
         print("--------------", type(x[3, 2]), type(class_ids[20]), "--------------")
         self.svm.fit(x, class_ids)
         if self.svm.fit_status_:
-            print("SVM did not converge")
+            logging.warning("SVM did not converge")
 
     def save(self, file_path):
         """Saves the model to file.
@@ -339,8 +339,8 @@ def add_eval_args(parser, prefix=None):
 
         parser.add_argument(
             "--eval-type",
-            default="cat-post",
-            choices=["cat-logpost", "cat-post"],
+            default="decision-func",
+            choices=["cat-log-post", "cat-post", "decision-func"],
             help=("type of evaluation"),
         )
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index f24ca8c5..439c00ba 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -16,7 +16,8 @@
 
 from ..torch_defs import floatstr_torch
 from ...io import RandomAccessAudioReader as AR
-from ...utils.utt2info import Utt2Info
+
+# from ...utils.utt2info import Utt2Info
 from ...np.augment import SpeechAugment
 
 from torch.utils.data import Dataset
@@ -618,7 +619,7 @@ def _read_audio(self, seg_id, start, duration):
         # read audio
         recording_id = self.seg_set.recording_ids(seg_id)
         x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
-        return x[0], fs[0]
+        return x[0].astype(floatstr_torch(), copy=False), fs[0]
 
     def _apply_augs(self, x, num_samples, reverb_context_samples):
         x_augs = []
@@ -630,7 +631,7 @@ def _apply_augs(self, x, num_samples, reverb_context_samples):
                 x_aug, aug_info = augmenter(x)
                 # remove the extra left context used to compute the reverberation.
                 x_aug = x_aug[reverb_context_samples : len(x)]
-                x_augs.append(x_aug)
+                x_augs.append(x_aug.astype(floatstr_torch(), copy=False))
 
         return x_augs
 
@@ -663,14 +664,14 @@ def _get_resampler(self, fs):
             resampling_method="kaiser_window",
             beta=14.769656459379492,
         )
-        self.resampler[fs] = resampler
-        return resampler
+        resampler_f = lambda x: resampler(torch.from_numpy(x)).numpy()
+        self.resamplers[fs] = resampler_f
+        return resampler_f
 
     def _resample(self, x, fs):
         try:
             if self.target_sample_freq is None or fs == self.target_sample_freq:
                 return x, fs
-
             resampler = self._get_resampler(fs)
             return resampler(x), self.target_sample_freq
         except:
@@ -681,7 +682,6 @@ def __getitem__(self, segment):
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
         x, fs = self._resample(x, fs)
-
         if self.augmenters:
             # augmentations
             num_samples = int(duration * fs)
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 4cc4bc8c..9b04fdd0 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -129,7 +129,6 @@ def train_epoch(self, data_loader):
 
             data, target = data.to(self.device), target.to(self.device)
             batch_size = data.shape[0]
-
             with self.amp_autocast():
                 output = self.model(data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps

From 2f6547acdef9779ba1aef90fb49f974ef7e18a39 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Mon, 21 Nov 2022 22:40:22 -0500
Subject: [PATCH 050/154] Address comments for transducer

---
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |   8 -
 .../v1/global_conf/config_transducer_v1.sh    |  14 -
 hyperion/torch/data/__init__.py               |   2 +-
 hyperion/torch/data/audio_dataset.py          |  34 +-
 hyperion/torch/data/bucketing_seg_sampler.py  |   5 +-
 hyperion/torch/models/transducer/conformer.py |   2 +-
 .../torch/models/transducer/transducer.py     |   2 +-
 .../torch/models/transducer/transformer.py    |   2 +-
 hyperion/utils/text.py                        | 143 +++
 hyperion/utils/utils.py                       | 979 ------------------
 10 files changed, 164 insertions(+), 1027 deletions(-)
 delete mode 100644 hyperion/utils/utils.py

diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index 3d9d768a..b8a1cdbb 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -1,14 +1,6 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
 transducer:
-#  conformer_enc:
-  #  num_features: 80
-  #  subsampling_factor: 4
-  #  d_model: 512
-  #  nhead: 8
-  #  dim_feedforward: 2048
-  #  num_encoder_layers: 12
-  #  vgg_frontend: False
   decoder:
     embedding_dim: 1024
     num_layers: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index 9f28f551..ca1ca29c 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -37,17 +37,3 @@ nnet_s3_name=${nnet_name}.s3
 nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
 nnet_s3=$nnet_s3_dir/model_ep0002.pth
 nnet_s3=$nnet_s3_dir/model_ep0005.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index e289acbf..16162da8 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -11,5 +11,5 @@
 
 # samplers
 from .bucketing_seg_sampler import BucketingSegSampler
-from .weighted_seq_sampler import ClassWeightedSeqSampler
+# from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 35b7d85b..403e0d1d 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -459,7 +459,7 @@ class AudioDataset(Dataset):
     def __init__(
         self,
         audio_file,
-        segments_file=None,
+        segments_file,
         class_names=None,
         class_files=None,
         bpe_model=None,
@@ -493,22 +493,21 @@ def __init__(
         
         if rank == 0:
             logging.info("loading segments file %s" % segments_file)
-        if segments_file is not None:
-            self.seg_set = SegmentSet.load(segments_file)
-            if rank == 0:
-                logging.info("dataset contains %d seqs" % len(self.seg_set))
+        self.seg_set = SegmentSet.load(segments_file)
+        if rank == 0:
+            logging.info("dataset contains %d seqs" % len(self.seg_set))
 
-            self.is_val = is_val
-            if time_durs_file is not None:
-                if rank == 0:
-                    logging.info("loading durations file %s" % time_durs_file)
+        self.is_val = is_val
+        if time_durs_file is not None:
+            if rank == 0:
+                logging.info("loading durations file %s" % time_durs_file)
 
-                time_durs = SegmentSet.load(time_durs_file)
-                self.seg_set["duration"] = time_durs.loc[
-                    self.seg_set["id"]
-                ].class_id.values.astype(np.float, copy=False)
-            else:
-                assert "duration" in self.seg_set
+            time_durs = SegmentSet.load(time_durs_file)
+            self.seg_set["duration"] = time_durs.loc[
+                self.seg_set["id"]
+            ].class_id.values.astype(np.float, copy=False)
+        else:
+            assert "duration" in self.seg_set
 
         logging.info("loading class-info files")
         self._load_class_infos(class_names, class_files, is_val)
@@ -546,7 +545,6 @@ def _load_text_infos(self, text_file, is_val):
         
         text = read_text(text_file)
         self.seg_set["text"] = text.loc[self.seg_set["id"]].text
-        self.text_info = ClassInfo(text)
 
 
 
@@ -679,8 +677,7 @@ def _get_segment_info(self, seg_id):
                 idx = class_info.loc[seg_info, "class_idx"]
                 seg_info = idx
             if info_name  == "text":
-                text = self.text_info.loc[seg_id, "text"]
-                seg_info = self.sp.encode(text, out_type=int)
+                seg_info = self.sp.encode(seg_info, out_type=int)
 
             r.append(seg_info)
 
@@ -713,7 +710,6 @@ def __getitem__(self, segment):
         seg_info = self._get_segment_info(seg_id)
         r.extend(seg_info)
 
-
         return (*r,)
 
     @staticmethod
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 83e6425c..8b0e855a 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -42,9 +42,8 @@ def create_buckets(self):
         bucket_length = cum_lengths[-1] / self.num_buckets
         buckets = []
         for i in range(self.num_buckets):
-            bucket_bool = (cum_lengths <= bucket_length) & (cum_lengths > 0)
-            bucket_idx = np.arange(len(bucket_bool))[bucket_bool]
-            bucket_i = sorted_seg_set.iloc[bucket_idx]
+            bucket_idx = (cum_lengths <= bucket_length) & (cum_lengths > 0)
+            bucket_i = sorted_seg_set.loc[bucket_idx]
             buckets.append(bucket_i)
             cum_lengths -= bucket_length
 
diff --git a/hyperion/torch/models/transducer/conformer.py b/hyperion/torch/models/transducer/conformer.py
index 734c28ce..a350d579 100644
--- a/hyperion/torch/models/transducer/conformer.py
+++ b/hyperion/torch/models/transducer/conformer.py
@@ -25,7 +25,7 @@
 from torch import Tensor, nn
 from .transformer import Transformer
 
-from hyperion.utils.utils import make_pad_mask, subsequent_chunk_mask
+from hyperion.utils.text import make_pad_mask, subsequent_chunk_mask
 
 
 class Conformer(Transformer):
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 80bf9891..52547954 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -27,7 +27,7 @@
 from .encoder_interface import EncoderInterface
 
 from ...torch_model import TorchModel
-from hyperion.utils.utils import add_sos
+from hyperion.utils.text import add_sos
 # from .conformer import Conformer
 from .decoder import Decoder
 from .joiner import Joiner
diff --git a/hyperion/torch/models/transducer/transformer.py b/hyperion/torch/models/transducer/transformer.py
index 38edbd62..d9d5d6fb 100644
--- a/hyperion/torch/models/transducer/transformer.py
+++ b/hyperion/torch/models/transducer/transformer.py
@@ -23,7 +23,7 @@
 from .encoder_interface import EncoderInterface
 from .subsampling import Conv2dSubsampling, VggSubsampling
 
-from hyperion.utils.utils import make_pad_mask
+from hyperion.utils.text import make_pad_mask
 
 
 class Transformer(EncoderInterface):
diff --git a/hyperion/utils/text.py b/hyperion/utils/text.py
index 5e06ad0c..48be92e3 100644
--- a/hyperion/utils/text.py
+++ b/hyperion/utils/text.py
@@ -4,11 +4,154 @@
 """
 from pathlib import Path
 
+import k2
+import k2.version
 import numpy as np
 import pandas as pd
+import torch
 
 
 
+# Copied and modified from https://github.com/wenet-e2e/wenet/blob/main/wenet/utils/mask.py
+def subsequent_chunk_mask(
+    size: int,
+    chunk_size: int,
+    num_left_chunks: int = -1,
+    device: torch.device = torch.device("cpu"),
+) -> torch.Tensor:
+    """Create mask for subsequent steps (size, size) with chunk size,
+       this is for streaming encoder
+    Args:
+        size (int): size of mask
+        chunk_size (int): size of chunk
+        num_left_chunks (int): number of left chunks
+            <0: use full chunk
+            >=0: use num_left_chunks
+        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
+    Returns:
+        torch.Tensor: mask
+    Examples:
+        >>> subsequent_chunk_mask(4, 2)
+        [[1, 1, 0, 0],
+         [1, 1, 0, 0],
+         [1, 1, 1, 1],
+         [1, 1, 1, 1]]
+    """
+    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
+    for i in range(size):
+        if num_left_chunks < 0:
+            start = 0
+        else:
+            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
+        ending = min((i // chunk_size + 1) * chunk_size, size)
+        ret[i, start:ending] = True
+    return ret
+
+
+
+def make_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
+    """
+    Args:
+      lengths:
+        A 1-D tensor containing sentence lengths.
+    Returns:
+      Return a 2-D bool tensor, where masked positions
+      are filled with `True` and non-masked positions are
+      filled with `False`.
+
+    >>> lengths = torch.tensor([1, 3, 2, 5])
+    >>> make_pad_mask(lengths)
+    tensor([[False,  True,  True,  True,  True],
+            [False, False, False,  True,  True],
+            [False, False,  True,  True,  True],
+            [False, False, False, False, False]])
+    """
+    assert lengths.ndim == 1, lengths.ndim
+
+    max_len = lengths.max()
+    n = lengths.size(0)
+
+    expaned_lengths = torch.arange(max_len).expand(n, max_len).to(lengths)
+
+    return expaned_lengths >= lengths.unsqueeze(1)
+
+
+def concat(
+    ragged: k2.RaggedTensor, value: int, direction: str
+) -> k2.RaggedTensor:
+    """Prepend a value to the beginning of each sublist or append a value.
+    to the end of each sublist.
+
+    Args:
+      ragged:
+        A ragged tensor with two axes.
+      value:
+        The value to prepend or append.
+      direction:
+        It can be either "left" or "right". If it is "left", we
+        prepend the value to the beginning of each sublist;
+        if it is "right", we append the value to the end of each
+        sublist.
+
+    Returns:
+      Return a new ragged tensor, whose sublists either start with
+      or end with the given value.
+
+    >>> a = k2.RaggedTensor([[1, 3], [5]])
+    >>> a
+    [ [ 1 3 ] [ 5 ] ]
+    >>> concat(a, value=0, direction="left")
+    [ [ 0 1 3 ] [ 0 5 ] ]
+    >>> concat(a, value=0, direction="right")
+    [ [ 1 3 0 ] [ 5 0 ] ]
+
+    """
+    dtype = ragged.dtype
+    device = ragged.device
+
+    assert ragged.num_axes == 2, f"num_axes: {ragged.num_axes}"
+    pad_values = torch.full(
+        size=(ragged.tot_size(0), 1),
+        fill_value=value,
+        device=device,
+        dtype=dtype,
+    )
+    pad = k2.RaggedTensor(pad_values)
+
+    if direction == "left":
+        ans = k2.ragged.cat([pad, ragged], axis=1)
+    elif direction == "right":
+        ans = k2.ragged.cat([ragged, pad], axis=1)
+    else:
+        raise ValueError(
+            f'Unsupported direction: {direction}. " \
+            "Expect either "left" or "right"'
+        )
+    return ans
+
+
+def add_sos(ragged: k2.RaggedTensor, sos_id: int) -> k2.RaggedTensor:
+    """Add SOS to each sublist.
+
+    Args:
+      ragged:
+        A ragged tensor with two axes.
+      sos_id:
+        The ID of the SOS symbol.
+
+    Returns:
+      Return a new ragged tensor, where each sublist starts with SOS.
+
+    >>> a = k2.RaggedTensor([[1, 3], [5]])
+    >>> a
+    [ [ 1 3 ] [ 5 ] ]
+    >>> add_sos(a, sos_id=0)
+    [ [ 0 1 3 ] [ 0 5 ] ]
+
+    """
+    return concat(ragged, sos_id, direction="left")
+
+
 def read_text(text_file: str):
     # assert check_argument_types()
     text_file = Path(text_file)
diff --git a/hyperion/utils/utils.py b/hyperion/utils/utils.py
deleted file mode 100644
index 2da78581..00000000
--- a/hyperion/utils/utils.py
+++ /dev/null
@@ -1,979 +0,0 @@
-# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang
-#                                                    Mingshuang Luo)
-#
-# See ../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import collections
-import logging
-import os
-import re
-import subprocess
-from collections import defaultdict
-from contextlib import contextmanager
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, Iterable, List, TextIO, Tuple, Union
-
-import k2
-import k2.version
-import kaldialign
-import sentencepiece as spm
-import torch
-import torch.distributed as dist
-import torch.nn as nn
-from torch.utils.tensorboard import SummaryWriter
-
-from hyperion.utils.checkpoint import average_checkpoints
-
-Pathlike = Union[str, Path]
-
-
-# Pytorch issue: https://github.com/pytorch/pytorch/issues/47379
-# Fixed: https://github.com/pytorch/pytorch/pull/49853
-# The fix was included in v1.9.0
-# https://github.com/pytorch/pytorch/releases/tag/v1.9.0
-def is_jit_tracing():
-    if torch.jit.is_scripting():
-        return False
-    elif torch.jit.is_tracing():
-        return True
-    return False
-
-
-@contextmanager
-def get_executor():
-    # We'll either return a process pool or a distributed worker pool.
-    # Note that this has to be a context manager because we might use multiple
-    # context manager ("with" clauses) inside, and this way everything will
-    # free up the resources at the right time.
-    try:
-        # If this is executed on the CLSP grid, we will try to use the
-        # Grid Engine to distribute the tasks.
-        # Other clusters can also benefit from that, provided a
-        # cluster-specific wrapper.
-        # (see https://github.com/pzelasko/plz for reference)
-        #
-        # The following must be installed:
-        # $ pip install dask distributed
-        # $ pip install git+https://github.com/pzelasko/plz
-        name = subprocess.check_output("hostname -f", shell=True, text=True)
-        if name.strip().endswith(".clsp.jhu.edu"):
-            import plz
-            from distributed import Client
-
-            with plz.setup_cluster() as cluster:
-                cluster.scale(80)
-                yield Client(cluster)
-            return
-    except Exception:
-        pass
-    # No need to return anything - compute_and_store_features
-    # will just instantiate the pool itself.
-    yield None
-
-
-def str2bool(v):
-    """Used in argparse.ArgumentParser.add_argument to indicate
-    that a type is a bool type and user can enter
-
-        - yes, true, t, y, 1, to represent True
-        - no, false, f, n, 0, to represent False
-
-    See https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse  # noqa
-    """
-    if isinstance(v, bool):
-        return v
-    if v.lower() in ("yes", "true", "t", "y", "1"):
-        return True
-    elif v.lower() in ("no", "false", "f", "n", "0"):
-        return False
-    else:
-        raise argparse.ArgumentTypeError("Boolean value expected.")
-
-
-def setup_logger(
-    log_filename: Pathlike,
-    log_level: str = "info",
-    use_console: bool = True,
-) -> None:
-    """Setup log level.
-
-    Args:
-      log_filename:
-        The filename to save the log.
-      log_level:
-        The log level to use, e.g., "debug", "info", "warning", "error",
-        "critical"
-      use_console:
-        True to also print logs to console.
-    """
-    now = datetime.now()
-    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
-    if dist.is_available() and dist.is_initialized():
-        world_size = dist.get_world_size()
-        rank = dist.get_rank()
-        formatter = f"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] ({rank}/{world_size}) %(message)s"  # noqa
-        log_filename = f"{log_filename}-{date_time}-{rank}"
-    else:
-        formatter = (
-            "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
-        )
-        log_filename = f"{log_filename}-{date_time}"
-
-    os.makedirs(os.path.dirname(log_filename), exist_ok=True)
-
-    level = logging.ERROR
-    if log_level == "debug":
-        level = logging.DEBUG
-    elif log_level == "info":
-        level = logging.INFO
-    elif log_level == "warning":
-        level = logging.WARNING
-    elif log_level == "critical":
-        level = logging.CRITICAL
-
-    logging.basicConfig(
-        filename=log_filename,
-        format=formatter,
-        level=level,
-        filemode="w",
-    )
-    if use_console:
-        console = logging.StreamHandler()
-        console.setLevel(level)
-        console.setFormatter(logging.Formatter(formatter))
-        logging.getLogger("").addHandler(console)
-
-
-class AttributeDict(dict):
-    def __getattr__(self, key):
-        if key in self:
-            return self[key]
-        raise AttributeError(f"No such attribute '{key}'")
-
-    def __setattr__(self, key, value):
-        self[key] = value
-
-    def __delattr__(self, key):
-        if key in self:
-            del self[key]
-            return
-        raise AttributeError(f"No such attribute '{key}'")
-
-
-def encode_supervisions(
-    supervisions: dict, subsampling_factor: int
-) -> Tuple[torch.Tensor, List[str]]:
-    """
-    Encodes Lhotse's ``batch["supervisions"]`` dict into
-    a pair of torch Tensor, and a list of transcription strings.
-
-    The supervision tensor has shape ``(batch_size, 3)``.
-    Its second dimension contains information about sequence index [0],
-    start frames [1] and num frames [2].
-
-    The batch items might become re-ordered during this operation -- the
-    returned tensor and list of strings are guaranteed to be consistent with
-    each other.
-    """
-    supervision_segments = torch.stack(
-        (
-            supervisions["sequence_idx"],
-            supervisions["start_frame"] // subsampling_factor,
-            supervisions["num_frames"] // subsampling_factor,
-        ),
-        1,
-    ).to(torch.int32)
-
-    indices = torch.argsort(supervision_segments[:, 2], descending=True)
-    supervision_segments = supervision_segments[indices]
-    texts = supervisions["text"]
-    texts = [texts[idx] for idx in indices]
-
-    return supervision_segments, texts
-
-
-def get_texts(
-    best_paths: k2.Fsa, return_ragged: bool = False
-) -> Union[List[List[int]], k2.RaggedTensor]:
-    """Extract the texts (as word IDs) from the best-path FSAs.
-    Args:
-      best_paths:
-        A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
-        containing multiple FSAs, which is expected to be the result
-        of k2.shortest_path (otherwise the returned values won't
-        be meaningful).
-      return_ragged:
-        True to return a ragged tensor with two axes [utt][word_id].
-        False to return a list-of-list word IDs.
-    Returns:
-      Returns a list of lists of int, containing the label sequences we
-      decoded.
-    """
-    if isinstance(best_paths.aux_labels, k2.RaggedTensor):
-        # remove 0's and -1's.
-        aux_labels = best_paths.aux_labels.remove_values_leq(0)
-        # TODO: change arcs.shape() to arcs.shape
-        aux_shape = best_paths.arcs.shape().compose(aux_labels.shape)
-
-        # remove the states and arcs axes.
-        aux_shape = aux_shape.remove_axis(1)
-        aux_shape = aux_shape.remove_axis(1)
-        aux_labels = k2.RaggedTensor(aux_shape, aux_labels.values)
-    else:
-        # remove axis corresponding to states.
-        aux_shape = best_paths.arcs.shape().remove_axis(1)
-        aux_labels = k2.RaggedTensor(aux_shape, best_paths.aux_labels)
-        # remove 0's and -1's.
-        aux_labels = aux_labels.remove_values_leq(0)
-
-    assert aux_labels.num_axes == 2
-    if return_ragged:
-        return aux_labels
-    else:
-        return aux_labels.tolist()
-
-
-def get_alignments(best_paths: k2.Fsa, kind: str) -> List[List[int]]:
-    """Extract labels or aux_labels from the best-path FSAs.
-
-    Args:
-      best_paths:
-        A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
-        containing multiple FSAs, which is expected to be the result
-        of k2.shortest_path (otherwise the returned values won't
-        be meaningful).
-      kind:
-        Possible values are: "labels" and "aux_labels". Caution: When it is
-        "labels", the resulting alignments contain repeats.
-    Returns:
-      Returns a list of lists of int, containing the token sequences we
-      decoded. For `ans[i]`, its length equals to the number of frames
-      after subsampling of the i-th utterance in the batch.
-
-    Example:
-      When `kind` is `labels`, one possible alignment example is (with
-      repeats)::
-
-        c c c blk a a blk blk t t t blk blk
-
-     If `kind` is `aux_labels`, the above example changes to::
-
-        c blk blk blk a blk blk blk t blk blk blk blk
-
-    """
-    assert kind in ("labels", "aux_labels")
-    # arc.shape() has axes [fsa][state][arc], we remove "state"-axis here
-    token_shape = best_paths.arcs.shape().remove_axis(1)
-    # token_shape has axes [fsa][arc]
-    tokens = k2.RaggedTensor(
-        token_shape, getattr(best_paths, kind).contiguous()
-    )
-    tokens = tokens.remove_values_eq(-1)
-    return tokens.tolist()
-
-
-def save_alignments(
-    alignments: Dict[str, List[int]],
-    subsampling_factor: int,
-    filename: str,
-) -> None:
-    """Save alignments to a file.
-
-    Args:
-      alignments:
-        A dict containing alignments. Keys of the dict are utterances and
-        values are the corresponding framewise alignments after subsampling.
-      subsampling_factor:
-        The subsampling factor of the model.
-      filename:
-        Path to save the alignments.
-    Returns:
-      Return None.
-    """
-    ali_dict = {
-        "subsampling_factor": subsampling_factor,
-        "alignments": alignments,
-    }
-    torch.save(ali_dict, filename)
-
-
-def load_alignments(filename: str) -> Tuple[int, Dict[str, List[int]]]:
-    """Load alignments from a file.
-
-    Args:
-      filename:
-        Path to the file containing alignment information.
-        The file should be saved by :func:`save_alignments`.
-    Returns:
-      Return a tuple containing:
-        - subsampling_factor: The subsampling_factor used to compute
-          the alignments.
-        - alignments: A dict containing utterances and their corresponding
-          framewise alignment, after subsampling.
-    """
-    ali_dict = torch.load(filename)
-    subsampling_factor = ali_dict["subsampling_factor"]
-    alignments = ali_dict["alignments"]
-    return subsampling_factor, alignments
-
-
-def store_transcripts(
-    filename: Pathlike, texts: Iterable[Tuple[str, str, str]]
-) -> None:
-    """Save predicted results and reference transcripts to a file.
-
-    Args:
-      filename:
-        File to save the results to.
-      texts:
-        An iterable of tuples. The first element is the cur_id, the second is
-        the reference transcript and the third element is the predicted result.
-    Returns:
-      Return None.
-    """
-    with open(filename, "w") as f:
-        for cut_id, ref, hyp in texts:
-            print(f"{cut_id}:\tref={ref}", file=f)
-            print(f"{cut_id}:\thyp={hyp}", file=f)
-
-
-
-def write_error_stats(
-    f: TextIO,
-    test_set_name: str,
-    results: List[Tuple[str, str]],
-    enable_log: bool = True,
-) -> float:
-    """Write statistics based on predicted results and reference transcripts.
-
-    It will write the following to the given file:
-
-        - WER
-        - number of insertions, deletions, substitutions, corrects and total
-          reference words. For example::
-
-              Errors: 23 insertions, 57 deletions, 212 substitutions, over 2606
-              reference words (2337 correct)
-
-        - The difference between the reference transcript and predicted result.
-          An instance is given below::
-
-            THE ASSOCIATION OF (EDISON->ADDISON) ILLUMINATING COMPANIES
-
-          The above example shows that the reference word is `EDISON`,
-          but it is predicted to `ADDISON` (a substitution error).
-
-          Another example is::
-
-            FOR THE FIRST DAY (SIR->*) I THINK
-
-          The reference word `SIR` is missing in the predicted
-          results (a deletion error).
-      results:
-        An iterable of tuples. The first element is the cur_id, the second is
-        the reference transcript and the third element is the predicted result.
-      enable_log:
-        If True, also print detailed WER to the console.
-        Otherwise, it is written only to the given file.
-    Returns:
-      Return None.
-    """
-    subs: Dict[Tuple[str, str], int] = defaultdict(int)
-    ins: Dict[str, int] = defaultdict(int)
-    dels: Dict[str, int] = defaultdict(int)
-
-    # `words` stores counts per word, as follows:
-    #   corr, ref_sub, hyp_sub, ins, dels
-    words: Dict[str, List[int]] = defaultdict(lambda: [0, 0, 0, 0, 0])
-    num_corr = 0
-    ERR = "*"
-    for cut_id, ref, hyp in results:
-        ali = kaldialign.align(ref, hyp, ERR)
-        for ref_word, hyp_word in ali:
-            if ref_word == ERR:
-                ins[hyp_word] += 1
-                words[hyp_word][3] += 1
-            elif hyp_word == ERR:
-                dels[ref_word] += 1
-                words[ref_word][4] += 1
-            elif hyp_word != ref_word:
-                subs[(ref_word, hyp_word)] += 1
-                words[ref_word][1] += 1
-                words[hyp_word][2] += 1
-            else:
-                words[ref_word][0] += 1
-                num_corr += 1
-    ref_len = sum([len(r) for _, r, _ in results])
-    sub_errs = sum(subs.values())
-    ins_errs = sum(ins.values())
-    del_errs = sum(dels.values())
-    tot_errs = sub_errs + ins_errs + del_errs
-    tot_err_rate = "%.2f" % (100.0 * tot_errs / ref_len)
-
-    if enable_log:
-        logging.info(
-            f"[{test_set_name}] %WER {tot_errs / ref_len:.2%} "
-            f"[{tot_errs} / {ref_len}, {ins_errs} ins, "
-            f"{del_errs} del, {sub_errs} sub ]"
-        )
-
-    print(f"%WER = {tot_err_rate}", file=f)
-    print(
-        f"Errors: {ins_errs} insertions, {del_errs} deletions, "
-        f"{sub_errs} substitutions, over {ref_len} reference "
-        f"words ({num_corr} correct)",
-        file=f,
-    )
-    print(
-        "Search below for sections starting with PER-UTT DETAILS:, "
-        "SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS:",
-        file=f,
-    )
-
-    print("", file=f)
-    print("PER-UTT DETAILS: corr or (ref->hyp)  ", file=f)
-    for cut_id, ref, hyp in results:
-        ali = kaldialign.align(ref, hyp, ERR)
-        combine_successive_errors = True
-        if combine_successive_errors:
-            ali = [[[x], [y]] for x, y in ali]
-            for i in range(len(ali) - 1):
-                if ali[i][0] != ali[i][1] and ali[i + 1][0] != ali[i + 1][1]:
-                    ali[i + 1][0] = ali[i][0] + ali[i + 1][0]
-                    ali[i + 1][1] = ali[i][1] + ali[i + 1][1]
-                    ali[i] = [[], []]
-            ali = [
-                [
-                    list(filter(lambda a: a != ERR, x)),
-                    list(filter(lambda a: a != ERR, y)),
-                ]
-                for x, y in ali
-            ]
-            ali = list(filter(lambda x: x != [[], []], ali))
-            ali = [
-                [
-                    ERR if x == [] else " ".join(x),
-                    ERR if y == [] else " ".join(y),
-                ]
-                for x, y in ali
-            ]
-
-        print(
-            f"{cut_id}:\t"
-            + " ".join(
-                (
-                    ref_word
-                    if ref_word == hyp_word
-                    else f"({ref_word}->{hyp_word})"
-                    for ref_word, hyp_word in ali
-                )
-            ),
-            file=f,
-        )
-
-    print("", file=f)
-    print("SUBSTITUTIONS: count ref -> hyp", file=f)
-
-    for count, (ref, hyp) in sorted(
-        [(v, k) for k, v in subs.items()], reverse=True
-    ):
-        print(f"{count}   {ref} -> {hyp}", file=f)
-
-    print("", file=f)
-    print("DELETIONS: count ref", file=f)
-    for count, ref in sorted([(v, k) for k, v in dels.items()], reverse=True):
-        print(f"{count}   {ref}", file=f)
-
-    print("", file=f)
-    print("INSERTIONS: count hyp", file=f)
-    for count, hyp in sorted([(v, k) for k, v in ins.items()], reverse=True):
-        print(f"{count}   {hyp}", file=f)
-
-    print("", file=f)
-    print(
-        "PER-WORD STATS: word  corr tot_errs count_in_ref count_in_hyp", file=f
-    )
-    for _, word, counts in sorted(
-        [(sum(v[1:]), k, v) for k, v in words.items()], reverse=True
-    ):
-        (corr, ref_sub, hyp_sub, ins, dels) = counts
-        tot_errs = ref_sub + hyp_sub + ins + dels
-        ref_count = corr + ref_sub + dels
-        hyp_count = corr + hyp_sub + ins
-
-        print(f"{word}   {corr} {tot_errs} {ref_count} {hyp_count}", file=f)
-    return float(tot_err_rate)
-
-
-class MetricsTracker(collections.defaultdict):
-    def __init__(self):
-        # Passing the type 'int' to the base-class constructor
-        # makes undefined items default to int() which is zero.
-        # This class will play a role as metrics tracker.
-        # It can record many metrics, including but not limited to loss.
-        super(MetricsTracker, self).__init__(int)
-
-    def __add__(self, other: "MetricsTracker") -> "MetricsTracker":
-        ans = MetricsTracker()
-        for k, v in self.items():
-            ans[k] = v
-        for k, v in other.items():
-            ans[k] = ans[k] + v
-        return ans
-
-    def __mul__(self, alpha: float) -> "MetricsTracker":
-        ans = MetricsTracker()
-        for k, v in self.items():
-            ans[k] = v * alpha
-        return ans
-
-    def __str__(self) -> str:
-        ans_frames = ""
-        ans_utterances = ""
-        for k, v in self.norm_items():
-            norm_value = "%.4g" % v
-            if "utt_" not in k:
-                ans_frames += str(k) + "=" + str(norm_value) + ", "
-            else:
-                ans_utterances += str(k) + "=" + str(norm_value)
-                if k == "utt_duration":
-                    ans_utterances += " frames, "
-                elif k == "utt_pad_proportion":
-                    ans_utterances += ", "
-                else:
-                    raise ValueError(f"Unexpected key: {k}")
-        frames = "%.2f" % self["frames"]
-        ans_frames += "over " + str(frames) + " frames. "
-        if ans_utterances != "":
-            utterances = "%.2f" % self["utterances"]
-            ans_utterances += "over " + str(utterances) + " utterances."
-
-        return ans_frames + ans_utterances
-
-    def norm_items(self) -> List[Tuple[str, float]]:
-        """
-        Returns a list of pairs, like:
-          [('ctc_loss', 0.1), ('att_loss', 0.07)]
-        """
-        num_frames = self["frames"] if "frames" in self else 1
-        num_utterances = self["utterances"] if "utterances" in self else 1
-        ans = []
-        for k, v in self.items():
-            if k == "frames" or k == "utterances":
-                continue
-            norm_value = (
-                float(v) / num_frames
-                if "utt_" not in k
-                else float(v) / num_utterances
-            )
-            ans.append((k, norm_value))
-        return ans
-
-    def reduce(self, device):
-        """
-        Reduce using torch.distributed, which I believe ensures that
-        all processes get the total.
-        """
-        keys = sorted(self.keys())
-        s = torch.tensor([float(self[k]) for k in keys], device=device)
-        dist.all_reduce(s, op=dist.ReduceOp.SUM)
-        for k, v in zip(keys, s.cpu().tolist()):
-            self[k] = v
-
-    def write_summary(
-        self,
-        tb_writer: SummaryWriter,
-        prefix: str,
-        batch_idx: int,
-    ) -> None:
-        """Add logging information to a TensorBoard writer.
-
-        Args:
-            tb_writer: a TensorBoard writer
-            prefix: a prefix for the name of the loss, e.g. "train/valid_",
-                or "train/current_"
-            batch_idx: The current batch index, used as the x-axis of the plot.
-        """
-        for k, v in self.norm_items():
-            tb_writer.add_scalar(prefix + k, v, batch_idx)
-
-
-def concat(
-    ragged: k2.RaggedTensor, value: int, direction: str
-) -> k2.RaggedTensor:
-    """Prepend a value to the beginning of each sublist or append a value.
-    to the end of each sublist.
-
-    Args:
-      ragged:
-        A ragged tensor with two axes.
-      value:
-        The value to prepend or append.
-      direction:
-        It can be either "left" or "right". If it is "left", we
-        prepend the value to the beginning of each sublist;
-        if it is "right", we append the value to the end of each
-        sublist.
-
-    Returns:
-      Return a new ragged tensor, whose sublists either start with
-      or end with the given value.
-
-    >>> a = k2.RaggedTensor([[1, 3], [5]])
-    >>> a
-    [ [ 1 3 ] [ 5 ] ]
-    >>> concat(a, value=0, direction="left")
-    [ [ 0 1 3 ] [ 0 5 ] ]
-    >>> concat(a, value=0, direction="right")
-    [ [ 1 3 0 ] [ 5 0 ] ]
-
-    """
-    dtype = ragged.dtype
-    device = ragged.device
-
-    assert ragged.num_axes == 2, f"num_axes: {ragged.num_axes}"
-    pad_values = torch.full(
-        size=(ragged.tot_size(0), 1),
-        fill_value=value,
-        device=device,
-        dtype=dtype,
-    )
-    pad = k2.RaggedTensor(pad_values)
-
-    if direction == "left":
-        ans = k2.ragged.cat([pad, ragged], axis=1)
-    elif direction == "right":
-        ans = k2.ragged.cat([ragged, pad], axis=1)
-    else:
-        raise ValueError(
-            f'Unsupported direction: {direction}. " \
-            "Expect either "left" or "right"'
-        )
-    return ans
-
-
-def add_sos(ragged: k2.RaggedTensor, sos_id: int) -> k2.RaggedTensor:
-    """Add SOS to each sublist.
-
-    Args:
-      ragged:
-        A ragged tensor with two axes.
-      sos_id:
-        The ID of the SOS symbol.
-
-    Returns:
-      Return a new ragged tensor, where each sublist starts with SOS.
-
-    >>> a = k2.RaggedTensor([[1, 3], [5]])
-    >>> a
-    [ [ 1 3 ] [ 5 ] ]
-    >>> add_sos(a, sos_id=0)
-    [ [ 0 1 3 ] [ 0 5 ] ]
-
-    """
-    return concat(ragged, sos_id, direction="left")
-
-
-def add_eos(ragged: k2.RaggedTensor, eos_id: int) -> k2.RaggedTensor:
-    """Add EOS to each sublist.
-
-    Args:
-      ragged:
-        A ragged tensor with two axes.
-      eos_id:
-        The ID of the EOS symbol.
-
-    Returns:
-      Return a new ragged tensor, where each sublist ends with EOS.
-
-    >>> a = k2.RaggedTensor([[1, 3], [5]])
-    >>> a
-    [ [ 1 3 ] [ 5 ] ]
-    >>> add_eos(a, eos_id=0)
-    [ [ 1 3 0 ] [ 5 0 ] ]
-
-    """
-    return concat(ragged, eos_id, direction="right")
-
-
-def make_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
-    """
-    Args:
-      lengths:
-        A 1-D tensor containing sentence lengths.
-    Returns:
-      Return a 2-D bool tensor, where masked positions
-      are filled with `True` and non-masked positions are
-      filled with `False`.
-
-    >>> lengths = torch.tensor([1, 3, 2, 5])
-    >>> make_pad_mask(lengths)
-    tensor([[False,  True,  True,  True,  True],
-            [False, False, False,  True,  True],
-            [False, False,  True,  True,  True],
-            [False, False, False, False, False]])
-    """
-    assert lengths.ndim == 1, lengths.ndim
-
-    max_len = lengths.max()
-    n = lengths.size(0)
-
-    expaned_lengths = torch.arange(max_len).expand(n, max_len).to(lengths)
-
-    return expaned_lengths >= lengths.unsqueeze(1)
-
-
-# Copied and modified from https://github.com/wenet-e2e/wenet/blob/main/wenet/utils/mask.py
-def subsequent_chunk_mask(
-    size: int,
-    chunk_size: int,
-    num_left_chunks: int = -1,
-    device: torch.device = torch.device("cpu"),
-) -> torch.Tensor:
-    """Create mask for subsequent steps (size, size) with chunk size,
-       this is for streaming encoder
-    Args:
-        size (int): size of mask
-        chunk_size (int): size of chunk
-        num_left_chunks (int): number of left chunks
-            <0: use full chunk
-            >=0: use num_left_chunks
-        device (torch.device): "cpu" or "cuda" or torch.Tensor.device
-    Returns:
-        torch.Tensor: mask
-    Examples:
-        >>> subsequent_chunk_mask(4, 2)
-        [[1, 1, 0, 0],
-         [1, 1, 0, 0],
-         [1, 1, 1, 1],
-         [1, 1, 1, 1]]
-    """
-    ret = torch.zeros(size, size, device=device, dtype=torch.bool)
-    for i in range(size):
-        if num_left_chunks < 0:
-            start = 0
-        else:
-            start = max((i // chunk_size - num_left_chunks) * chunk_size, 0)
-        ending = min((i // chunk_size + 1) * chunk_size, size)
-        ret[i, start:ending] = True
-    return ret
-
-
-def l1_norm(x):
-    return torch.sum(torch.abs(x))
-
-
-def l2_norm(x):
-    return torch.sum(torch.pow(x, 2))
-
-
-def linf_norm(x):
-    return torch.max(torch.abs(x))
-
-
-def measure_weight_norms(
-    model: nn.Module, norm: str = "l2"
-) -> Dict[str, float]:
-    """
-    Compute the norms of the model's parameters.
-
-    :param model: a torch.nn.Module instance
-    :param norm: how to compute the norm. Available values: 'l1', 'l2', 'linf'
-    :return: a dict mapping from parameter's name to its norm.
-    """
-    with torch.no_grad():
-        norms = {}
-        for name, param in model.named_parameters():
-            if norm == "l1":
-                val = l1_norm(param)
-            elif norm == "l2":
-                val = l2_norm(param)
-            elif norm == "linf":
-                val = linf_norm(param)
-            else:
-                raise ValueError(f"Unknown norm type: {norm}")
-            norms[name] = val.item()
-        return norms
-
-
-def measure_gradient_norms(
-    model: nn.Module, norm: str = "l1"
-) -> Dict[str, float]:
-    """
-    Compute the norms of the gradients for each of model's parameters.
-
-    :param model: a torch.nn.Module instance
-    :param norm: how to compute the norm. Available values: 'l1', 'l2', 'linf'
-    :return: a dict mapping from parameter's name to its gradient's norm.
-    """
-    with torch.no_grad():
-        norms = {}
-        for name, param in model.named_parameters():
-            if norm == "l1":
-                val = l1_norm(param.grad)
-            elif norm == "l2":
-                val = l2_norm(param.grad)
-            elif norm == "linf":
-                val = linf_norm(param.grad)
-            else:
-                raise ValueError(f"Unknown norm type: {norm}")
-            norms[name] = val.item()
-        return norms
-
-
-def optim_step_and_measure_param_change(
-    model: nn.Module,
-    old_parameters: Dict[str, nn.parameter.Parameter],
-) -> Dict[str, float]:
-    """
-    Measure the "relative change in parameters per minibatch."
-    It is understood as a ratio between the L2 norm of the difference between original and updates parameters,
-    and the L2 norm of the original parameter. It is given by the formula:
-
-        .. math::
-            \begin{aligned}
-                \delta = \frac{\Vert\theta - \theta_{new}\Vert^2}{\Vert\theta\Vert^2}
-            \end{aligned}
-
-    This function is supposed to be used as follows:
-
-      .. code-block:: python
-
-        old_parameters = {
-            n: p.detach().clone() for n, p in model.named_parameters()
-        }
-
-        optimizer.step()
-
-        deltas = optim_step_and_measure_param_change(old_parameters)
-
-    Args:
-      model: A torch.nn.Module instance.
-      old_parameters:
-        A Dict of named_parameters before optimizer.step().
-
-    Return:
-      A Dict containing the relative change for each parameter.
-    """
-    relative_change = {}
-    with torch.no_grad():
-        for n, p_new in model.named_parameters():
-            p_orig = old_parameters[n]
-            delta = l2_norm(p_orig - p_new) / l2_norm(p_orig)
-            relative_change[n] = delta.item()
-    return relative_change
-
-
-def load_averaged_model(
-    model_dir: str,
-    model: torch.nn.Module,
-    epoch: int,
-    avg: int,
-    device: torch.device,
-):
-    """
-    Load a model which is the average of all checkpoints
-
-    :param model_dir: a str of the experiment directory
-    :param model: a torch.nn.Module instance
-
-    :param epoch: the last epoch to load from
-    :param avg: how many models to average from
-    :param device: move model to this device
-
-    :return: A model averaged
-    """
-
-    # start cannot be negative
-    start = max(epoch - avg + 1, 0)
-    filenames = [f"{model_dir}/epoch-{i}.pt" for i in range(start, epoch + 1)]
-
-    logging.info(f"averaging {filenames}")
-    model.to(device)
-    model.load_state_dict(average_checkpoints(filenames, device=device))
-
-    return model
-
-
-def tokenize_by_bpe_model(
-    sp: spm.SentencePieceProcessor,
-    txt: str,
-) -> str:
-    """
-    Tokenize text with bpe model. This function is from
-    https://github1s.com/wenet-e2e/wenet/blob/main/wenet/dataset/processor.py#L322-L342.
-    Args:
-      sp: spm.SentencePieceProcessor.
-      txt: str
-
-    Return:
-      A new string which includes chars and bpes.
-    """
-    tokens = []
-    # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
-    # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    pattern = re.compile(r"([\u4e00-\u9fff])")
-    # Example:
-    #   txt   = "你好 ITS'S OKAY 的"
-    #   chars = ["你", "好", " ITS'S OKAY ", "的"]
-    chars = pattern.split(txt.upper())
-    mix_chars = [w for w in chars if len(w.strip()) > 0]
-    for ch_or_w in mix_chars:
-        # ch_or_w is a single CJK charater(i.e., "你"), do nothing.
-        if pattern.fullmatch(ch_or_w) is not None:
-            tokens.append(ch_or_w)
-        # ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
-        # encode ch_or_w using bpe_model.
-        else:
-            for p in sp.encode_as_pieces(ch_or_w):
-                tokens.append(p)
-    txt_with_bpe = "/".join(tokens)
-
-    return txt_with_bpe
-
-
-def display_and_save_batch(
-    batch: dict,
-    params: AttributeDict,
-    sp: spm.SentencePieceProcessor,
-) -> None:
-    """Display the batch statistics and save the batch into disk.
-
-    Args:
-      batch:
-        A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()`
-        for the content in it.
-      params:
-        Parameters for training. See :func:`get_params`.
-      sp:
-        The BPE model.
-    """
-    from lhotse.utils import uuid4
-
-    filename = f"{params.exp_dir}/batch-{uuid4()}.pt"
-    logging.info(f"Saving batch to {filename}")
-    torch.save(batch, filename)
-
-    supervisions = batch["supervisions"]
-    features = batch["inputs"]
-
-    logging.info(f"features shape: {features.shape}")
-
-    y = sp.encode(supervisions["text"], out_type=int)
-    num_tokens = sum(len(i) for i in y)
-    logging.info(f"num tokens: {num_tokens}")

From 2cbefda1d3a541ff68fc4bc926621fd08d784ff4 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 29 Nov 2022 20:31:14 -0500
Subject: [PATCH 051/154] audio to duration python script

---
 egs/librispeech/v1/local/data_prep.sh         |   6 +-
 egs/librispeech/v1/run_001_prepare_data.sh    |   5 +-
 .../v1/run_003_prepare_noises_rirs.sh         |   6 +-
 hyp_utils/xvectors/audio_to_duration.sh       |  45 ++++++
 .../preprocess_audios_for_nnet_train.sh       |   6 +-
 hyperion/bin/audio_to_duration.py             |  66 +++++++++
 hyperion/io/audio_reader.py                   | 122 ++++++++--------
 .../torch/models/transducer/transducer.py     |  42 +++---
 hyperion/torch/utils/dummy_k2.py              |  10 ++
 hyperion/utils/info_table.py                  | 130 +++++++++++-------
 hyperion/utils/text.py                        |  32 ++---
 11 files changed, 307 insertions(+), 163 deletions(-)
 create mode 100755 hyp_utils/xvectors/audio_to_duration.sh
 create mode 100755 hyperion/bin/audio_to_duration.py
 create mode 100644 hyperion/torch/utils/dummy_k2.py

diff --git a/egs/librispeech/v1/local/data_prep.sh b/egs/librispeech/v1/local/data_prep.sh
index 0923aeab..d1ec7f52 100755
--- a/egs/librispeech/v1/local/data_prep.sh
+++ b/egs/librispeech/v1/local/data_prep.sh
@@ -72,9 +72,9 @@ done
 
 spk2utt=$dst/spk2utt
 utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
-utils/data/get_utt2dur.sh $dst 
-awk 'sub(/ *$/, "", $0)' $dst/utt2dur > $dst/utt2dur2
-mv $dst/utt2dur2 $dst/utt2dur
+# utils/data/get_utt2dur.sh $dst 
+# awk 'sub(/ *$/, "", $0)' $dst/utt2dur > $dst/utt2dur2
+# mv $dst/utt2dur2 $dst/utt2dur
 
 ntrans=$(wc -l <$trans)
 nutt2spk=$(wc -l <$utt2spk)
diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
index c6c15692..ba24f5d6 100755
--- a/egs/librispeech/v1/run_001_prepare_data.sh
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -24,8 +24,9 @@ if [ ${stage} -le 1 ]; then
     ### But you can utilize Kaldi recipes in most cases
     echo "stage 0: Data preparation"
     for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
-        # use underscore-separated names in data directories.
-        local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+      # use underscore-separated names in data directories.
+      #local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+      steps_xvec/audio_to_duration.sh data/${part//-/_}
     done
 fi
 
diff --git a/egs/librispeech/v1/run_003_prepare_noises_rirs.sh b/egs/librispeech/v1/run_003_prepare_noises_rirs.sh
index a448af9a..6bdcb4f2 100755
--- a/egs/librispeech/v1/run_003_prepare_noises_rirs.sh
+++ b/egs/librispeech/v1/run_003_prepare_noises_rirs.sh
@@ -20,11 +20,11 @@ if [ $stage -le 1 ]; then
     # Prepare the MUSAN corpus, which consists of music, speech, and noise
     # suitable for augmentation.
     local/make_musan.sh $musan_root 16 data
-    
+
     for name in musan_noise musan_music
     do
 	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
-	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    --storage_name librispeech-v1-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
 	utils/fix_data_dir.sh data/${name}_proc_audio
     done
@@ -37,7 +37,7 @@ if [ $stage -le 2 ]; then
     for name in musan_speech
     do
 	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
-	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    --storage_name librispeech-v1-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_babble exp/${name}_babble
 	# utils/fix_data_dir.sh data/${name}_babble
     done
diff --git a/hyp_utils/xvectors/audio_to_duration.sh b/hyp_utils/xvectors/audio_to_duration.sh
new file mode 100755
index 00000000..56a8ffe2
--- /dev/null
+++ b/hyp_utils/xvectors/audio_to_duration.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+#           2022 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0.
+set -e
+nj=40
+cmd="run.pl"
+stage=0
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 1 ]; then
+  echo "Usage: $0 <data-dir>"
+  echo "e.g.: $0 data/train data/train_no_sil"
+  echo "Options: "
+  echo "  --nj <nj>                                        # number of parallel jobs"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  exit 1;
+fi
+
+data_in=$1
+output_dir=$data_in/durations
+
+name=`basename $data_in`
+
+for f in $data_in/wav.scp ; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+mkdir -p $output_dir/log
+
+$cmd JOB=1:$nj $output_dir/log/audio_to_duration.JOB.log \
+    hyp_utils/conda_env.sh \
+    audio_to_duration.py \
+    --audio-file $data_in/wav.scp \
+    --output-file $output_dir/utt2dur.JOB
+
+for n in $(seq $nj); do
+  cat $output_dir/utt2dur.$n || exit 1;
+done > ${data_in}/utt2dur || exit 1
+
+echo "$0: Succeeded processing audios for $name"
diff --git a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
index 7c35b234..8321169f 100755
--- a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
+++ b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
@@ -56,9 +56,13 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $output_dir/storage ]; then
     elif [ "$nodes" == "s01" ];then
 	utils/create_split_dir.pl \
 	    /export/s01/$dir_name $output_dir/storage
-    else
+    elif [ "$nodes" == "c0" ];then
 	utils/create_split_dir.pl \
 	    /export/c{01,06,07,08,09}/$dir_name $output_dir/storage
+    elif [ "$nodes" == "fs05" ];then
+	utils/create_split_dir.pl \
+	    utils/create_split_dir.pl \
+	    /export/fs05/$dir_name $output_dir/storage
     fi
 
     for f in $(awk '{ print $1}' $data_in/wav.scp); do
diff --git a/hyperion/bin/audio_to_duration.py b/hyperion/bin/audio_to_duration.py
new file mode 100755
index 00000000..04acb76c
--- /dev/null
+++ b/hyperion/bin/audio_to_duration.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import SequentialAudioReader as AR
+
+
+def audio_to_duration(audio_file, output_file, **kwargs):
+
+    input_args = AR.filter_args(**kwargs)
+    logging.info(f"input_args={input_args}")
+
+    keys = []
+    durations = []
+    with AR(audio_file, **input_args) as reader:
+        for data in reader:
+            key, x, fs = data
+            duration = x.shape[0] / fs
+            keys.append(key)
+            durations.append(duration)
+            logging.info("read audio %s duration=%.3f", key, duration)
+
+    print(len(keys), len(durations))
+    seg_set = SegmentSet.from_lists(keys, ["duration"], [durations])
+    seg_set.save(output_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Writes audio file durations to table")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--audio-file", required=True)
+    parser.add_argument("--output-file", required=True)
+    AR.add_class_args(parser)
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+        help="Verbose level",
+    )
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    audio_to_duration(**namespace_to_dict(args))
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index 043ae778..e1bdaca8 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -5,6 +5,7 @@
 
 import os
 import logging
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import io
 import math
 import subprocess
@@ -47,8 +48,7 @@ class AudioReader(object):
          segments_path: segments file with format: segment_id file_id tbeg tend
          wav_scale:     multiplies signal by scale factor
     """
-
-    def __init__(self, file_path, segments_path=None, wav_scale=2 ** 15 - 1):
+    def __init__(self, file_path, segments_path=None, wav_scale=2**15 - 1):
         self.file_path = file_path
         if isinstance(file_path, SCPList):
             self.scp = file_path
@@ -64,9 +64,9 @@ def __init__(self, file_path, segments_path=None, wav_scale=2 ** 15 - 1):
             if isinstance(file_path, SegmentList):
                 self.segments = segments_path
             else:
-                self.segments = SegmentList.load(
-                    segments_path, sep=" ", index_by_file=False
-                )
+                self.segments = SegmentList.load(segments_path,
+                                                 sep=" ",
+                                                 index_by_file=False)
 
         self.wav_scale = wav_scale
 
@@ -93,7 +93,10 @@ def __exit__(self, exc_type, exc_value, traceback):
         pass
 
     @staticmethod
-    def read_wavspecifier(wavspecifier, scale=2 ** 15, time_offset=0, time_dur=0):
+    def read_wavspecifier(wavspecifier,
+                          scale=2**15,
+                          time_offset=0,
+                          time_dur=0):
         """Reads an audiospecifier (audio_file/pipe)
            It reads from pipe or from all the files that can be read
            by `libsndfile <http://www.mega-nerd.com/libsndfile/#Features>`
@@ -143,20 +146,20 @@ def read_wavspecifier(wavspecifier, scale=2 ** 15, time_offset=0, time_dur=0):
         raise Exception("Unknown format for %s" % (wavspecifier))
 
     @staticmethod
-    def read_pipe(wavspecifier, scale=2 ** 15):
+    def read_pipe(wavspecifier, scale=2**15):
         """Reads wave file from a pipe
         Args:
           wavspecifier: Shell command with pipe output
           scale:        Multiplies signal by scale factor
         """
         # proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-        proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE)
+        proc = subprocess.Popen(wavspecifier,
+                                shell=True,
+                                stdout=subprocess.PIPE)
         pipe = proc.communicate()[0]
         if proc.returncode != 0:
-            raise Exception(
-                "Wave read pipe command %s returned code %d"
-                % (wavspecifier, proc.returncode)
-            )
+            raise Exception("Wave read pipe command %s returned code %d" %
+                            (wavspecifier, proc.returncode))
         x, fs = sf.read(io.BytesIO(pipe), dtype=float_cpu())
         x *= scale
         return x, fs
@@ -184,8 +187,7 @@ def _read_segment(self, segment, time_offset=0, time_dur=0):
         if s_beg >= num_samples_i:
             raise Exception(
                 "segment %s tbeg=%.2f (num_sample=%d) longer that wav file %s (num_samples=%d)"
-                % (file_id, t_beg, s_beg, file_id, num_samples_i)
-            )
+                % (file_id, t_beg, s_beg, file_id, num_samples_i))
 
         s_end = int(t_end * fs_i)
         if s_end > num_samples_i or t_end < 0:
@@ -203,7 +205,7 @@ def __init__(
         self,
         file_path,
         segments_path=None,
-        wav_scale=2 ** 15 - 1,
+        wav_scale=2**15 - 1,
         part_idx=1,
         num_parts=1,
     ):
@@ -213,11 +215,12 @@ def __init__(
         self.num_parts = num_parts
         if self.num_parts > 1:
             if self.with_segments:
-                self.segments = self.segments.split(self.part_idx, self.num_parts)
+                self.segments = self.segments.split(self.part_idx,
+                                                    self.num_parts)
             else:
-                self.scp = self.scp.split(
-                    self.part_idx, self.num_parts, group_by_key=False
-                )
+                self.scp = self.scp.split(self.part_idx,
+                                          self.num_parts,
+                                          group_by_key=False)
 
     def __iter__(self):
         """Needed to build an iterator, e.g.:
@@ -297,9 +300,8 @@ def read(self, num_records=0, time_offset=0, time_durs=0):
                 x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
             else:
                 key, file_path, _, _ = self.scp[self.cur_item]
-                x_i, fs_i = self.read_wavspecifier(
-                    file_path, self.wav_scale, offset_i, dur_i
-                )
+                x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale,
+                                                   offset_i, dur_i)
 
             keys.append(key)
             data.append(x_i)
@@ -315,42 +317,45 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "wav-scale",
-            default=2 ** 15 - 1,
+            "--wav-scale",
+            default=2**15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
         try:
             parser.add_argument(
-                p1 + "part-idx",
+                "--part-idx",
                 type=int,
                 default=1,
-                help=(
-                    "splits the list of files into num-parts and " "processes part-idx"
-                ),
+                help=("splits the list of files into num-parts and "
+                      "processes part-idx"),
             )
             parser.add_argument(
-                p1 + "num-parts",
+                "--num-parts",
                 type=int,
                 default=1,
-                help=(
-                    "splits the list of files into num-parts and " "processes part-idx"
-                ),
+                help=("splits the list of files into num-parts and "
+                      "processes part-idx"),
             )
         except:
             pass
 
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
+
     add_argparse_args = add_class_args
 
 
 class RandomAccessAudioReader(AudioReader):
-    def __init__(self, file_path, segments_path=None, wav_scale=2 ** 15 - 1):
+    def __init__(self, file_path, segments_path=None, wav_scale=2**15 - 1):
         super().__init__(file_path, segments_path, wav_scale)
 
     def _read(self, keys, time_offset=0, time_durs=0):
@@ -386,9 +391,8 @@ def _read(self, keys, time_offset=0, time_durs=0):
                     raise Exception("Key %s not found" % key)
 
                 file_path, _, _ = self.scp[key]
-                x_i, fs_i = self.read_wavspecifier(
-                    file_path, self.wav_scale, offset_i, dur_i
-                )
+                x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale,
+                                                   offset_i, dur_i)
 
             data.append(x_i)
             fs.append(fs_i)
@@ -406,7 +410,9 @@ def read(self, keys, time_offset=0, time_durs=0):
           fs: List of sampling freq.
         """
         try:
-            x, fs = self._read(keys, time_offset=time_offset, time_durs=time_durs)
+            x, fs = self._read(keys,
+                               time_offset=time_offset,
+                               time_durs=time_durs)
         except:
             if isinstance(keys, str):
                 keys = [keys]
@@ -422,23 +428,17 @@ def read(self, keys, time_offset=0, time_durs=0):
                 # we try to read from
                 # time-offset to the end of the file, and remove the extra frames later,
                 # this solves the problem in most cases
-                logging.info(
-                    (
-                        "error-1 reading at keys={} offset={} "
-                        "retrying reading until end-of-file ..."
-                    ).format(keys, time_offset)
-                )
+                logging.info(("error-1 reading at keys={} offset={} "
+                              "retrying reading until end-of-file ...").format(
+                                  keys, time_offset))
                 x, fs = self._read(keys, time_offset=time_offset)
                 for i in range(len(x)):
                     end_sample = int(time_durs[i] * fs[i])
                     x[i] = x[i][:end_sample]
             except:
                 # try to read the full file
-                logging.info(
-                    (
-                        "error-2 reading at key={}, " "retrying reading full file ..."
-                    ).format(keys)
-                )
+                logging.info(("error-2 reading at key={}, "
+                              "retrying reading full file ...").format(keys))
                 x, fs = self._read(keys)
                 for i in range(len(x)):
                     start_sample = int(time_offset[i] * fs[i])
@@ -449,21 +449,25 @@ def read(self, keys, time_offset=0, time_durs=0):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("wav_scale",)
+        valid_args = ("wav_scale", )
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "wav-scale",
-            default=2 ** 15 - 1,
+            "--wav-scale",
+            default=2**15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 52547954..b2a90f4b 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -13,13 +13,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Note we use `rnnt_loss` from torchaudio, which exists only in
 torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
 """
 from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-import k2
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
 import torch
 import torch.nn as nn
 import torchaudio
@@ -37,9 +40,8 @@ class Transducer(TorchModel):
     """It implements https://arxiv.org/pdf/1211.3711.pdf
     "Sequence Transduction with Recurrent Neural Networks"
     """
-
     def __init__(
-        self, 
+        self,
         vocab_size,
         blank_id,
         # conformer_enc,
@@ -75,9 +77,6 @@ def __init__(
         self.decoder = Decoder(**decoder)
         self.joiner = Joiner(**joiner)
 
-
-
-
     def forward(
         self,
         x: torch.Tensor,
@@ -100,7 +99,7 @@ def forward(
         assert x.ndim == 3, x.shape
         assert x_lens.ndim == 1, x_lens.shape
         assert y.num_axes == 2, y.num_axes
-        
+
         assert x.size(0) == x_lens.size(0) == y.dim0
 
         #  wav2vec2 works as encoder
@@ -128,11 +127,9 @@ def forward(
 
         assert hasattr(torchaudio.functional, "rnnt_loss"), (
             f"Current torchaudio version: {torchaudio.__version__}\n"
-            "Please install a version >= 0.10.0"
-        )
-        
-        x_lens = x_lens.to(torch.int32)
+            "Please install a version >= 0.10.0")
 
+        x_lens = x_lens.to(torch.int32)
 
         loss = torchaudio.functional.rnnt_loss(
             logits=logits,
@@ -145,7 +142,6 @@ def forward(
 
         return logits, loss
 
-
     def set_train_mode(self, mode):
         if mode == self._train_mode:
             return
@@ -162,8 +158,6 @@ def set_train_mode(self, mode):
 
         self._train_mode = mode
 
-
-
     def _train(self, train_mode: str):
         if train_mode in ["full", "frozen"]:
             super()._train(train_mode)
@@ -179,8 +173,8 @@ def get_config(self):
         join_cfg = self.joiner.get_config()
 
         config = {
-            "blank_id" : self.blank_id,
-            "vocab_size" : self.vocab_size,
+            "blank_id": self.blank_id,
+            "vocab_size": self.vocab_size,
             "decoder": dec_cfg,
             "joiner": join_cfg,
         }
@@ -199,7 +193,7 @@ def filter_args(**kwargs):
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
         args["decoder"] = decoder_args
-        args["joiner"] = joiner_args 
+        args["joiner"] = joiner_args
         return args
 
     @staticmethod
@@ -209,17 +203,13 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        Decoder.add_class_args(
-            parser, prefix="decoder"
-        )
+        Decoder.add_class_args(parser, prefix="decoder")
 
-        Joiner.add_class_args(
-            parser, prefix="joiner"
-        )
+        Joiner.add_class_args(parser, prefix="joiner")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
 
     # def change_config(
     #     self,
diff --git a/hyperion/torch/utils/dummy_k2.py b/hyperion/torch/utils/dummy_k2.py
new file mode 100644
index 00000000..27d387de
--- /dev/null
+++ b/hyperion/torch/utils/dummy_k2.py
@@ -0,0 +1,10 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+version = 0.0
+
+
+class RaggedTensor:
+    pass
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index ad676e72..d6ff7602 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -7,6 +7,7 @@
 import logging
 from collections import OrderedDict
 from copy import deepcopy
+import re
 
 import numpy as np
 import pandas as pd
@@ -21,7 +22,6 @@ class InfoTable(object):
     Attributes:
       df: pandas dataframe.
     """
-
     def __init__(self, df):
         self.df = df
         assert "id" in df, f"info_table={df}"
@@ -89,8 +89,9 @@ def save(self, file_path, sep=None):
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
         ext = file_path.suffix
-        if ext in ["", ".scp"]:
+        if ext in ["", ".scp"] or re.match(r"\.[0-9]+$", ext):
             # if no extension we save as kaldi utt2spk file
+            assert len(self.df.columns) == 2
             self.df.to_csv(file_path, sep=" ", header=False, index=False)
             return
 
@@ -99,6 +100,22 @@ def save(self, file_path, sep=None):
 
         self.df.to_csv(file_path, sep=sep, index=False)
 
+    @classmethod
+    def from_lists(cls, ids, column_names, column_data):
+        df_dict = {"id": ids}
+        assert len(column_names) == len(column_data)
+        for name, data in zip(column_names, column_data):
+            assert len(ids) == len(data)
+            df_dict[name] = data
+        df = pd.DataFrame(df_dict)
+        return cls(df)
+
+    @classmethod
+    def from_dict(cls, df_dict):
+        assert "id" in df_dict
+        df = pd.DataFrame(df_dict)
+        return cls(df)
+
     @classmethod
     def load(cls, file_path, sep=None, name="class_id"):
         """Loads utt2info list from text file.
@@ -120,7 +137,10 @@ def load(cls, file_path, sep=None, name="class_id"):
                 sep=" ",
                 header=None,
                 names=["id", name],
-                dtype={"id": np.str, name: np.str},
+                dtype={
+                    "id": np.str,
+                    name: np.str
+                },
             )
         else:
             if sep is None:
@@ -149,7 +169,8 @@ def split(self, idx, num_parts, group_by=None):
         if group_by is None:
             _, idx1 = split_list(self.df["id"], idx, num_parts)
         else:
-            _, idx1 = split_list_group_by_key(self.df[group_by], idx, num_parts)
+            _, idx1 = split_list_group_by_key(self.df[group_by], idx,
+                                              num_parts)
 
         df = self.df.iloc[idx1]
         return self.__class__(df)
@@ -168,10 +189,14 @@ def merge(cls, tables):
         df = pd.concat(df_list)
         return cls(df)
 
-    def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
-        assert (
-            items is None or iindex is None
-        ), "items and iindex cannot be not None at the same time"
+    def filter(self,
+               items=None,
+               iindex=None,
+               columns=None,
+               by="id",
+               keep=True):
+        assert (items is None or iindex is None
+                ), "items and iindex cannot be not None at the same time"
         df = self.df
 
         if not keep:
@@ -220,7 +245,51 @@ def __cmp__(self, other):
             return 0
         return 1
 
-    # def __len__(self):
+    def shuffle(self, seed=1024, rng=None):
+        """Shuffles the elements of the list.
+
+        Args:
+          seed: Seed for random number generator.
+          rng: numpy random number generator object.
+
+        Returns:
+          Index used to shuffle the list.
+        """
+        if rng is None:
+            rng = np.random.RandomState(seed=seed)
+        index = np.arange(len(self.df))
+        rng.shuffle(index)
+        self.df = self.df.iloc[index]
+        return index
+
+    def set_index(self, keys, inplace=True):
+        if inplace:
+            self.df.set_index(keys, drop=False, inplace=True)
+            return
+
+        df = self.df.set_index(keys, drop=False, inplace=False)
+        return type(self)(df)
+
+    def reset_index(self):
+        self.df.set_index("id", drop=False, inplace=True)
+
+    def get_loc(self, keys):
+        if isinstance(keys, (list, np.ndarray)):
+            return self.df.index.get_indexer(keys)
+
+        loc = self.df.index.get_loc(keys)
+        if isinstance(loc, int):
+            return loc
+        elif isinstance(loc, np.ndarray) and loc.dtype == np.bool:
+            return np.nonzero(loc)[0]
+        else:
+            return list(range(loc.start, loc.stop, loc.step))
+
+    def get_col_idx(self, keys):
+        return self.df.columns.get_loc(keys)
+
+        # def __len__(self):
+
     #     """Returns the number of elements in the list."""
     #     return len(self.df)
 
@@ -375,46 +444,3 @@ def __cmp__(self, other):
 
     #     utt_info = self.utt_info.iloc[index]
     #     return Utt2Info(utt_info)
-
-    def shuffle(self, seed=1024, rng=None):
-        """Shuffles the elements of the list.
-
-        Args:
-          seed: Seed for random number generator.
-          rng: numpy random number generator object.
-
-        Returns:
-          Index used to shuffle the list.
-        """
-        if rng is None:
-            rng = np.random.RandomState(seed=seed)
-        index = np.arange(len(self.df))
-        rng.shuffle(index)
-        self.df = self.df.iloc[index]
-        return index
-
-    def set_index(self, keys, inplace=True):
-        if inplace:
-            self.df.set_index(keys, drop=False, inplace=True)
-            return
-
-        df = self.df.set_index(keys, drop=False, inplace=False)
-        return type(self)(df)
-
-    def reset_index(self):
-        self.df.set_index("id", drop=False, inplace=True)
-
-    def get_loc(self, keys):
-        if isinstance(keys, (list, np.ndarray)):
-            return self.df.index.get_indexer(keys)
-
-        loc = self.df.index.get_loc(keys)
-        if isinstance(loc, int):
-            return loc
-        elif isinstance(loc, np.ndarray) and loc.dtype == np.bool:
-            return np.nonzero(loc)[0]
-        else:
-            return list(range(loc.start, loc.stop, loc.step))
-
-    def get_col_idx(self, keys):
-        return self.df.columns.get_loc(keys)
diff --git a/hyperion/utils/text.py b/hyperion/utils/text.py
index 48be92e3..be70f638 100644
--- a/hyperion/utils/text.py
+++ b/hyperion/utils/text.py
@@ -4,20 +4,23 @@
 """
 from pathlib import Path
 
-import k2
-import k2.version
+try:
+    import k2
+    import k2.version
+except ModuleNotFoundError:
+    from ..torch.utils import dummy_k2 as k2
+
 import numpy as np
 import pandas as pd
 import torch
 
 
-
 # Copied and modified from https://github.com/wenet-e2e/wenet/blob/main/wenet/utils/mask.py
 def subsequent_chunk_mask(
-    size: int,
-    chunk_size: int,
-    num_left_chunks: int = -1,
-    device: torch.device = torch.device("cpu"),
+        size: int,
+        chunk_size: int,
+        num_left_chunks: int = -1,
+        device: torch.device = torch.device("cpu"),
 ) -> torch.Tensor:
     """Create mask for subsequent steps (size, size) with chunk size,
        this is for streaming encoder
@@ -48,7 +51,6 @@ def subsequent_chunk_mask(
     return ret
 
 
-
 def make_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
     """
     Args:
@@ -76,9 +78,8 @@ def make_pad_mask(lengths: torch.Tensor) -> torch.Tensor:
     return expaned_lengths >= lengths.unsqueeze(1)
 
 
-def concat(
-    ragged: k2.RaggedTensor, value: int, direction: str
-) -> k2.RaggedTensor:
+def concat(ragged: k2.RaggedTensor, value: int,
+           direction: str) -> k2.RaggedTensor:
     """Prepend a value to the beginning of each sublist or append a value.
     to the end of each sublist.
 
@@ -123,10 +124,8 @@ def concat(
     elif direction == "right":
         ans = k2.ragged.cat([ragged, pad], axis=1)
     else:
-        raise ValueError(
-            f'Unsupported direction: {direction}. " \
-            "Expect either "left" or "right"'
-        )
+        raise ValueError(f'Unsupported direction: {direction}. " \
+            "Expect either "left" or "right"')
     return ans
 
 
@@ -156,7 +155,7 @@ def read_text(text_file: str):
     # assert check_argument_types()
     text_file = Path(text_file)
 
-    data = {"id":[],"text":[]}
+    data = {"id": [], "text": []}
     with Path(text_file).open("r", encoding="utf-8") as f:
         for linenum, line in enumerate(f, 1):
             sps = line.rstrip().split(maxsplit=1)
@@ -169,4 +168,3 @@ def read_text(text_file: str):
             data["id"].append(k)
             data["text"].append(v)
     return pd.DataFrame(data=data, index=data["id"])
-

From 898d4e1b553455a4f13f148c68b80bd4a015cfce Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Wed, 30 Nov 2022 14:38:13 -0500
Subject: [PATCH 052/154] upload missing file: download_lm.py

---
 egs/librispeech/v1/local/download_lm.py | 99 +++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100755 egs/librispeech/v1/local/download_lm.py

diff --git a/egs/librispeech/v1/local/download_lm.py b/egs/librispeech/v1/local/download_lm.py
new file mode 100755
index 00000000..030122aa
--- /dev/null
+++ b/egs/librispeech/v1/local/download_lm.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This file downloads the following LibriSpeech LM files:
+
+    - 3-gram.pruned.1e-7.arpa.gz
+    - 4-gram.arpa.gz
+    - librispeech-vocab.txt
+    - librispeech-lexicon.txt
+    - librispeech-lm-norm.txt.gz
+
+from http://www.openslr.org/resources/11
+and save them in the user provided directory.
+
+Files are not re-downloaded if they already exist.
+
+Usage:
+    ./local/download_lm.py --out-dir ./download/lm
+"""
+
+import argparse
+import gzip
+import logging
+import os
+import shutil
+from pathlib import Path
+
+from lhotse.utils import urlretrieve_progress
+from tqdm.auto import tqdm
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out-dir", type=str, help="Output directory.")
+
+    args = parser.parse_args()
+    return args
+
+
+def main(out_dir: str):
+    url = "http://www.openslr.org/resources/11"
+    out_dir = Path(out_dir)
+
+    files_to_download = (
+        "3-gram.pruned.1e-7.arpa.gz",
+        "4-gram.arpa.gz",
+        "librispeech-vocab.txt",
+        "librispeech-lexicon.txt",
+        "librispeech-lm-norm.txt.gz",
+    )
+
+    for f in tqdm(files_to_download, desc="Downloading LibriSpeech LM files"):
+        filename = out_dir / f
+        if filename.is_file() is False:
+            urlretrieve_progress(
+                f"{url}/{f}",
+                filename=filename,
+                desc=f"Downloading {filename}",
+            )
+        else:
+            logging.info(f"{filename} already exists - skipping")
+
+        if ".gz" in str(filename):
+            unzipped = Path(os.path.splitext(filename)[0])
+            if unzipped.is_file() is False:
+                with gzip.open(filename, "rb") as f_in:
+                    with open(unzipped, "wb") as f_out:
+                        shutil.copyfileobj(f_in, f_out)
+            else:
+                logging.info(f"{unzipped} already exist - skipping")
+
+
+if __name__ == "__main__":
+    formatter = (
+        "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
+    )
+
+    logging.basicConfig(format=formatter, level=logging.INFO)
+
+    args = get_args()
+    logging.info(f"out_dir: {args.out_dir}")
+
+    main(out_dir=args.out_dir)

From 7f9ee742da9e4544185c827994531b0da2aec79c Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 30 Nov 2022 16:33:10 -0500
Subject: [PATCH 053/154] added script to install k2

---
 egs/librispeech/v1/run_004_compute_bpe.sh |  9 +--
 tools/install_k2_from_src.sh              | 68 +++++++++++++++++++++++
 2 files changed, 70 insertions(+), 7 deletions(-)
 create mode 100755 tools/install_k2_from_src.sh

diff --git a/egs/librispeech/v1/run_004_compute_bpe.sh b/egs/librispeech/v1/run_004_compute_bpe.sh
index f1fa36b1..0bfeacb9 100755
--- a/egs/librispeech/v1/run_004_compute_bpe.sh
+++ b/egs/librispeech/v1/run_004_compute_bpe.sh
@@ -5,12 +5,7 @@
 #
 . ./cmd.sh
 . ./path.sh
-
-
 set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-. ./datapath.sh 
 
 vocab_sizes=(
   # 5000
@@ -19,13 +14,13 @@ vocab_sizes=(
   500
 )
 
-
 dl_dir=$PWD/download
 
-stage=2
+stage=1
 config_file=default_config.sh
 
 . parse_options.sh || exit 1;
+. ./datapath.sh 
 . $config_file
 
 
diff --git a/tools/install_k2_from_src.sh b/tools/install_k2_from_src.sh
new file mode 100755
index 00000000..0bd2e972
--- /dev/null
+++ b/tools/install_k2_from_src.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+if [ $# -ne 2 ];then
+    echo "Usage: $0 <conda-env-name> <cuda-root-path>"
+    echo "  e.g.: $0 hyperion /usr/local/cuda"
+fi
+
+env_name=$1
+CUDA_ROOT=$2
+
+eval "$(conda shell.bash hook)"
+conda activate $env_name
+
+#module load cuda10.2/toolkit
+#module load gcc
+
+#conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch
+
+CUDA_VERSION=$(echo "import torch; print(torch.version.cuda)" | python)
+CUDNN_VERSION=$(echo "import torch; print(torch.__config__.show())" | python | awk '/CuDNN/ { print $NF}')
+
+# Install cmake
+echo "Installing CMAKE"
+conda install -c anaconda cmake
+echo "Installing NVIDIDA CUDA=$CUDA_VERSION CUDNN=$CUDNN_VERSION"
+conda install -c nvidia cudnn=$CUDNN_VERSION cudatoolkit=$CUDA_VERSION
+
+#conda install -c k2-fsa -c conda-forge kaldilm
+
+echo "Download k2"
+git clone https://github.com/k2-fsa/k2.git
+cd k2
+
+ENV_PATH=$(which python | sed 's@/bin/python$@@')
+NVCC=$CUDA_ROOT/bin/nvcc
+CUDNN_LIBRARY_PATH=${ENV_PATH}/lib
+CUDNN_INCLUDE_PATH=${ENV_PATH}/include
+CUDA_TOOLKIT_DIR=$ENV_PATH
+export PATH=$CUDA_ROOT/bin:$PATH
+
+export K2_CMAKE_ARGS="\
+-DCMAKE_BUILD_TYPE=Release \
+-DCMAKE_CUDA_COMPILER=$NVCC \
+-DPYTHON_EXECUTABLE=$(which python) \
+-DCUDNN_LIBRARY_PATH=$CUDNN_LIBRARY_PATH/libcudnn.so \
+-DCUDNN_INCLUDE_PATH=$CUDNN_INCLUDE_PATH \
+-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_ROOT"
+
+export K2_MAKE_ARGS="-j6"
+
+echo "Compile k2 with CMAKE_ARGS=$K2_CMAKE_ARGS"
+python setup.py install
+cd -
+
+
+# pip install lhotse
+
+# export OT_CMAKE_ARGS=$K2_CMAKE_ARGS
+# git clone https://github.com/csukuangfj/optimized_transducer
+# cd optimized_transducer
+# python setup.py install
+# cd -
+
+
+# git clone https://github.com/k2-fsa/icefall
+# cd icefall
+# pip install -r requirements.txt
+# export PYTHONPATH=./icefall:$PYTHONPATH

From 72b8b3af323d83a4d727eece8f43866b23aa7a2d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 5 Dec 2022 15:44:01 -0500
Subject: [PATCH 054/154] fixed duration extraction in libri recipe

---
 egs/librispeech/v1/cmd.sh                    |  4 ++--
 egs/librispeech/v1/local/data_prep.sh        |  2 +-
 egs/librispeech/v1/local/prepare_lang.py     | 13 +++++--------
 egs/librispeech/v1/local/prepare_lang_bpe.py | 18 ++++++++----------
 egs/librispeech/v1/run_001_prepare_data.sh   |  7 ++++---
 hyp_utils/conda_env.sh                       |  3 ++-
 hyp_utils/xvectors/audio_to_duration.sh      |  4 +++-
 7 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/egs/librispeech/v1/cmd.sh b/egs/librispeech/v1/cmd.sh
index 71f3bae0..89dbb7d8 100755
--- a/egs/librispeech/v1/cmd.sh
+++ b/egs/librispeech/v1/cmd.sh
@@ -19,8 +19,8 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 10G"
     #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
-    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
-    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export train_cmd="queue.pl --config conf/clsp.conf --mem 4G " 
+    export cuda_cmd="queue.pl --config conf/clsp.conf --mem 20G"
     export cuda_eval_cmd="$train_cmd"
 fi
 
diff --git a/egs/librispeech/v1/local/data_prep.sh b/egs/librispeech/v1/local/data_prep.sh
index d1ec7f52..cb446a12 100755
--- a/egs/librispeech/v1/local/data_prep.sh
+++ b/egs/librispeech/v1/local/data_prep.sh
@@ -53,7 +53,7 @@ for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
     fi
 
     find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
-      awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1
+      awk -v "dir=$chapter_dir" '{printf "%s %s/%s.flac\n", $0, dir, $0}' >>$wav_scp|| exit 1
 
     chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
     [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
diff --git a/egs/librispeech/v1/local/prepare_lang.py b/egs/librispeech/v1/local/prepare_lang.py
index 74e09629..39d76146 100755
--- a/egs/librispeech/v1/local/prepare_lang.py
+++ b/egs/librispeech/v1/local/prepare_lang.py
@@ -14,8 +14,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
 """
 This script takes as input a lexicon file "data/lang_phone/lexicon.txt"
 consisting of words and tokens (i.e., phones) and does the following:
@@ -43,7 +41,6 @@
 import torch
 
 from hyperion.utils.lexicon import read_lexicon, write_lexicon
-from hyperion.utils.utils import str2bool
 
 Lexicon = List[Tuple[str, List[str]]]
 
@@ -61,8 +58,8 @@ def get_args():
 
     parser.add_argument(
         "--debug",
-        type=str2bool,
         default=False,
+        action="store_true",
         help="""True for debugging, which will generate
         a visualization of the lexicon FST.
 
@@ -205,9 +202,8 @@ def generate_id_map(symbols: List[str]) -> Dict[str, int]:
     return {sym: i for i, sym in enumerate(symbols)}
 
 
-def add_self_loops(
-    arcs: List[List[Any]], disambig_token: int, disambig_word: int
-) -> List[List[Any]]:
+def add_self_loops(arcs: List[List[Any]], disambig_token: int,
+                   disambig_word: int) -> List[List[Any]]:
     """Adds self-loops to states of an FST to propagate disambiguation symbols
     through it. They are added on each state with non-epsilon output symbols
     on at least one arc out of the state.
@@ -406,7 +402,8 @@ def main():
 
         L_disambig.labels_sym = labels_sym
         L_disambig.aux_labels_sym = aux_labels_sym
-        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}", title="L_disambig.pt")
+        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}",
+                        title="L_disambig.pt")
 
 
 if __name__ == "__main__":
diff --git a/egs/librispeech/v1/local/prepare_lang_bpe.py b/egs/librispeech/v1/local/prepare_lang_bpe.py
index d8cee8ed..7838b6a0 100755
--- a/egs/librispeech/v1/local/prepare_lang_bpe.py
+++ b/egs/librispeech/v1/local/prepare_lang_bpe.py
@@ -15,9 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 # Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-
 """
 
 This script takes as input `lang_dir`, which should contain::
@@ -49,8 +47,6 @@
     write_mapping,
 )
 
-from hyperion.utils.utils import str2bool
-
 
 def lexicon_to_fst_no_sil(
     lexicon: Lexicon,
@@ -126,9 +122,8 @@ def lexicon_to_fst_no_sil(
     return fsa
 
 
-def generate_lexicon(
-    model_file: str, words: List[str]
-) -> Tuple[Lexicon, Dict[str, int]]:
+def generate_lexicon(model_file: str,
+                     words: List[str]) -> Tuple[Lexicon, Dict[str, int]]:
     """Generate a lexicon from a BPE model.
 
     Args:
@@ -180,8 +175,8 @@ def get_args():
 
     parser.add_argument(
         "--debug",
-        type=str2bool,
         default=False,
+        action="store_true",
         help="""True for debugging, which will generate
         a visualization of the lexicon FST.
 
@@ -204,7 +199,9 @@ def main():
 
     words = word_sym_table.symbols
 
-    excluded = ["<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"]
+    excluded = [
+        "<eps>", "!SIL", "<SPOKEN_NOISE>", "<UNK>", "#0", "<s>", "</s>"
+    ]
     for w in excluded:
         if w in words:
             words.remove(w)
@@ -254,7 +251,8 @@ def main():
 
         L_disambig.labels_sym = labels_sym
         L_disambig.aux_labels_sym = aux_labels_sym
-        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}", title="L_disambig.pt")
+        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}",
+                        title="L_disambig.pt")
 
 
 if __name__ == "__main__":
diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
index ba24f5d6..8502b724 100755
--- a/egs/librispeech/v1/run_001_prepare_data.sh
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -23,10 +23,11 @@ if [ ${stage} -le 1 ]; then
     ### Task dependent. You have to make data the following preparation part by yourself.
     ### But you can utilize Kaldi recipes in most cases
     echo "stage 0: Data preparation"
-    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
+    for part in train-clean-360 train-other-500 #dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
+    do
       # use underscore-separated names in data directories.
-      #local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
-      steps_xvec/audio_to_duration.sh data/${part//-/_}
+      local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+      steps_xvec/audio_to_duration.sh --cmd "$train_cmd" data/${part//-/_}
     done
 fi
 
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 0a8f7a41..905567ee 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -66,7 +66,8 @@ if [ $num_gpus -gt 0 ];then
     fi
   fi
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-  export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
+  #export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
+  #export $LD_LIBRARY_PATH=/opt/NVIDIA/cuda-10/targets/x86_64-linux/lib/stubs:$LD_LIBRARY_PATH
   if [ $num_gpus -gt 1 ];then
     # export CUDA_LAUNCH_BLOCKING=1
     [[ $(type -P "$torchrun") ]] && command="torchrun" \
diff --git a/hyp_utils/xvectors/audio_to_duration.sh b/hyp_utils/xvectors/audio_to_duration.sh
index 56a8ffe2..f4187919 100755
--- a/hyp_utils/xvectors/audio_to_duration.sh
+++ b/hyp_utils/xvectors/audio_to_duration.sh
@@ -36,7 +36,9 @@ $cmd JOB=1:$nj $output_dir/log/audio_to_duration.JOB.log \
     hyp_utils/conda_env.sh \
     audio_to_duration.py \
     --audio-file $data_in/wav.scp \
-    --output-file $output_dir/utt2dur.JOB
+    --output-file $output_dir/utt2dur.JOB \
+    --part-idx JOB --num-parts $nj
+
 
 for n in $(seq $nj); do
   cat $output_dir/utt2dur.$n || exit 1;

From 9460cc340b0d1919f8578c8bda64629dd293e3fb Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 5 Dec 2022 17:34:19 -0500
Subject: [PATCH 055/154] fix dataprep

---
 egs/librispeech/v1/run_001_prepare_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
index 8502b724..0708e667 100755
--- a/egs/librispeech/v1/run_001_prepare_data.sh
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -23,7 +23,7 @@ if [ ${stage} -le 1 ]; then
     ### Task dependent. You have to make data the following preparation part by yourself.
     ### But you can utilize Kaldi recipes in most cases
     echo "stage 0: Data preparation"
-    for part in train-clean-360 train-other-500 #dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
+    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
     do
       # use underscore-separated names in data directories.
       local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}

From 118d1d4f0de5b4d1b8412353a7174fa7066aa692 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 7 Dec 2022 08:59:37 -0500
Subject: [PATCH 056/154] fixed bucketing sampler

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  18 +-
 .../v1/conf/wav2vec2xlsr300m_transducer.yaml  |   3 +
 hyp_utils/conda_env.sh                        |   3 +-
 hyperion/bin/train_wav2vec2transducer.py      |  78 +++++----
 hyperion/torch/data/bucketing_seg_sampler.py  |  43 +++--
 hyperion/torch/data/seg_sampler.py            |  76 ++++----
 hyperion/torch/data/seg_sampler_factory.py    |  33 ++--
 hyperion/torch/models/transducer/decoder.py   |  58 ++++---
 .../torch/models/transducer/transducer.py     |   1 -
 .../wav2transducer/hf_wav2transducer.py       |  90 +++++-----
 hyperion/torch/trainers/torch_trainer.py      | 162 ++++++++++--------
 hyperion/torch/trainers/transducer_trainer.py |  24 ++-
 12 files changed, 320 insertions(+), 269 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 50750cd8..f89108ea 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -7,11 +7,10 @@ data:
       return_segment_info:
         - text
     sampler:
-      sampler_type: 'seg_sampler'
-      # sampler_type: 'bucketing_seg_sampler'
-      min_batch_size: 4
-      batch_size: 4
-      iters_per_epoch: 6
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
       drop_last: true
     data_loader:
       num_workers: 8
@@ -23,11 +22,10 @@ data:
       return_segment_info:
         - text
     sampler:
-      sampler_type: 'seg_sampler'
-      # sampler_type: 'bucketing_seg_sampler'
-      min_batch_size: 2
-      batch_size: 2
-      iters_per_epoch: 6
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
       drop_last: true
     data_loader:
       num_workers: 8
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
index b8a1cdbb..a7071b8c 100644
--- a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
@@ -5,6 +5,9 @@ transducer:
     embedding_dim: 1024
     num_layers: 2
     hidden_dim: 512
+    #embedding_dim: 128
+    #num_layers: 1
+    #hidden_dim: 64
   joiner:
     num_layers: 1
 feat_fusion_method: weighted-avg
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 905567ee..35c14680 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -67,9 +67,8 @@ if [ $num_gpus -gt 0 ];then
   fi
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
   #export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
-  #export $LD_LIBRARY_PATH=/opt/NVIDIA/cuda-10/targets/x86_64-linux/lib/stubs:$LD_LIBRARY_PATH
   if [ $num_gpus -gt 1 ];then
-    # export CUDA_LAUNCH_BLOCKING=1
+    #export CUDA_LAUNCH_BLOCKING=1
     [[ $(type -P "$torchrun") ]] && command="torchrun" \
 	|| command="python -m torch.distributed.run"
     command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 3e4ccb84..8156f9b1 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -31,7 +31,6 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from torch.nn.utils.rnn import pad_sequence
 
-
 model_dict = {
     "hf_wav2vec2transducer": HFWav2Vec2Transducer,
 }
@@ -49,8 +48,7 @@ def transducer_collate(batch):
     audio = pad_sequence(audio)
     audio_length = torch.as_tensor(audio_length)
     target = k2.RaggedTensor(target)
-    return torch.transpose(audio,0,1), audio_length, target
-
+    return torch.transpose(audio, 0, 1), audio_length, target
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
@@ -76,10 +74,14 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     num_workers = data_kwargs["data_loader"]["num_workers"]
     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = (
-        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
-    )
-    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate)
+    largs = ({
+        "num_workers": num_workers_per_gpu,
+        "pin_memory": True
+    } if num_gpus > 0 else {})
+    data_loader = torch.utils.data.DataLoader(dataset,
+                                              batch_sampler=sampler,
+                                              **largs,
+                                              collate_fn=transducer_collate)
     return data_loader
 
 
@@ -87,7 +89,7 @@ def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
     model_args = model_class.filter_args(**kwargs["model"])
     if rank == 0:
         logging.info("model network args={}".format(model_args))
-    # TODO: check model_args 
+    # TODO: check model_args
     model_args["transducer"]["blank_id"] = blank_id
     model_args["transducer"]["vocab_size"] = vocab_size
     model = model_class(**model_args)
@@ -96,9 +98,6 @@ def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
     return model
 
 
-
-
-
 def train_model(gpu_id, args):
 
     config_logger(args.verbose)
@@ -108,6 +107,9 @@ def train_model(gpu_id, args):
     kwargs = namespace_to_dict(args)
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
+    #torch.backends.cudnn.deterministic = True
+    #torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.enabled = False
 
     ddp_args = ddp.filter_ddp_args(**kwargs)
     device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
@@ -121,12 +123,13 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"), train_loader.dataset.sp.get_piece_size(), **kwargs)
+    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
+                       train_loader.dataset.sp.get_piece_size(), **kwargs)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
-    metrics = {} #{"acc": CategoricalAccuracy()}
+    metrics = {}  #{"acc": CategoricalAccuracy()}
     trainer = Trainer(
         model,
         device=device,
@@ -142,7 +145,7 @@ def train_model(gpu_id, args):
 
 def make_parser(model_class):
     parser = ArgumentParser()
-    
+
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
     AD.add_class_args(train_parser, prefix="dataset", skip={})
@@ -164,46 +167,51 @@ def make_parser(model_class):
         help="num_workers of data loader",
     )
     data_parser = ArgumentParser(prog="")
-    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--train",
+                             action=ActionParser(parser=train_parser))
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-
     parser.add_argument(
         "--data.train.dataset.text_file",
-        type=str, 
+        type=str,
     )
-    
-    parser.add_argument("--data.val.dataset.text_file", type=str) 
-    
+
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+
     parser.add_argument(
         "--data.train.dataset.bpe_model",
-        type=str, 
+        type=str,
     )
 
-    parser.link_arguments(
-        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
-    )
+    parser.link_arguments("data.train.data_loader.num_workers",
+                          "data.val.data_loader.num_workers")
 
-    parser.link_arguments(
-        "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
-    )
+    parser.link_arguments("data.train.dataset.bpe_model",
+                          "data.val.dataset.bpe_model")
 
     model_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(
-        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
-    )
+    Trainer.add_class_args(parser,
+                           prefix="trainer",
+                           train_modes=model_class.valid_train_modes())
     ddp.add_ddp_args(parser)
-    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
+    parser.add_argument("--seed",
+                        type=int,
+                        default=1123581321,
+                        help="random seed")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
 
     return parser
 
 
 if __name__ == "__main__":
-    parser = ArgumentParser(description="Train Wav2Vec2Transducer model from audio files")
+    parser = ArgumentParser(
+        description="Train Wav2Vec2Transducer model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 8b0e855a..9c8384bf 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -9,21 +9,20 @@
 
 import numpy as np
 import torch
+import torch.distributed as dist
+
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
-import torch.distributed as dist
 
 
 class BucketingSegSampler(HypSampler):
-    def __init__(
-        self,
-        seg_set,
-        base_sampler=SegSampler,
-        num_buckets=10,
-        length_column="duration",
-        seed=1234,
-        **base_kwargs
-    ):
+    def __init__(self,
+                 seg_set,
+                 base_sampler=SegSampler,
+                 num_buckets=10,
+                 length_column="duration",
+                 seed=1234,
+                 **base_kwargs):
         super().__init__(shuffle=False, seed=seed)
         self.seg_set = seg_set
         self.base_sampler = base_sampler
@@ -33,12 +32,13 @@ def __init__(
         self.length_column = length_column
         self._create_bucket_samplers()
         self._compute_len()
-        self.depleted_buckets = torch.zeros((num_buckets,), dtype=torch.bool)
+        self.depleted_buckets = torch.zeros((num_buckets, ), dtype=torch.bool)
 
     def create_buckets(self):
-        sort_idx = torch.argsort(torch.from_numpy(self.seg_set[self.length_column].values))
+        sort_idx = np.argsort(self.seg_set[self.length_column].values)
         sorted_seg_set = self.seg_set.iloc[sort_idx]
-        cum_lengths = torch.cumsum(torch.from_numpy(sorted_seg_set[self.length_column].values),dim=0)
+        cum_lengths = np.cumsum(sorted_seg_set[self.length_column].values,
+                                axis=0)
         bucket_length = cum_lengths[-1] / self.num_buckets
         buckets = []
         for i in range(self.num_buckets):
@@ -72,6 +72,7 @@ def set_epoch(self, epoch):
 
     def __iter__(self):
         super().__iter__()
+        self.depleted_buckets[:] = False
         for i in range(self.num_buckets):
             self.bucket_samplers[i].__iter__()
 
@@ -86,9 +87,10 @@ def __next__(self):
             raise StopIteration
 
         while True:
-            bucket_idx = torch.randint(
-                low=0, high=self.num_buckets, size=(1,), generator=self.rng
-            ).item()
+            bucket_idx = torch.randint(low=0,
+                                       high=self.num_buckets,
+                                       size=(1, ),
+                                       generator=self.rng).item()
             if self.depleted_buckets[bucket_idx]:
                 continue
 
@@ -107,6 +109,15 @@ def __next__(self):
         self.batch += 1
         return batch
 
+    @property
+    def avg_batch_size(self):
+        avg_batch_size = 0
+        for sampler in self.bucket_samplers:
+            avg_batch_size += sampler.avg_batch_size
+
+        avg_batch_size /= self.num_buckets
+        return avg_batch_size
+
     @staticmethod
     def filter_args(**kwargs):
 
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 73319dca..8c5ad306 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -33,19 +33,20 @@ def __init__(
         self.var_batch_size = max_batch_length is not None
         self.length_name = length_name
         if self.var_batch_size:
-            avg_batch_size = max_batch_length / torch.mean(
-                self.seg_set[self.length_name]
-            )
+            avg_batch_size = max_batch_length / np.mean(
+                self.seg_set[self.length_name])
         else:
             avg_batch_size = min_batch_size
 
         self.avg_batch_size = avg_batch_size
 
-        num_batches = len(self.seg_set) / avg_batch_size / self.world_size
         if drop_last:
-            self._len = int(num_batches)
+            self._len = int(
+                len(self.seg_set) / (avg_batch_size * self.world_size))
         else:
-            self._len = int(math.ceil(num_batches))
+            self._len = int(
+                math.ceil(
+                    (len(self.seg_set) // self.world_size) / avg_batch_size))
 
         self._permutation = None
 
@@ -53,9 +54,8 @@ def __len__(self):
         return self._len
 
     def _shuffle_segs(self):
-        self._permutation = torch.randperm(
-            len(self.seg_set), generator=self.rng
-        ).numpy()
+        self._permutation = torch.randperm(len(self.seg_set),
+                                           generator=self.rng).numpy()
 
     def __iter__(self):
         super().__iter__()
@@ -71,50 +71,47 @@ def __next__(self):
             raise StopIteration
 
         if self.var_batch_size:
+            column_idx = self.seg_set.columns.get_loc(self.length_name)
             idxs = []
             max_length = 0
             batch_size = 0
             while True:
-                if self._shuffle:
+                if self.shuffle:
                     idx = self._permutation[self.start]
                 else:
                     idx = self.start
 
-                max_length = max(max_length, self.seg_set.iloc[idx].duration.values)
+                max_length = max(max_length, self.seg_set.iloc[idx,
+                                                               column_idx])
                 if max_length * (batch_size + 1) > self.max_batch_length:
                     break
 
                 idxs.append(idx)
                 self.start = (self.start + self.world_size) % len(self.seg_set)
                 batch_size += 1
-                if (
-                    self.max_batch_size is not None
-                    and batch_size >= self.max_batch_size
-                ):
+                if (self.max_batch_size is not None
+                        and batch_size >= self.max_batch_size):
                     break
 
-            assert len(idxs) > self.min_batch_size
+            assert len(
+                idxs
+            ) >= 1, f"increase max_batch_length {self.max_batch_length} >= {max_length}"
         else:
-            stop = min(
-                self.start + self.world_size * self.min_batch_size, len(self.seg_set)
-            )
+            stop = min(self.start + self.world_size * self.min_batch_size,
+                       len(self.seg_set))
             if self.shuffle:
-                idx = self._permutation[self.start : stop : self.world_size]
+                idxs = self._permutation[self.start:stop:self.world_size]
             else:
-                idx = slice(self.start, stop, self.world_size)
+                idxs = slice(self.start, stop, self.world_size)
 
             self.start += self.world_size * self.min_batch_size
 
         if "chunk_start" in self.seg_set:
-            chunks = self.seg_set.iloc[idx]
-            seg_ids = [
-                (id, s, d)
-                for id, s, d in zip(
-                    chunks.seg_id, chunks.chunk_start, chunks[self.length_name]
-                )
-            ]
+            chunks = self.seg_set.iloc[idxs]
+            seg_ids = [(id, s, d) for id, s, d in zip(
+                chunks.seg_id, chunks.chunk_start, chunks[self.length_name])]
         else:
-            seg_ids = self.seg_set.iloc[idx].id
+            seg_ids = self.seg_set.iloc[idxs].id
 
         if self.batch == 0:
             logging.info("batch 0 chunks=%s", str(seg_ids[:10]))
@@ -153,18 +150,18 @@ def add_class_args(parser, prefix=None):
             "--max-batch-size",
             type=int,
             default=None,
-            help=(
-                "maximum batch size per gpu, if None, estimated from max_batch_length"
-            ),
+            help=
+            ("maximum batch size per gpu, if None, estimated from max_batch_length"
+             ),
         )
 
         parser.add_argument(
             "--max-batch-duration",
             type=float,
             default=None,
-            help=(
-                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
-            ),
+            help=
+            ("maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+             ),
         )
 
         parser.add_argument(
@@ -176,7 +173,8 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
-            help="shuffles the segments or chunks at the beginning of the epoch",
+            help=
+            "shuffles the segments or chunks at the beginning of the epoch",
         )
 
         parser.add_argument(
@@ -189,7 +187,9 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--length-name",
             default="duration",
-            help="which column in the segment table indicates the duration of the file",
+            help=
+            "which column in the segment table indicates the duration of the file",
         )
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index 251d937b..512f2f64 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -15,7 +15,8 @@
 from .bucketing_seg_sampler import BucketingSegSampler
 
 sampler_dict = {
-    "class_weighted_random_seg_chunk_sampler": ClassWeightedRandomSegChunkSampler,
+    "class_weighted_random_seg_chunk_sampler":
+    ClassWeightedRandomSegChunkSampler,
     "seg_sampler": SegSampler,
     "seg_chunk_sampler": SegChunkSampler,
     "bucketing_seg_sampler": BucketingSegSampler,
@@ -26,7 +27,6 @@ class SegSamplerFactory(object):
     """Factory class to create different types of samplers for
     sequencial data like audio or acoustic features.
     """
-
     @staticmethod
     def create(
         dataset: Union[AudioDataset, FeatSeqDataset],
@@ -112,7 +112,8 @@ def add_class_args(parser, prefix=None):
             "--base-sampler-type",
             choices=["seg_sampler", "bucketing_seg_sampler"],
             default="seg_sampler",
-            help="base sampler used for seg_chunk_sampler or bucketing_seg_sampler",
+            help=
+            "base sampler used for seg_chunk_sampler or bucketing_seg_sampler",
         )
 
         parser.add_argument(
@@ -139,9 +140,9 @@ def add_class_args(parser, prefix=None):
             "--max-batch-size",
             type=int,
             default=None,
-            help=(
-                "maximum batch size per gpu, if None, estimated from max_batch_length"
-            ),
+            help=
+            ("maximum batch size per gpu, if None, estimated from max_batch_length"
+             ),
         )
 
         parser.add_argument(
@@ -152,12 +153,12 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--max-batch-duration",
+            "--max-batch-length",
             type=float,
             default=None,
-            help=(
-                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
-            ),
+            help=
+            ("maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+             ),
         )
 
         parser.add_argument(
@@ -223,7 +224,8 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
-            help="shuffles the segments or chunks at the beginning of the epoch",
+            help=
+            "shuffles the segments or chunks at the beginning of the epoch",
         )
         parser.add_argument(
             "--seed",
@@ -235,13 +237,16 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--length-name",
             default="duration",
-            help="which column in the segment table indicates the duration of the segment",
+            help=
+            "which column in the segment table indicates the duration of the segment",
         )
         parser.add_argument(
             "--class-name",
             default="class_id",
-            help="which column in the segment table indicates the class of the segment",
+            help=
+            "which column in the segment table indicates the class of the segment",
         )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index bd7bd202..3effea9e 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -98,6 +98,7 @@ def forward(
         """
         embedding_out = self.embedding(y)
         embedding_out = self.embedding_dropout(embedding_out)
+        #print("yy", y.shape, embedding_out.shape, y)
         rnn_out, (h, c) = self.rnn(embedding_out, states)
         out = self.output_linear(rnn_out)
 
@@ -105,18 +106,17 @@ def forward(
 
     def get_config(self):
         config = {
-            "in_feats" : self.in_feats,
-            "blank_id" : self.blank_id,
-            "vocab_size" : self.vocab_size,
-            "embedding_dim" :self.embedding_dim,
-            "num_layers" : self.num_layers,
-            "hidden_dim" : self.hidden_dim,
+            "in_feats": self.in_feats,
+            "blank_id": self.blank_id,
+            "vocab_size": self.vocab_size,
+            "embedding_dim": self.embedding_dim,
+            "num_layers": self.num_layers,
+            "hidden_dim": self.hidden_dim,
         }
 
         # base_config = super().get_config()
         return dict(list(config.items()))
 
-
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
@@ -132,36 +132,38 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=set(["in_feats", "blank_id", "vocab_size" ])):
+    def add_class_args(parser,
+                       prefix=None,
+                       skip=set(["in_feats", "blank_id", "vocab_size"])):
 
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
         if "in_feats" not in skip:
-            parser.add_argument(
-                "--in-feats", type=int, required=True, help=("input feature dimension")
-            )
+            parser.add_argument("--in-feats",
+                                type=int,
+                                required=True,
+                                help=("input feature dimension"))
         if "blank_id" not in skip:
-            parser.add_argument(
-                "--blank-id", type=int, required=True, help=("blank id from sp model")
-            )
+            parser.add_argument("--blank-id",
+                                type=int,
+                                required=True,
+                                help=("blank id from sp model"))
         if "vocab_size" not in skip:
-            parser.add_argument(
-                "--vocab-size", type=int, required=True, help=("output prediction dimension")
-            )
-        parser.add_argument(
-            "--embedding-dim", default=1024, type=int, help=("feature dimension")
-        )
+            parser.add_argument("--vocab-size",
+                                type=int,
+                                required=True,
+                                help=("output prediction dimension"))
+        parser.add_argument("--embedding-dim",
+                            default=1024,
+                            type=int,
+                            help=("feature dimension"))
 
-        parser.add_argument(
-            "--num-layers", default=2, type=int, help=("")
-        )
+        parser.add_argument("--num-layers", default=2, type=int, help=(""))
 
-        parser.add_argument(
-            "--hidden-dim", default=512, type=int, help=("")
-        )
+        parser.add_argument("--hidden-dim", default=512, type=int, help=(""))
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index b2a90f4b..2d523b7c 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -204,7 +204,6 @@ def add_class_args(parser, prefix=None, skip=set()):
             parser = ArgumentParser(prog="")
 
         Decoder.add_class_args(parser, prefix="decoder")
-
         Joiner.add_class_args(parser, prefix="joiner")
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index b5bd220f..d21bb777 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -16,7 +16,6 @@
 # from ..wav2xvectors.hf_wav2xvector import HFWav2XVector
 
 
-
 class HFWav2Transducer(TorchModel):
     """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
 
@@ -28,10 +27,11 @@ class HFWav2Transducer(TorchModel):
        feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
                            than one layer is used.
     """
-
-    def __init__(
-        self, hf_feats, transducer, feat_fusion_start=0, feat_fusion_method="weighted-avg"
-    ):
+    def __init__(self,
+                 hf_feats,
+                 transducer,
+                 feat_fusion_start=0,
+                 feat_fusion_method="weighted-avg"):
 
         super().__init__()
         self.hf_feats = hf_feats
@@ -52,9 +52,12 @@ def _make_fuser(self):
             self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
         elif self.feat_fusion_method == "linear":
             self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
+            self.feat_fuser.weight.data = torch.ones(1,
+                                                     num_layers) / num_layers
         elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+            self.feat_fuser = nn.Linear(num_layers * layer_dim,
+                                        layer_dim,
+                                        bias=False)
 
     def _fuse_hid_feats(self, hid_feats):
         """Fuses the hidden features from the Wav2Vec model.
@@ -69,7 +72,7 @@ def _fuse_hid_feats(self, hid_feats):
             # There is only one layer of features
             return hid_feats[0]
 
-        hid_feats = hid_feats[self.feat_fusion_start :]
+        hid_feats = hid_feats[self.feat_fusion_start:]
         if self.feat_fusion_method == "weighted-avg":
             hid_feats = torch.stack(hid_feats, dim=-1)
             norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
@@ -119,14 +122,14 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
-    def forward_feats(
-        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
-    ):
-        return_hid_states = (
-            False
-            if return_feat_layers is None and self.feat_fusion_method == "last"
-            else True
-        )
+    def forward_feats(self,
+                      x,
+                      x_lengths,
+                      return_feat_layers=None,
+                      chunk_length=0,
+                      detach_chunks=False):
+        return_hid_states = (False if return_feat_layers is None
+                             and self.feat_fusion_method == "last" else True)
         with self._hf_context:
             hf_output = self.hf_feats(
                 x,
@@ -148,8 +151,7 @@ def forward_feats(
             # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
             # as the hidden features of the x-vector encoder.
             hid_feats = [
-                f.transpose(1, 2)
-                for i, f in enumerate(hid_feats)
+                f.transpose(1, 2) for i, f in enumerate(hid_feats)
                 if i in return_feat_layers
             ]
         else:
@@ -189,11 +191,10 @@ def forward(
           "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
         """
         feats, hid_feats, feat_lengths = self.forward_feats(
-            x, x_lengths, return_feat_layers
-        )
+            x, x_lengths, return_feat_layers)
 
         feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
-        
+
         output, loss = self.transducer(
             feats,
             feat_lengths,
@@ -226,17 +227,16 @@ def extract_embed(
             x, x_lengths = remove_silence(x, x_lengths)
 
         feats, _, feat_lengths = self.forward_feats(
-            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
-        )
-        xvec_chunk_length = int(
-            xvec_chunk_length
-            * self.hf_feats.sample_frequency
-            * feats.size(-1)
-            // x.size(-1)
-        )
-        return self.transducer.extract_embed(
-            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
-        )
+            x,
+            x_lengths,
+            chunk_length=hf_chunk_length,
+            detach_chunks=detach_chunks)
+        xvec_chunk_length = int(xvec_chunk_length *
+                                self.hf_feats.sample_frequency *
+                                feats.size(-1) // x.size(-1))
+        return self.transducer.extract_embed(feats, feat_lengths,
+                                             xvec_chunk_length, embed_layer,
+                                             detach_chunks)
 
     def freeze_feat_fuser(self):
         if self.feat_fuser is None:
@@ -299,11 +299,11 @@ def _train(self, train_mode: str):
             self.hf_feats.train()
             self.transducer._train("ft-embed_affine")
         elif train_mode in [
-            "ft-transducer",
-            "hf-feats-frozen",
-            "ft-transducer-nograd",
-            "hf-feats-frozen-nograd",
-            "hf-feat-extractor-frozen",
+                "ft-transducer",
+                "hf-feats-frozen",
+                "ft-transducer-nograd",
+                "hf-feats-frozen-nograd",
+                "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
             self.transducer._train("full")
@@ -365,26 +365,20 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--feat-fusion-start",
             default=0,
             type=int,
-            help=(
-                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
-                "the wav2vec num_layers"
-            ),
+            help=
+            ("the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+             "the wav2vec num_layers"),
         )
         parser.add_argument(
             "--feat-fusion-method",
             default="weighted-avg",
             choices=["weighted-avg", "linear", "cat", "last"],
-            help=(
-                "method to fuse the hidden layers from the wav2vec model "
-                "in [weighted-avg, cat]"
-            ),
+            help=("method to fuse the hidden layers from the wav2vec model "
+                  "in [weighted-avg, cat]"),
         )
 
         if prefix is not None:
             outer_parser.add_argument(
                 "--" + prefix,
                 action=ActionParser(parser=parser),
-                help="xvector options",
             )
-
-
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 5f573904..8d396719 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -67,7 +67,6 @@ class TorchTrainer(object):
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
     """
-
     def __init__(
         self,
         model,
@@ -107,9 +106,8 @@ def __init__(
         self.exp_path = Path(exp_path)
 
         if loggers is None:
-            self.loggers = self._default_loggers(
-                log_interval, use_tensorboard, use_wandb, wandb
-            )
+            self.loggers = self._default_loggers(log_interval, use_tensorboard,
+                                                 use_wandb, wandb)
         elif isinstance(loggers, list):
             self.loggers = LoggerList(loggers)
         else:
@@ -142,25 +140,31 @@ def __init__(
             self.rank = dist.get_rank()
             self.world_size = dist.get_world_size()
             if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(
+                    self.model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with distributed-data-parallel"
                     )
                 oss = False if ddp_type == DDPType.DDP else True
-                self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
+                self.optimizer = self._make_optimizer(optim,
+                                                      self.model,
+                                                      oss=oss)
                 self.model = TorchDDP(
                     self.model,
                     device_ids=[device],
                     output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(
+                    self.model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with fair sharded-distributed-data-parallel"
                     )
-                self.optimizer = self._make_optimizer(optim, self.model, oss=True)
+                self.optimizer = self._make_optimizer(optim,
+                                                      self.model,
+                                                      oss=True)
                 self.model = FairShardedDDP(self.model, self.optimizer)
             else:
                 if self.rank == 0:
@@ -173,7 +177,9 @@ def __init__(
                     mixed_precision=self.use_amp,
                     move_params_to_cpu=cpu_offload,
                 )
-                self.optimizer = self._make_optimizer(optim, self.model, oss=False)
+                self.optimizer = self._make_optimizer(optim,
+                                                      self.model,
+                                                      oss=False)
 
         else:
             self.optimizer = self._make_optimizer(optim, self.model)
@@ -203,9 +209,9 @@ def __init__(
             if self.rank == 0:
                 logging.info("init SWA model")
             self.swa_model = AveragedModel(self.model)
-            self.swa_scheduler = SWALR(
-                self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
-            )
+            self.swa_scheduler = SWALR(self.optimizer,
+                                       swa_lr=self.swa_lr,
+                                       anneal_epochs=self.swa_anneal_epochs)
 
     def set_epoch(self, data_loader):
         try:
@@ -239,7 +245,8 @@ def fit(self, train_data, val_data=None):
             if self.lr_scheduler is not None:
                 # this is needed by cosine scheduler
                 epoch_updates = int(len(train_data) / self.grad_acc_steps)
-                self.lr_scheduler.on_epoch_begin(epoch, epoch_updates=epoch_updates)
+                self.lr_scheduler.on_epoch_begin(epoch,
+                                                 epoch_updates=epoch_updates)
 
             logs = self.train_epoch(train_data)
             if val_data is not None:
@@ -261,7 +268,8 @@ def fit(self, train_data, val_data=None):
             self.save_checkpoint(logs)
 
         if self.in_swa:
-            self.loggers.on_epoch_begin(self.cur_epoch, batches=len(train_data))
+            self.loggers.on_epoch_begin(self.cur_epoch,
+                                        batches=len(train_data))
             self.model = self.swa_model.module
             logs = self.bn_update_epoch(train_data)
 
@@ -366,9 +374,9 @@ def bn_update_epoch(self, data_loader):
     def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
         if self.ddp:
             if self.ddp_type == DDPType.DDP:
-                nn.utils.clip_grad_norm_(
-                    model.parameters(), grad_clip, norm_type=grad_clip_norm
-                )
+                nn.utils.clip_grad_norm_(model.parameters(),
+                                         grad_clip,
+                                         norm_type=grad_clip_norm)
                 return
             if self.ddp_type == DDPType.FULLY_SHARDED_DDP:
                 # we have to use the member function in FullyShardedDDP class
@@ -380,26 +388,24 @@ def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
                 optim.clip_grad_norm(grad_clip, norm_type=grad_clip_norm)
 
         # if no DDP clip normally
-        nn.utils.clip_grad_norm_(
-            model.parameters(), grad_clip, norm_type=grad_clip_norm
-        )
+        nn.utils.clip_grad_norm_(model.parameters(),
+                                 grad_clip,
+                                 norm_type=grad_clip_norm)
 
     def update_model(self):
         """Updates the model and does gradding clipping."""
         if self.use_amp:
             if self.grad_clip > 0:
                 self.grad_scaler.unscale_(self.optimizer)
-                self._clip_grad_norm(
-                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
-                )
+                self._clip_grad_norm(self.model, self.optimizer,
+                                     self.grad_clip, self.grad_clip_norm)
 
             self.grad_scaler.step(self.optimizer)
             self.grad_scaler.update()
         else:
             if self.grad_clip > 0:
-                self._clip_grad_norm(
-                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
-                )
+                self._clip_grad_norm(self.model, self.optimizer,
+                                     self.grad_clip, self.grad_clip_norm)
 
             self.optimizer.step()
 
@@ -428,21 +434,20 @@ def _make_lr_sched(self, lr_sched, optim):
         lr_sched = LRSF.create(optim, **args)
         return lr_sched
 
-    def _default_loggers(self, log_interval, use_tensorboard, use_wandb, wandb):
+    def _default_loggers(self, log_interval, use_tensorboard, use_wandb,
+                         wandb):
         """Creates the default data loaders"""
         prog_log = ProgLogger(interval=log_interval)
         csv_log = CSVLogger(self.exp_path / "train.log", append=True)
         loggers = [prog_log, csv_log]
         if use_tensorboard:
             loggers.append(
-                TensorBoardLogger(self.exp_path / "tb", interval=log_interval)
-            )
+                TensorBoardLogger(self.exp_path / "tb", interval=log_interval))
         if use_wandb:
             loggers.append(
-                WAndBLogger(
-                    **wandb, path=self.exp_path / "wandb", interval=log_interval
-                )
-            )
+                WAndBLogger(**wandb,
+                            path=self.exp_path / "wandb",
+                            interval=log_interval))
         return LoggerList(loggers)
 
     def _get_lr(self):
@@ -458,7 +463,7 @@ def _compute_grad_acc_steps(self, data_loader):
             try:
                 batch_size = data_loader.batch_sampler.avg_batch_size
             except:
-                logging.warn(
+                logging.warning(
                     "batch sampler doesn't have avg_batch_size property, "
                     "we cannot estimate grad_acc_steps, using grad_acc_steps=%d",
                     self.grad_acc_steps,
@@ -466,8 +471,7 @@ def _compute_grad_acc_steps(self, data_loader):
                 return
 
             self.grad_acc_steps = int(
-                math.ceil(self.eff_batch_size / batch_size / self.world_size)
-            )
+                math.ceil(self.eff_batch_size / batch_size / self.world_size))
             logging.info(
                 "Setting grad_acc_steps=%d for "
                 "eff_batch_size=%d, avg_batch_size=%d, world_size=%d",
@@ -478,7 +482,7 @@ def _compute_grad_acc_steps(self, data_loader):
             )
             return
 
-        logging.warn(
+        logging.warning(
             "We cannot determine the batch_size, "
             "we cannot estimate grad_acc_steps, using grad_acc_steps=%d",
             self.grad_acc_steps,
@@ -491,24 +495,30 @@ def checkpoint(self, logs=None):
           logs: logs containing the current value of the metrics.
         """
         checkpoint = {
-            "epoch": self.cur_epoch,
-            "rng_state": torch.get_rng_state(),
-            "model_cfg": self.model.get_config(),
-            "model_state_dict": self.model.state_dict(),
-            "optimizer_state_dict": self.optimizer.state_dict(),
-            "loss_state_dict": self.loss.state_dict()
-            if self.loss is not None
-            else None,
+            "epoch":
+            self.cur_epoch,
+            "rng_state":
+            torch.get_rng_state(),
+            "model_cfg":
+            self.model.get_config(),
+            "model_state_dict":
+            self.model.state_dict(),
+            "optimizer_state_dict":
+            self.optimizer.state_dict(),
+            "loss_state_dict":
+            self.loss.state_dict() if self.loss is not None else None,
         }
         if self.lr_scheduler is not None:
-            checkpoint["lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
+            checkpoint[
+                "lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
 
         if logs is not None:
             checkpoint["logs"] = logs
 
         if self.in_swa:
             checkpoint["swa_model_state_dict"] = self.swa_model.state_dict()
-            checkpoint["swa_scheduler_state_dict"] = self.swa_scheduler.state_dict()
+            checkpoint[
+                "swa_scheduler_state_dict"] = self.swa_scheduler.state_dict()
 
         return checkpoint
 
@@ -518,9 +528,8 @@ def save_checkpoint(self, logs=None):
         Args:
           logs: logs containing the current value of the metrics.
         """
-        if self.ddp and (
-            self.ddp_type == DDPType.OSS_DDP or self.ddp_type == DDPType.OSS_SHARDED_DDP
-        ):
+        if self.ddp and (self.ddp_type == DDPType.OSS_DDP
+                         or self.ddp_type == DDPType.OSS_SHARDED_DDP):
             # Not sure what this does, just copying from the example in
             # https://github.com/facebookresearch/fairscale/blob/master/benchmarks/oss.py
             # Check the checkpointing in the case of the OSS optimizer
@@ -575,16 +584,17 @@ def load_checkpoint(self, file_path):
         if self.loss is not None:
             self.loss.load_state_dict(checkpoint["loss_state_dict"])
         if self.lr_scheduler is not None:
-            self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
+            self.lr_scheduler.load_state_dict(
+                checkpoint["lr_scheduler_state_dict"])
 
         # if self.use_amp:
         #    amp.load_state_dict(checkpoint['amp'])
         if self.do_swa:
             if "swa_model_state_dict" in checkpoint:
-                self.swa_model.load_state_dict(checkpoint["swa_model_state_dict"])
+                self.swa_model.load_state_dict(
+                    checkpoint["swa_model_state_dict"])
                 self.swa_scheduler.load_state_dict(
-                    checkpoint["swa_scheduler_state_dict"]
-                )
+                    checkpoint["swa_scheduler_state_dict"])
             else:
                 self.swa_scheduler = SWALR(
                     self.optimizer,
@@ -662,9 +672,13 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
             "--eff-batch-size",
             type=int,
             default=None,
-            help="effective total batch size, if given, it overrides grad_acc_steps",
+            help=
+            "effective total batch size, if given, it overrides grad_acc_steps",
         )
-        parser.add_argument("--epochs", type=int, default=200, help="number of epochs")
+        parser.add_argument("--epochs",
+                            type=int,
+                            default=200,
+                            help="number of epochs")
         if train_modes is not None:
             parser.add_argument(
                 "--train-mode",
@@ -684,12 +698,19 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
             default=False,
             help="use tensorboard logger",
         )
-        parser.add_argument(
-            "--use-wandb", action="store_true", default=False, help="use wandb logger"
-        )
-        parser.add_argument("--wandb.project", default=None, help="wandb project name")
-        parser.add_argument("--wandb.group", default=None, help="wandb group name")
-        parser.add_argument("--wandb.name", default=None, help="wandb display name")
+        parser.add_argument("--use-wandb",
+                            action="store_true",
+                            default=False,
+                            help="use wandb logger")
+        parser.add_argument("--wandb.project",
+                            default=None,
+                            help="wandb project name")
+        parser.add_argument("--wandb.group",
+                            default=None,
+                            help="wandb group name")
+        parser.add_argument("--wandb.name",
+                            default=None,
+                            help="wandb display name")
         # parser.add_argument(
         #     '--wandb.path', default=None,
         #     help='wandb directory')
@@ -718,9 +739,10 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
             default=False,
             help="CPU offload of gradients when using fully_sharded_ddp",
         )
-        parser.add_argument(
-            "--grad-clip", type=float, default=0, help="gradient clipping norm value"
-        )
+        parser.add_argument("--grad-clip",
+                            type=float,
+                            default=0,
+                            help="gradient clipping norm value")
         parser.add_argument(
             "--grad-clip-norm",
             default=2,
@@ -733,9 +755,10 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
             default=0,
             help="start epoch for SWA, if 0 it does not use SWA",
         )
-        parser.add_argument(
-            "--swa-lr", type=float, default=1e-3, help="learning rate for SWA phase"
-        )
+        parser.add_argument("--swa-lr",
+                            type=float,
+                            default=1e-3,
+                            help="learning rate for SWA phase")
         parser.add_argument(
             "--swa-anneal-epochs",
             type=int,
@@ -746,6 +769,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
         parser.add_argument("--exp-path", help="experiment path")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index bbe847d0..932c3ed4 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -47,7 +47,6 @@ class TransducerTrainer(TorchTrainer):
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
     """
-
     def __init__(
         self,
         model,
@@ -129,13 +128,19 @@ def train_epoch(self, data_loader):
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
             # TODO: Check and Modify data, target
-            data, audio_length, target = data.to(self.device), audio_length.to(self.device), target.to(self.device)
+            data, audio_length, target = data.to(self.device), audio_length.to(
+                self.device), target.to(self.device)
             batch_size = data.shape[0]
 
             with self.amp_autocast():
-                output, loss = self.model(data, x_lengths=audio_length, y=target)
+                # print("xx", data.shape, data.shape[0] * data.shape[1] / 16000,
+                #       torch.sum(audio_length).item() / 16000,
+                #       torch.min(audio_length).item() / 16000,
+                #       torch.max(audio_length).item() / 16000)
+                output, loss = self.model(data,
+                                          x_lengths=audio_length,
+                                          y=target)
                 loss = loss.mean() / self.grad_acc_steps
-                # loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -161,7 +166,6 @@ def train_epoch(self, data_loader):
         logs["lr"] = self._get_lr()
         return logs
 
-
     def validation_epoch(self, data_loader, swa_update_bn=False):
         """Validation epoch loop
 
@@ -181,13 +185,17 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, (data, audio_length, target) in enumerate(data_loader):
-                data, audio_length, target = data.to(self.device), audio_length.to(self.device), target.to(self.device)
+                data, audio_length, target = data.to(
+                    self.device), audio_length.to(self.device), target.to(
+                        self.device)
                 batch_size = data.shape[0]
                 # data, target = data.to(self.device), target.to(self.device)
                 # batch_size = data.shape[0]
 
                 with self.amp_autocast():
-                    output, loss = self.model(data, x_lengths=audio_length, y=target)
+                    output, loss = self.model(data,
+                                              x_lengths=audio_length,
+                                              y=target)
                     # output = self.model(data)
                     # loss = self.loss(output, target)
 
@@ -199,4 +207,4 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
-        return logs
\ No newline at end of file
+        return logs

From 49b829b0a3fb8c3a8f28dd61775f877619aff00d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 8 Dec 2022 09:41:27 -0500
Subject: [PATCH 057/154] transducer training, working in clsp grid,
 cudnn=False

---
 ...in_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml |  9 +++++----
 egs/librispeech/v1/run_011_train_asr.sh            |  2 +-
 hyp_utils/conda_env.sh                             | 14 ++++++++++++--
 hyperion/torch/data/seg_sampler.py                 |  4 ++--
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index f89108ea..9349efa7 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -11,9 +11,9 @@ data:
       sampler_type: 'bucketing_seg_sampler'
       max_batch_length: 85.
       min_batch_size: 1
-      drop_last: true
+      drop_last: false
     data_loader:
-      num_workers: 8
+      num_workers: 4
   val:
     dataset:
       aug_cfgs: 
@@ -28,7 +28,7 @@ data:
       min_batch_size: 1
       drop_last: true
     data_loader:
-      num_workers: 8
+      num_workers: 4
 model: wav2vec2xlsr300m_transducer.yaml
 trainer:
   optim:
@@ -48,7 +48,8 @@ trainer:
   use_amp: true
   log_interval: 1000
   epochs: 60
-  eff_batch_size: 1024
+  # eff_batch_size: 1024
+  eff_batch_size: 128
   train_mode: hf-feats-frozen-nograd
 
  
\ No newline at end of file
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 7c2c0f70..868cf4d1 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -46,7 +46,7 @@ if [ $stage -le 1 ]; then
   mkdir -p $nnet_s1_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
-    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
     train_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $train_dir/wav.scp \
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 35c14680..ceee4e93 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -14,6 +14,7 @@ if [ -n "$HYP_ENV" ];then
 else
     conda_env=base
 fi
+max_split_size_mb=""
 
 while true
 do
@@ -25,6 +26,10 @@ do
 	shift;
 	conda_env=$1
 	shift;
+    elif [ "$1" == "--max-split-size-mb" ];then
+	shift;
+	max_split_size_mb=$1
+	shift;
     else
 	break
     fi
@@ -49,7 +54,7 @@ fi
 conda activate $conda_env
 command="python"
 if [ $num_gpus -gt 0 ];then
-    # set CUDA_VISIBLE_DEVICES
+  # set CUDA_VISIBLE_DEVICES
   if [ ! -z "$SGE_HGR_gpu" ]; then
     echo "SGE_HGR_gpu=$SGE_HGR_gpu"
     export CUDA_VISIBLE_DEVICES=$(echo $SGE_HGR_gpu | sed 's@ @,@g')
@@ -66,9 +71,14 @@ if [ $num_gpus -gt 0 ];then
     fi
   fi
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+  if [ -n "$max_split_size_mb" ];then
+      export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:${max_split_size_mb}"
+      echo "PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF}
+  fi
+  #export CUDA_LAUNCH_BLOCKING=1
   #export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
   if [ $num_gpus -gt 1 ];then
-    #export CUDA_LAUNCH_BLOCKING=1
+    
     [[ $(type -P "$torchrun") ]] && command="torchrun" \
 	|| command="python -m torch.distributed.run"
     command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 8c5ad306..cd976d11 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -111,10 +111,10 @@ def __next__(self):
             seg_ids = [(id, s, d) for id, s, d in zip(
                 chunks.seg_id, chunks.chunk_start, chunks[self.length_name])]
         else:
-            seg_ids = self.seg_set.iloc[idxs].id
+            seg_ids = self.seg_set.iloc[idxs].id.values
 
         if self.batch == 0:
-            logging.info("batch 0 chunks=%s", str(seg_ids[:10]))
+            logging.info("batch 0 seg_ids=%s", str(seg_ids[:10]))
 
         self.batch += 1
         return seg_ids

From 829aa7de1aff8e8ae654923be41b06b97392ce88 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 13 Dec 2022 12:42:29 -0500
Subject: [PATCH 058/154] w2v2 transducer with do

---
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |  2 +-
 ...v2vec2xlsr300m_transducer_stage1_v3.0.yaml | 55 +++++++++++++++++++
 ...v2vec2xlsr300m_transducer_stage1_v3.1.yaml | 55 +++++++++++++++++++
 .../conf/wav2vec2xlsr300m_transducer_do.yaml  | 13 +++++
 .../wav2vec2xlsr300m_transducer_do0.2.yaml    | 13 +++++
 .../v1/global_conf/config_transducer_v3.1.sh  | 39 +++++++++++++
 .../v1/global_conf/config_transducer_v3.sh    | 39 +++++++++++++
 hyperion/torch/lr_schedulers/noam_lr.py       |  3 +-
 hyperion/torch/models/transducer/decoder.py   | 22 ++++++--
 9 files changed, 234 insertions(+), 7 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v3.1.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v3.sh

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
index 9349efa7..edc0af5e 100644
--- a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
@@ -47,7 +47,7 @@ trainer:
   grad_clip: 100
   use_amp: true
   log_interval: 1000
-  epochs: 60
+  epochs: 120
   # eff_batch_size: 1024
   eff_batch_size: 128
   train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
new file mode 100644
index 00000000..49077fd6
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
new file mode 100644
index 00000000..9f070bbe
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do0.2.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml
new file mode 100644
index 00000000..c7fc2df7
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml
@@ -0,0 +1,13 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+    embedding_dropout_rate: 0.1
+    rnn_dropout_rate: 0.1
+  joiner:
+    num_layers: 1
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
new file mode 100644
index 00000000..1ee4ec72
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
@@ -0,0 +1,13 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+    embedding_dropout_rate: 0.2
+    rnn_dropout_rate: 0.2
+  joiner:
+    num_layers: 1
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.1.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.1.sh
new file mode 100644
index 00000000..0aa4d949
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.1.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v3.1
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.sh
new file mode 100644
index 00000000..3871ee55
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v3.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/hyperion/torch/lr_schedulers/noam_lr.py b/hyperion/torch/lr_schedulers/noam_lr.py
index 4acdc3b9..8e4f2f1c 100644
--- a/hyperion/torch/lr_schedulers/noam_lr.py
+++ b/hyperion/torch/lr_schedulers/noam_lr.py
@@ -28,7 +28,6 @@ class NoamLR(InvPowLR):
       step: initial training step, this is needed to restart the model training.
 
     """
-
     def __init__(
         self,
         optimizer,
@@ -47,7 +46,7 @@ def __init__(
         # different modules of the model
         max_lr = 0
         for group in optimizer.param_groups:
-            max_lr = max(lr, max_lr)
+            max_lr = max(group["lr"], max_lr)
         for group in optimizer.param_groups:
             group["lr"] = lr * group["lr"] / max_lr
         super().__init__(
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 3effea9e..833394d0 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -31,8 +31,8 @@ def __init__(
         num_layers: int,
         hidden_dim: int,
         in_feats: int,
-        embedding_dropout: float = 0.0,
-        rnn_dropout: float = 0.0,
+        embedding_dropout_rate: float = 0.0,
+        rnn_dropout_rate: float = 0.0,
     ):
         """
         Args:
@@ -59,14 +59,14 @@ def __init__(
             embedding_dim=embedding_dim,
             padding_idx=blank_id,
         )
-        self.embedding_dropout = nn.Dropout(embedding_dropout)
+        self.embedding_dropout = nn.Dropout(embedding_dropout_rate)
         # TODO(fangjun): Use layer normalized LSTM
         self.rnn = nn.LSTM(
             input_size=embedding_dim,
             hidden_size=hidden_dim,
             num_layers=num_layers,
             batch_first=True,
-            dropout=rnn_dropout,
+            dropout=rnn_dropout_rate,
         )
 
         self.in_feats = in_feats
@@ -75,6 +75,8 @@ def __init__(
         self.embedding_dim = embedding_dim
         self.num_layers = num_layers
         self.hidden_dim = hidden_dim
+        self.embedding_dropout_rate = embedding_dropout_rate
+        self.rnn_dropout_rate = rnn_dropout_rate
         self.output_linear = nn.Linear(hidden_dim, in_feats)
 
     def forward(
@@ -112,6 +114,8 @@ def get_config(self):
             "embedding_dim": self.embedding_dim,
             "num_layers": self.num_layers,
             "hidden_dim": self.hidden_dim,
+            "embedding_dropout_rate": self.embedding_dropout_rate,
+            "rnn_dropout_rate": self.rnn_dropout_rate,
         }
 
         # base_config = super().get_config()
@@ -126,6 +130,8 @@ def filter_args(**kwargs):
             "embedding_dim",
             "num_layers",
             "hidden_dim",
+            "embedding_dropout_rate",
+            "rnn_dropout_rate",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
@@ -159,6 +165,14 @@ def add_class_args(parser,
                             default=1024,
                             type=int,
                             help=("feature dimension"))
+        parser.add_argument("--embedding-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
 
         parser.add_argument("--num-layers", default=2, type=int, help=(""))
 

From 43f6c4cdaa035c6a626f46c599c183c9cb5efc2f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 15 Dec 2022 10:15:23 -0500
Subject: [PATCH 059/154] more transducer configs

---
 ...v2vec2xlsr300m_transducer_stage1_v2.0.yaml | 55 +++++++++++++++++++
 ...v2vec2xlsr300m_transducer_stage1_v3.2.yaml | 55 +++++++++++++++++++
 .../wav2vec2xlsr300m_transducer_do0.3.yaml    | 13 +++++
 .../wav2vec2xlsr300m_transducer_enclast.yaml  | 11 ++++
 .../v1/global_conf/config_transducer_v2.sh    | 39 +++++++++++++
 .../v1/global_conf/config_transducer_v3.2.sh  | 39 +++++++++++++
 6 files changed, 212 insertions(+)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v2.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v3.2.sh

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
new file mode 100644
index 00000000..aefddc7e
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_enclast.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
new file mode 100644
index 00000000..d787a373
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do0.3.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
new file mode 100644
index 00000000..ca7c1995
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
@@ -0,0 +1,13 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+    embedding_dropout_rate: 0.3
+    rnn_dropout_rate: 0.3
+  joiner:
+    num_layers: 1
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml
new file mode 100644
index 00000000..1d46c33c
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml
@@ -0,0 +1,11 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+  joiner:
+    num_layers: 1
+feat_fusion_method: last
+
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v2.sh b/egs/librispeech/v1/global_conf/config_transducer_v2.sh
new file mode 100644
index 00000000..f663e2dd
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v2.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
new file mode 100644
index 00000000..2ff8d3c9
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v3.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth

From 63a1216b2778565e840068f178ee2463d74fc550 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Tue, 20 Dec 2022 21:45:57 -0500
Subject: [PATCH 060/154] Merge and add decode for ASR

---
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |  68 ----
 .../v1/global_conf/config_transducer_v1.sh    |   5 +-
 egs/librispeech/v1/run_030_inference.sh       |  52 +--
 .../decode_wav2vec2transducer.sh              |  80 ++++
 egs/voxceleb/v2/path.sh                       |   2 +-
 hyperion/bin/decode_wav2transducer.py         | 360 ++++++++++++++++++
 hyperion/bin/finetune_wav2vec2xvector.py      |   2 +-
 hyperion/bin/train_wav2vec2transducer.py      |   3 +-
 hyperion/torch/models/transducer/joiner.py    |   2 +
 .../models/wav2transducer/beam_search.py      | 216 +++++++++++
 hyperion/torch/trainers/transducer_trainer.py |   3 +
 hyperion/torch/utils/__init__.py              |   1 +
 12 files changed, 682 insertions(+), 112 deletions(-)
 delete mode 100644 egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
 create mode 100755 egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
 create mode 100755 hyperion/bin/decode_wav2transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/beam_search.py

diff --git a/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
deleted file mode 100644
index bf6c2fb8..00000000
--- a/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-# LResNet34 x-vector with mixed precision training
-
-# acoustic features
-feat_config=conf/fbank80_stmn_16k.yaml
-feat_type=fbank80_stmn
-
-# x-vector training 
-nnet_data=voxcelebcat
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-ipe=$nnet_num_augs
-min_chunk=4
-max_chunk=4
-lr=0.05
-
-nnet_type=res2net50 
-dropout=0
-embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxcelebcat
-else
-    plda_data=voxcelebcat_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
-diar_plda_num_augs=0
-if [ $diar_plda_num_augs -eq 0 ]; then
-    diar_plda_data=voxcelebcat
-else
-    diar_plda_data=voxcelebcat_augx${plda_num_augs}
-fi
-diar_plda_type=splda
-diar_lda_dim=150
-diar_plda_y_dim=150
-diar_plda_z_dim=150
-
-diar_plda_name=lda${diar_lda_dim}_${diar_plda_type}y${diar_plda_y_dim}_v1_${diar_plda_data}
-diar_thr=-7
-diar_dir=exp/diarization/$nnet_name/${diar_plda_name}/ahc_pcar1_thr${diar_thr}
-diar_name=diar_res2net50w26s4_thr${diar_thr}
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index ca1ca29c..7fc1508f 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -9,6 +9,7 @@ hf_model_name=wav2vec2xlsr300m
 # x-vector training 
 nnet_data=train_clean_100
 dev_data=dev_clean
+test_data=test_clean
 # nnet_data=train_clean_small
 
 bpe_model=data/lang_bpe_1000/bpe.model
@@ -23,9 +24,9 @@ nnet_name=${hf_model_name}_transducer_v1.0
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0060.pth
+nnet_s1=${nnet_s1_dir}_pre/model_ep0060.pth
 
-nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
 nnet_s2_args=""
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v1/run_030_inference.sh
index 67122f85..fb76088b 100755
--- a/egs/librispeech/v1/run_030_inference.sh
+++ b/egs/librispeech/v1/run_030_inference.sh
@@ -7,20 +7,17 @@
 . ./path.sh
 set -e
 
-stage=2
 config_file=default_config.sh
 use_gpu=false
-nnet_stage=3
-hf_chunk_length=120 #seconds
-xvec_chunk_length=120 #seconds
+nnet_stage=1
 . parse_options.sh || exit 1;
 . $config_file
 
 if [ "$use_gpu" == "true" ];then
-  xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
-  xvec_cmd="$cuda_eval_cmd --mem 6G"
+  transducer_args="--use-gpu true"
+  transducer_cmd="$cuda_eval_cmd --mem 6G"
 else
-  xvec_cmd="$train_cmd --mem 12G"
+  transducer_cmd="$train_cmd --mem 12G"
 fi
 
 if [ $nnet_stage -eq 1 ];then
@@ -34,41 +31,18 @@ elif [ $nnet_stage -eq 3 ];then
   nnet_name=$nnet_s3_name
 fi
 
-xvector_dir=exp/xvectors/$nnet_name
+transducer_dir=exp/transducer/$nnet_name
 
-if [ $stage -le 1 ]; then
-  # Extract xvectors for training LDA/PLDA
-  for name in voxceleb2cat_train
-  do
-    if [ $plda_num_augs -eq 0 ]; then
-      steps_xvec/extract_wav2vec2xvectors.sh \
-	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
-	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
-    	$nnet data/${name} \
-    	$xvector_dir/${name}
-    else
-      steps_xvec/extract_wav2vec2xvectors.sh \
-	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
-	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
-	--aug-config $plda_aug_config --num-augs $plda_num_augs \
-    	$nnet data/${name} \
-    	$xvector_dir/${name}_augx${plda_num_augs} \
-	data/${name}_augx${plda_num_augs}
-    fi
-  done
-fi
 
-if [ $stage -le 2 ]; then
-  # Extracts x-vectors for evaluation
-  for name in voxceleb1_test 
+test_data=test_clean
+
+
+# Extracts x-vectors for evaluation
+for name in $test_data 
   do
-    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-    nj=$(($num_spk < 100 ? $num_spk:100))
-    steps_xvec/extract_wav2vec2xvectors.sh \
-      --cmd "$xvec_cmd" --nj $nj ${xvec_args} \
+    nj=16
+    steps_transducer/decode_wav2vec2transducer.sh --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
       $nnet data/$name \
-      $xvector_dir/$name
+      $transducer_dir/$name $bpe_model
   done
-fi
-
 exit
diff --git a/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh b/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
new file mode 100755
index 00000000..143087a5
--- /dev/null
+++ b/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+#               2022  Johns Hopkins University (Author: Yen-Ju Lu)
+# Apache 2.0.
+nj=30
+cmd="run.pl"
+
+use_gpu=false
+write_utt2num_frames=true  # If true writes utt2num_frames.
+stage=0
+num_augs=0
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ] && [ $# != 4 ]; then
+  echo "Usage: $0 [options] <nnet-model> <data> <xvector-dir> [<data-out-dir>]"
+  echo " e.g.: $0 --feat-config conf/fbank_mvn.yml --aug-config conf/noise_aug.yml exp/xvector_nnet/model.pt data/train exp/xvectors_train [data/train_aug]"
+  echo "main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --use-gpu <bool|false>                           # If true, use GPU."
+  echo "  --nj <n|30>                                      # Number of jobs"
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --use-bin-vad <bool|true>                        # If true, uses binary VAD from vad.scp"
+  echo "  --write-utt2num-frames <bool|tru>                # If true, write utt2num_frames file."
+  echo "  --chunk-length <n|0>                             # If provided, applies encoder with specified chunk-length and "
+  echo "                                                   # concatenates the chunks outputs before pooling"
+  echo "  --feat-config <str>                              # feature/mvn config file"
+  echo "  --aug-config <str>                               # augmentation config file"
+  echo "  --random-utt-length                              # If true, extracts a random chunk from the utterance between "
+  echo "                                                   # min_utt_length and max_utt_length"
+  echo "  --min-utt-length <n|0>                           # "
+  echo "  --max-utt-length <n|0>                           # "
+  
+
+fi
+
+nnet_file=$1
+data_dir=$2
+output_dir=$3
+bpe_model=$4
+
+for f in $data_dir/wav.scp ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+log_dir=$output_dir/log
+mkdir -p $log_dir
+
+num_gpus=0
+args=""
+if [ "$use_gpu" == "true" ];then
+    cmd="$cmd --gpu 1"
+    num_gpus=1
+    args="--use-gpu"
+fi 
+
+if [ "$write_utt2num_frames" == "true" ];then
+    write_num_frames_opt="--write-num-frames $output_dir/utt2num_frames.JOB"
+fi
+
+if [ $stage -le 0 ];then
+    set +e
+    $cmd JOB=1:$nj $output_dir/log/decode_transducer.JOB.log \
+	hyp_utils/conda_env.sh --num-gpus $num_gpus \
+    decode_wav2transducer.py \
+    --part-idx JOB --num-parts $nj \
+    --input $data_dir/wav.scp \
+    --model-path $nnet_file \
+    --bpe-model $bpe_model \
+    --output $output_dir/transducer.JOB.text
+      set -e
+fi
+
+if [ $stage -le 1 ];then
+  echo "compute wer"
+  cat $output_dir/transducer.*.text > $output_dir/transducer.text
+  compute-wer --text --mode=present ark:$data_dir/text ark:$output_dir/transducer.text
+fi
diff --git a/egs/voxceleb/v2/path.sh b/egs/voxceleb/v2/path.sh
index 6994fdab..0dc5a9d5 100755
--- a/egs/voxceleb/v2/path.sh
+++ b/egs/voxceleb/v2/path.sh
@@ -2,4 +2,4 @@
 export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
 export TOOLS_ROOT=$HYP_ROOT/tools
 
-. $TOOLS_ROOT/path.sh
+ . $TOOLS_ROOT/path.sh
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
new file mode 100755
index 00000000..c71df79e
--- /dev/null
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -0,0 +1,360 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+
+
+from typing import Dict, List, Tuple
+
+import sentencepiece as spm
+import torch.nn as nn
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+import pandas as pd
+
+import torch
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.np.augment import SpeechAugment
+
+from hyperion.torch.utils import open_device
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch import TorchModelLoader as TML
+
+from hyperion.torch.models.wav2transducer.beam_search import greedy_search, beam_search
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("transducer-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+
+# def decode_dataset(
+#     dl: torch.utils.data.DataLoader,
+#     params: AttributeDict,
+#     model: nn.Module,
+#     sp: spm.SentencePieceProcessor,
+# ) -> Dict[str, List[Tuple[str, List[str], List[str]]]]:
+#     """Decode dataset.
+#     Args:
+#       dl:
+#         PyTorch's dataloader containing the dataset to decode.
+#       params:
+#         It is returned by :func:`get_params`.
+#       model:
+#         The neural model.
+#       sp:
+#         The BPE model.
+#     Returns:
+#       Return a dict, whose key may be "greedy_search" if greedy search
+#       is used, or it may be "beam_7" if beam size of 7 is used.
+#       Its value is a list of tuples. Each tuple contains two elements:
+#       The first is the reference transcript, and the second is the
+#       predicted result.
+#     """
+#     num_cuts = 0
+
+#     try:
+#         num_batches = len(dl)
+#     except TypeError:
+#         num_batches = "?"
+
+#     if decoding_method == "greedy_search":
+#         log_interval = 100
+#     else:
+#         log_interval = 2
+
+#     results = defaultdict(list)
+#     for batch_idx, batch in enumerate(dl):
+#         texts = batch["supervisions"]["text"]
+#         cut_ids = [cut.id for cut in batch["supervisions"]["cut"]]
+
+#         hyps_dict = decode_one_batch(
+#             params=params,
+#             model=model,
+#             sp=sp,
+#             batch=batch,
+#         )
+
+#         for name, hyps in hyps_dict.items():
+#             this_batch = []
+#             assert len(hyps) == len(texts)
+#             for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts):
+#                 ref_words = ref_text.split()
+#                 this_batch.append((cut_id, ref_words, hyp_words))
+
+#             results[name].extend(this_batch)
+
+#         num_cuts += len(texts)
+
+#         if batch_idx % log_interval == 0:
+#             batch_str = f"{batch_idx}/{num_batches}"
+
+#             logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}")
+#     return results
+
+
+def decode_one_batch(
+    model: nn.Module,
+    sp: spm.SentencePieceProcessor,
+    x: torch.Tensor,
+    decoding_method = "beam_search"
+) -> Dict[str, List[List[str]]]:
+    """Decode one batch and return the result in a dict. The dict has the
+    following format:
+        - key: It indicates the setting used for decoding. For example,
+               if greedy_search is used, it would be "greedy_search"
+               If beam search with a beam size of 7 is used, it would be
+               "beam_7"
+        - value: It contains the decoding result. `len(value)` equals to
+                 batch size. `value[i]` is the decoding result for the i-th
+                 utterance in the given batch.
+    Args:
+      params:
+        It's the return value of :func:`get_params`.
+      model:
+        The neural model.
+      sp:
+        The BPE model.
+      batch:
+        It is the return value from iterating
+        `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation
+        for the format of the `batch`.
+    Returns:
+      Return the decoding result. See above description for the format of
+      the returned dict.
+    """
+    device = model.device
+    feature = x #batch["inputs"]
+    assert x.shape[0] == 1
+    assert feature.ndim == 2
+
+    feature = feature.to(device)
+    # at entry, feature is (N, T, C)
+
+    feature_lens = torch.Tensor([x.shape[1]]).int() #batch["supervisions"]
+    # feature_lens = supervisions["num_frames"].to(device)
+
+    # encoder_out, encoder_out_lens = model.encoder(x=feature, x_lens=feature_lens)
+
+    # print("feature",feature.shape)
+    # print("feature_lens",feature_lens)
+    encoder_out, hid_feats, encoder_out_lens = model.forward_feats(x=feature, x_lengths=feature_lens)
+    
+    hyps = []
+    batch_size = encoder_out.size(0)
+
+    encoder_out = encoder_out.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
+
+    for i in range(batch_size):
+        # fmt: off
+        encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]]
+        # fmt: on
+        if decoding_method == "greedy_search":
+            hyp = greedy_search(model=model, encoder_out=encoder_out_i)
+        elif decoding_method == "beam_search":
+            hyp = beam_search(
+                model=model, encoder_out=encoder_out_i, beam=5
+            )
+        else:
+            raise ValueError(f"Unsupported decoding method: {decoding_method}")
+        hyps.append(sp.decode(hyp).split())
+    
+    logging.info("hyps:{}".format(" ".join(hyps[0])))
+    
+    if decoding_method == "greedy_search":
+        return hyps[0] #{"greedy_search": hyps}
+    else:
+        return hyps[0] #{f"beam_{params.beam_size}": hyps}
+
+
+def decode_transducer(
+    input_spec,
+    output_spec,
+    scp_sep,
+    model_path,
+    bpe_model,
+    use_gpu,
+    **kwargs
+):
+
+    device = init_device(use_gpu)
+    model = load_model(model_path, device)
+
+    sp  = spm.SentencePieceProcessor()
+    sp.load(bpe_model)
+    # blank_id = self.sp.piece_to_id("<blk>")
+    # vocab_size = self.sp.get_piece_size()
+
+    # if write_num_frames_spec is not None:
+    #     keys = []
+    #     info = []
+
+    augmenter = None
+    aug_df = None
+    num_augs = 1
+
+    ar_args = AR.filter_args(**kwargs)
+    logging.info("opening output: %s" % (output_spec))
+    # with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+    with open(output_spec,"w") as writer:
+        logging.info(
+            "opening input stream: {} with args={}".format(input_spec, ar_args)
+        )
+        with AR(input_spec, **ar_args) as reader:
+            while not reader.eof():
+                t1 = time.time()
+                key, x0, fs = reader.read(1)
+                if len(key) == 0:
+                    break
+
+                x0 = x0[0]
+                key0 = key[0]
+                t2 = time.time()
+
+                logging.info("processing utt %s" % (key0))
+                for aug_id in range(num_augs):
+                    t3 = time.time()
+                    key, x = key0, x0 #augment(key0, x0, augmenter, aug_df, aug_id)
+                    t4 = time.time()
+                    with torch.no_grad():
+                        x = torch.tensor(
+                            x[None, :], dtype=torch.get_default_dtype()
+                        ).to(device)
+
+                        t5 = time.time()
+                        tot_frames = x.shape[1]
+
+                        logging.info(
+                            "utt %s detected %d/%d (%.2f %%) speech frames"
+                            % (
+                                key,
+                                x.shape[1],
+                                tot_frames,
+                                x.shape[1] / tot_frames * 100,
+                            )
+                        )
+
+
+                        t6 = time.time()
+                        if x.shape[1] == 0:
+                            y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                        else:
+                            # x = x.transpose(1, 2).contiguous()
+                            # x = torch.unsqueeze(x,2)
+                            # writer.write(key + ' ' + "abc")
+                            y = decode_one_batch(model=model, sp=sp, x=x)
+                            writer.write(key + ' ' + ' '.join(y) + "\n")
+
+                            # y = (
+                            #     model.extract_embed(
+                            #         x,
+                            #         chunk_length=chunk_length,
+                            #         embed_layer=embed_layer,
+                            #     )
+                            #     .cpu()
+                            #     .numpy()[0]
+                            # )
+
+                    t7 = time.time()
+                    # writer.write([key], [y])
+                    # if write_num_frames_spec is not None:
+                    #     keys.append(key)
+                    #     info.append(str(x.shape[-1]))
+
+                    t8 = time.time()
+                    read_time = t2 - t1
+                    tot_time = read_time + t8 - t3
+                    logging.info(
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "aug-time=%.3f feat-time=%.3f "
+                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                            "rt-factor=%.2f"
+                        )
+                        % (
+                            key,
+                            tot_time,
+                            read_time,
+                            t4 - t3,
+                            t5 - t4,
+                            t6 - t5,
+                            t7 - t6,
+                            t8 - t7,
+                            x0.shape[0] / fs[0] / tot_time,
+                        )
+                    )
+
+    # if write_num_frames_spec is not None:
+    #     logging.info("writing num-frames to %s" % (write_num_frames_spec))
+    #     u2nf = Utt2Info.create(keys, info)
+    #     u2nf.save(write_num_frames_spec)
+
+    # if aug_info_path is not None:
+    #     aug_df = pd.concat(aug_df, ignore_index=True)
+    #     aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Extracts x-vectors from waveform computing " "acoustic features on the fly"
+        )
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
+
+    AR.add_class_args(parser)
+
+
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--model-path", required=True)
+
+    parser.add_argument("--bpe-model", required=True)
+
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    decode_transducer(**namespace_to_dict(args))
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index 25722b35..adde6ed5 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -25,7 +25,7 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
     HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 8156f9b1..7313c661 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -18,7 +18,7 @@
 import multiprocessing
 
 import numpy as np
-
+import soundfile as sf
 import torch
 import torch.nn as nn
 
@@ -41,6 +41,7 @@ def transducer_collate(batch):
     audio_length = []
     target = []
     for record in batch:
+        # sf.write('/export/c06/ylu125/GSP/hyperion/egs/librispeech/v1/wavs/mix_{}.wav'.format(np.random.randn()), record[0], 16000)
         wav = torch.as_tensor(record[0])
         audio.append(wav)
         audio_length.append(wav.shape[0])
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 0fc1fe51..57587992 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -40,6 +40,8 @@ def forward(
         Returns:
           Return a tensor of shape (N, T, U, C).
         """
+        # print("encoder_out",encoder_out.shape)
+        # print("decoder_out",decoder_out.shape)
         assert encoder_out.ndim == decoder_out.ndim == 3
         assert encoder_out.size(0) == decoder_out.size(0)
         assert encoder_out.size(2) == decoder_out.size(2)
diff --git a/hyperion/torch/models/wav2transducer/beam_search.py b/hyperion/torch/models/wav2transducer/beam_search.py
new file mode 100644
index 00000000..95f6fadb
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/beam_search.py
@@ -0,0 +1,216 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+
+import torch
+
+from .hf_wav2transducer import HFWav2Transducer
+
+
+def greedy_search(model: HFWav2Transducer, encoder_out: torch.Tensor) -> List[int]:
+    """
+    Args:
+      model:
+        An instance of `Transducer`.
+      encoder_out:
+        A tensor of shape (N, T, C) from the encoder. Support only N==1 for now.
+    Returns:
+      Return the decoded result.
+    """
+    assert encoder_out.ndim == 3
+
+    # support only batch_size == 1 for now
+    assert encoder_out.size(0) == 1, encoder_out.size(0)
+    blank_id = model.transducer.decoder.blank_id
+    device = model.device
+
+    sos = torch.tensor([blank_id], device=device, dtype=torch.int64).reshape(1, 1)
+    decoder_out, (h, c) = model.transducer.decoder(sos)
+    T = encoder_out.size(1)
+    t = 0
+    hyp = []
+
+    sym_per_frame = 0
+    sym_per_utt = 0
+
+    max_sym_per_utt = 1000
+    max_sym_per_frame = 3
+
+    while t < T and sym_per_utt < max_sym_per_utt:
+        # fmt: off
+        current_encoder_out = encoder_out[:, t:t+1, :]
+        # fmt: on
+        logits = model.transducer.joiner(current_encoder_out, decoder_out)
+        # logits is (1, 1, 1, vocab_size)
+
+        log_prob = logits.log_softmax(dim=-1)
+        # log_prob is (1, 1, 1, vocab_size)
+        # TODO: Use logits.argmax()
+        y = log_prob.argmax()
+        if y != blank_id:
+            hyp.append(y.item())
+            y = y.reshape(1, 1)
+            decoder_out, (h, c) = model.transducer.decoder(y, (h, c))
+
+            sym_per_utt += 1
+            sym_per_frame += 1
+
+        if y == blank_id or sym_per_frame > max_sym_per_frame:
+            sym_per_frame = 0
+            t += 1
+
+    return hyp
+
+
+@dataclass
+class Hypothesis:
+    ys: List[int]  # the predicted sequences so far
+    log_prob: float  # The log prob of ys
+
+    # Optional decoder state. We assume it is LSTM for now,
+    # so the state is a tuple (h, c)
+    decoder_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
+
+
+def beam_search(
+    model: HFWav2Transducer,
+    encoder_out: torch.Tensor,
+    beam: int = 5,
+) -> List[int]:
+    """
+    It implements Algorithm 1 in https://arxiv.org/pdf/1211.3711.pdf
+    espnet/nets/beam_search_transducer.py#L247 is used as a reference.
+    Args:
+      model:
+        An instance of `Transducer`.
+      encoder_out:
+        A tensor of shape (N, T, C) from the encoder. Support only N==1 for now.
+      beam:
+        Beam size.
+    Returns:
+      Return the decoded result.
+    """
+    assert encoder_out.ndim == 3
+
+    # support only batch_size == 1 for now
+    assert encoder_out.size(0) == 1, encoder_out.size(0)
+    blank_id = model.transducer.decoder.blank_id
+    device = model.device
+
+    sos = torch.tensor([blank_id], device=device).reshape(1, 1)
+    decoder_out, (h, c) = model.transducer.decoder(sos)
+    T = encoder_out.size(1)
+    t = 0
+    B = [Hypothesis(ys=[blank_id], log_prob=0.0, decoder_state=None)]
+    max_u = 20000  # terminate after this number of steps
+    u = 0
+
+    cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]] = {}
+
+    while t < T and u < max_u:
+        # fmt: off
+        current_encoder_out = encoder_out[:, t:t+1, :]
+        # fmt: on
+        A = B
+        B = []
+        #  for hyp in A:
+        #      for h in A:
+        #          if h.ys == hyp.ys[:-1]:
+        #              # update the score of hyp
+        #              decoder_input = torch.tensor(
+        #                  [h.ys[-1]], device=device
+        #              ).reshape(1, 1)
+        #              decoder_out, _ = model.decoder(
+        #                  decoder_input, h.decoder_state
+        #              )
+        #              logits = model.joiner(current_encoder_out, decoder_out)
+        #              log_prob = logits.log_softmax(dim=-1)
+        #              log_prob = log_prob.squeeze()
+        #              hyp.log_prob += h.log_prob + log_prob[hyp.ys[-1]].item()
+
+        while u < max_u:
+            y_star = max(A, key=lambda hyp: hyp.log_prob)
+            A.remove(y_star)
+
+            # Note: y_star.ys is unhashable, i.e., cannot be used
+            # as a key into a dict
+            cached_key = "_".join(map(str, y_star.ys))
+
+            if cached_key not in cache:
+                decoder_input = torch.tensor([y_star.ys[-1]], device=device).reshape(
+                    1, 1
+                )
+
+                decoder_out, decoder_state = model.transducer.decoder(
+                    decoder_input,
+                    y_star.decoder_state,
+                )
+                cache[cached_key] = (decoder_out, decoder_state)
+            else:
+                decoder_out, decoder_state = cache[cached_key]
+
+            logits = model.transducer.joiner(current_encoder_out, decoder_out)
+            log_prob = logits.log_softmax(dim=-1)
+            # log_prob is (1, 1, 1, vocab_size)
+            log_prob = log_prob.squeeze()
+            # Now log_prob is (vocab_size,)
+
+            # If we choose blank here, add the new hypothesis to B.
+            # Otherwise, add the new hypothesis to A
+
+            # First, choose blank
+            skip_log_prob = log_prob[blank_id]
+            new_y_star_log_prob = y_star.log_prob + skip_log_prob.item()
+
+            # ys[:] returns a copy of ys
+            new_y_star = Hypothesis(
+                ys=y_star.ys[:],
+                log_prob=new_y_star_log_prob,
+                # Caution: Use y_star.decoder_state here
+                decoder_state=y_star.decoder_state,
+            )
+            B.append(new_y_star)
+
+            # Second, choose other labels
+            for i, v in enumerate(log_prob.tolist()):
+                if i == blank_id:
+                    continue
+                new_ys = y_star.ys + [i]
+                new_log_prob = y_star.log_prob + v
+                new_hyp = Hypothesis(
+                    ys=new_ys,
+                    log_prob=new_log_prob,
+                    decoder_state=decoder_state,
+                )
+                A.append(new_hyp)
+            u += 1
+            # check whether B contains more than "beam" elements more probable
+            # than the most probable in A
+            A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
+            B = sorted(
+                [hyp for hyp in B if hyp.log_prob > A_most_probable.log_prob],
+                key=lambda hyp: hyp.log_prob,
+                reverse=True,
+            )
+            if len(B) >= beam:
+                B = B[:beam]
+                break
+        t += 1
+    best_hyp = max(B, key=lambda hyp: hyp.log_prob / len(hyp.ys[1:]))
+    ys = best_hyp.ys[1:]  # [1:] to remove the blank
+    return ys
\ No newline at end of file
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 932c3ed4..1e36f9af 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -109,6 +109,7 @@ def __init__(
             cpu_offload=cpu_offload,
         )
 
+
     @record
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -124,6 +125,8 @@ def train_epoch(self, data_loader):
 
         for batch, (data, audio_length, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
+            # print("data",data.shape)
+            # print("audio_length",audio_length)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 3a4692dc..eec8a36a 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -5,6 +5,7 @@
 
 from .devices import open_device
 from .metric_acc import MetricAcc
+# from .recognition_acc import RecogAcc
 from .masking import seq_lengths_to_mask, scale_seq_lengths
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add

From 300d82b2f06e1946739d05dbd28fee4d91bf6720 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Tue, 20 Dec 2022 22:09:35 -0500
Subject: [PATCH 061/154] recover mistakenly deleted/changed files

---
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | 68 +++++++++++++++++++
 .../v1/global_conf/config_transducer_v1.sh    |  9 ++-
 egs/voxceleb/v2/path.sh                       |  2 +-
 hyperion/bin/finetune_wav2vec2xvector.py      |  2 +-
 hyperion/bin/train_wav2vec2transducer.py      |  2 -
 hyperion/torch/models/transducer/joiner.py    |  2 -
 hyperion/torch/trainers/transducer_trainer.py |  3 -
 hyperion/torch/utils/__init__.py              |  1 -
 8 files changed, 74 insertions(+), 15 deletions(-)
 create mode 100644 egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh

diff --git a/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
new file mode 100644
index 00000000..213380da
--- /dev/null
+++ b/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -0,0 +1,68 @@
+# LResNet34 x-vector with mixed precision training
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+# x-vector training 
+nnet_data=voxcelebcat
+nnet_num_augs=6
+aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
+
+batch_size_1gpu=32
+eff_batch_size=512 # effective batch size
+ipe=$nnet_num_augs
+min_chunk=4
+max_chunk=4
+lr=0.05
+
+nnet_type=res2net50 
+dropout=0
+embed_dim=256
+width_factor=1.625
+scale=4
+ws_tag=w26s4
+
+s=30
+margin_warmup=20
+margin=0.3
+
+nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
+
+opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
+lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+
+nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_num_epochs=70
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=6
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxcelebcat
+else
+    plda_data=voxcelebcat_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
+diar_plda_num_augs=0
+if [ $diar_plda_num_augs -eq 0 ]; then
+    diar_plda_data=voxcelebcat
+else
+    diar_plda_data=voxcelebcat_augx${plda_num_augs}
+fi
+diar_plda_type=splda
+diar_lda_dim=150
+diar_plda_y_dim=150
+diar_plda_z_dim=150
+
+diar_plda_name=lda${diar_lda_dim}_${diar_plda_type}y${diar_plda_y_dim}_v1_${diar_plda_data}
+diar_thr=-7
+diar_dir=exp/diarization/$nnet_name/${diar_plda_name}/ahc_pcar1_thr${diar_thr}
+diar_name=diar_res2net50w26s4_thr${diar_thr}
\ No newline at end of file
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index 7fc1508f..b88fe2a7 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -1,4 +1,4 @@
-# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+ WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
 
 # hugging face model
 hf_model_name=wav2vec2xlsr300m
@@ -10,7 +10,6 @@ hf_model_name=wav2vec2xlsr300m
 nnet_data=train_clean_100
 dev_data=dev_clean
 test_data=test_clean
-# nnet_data=train_clean_small
 
 bpe_model=data/lang_bpe_1000/bpe.model
 # x-vector cfg
@@ -24,9 +23,9 @@ nnet_name=${hf_model_name}_transducer_v1.0
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=${nnet_s1_dir}_pre/model_ep0060.pth
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
 
-nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s2_args=""
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
@@ -37,4 +36,4 @@ nnet_s3_args=""
 nnet_s3_name=${nnet_name}.s3
 nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
 nnet_s3=$nnet_s3_dir/model_ep0002.pth
-nnet_s3=$nnet_s3_dir/model_ep0005.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
\ No newline at end of file
diff --git a/egs/voxceleb/v2/path.sh b/egs/voxceleb/v2/path.sh
index 0dc5a9d5..6994fdab 100755
--- a/egs/voxceleb/v2/path.sh
+++ b/egs/voxceleb/v2/path.sh
@@ -2,4 +2,4 @@
 export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
 export TOOLS_ROOT=$HYP_ROOT/tools
 
- . $TOOLS_ROOT/path.sh
+. $TOOLS_ROOT/path.sh
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index adde6ed5..25722b35 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -25,7 +25,7 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
-# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
     HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 7313c661..0f1e8a3d 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -18,7 +18,6 @@
 import multiprocessing
 
 import numpy as np
-import soundfile as sf
 import torch
 import torch.nn as nn
 
@@ -41,7 +40,6 @@ def transducer_collate(batch):
     audio_length = []
     target = []
     for record in batch:
-        # sf.write('/export/c06/ylu125/GSP/hyperion/egs/librispeech/v1/wavs/mix_{}.wav'.format(np.random.randn()), record[0], 16000)
         wav = torch.as_tensor(record[0])
         audio.append(wav)
         audio_length.append(wav.shape[0])
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 57587992..0fc1fe51 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -40,8 +40,6 @@ def forward(
         Returns:
           Return a tensor of shape (N, T, U, C).
         """
-        # print("encoder_out",encoder_out.shape)
-        # print("decoder_out",decoder_out.shape)
         assert encoder_out.ndim == decoder_out.ndim == 3
         assert encoder_out.size(0) == decoder_out.size(0)
         assert encoder_out.size(2) == decoder_out.size(2)
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 1e36f9af..932c3ed4 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -109,7 +109,6 @@ def __init__(
             cpu_offload=cpu_offload,
         )
 
-
     @record
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -125,8 +124,6 @@ def train_epoch(self, data_loader):
 
         for batch, (data, audio_length, target) in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
-            # print("data",data.shape)
-            # print("audio_length",audio_length)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index eec8a36a..3a4692dc 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -5,7 +5,6 @@
 
 from .devices import open_device
 from .metric_acc import MetricAcc
-# from .recognition_acc import RecogAcc
 from .masking import seq_lengths_to_mask, scale_seq_lengths
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add

From 8984b349e56d4b6e6ae846658515d6c28b83422f Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Tue, 20 Dec 2022 22:18:11 -0500
Subject: [PATCH 062/154] fix typo

---
 egs/librispeech/v1/global_conf/config_transducer_v1.sh | 4 ++--
 hyperion/bin/train_wav2vec2transducer.py               | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
index b88fe2a7..39c4d90f 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v1.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v1.sh
@@ -1,4 +1,4 @@
- WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
 
 # hugging face model
 hf_model_name=wav2vec2xlsr300m
@@ -36,4 +36,4 @@ nnet_s3_args=""
 nnet_s3_name=${nnet_name}.s3
 nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
 nnet_s3=$nnet_s3_dir/model_ep0002.pth
-nnet_s3=$nnet_s3_dir/model_ep0005.pth
\ No newline at end of file
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 0f1e8a3d..8156f9b1 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -18,6 +18,7 @@
 import multiprocessing
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 

From e14a840e6754405732bfb98f3ccc422d93f89273 Mon Sep 17 00:00:00 2001
From: neillu23 <neilyenjulu@gmail.com>
Date: Wed, 21 Dec 2022 00:09:37 -0500
Subject: [PATCH 063/154] Add fine-tune function for transducer ASR

---
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |   2 +-
 ...v2vec2xlsr300m_transducer_stage2_v3.2.yaml |  61 +++++
 .../v1/global_conf/config_transducer_v3.2.sh  |   4 +-
 egs/librispeech/v1/run_011_train_asr.sh       |  50 ++--
 egs/librispeech/v1/run_030_inference.sh       |   2 +-
 hyperion/bin/decode_wav2transducer.py         | 114 +--------
 hyperion/bin/finetune_wav2vec2transducer.py   | 238 ++++++++++++++++++
 hyperion/bin/train_wav2vec2transducer.py      |   2 +-
 hyperion/torch/models/transducer/decoder.py   |  66 +++++
 .../torch/models/transducer/transducer.py     |  87 +++----
 .../wav2transducer/hf_wav2transducer.py       |  16 +-
 11 files changed, 449 insertions(+), 193 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
 create mode 100755 hyperion/bin/finetune_wav2vec2transducer.py

diff --git a/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 213380da..bf6c2fb8 100644
--- a/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/chime5_spkdet/v1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -65,4 +65,4 @@ diar_plda_z_dim=150
 diar_plda_name=lda${diar_lda_dim}_${diar_plda_type}y${diar_plda_y_dim}_v1_${diar_plda_data}
 diar_thr=-7
 diar_dir=exp/diarization/$nnet_name/${diar_plda_name}/ahc_pcar1_thr${diar_thr}
-diar_name=diar_res2net50w26s4_thr${diar_thr}
\ No newline at end of file
+diar_name=diar_res2net50w26s4_thr${diar_thr}
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
new file mode 100644
index 00000000..69c489b0
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
@@ -0,0 +1,61 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  transducer:
+    decoder:
+      override_dropouts: true
+      embedding_dropout_rate: 0.3
+      rnn_dropout_rate: 0.3
+
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
index 2ff8d3c9..9185cc3f 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
@@ -23,9 +23,9 @@ nnet_name=${hf_model_name}_transducer_v3.2
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0060.pth
+nnet_s1=$nnet_s1_dir/model_ep0120.pth
 
-nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
 nnet_s2_args=""
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 868cf4d1..3d0e6eb1 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
+#                2022   Johns Hopkins University (Author: Yen-Ju Lu)
 # Apache 2.0.
 #
 . ./cmd.sh
@@ -68,23 +68,25 @@ if [ $stage -le 2 ]; then
   if [ "$use_wandb" == "true" ];then
     extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
   fi
-  
+
   mkdir -p $nnet_s2_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s2_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    finetune_wav2vec2xvector.py $nnet_type \
+    finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
-    --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
-    --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
-    --in-model-file $nnet_s1 \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s2_dir $args \
-    --num-gpus $ngpu \
+    --in-model-file $nnet_s1 \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --num-gpus $ngpu
   
 fi
 
@@ -94,22 +96,24 @@ if [ $stage -le 3 ]; then
     extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
   fi
   
+
   mkdir -p $nnet_s3_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s3_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    finetune_wav2vec2xvector.py $nnet_type \
+    finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
-    --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
-    --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
-    --in-model-file $nnet_s2 \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s3_dir $args \
-    --num-gpus $ngpu \
-  
+    --in-model-file $nnet_s2 \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --num-gpus $ngpu
 fi
 
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v1/run_030_inference.sh
index fb76088b..73ac2b8f 100755
--- a/egs/librispeech/v1/run_030_inference.sh
+++ b/egs/librispeech/v1/run_030_inference.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
+#                2022   Johns Hopkins University (Author: Yen-Ju Lu)
 # Apache 2.0.
 #
 . ./cmd.sh
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index c71df79e..265a3536 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
@@ -55,73 +55,6 @@ def load_model(model_path, device):
     model.eval()
     return model
 
-
-
-# def decode_dataset(
-#     dl: torch.utils.data.DataLoader,
-#     params: AttributeDict,
-#     model: nn.Module,
-#     sp: spm.SentencePieceProcessor,
-# ) -> Dict[str, List[Tuple[str, List[str], List[str]]]]:
-#     """Decode dataset.
-#     Args:
-#       dl:
-#         PyTorch's dataloader containing the dataset to decode.
-#       params:
-#         It is returned by :func:`get_params`.
-#       model:
-#         The neural model.
-#       sp:
-#         The BPE model.
-#     Returns:
-#       Return a dict, whose key may be "greedy_search" if greedy search
-#       is used, or it may be "beam_7" if beam size of 7 is used.
-#       Its value is a list of tuples. Each tuple contains two elements:
-#       The first is the reference transcript, and the second is the
-#       predicted result.
-#     """
-#     num_cuts = 0
-
-#     try:
-#         num_batches = len(dl)
-#     except TypeError:
-#         num_batches = "?"
-
-#     if decoding_method == "greedy_search":
-#         log_interval = 100
-#     else:
-#         log_interval = 2
-
-#     results = defaultdict(list)
-#     for batch_idx, batch in enumerate(dl):
-#         texts = batch["supervisions"]["text"]
-#         cut_ids = [cut.id for cut in batch["supervisions"]["cut"]]
-
-#         hyps_dict = decode_one_batch(
-#             params=params,
-#             model=model,
-#             sp=sp,
-#             batch=batch,
-#         )
-
-#         for name, hyps in hyps_dict.items():
-#             this_batch = []
-#             assert len(hyps) == len(texts)
-#             for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts):
-#                 ref_words = ref_text.split()
-#                 this_batch.append((cut_id, ref_words, hyp_words))
-
-#             results[name].extend(this_batch)
-
-#         num_cuts += len(texts)
-
-#         if batch_idx % log_interval == 0:
-#             batch_str = f"{batch_idx}/{num_batches}"
-
-#             logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}")
-#     return results
-
-
 def decode_one_batch(
     model: nn.Module,
     sp: spm.SentencePieceProcessor,
@@ -160,13 +93,8 @@ def decode_one_batch(
     feature = feature.to(device)
     # at entry, feature is (N, T, C)
 
-    feature_lens = torch.Tensor([x.shape[1]]).int() #batch["supervisions"]
-    # feature_lens = supervisions["num_frames"].to(device)
+    feature_lens = torch.Tensor([x.shape[1]]).int()
 
-    # encoder_out, encoder_out_lens = model.encoder(x=feature, x_lens=feature_lens)
-
-    # print("feature",feature.shape)
-    # print("feature_lens",feature_lens)
     encoder_out, hid_feats, encoder_out_lens = model.forward_feats(x=feature, x_lengths=feature_lens)
     
     hyps = []
@@ -191,9 +119,9 @@ def decode_one_batch(
     logging.info("hyps:{}".format(" ".join(hyps[0])))
     
     if decoding_method == "greedy_search":
-        return hyps[0] #{"greedy_search": hyps}
+        return hyps[0]
     else:
-        return hyps[0] #{f"beam_{params.beam_size}": hyps}
+        return hyps[0]
 
 
 def decode_transducer(
@@ -211,12 +139,6 @@ def decode_transducer(
 
     sp  = spm.SentencePieceProcessor()
     sp.load(bpe_model)
-    # blank_id = self.sp.piece_to_id("<blk>")
-    # vocab_size = self.sp.get_piece_size()
-
-    # if write_num_frames_spec is not None:
-    #     keys = []
-    #     info = []
 
     augmenter = None
     aug_df = None
@@ -268,27 +190,10 @@ def decode_transducer(
                         if x.shape[1] == 0:
                             y = np.zeros((model.embed_dim,), dtype=float_cpu())
                         else:
-                            # x = x.transpose(1, 2).contiguous()
-                            # x = torch.unsqueeze(x,2)
-                            # writer.write(key + ' ' + "abc")
                             y = decode_one_batch(model=model, sp=sp, x=x)
-                            writer.write(key + ' ' + ' '.join(y) + "\n")
-
-                            # y = (
-                            #     model.extract_embed(
-                            #         x,
-                            #         chunk_length=chunk_length,
-                            #         embed_layer=embed_layer,
-                            #     )
-                            #     .cpu()
-                            #     .numpy()[0]
-                            # )
 
                     t7 = time.time()
-                    # writer.write([key], [y])
-                    # if write_num_frames_spec is not None:
-                    #     keys.append(key)
-                    #     info.append(str(x.shape[-1]))
+                    writer.write(key + ' ' + ' '.join(y) + "\n")
 
                     t8 = time.time()
                     read_time = t2 - t1
@@ -313,15 +218,6 @@ def decode_transducer(
                         )
                     )
 
-    # if write_num_frames_spec is not None:
-    #     logging.info("writing num-frames to %s" % (write_num_frames_spec))
-    #     u2nf = Utt2Info.create(keys, info)
-    #     u2nf.save(write_num_frames_spec)
-
-    # if aug_info_path is not None:
-    #     aug_df = pd.concat(aug_df, ignore_index=True)
-    #     aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
-
 
 if __name__ == "__main__":
 
diff --git a/hyperion/bin/finetune_wav2vec2transducer.py b/hyperion/bin/finetune_wav2vec2transducer.py
new file mode 100755
index 00000000..b940c024
--- /dev/null
+++ b/hyperion/bin/finetune_wav2vec2transducer.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import k2
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import HFWav2Vec2Transducer
+from torch.nn.utils.rnn import pad_sequence
+
+
+model_dict = {
+    "hf_wav2vec2transducer": HFWav2Vec2Transducer,
+}
+
+
+def transducer_collate(batch):
+    audio = []
+    audio_length = []
+    target = []
+    for record in batch:
+        wav = torch.as_tensor(record[0])
+        audio.append(wav)
+        audio_length.append(wav.shape[0])
+        target.append(record[1])
+    audio = pad_sequence(audio)
+    audio_length = torch.as_tensor(audio_length)
+    target = k2.RaggedTensor(target)
+    return torch.transpose(audio,0,1), audio_length, target
+
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+    data_kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**data_kwargs["dataset"])
+    sampler_args = data_kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = data_kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate)
+    return data_loader
+
+
+def init_model(in_model_file, rank, model_class, **kwargs):
+    model_args = model_class.filter_finetune_args(**kwargs["model"])
+    # model_args = model_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("model network ft args={}".format(model_args))
+    model = TML.load(in_model_file)
+    model.change_config(**model_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    # # for Debug
+    # rank = 0
+    # kwargs["rank"] = 0
+    # device = "cpu"
+    # world_size=1
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(**kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {} 
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+    
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+
+
+    parser.add_argument(
+        "--data.train.dataset.text_file",
+        type=str, 
+    )
+    
+    parser.add_argument("--data.val.dataset.text_file", type=str) 
+    
+    parser.add_argument(
+        "--data.train.dataset.bpe_model",
+        type=str, 
+    )
+
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    parser.link_arguments(
+        "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
+    )
+
+
+    parser.add_argument("--in-model-file", required=True)
+    model_class.add_finetune_args(parser, prefix="model")
+    # model_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description="Fine-tune  Wav2Vec2Transducer model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    # multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 8156f9b1..ee60080a 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import sys
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 833394d0..e7a40ec0 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -15,8 +15,10 @@
 # limitations under the License.
 
 from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+import logging
 from typing import Optional, Tuple
 
+import logging
 import torch
 import torch.nn as nn
 
@@ -137,6 +139,17 @@ def filter_args(**kwargs):
 
         return args
 
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        valid_args = (
+            "embedding_dropout_rate",
+            "rnn_dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
     @staticmethod
     def add_class_args(parser,
                        prefix=None,
@@ -181,3 +194,56 @@ def add_class_args(parser,
         if prefix is not None:
             outer_parser.add_argument("--" + prefix,
                                       action=ActionParser(parser=parser))
+
+
+    def change_config(
+        self,
+        override_dropouts=False,
+        embedding_dropout_rate: float = 0.0,
+        rnn_dropout_rate: float = 0.0,
+    ):
+        logging.info("changing decoder config")
+
+        if override_dropouts:
+            logging.info("overriding decoder dropouts")
+
+            # for module in self.modules():
+            #     if isinstance(module, DropConnect1d):
+            #         module.p *= drop_connect_rate / self.drop_connect_rate
+
+            self.rnn_dropout_rate = rnn_dropout_rate
+            self.rnn.p = self.rnn_dropout_rate
+            
+            self.embedding_dropout_rate = embedding_dropout_rate
+            self.embedding_dropout = nn.Dropout(self.embedding_dropout_rate)
+
+
+
+    @staticmethod
+    def add_finetune_args(parser,
+                       prefix=None,
+                       skip=set(["in_feats", "blank_id", "vocab_size"])):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument("--override-dropouts",
+                            default=False,
+                            action=ActionYesNo,
+                            help=(
+                                "whether to use the dropout probabilities passed in the "
+                                "arguments instead of the defaults in the pretrained model."
+                            ))
+        parser.add_argument("--embedding-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 2d523b7c..8d2a09e8 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -23,6 +23,7 @@
 except ModuleNotFoundError:
     from ...utils import dummy_k2 as k2
 
+import logging
 import torch
 import torch.nn as nn
 import torchaudio
@@ -210,51 +211,41 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser.add_argument("--" + prefix,
                                       action=ActionParser(parser=parser))
 
-    # def change_config(
-    #     self,
-    #     override_dropouts=False,
-    #     dropout_rate=0,
-    #     num_classes=None,
-    #     loss_type="arc-softmax",
-    #     cos_scale=64,
-    #     margin=0.3,
-    #     margin_warmup_epochs=10,
-    #     intertop_k=5,
-    #     intertop_margin=0.0,
-    #     num_subcenters=2,
-    # ):
-    #     logging.info("changing x-vector config")
-    #     self.rebuild_output_layer(
-    #         num_classes=num_classes,
-    #         loss_type=loss_type,
-    #         cos_scale=cos_scale,
-    #         margin=margin,
-    #         margin_warmup_epochs=margin_warmup_epochs,
-    #         intertop_k=intertop_k,
-    #         intertop_margin=intertop_margin,
-    #         num_subcenters=num_subcenters,
-    #     )
-
-    #     if override_dropouts:
-    #         logging.info("overriding x-vector dropouts")
-    #         self.encoder_net.change_dropouts(dropout_rate)
-    #         self.classif_net.change_dropouts(dropout_rate)
-
-    # @staticmethod
-    # def filter_finetune_args(**kwargs):
-    #     valid_args = (
-    #     )
-    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-    #     return args
-
-    # @staticmethod
-    # def add_finetune_args(parser, prefix=None):
-    #     if prefix is not None:
-    #         outer_parser = parser
-    #         parser = ArgumentParser(prog="")
-
-    #     if prefix is not None:
-    #         outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-
-    # add_argparse_args = add_class_args
-    # add_argparse_finetune_args = add_finetune_args
+    def change_config(self, 
+        decoder,
+        # joiner,
+        ):
+        logging.info("changing transducer config")
+        self.decoder.change_config(**decoder)
+        # self.joiner.change_config(**joiner)
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        # get arguments for pooling
+        decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
+        # joiner_args = Joiner.filter_finetune_args(**kwargs["joiner"])
+
+        valid_args = (
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        args["decoder"] = decoder_args
+        # args["joiner"] = joiner_args
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Decoder.add_finetune_args(parser, prefix="decoder")
+        # Joiner.add_finetune_args(parser, prefix="joiner")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    add_argparse_args = add_class_args
+    add_argparse_finetune_args = add_finetune_args
+
+
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index d21bb777..ec4c83b0 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -263,11 +263,11 @@ def set_train_mode(self, mode):
             self.unfreeze()
         elif mode == "frozen":
             self.freeze()
-        elif mode == "ft-embed-affine":
-            self.unfreeze()
-            self.freeze_feat_fuser()
-            self.freeze_hf_feats()
-            self.transducer.freeze_preembed_layers()
+        # elif mode == "ft-embed-affine":
+        #     self.unfreeze()
+        #     self.freeze_feat_fuser()
+        #     self.freeze_hf_feats()
+        #     self.transducer.freeze_preembed_layers()
         elif mode in ["ft-transducer", "ft-transducer-nograd"]:
             self.unfreeze()
             self.freeze_hf_feats()
@@ -295,9 +295,9 @@ def _train(self, train_mode: str):
 
         if train_mode in ["full", "frozen"]:
             super()._train(train_mode)
-        elif train_mode == "ft-embed-affine":
-            self.hf_feats.train()
-            self.transducer._train("ft-embed_affine")
+        # elif train_mode == "ft-embed-affine":
+        #     self.hf_feats.train()
+        #     self.transducer._train("ft-embed_affine")
         elif train_mode in [
                 "ft-transducer",
                 "hf-feats-frozen",

From 90c97af3a1543eb39c2b46eeed7b2bbbaa9ada0b Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 22 Dec 2022 06:09:37 -0500
Subject: [PATCH 064/154] more transducer configs

---
 ...v2vec2xlsr300m_transducer_stage1_v3.3.yaml | 55 +++++++++++++++++++
 ...v2vec2xlsr300m_transducer_stage1_v4.3.yaml | 55 +++++++++++++++++++
 ...v2vec2xlsr300m_transducer_stage1_v4.4.yaml | 55 +++++++++++++++++++
 .../wav2vec2xlsr300m_transducer_do0.4.yaml    | 13 +++++
 .../v1/global_conf/config_transducer_v3.3.sh  | 39 +++++++++++++
 .../v1/global_conf/config_transducer_v4.3.sh  | 39 +++++++++++++
 .../v1/global_conf/config_transducer_v4.4.sh  | 39 +++++++++++++
 7 files changed, 295 insertions(+)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v4.4.sh

diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
new file mode 100644
index 00000000..76d676f2
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 85.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 30
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
new file mode 100644
index 00000000..35b2b47c
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 80.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 80
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
new file mode 100644
index 00000000..855bfc98
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
@@ -0,0 +1,55 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 80.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      #sampler_type: 'seg_sampler'
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 80
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2xlsr300m_transducer_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 42000
+    hold_steps: 15000
+    min_lr: 4e-5
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 1200
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
new file mode 100644
index 00000000..9fed09e7
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
@@ -0,0 +1,13 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h #microsoft/wavlm-base #facebook/wav2vec2-base #microsoft/wavlm-base-plus
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+    embedding_dropout_rate: 0.4
+    rnn_dropout_rate: 0.4
+  joiner:
+    num_layers: 1
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
new file mode 100644
index 00000000..41f9e21f
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v3.3
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh b/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
new file mode 100644
index 00000000..de00c55a
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_all
+dev_data=dev_all
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v4.3
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh b/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
new file mode 100644
index 00000000..3114af61
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
@@ -0,0 +1,39 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_all
+dev_data=dev_all
+# nnet_data=train_clean_small
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v4.4
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/transducer_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth

From 02bb457d5034dd2243107ccd0f0f578a45eca243 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sat, 24 Dec 2022 14:05:53 -0500
Subject: [PATCH 065/154] fix beam search

---
 egs/librispeech/v1/conf/clsp.conf             |   2 +-
 .../v1/global_conf/config_transducer_v3.3.sh  |   2 +-
 hyperion/bin/decode_wav2transducer.py         | 125 ++++++++----------
 .../models/wav2transducer/beam_search.py      |  38 ++++--
 4 files changed, 85 insertions(+), 82 deletions(-)

diff --git a/egs/librispeech/v1/conf/clsp.conf b/egs/librispeech/v1/conf/clsp.conf
index 4ed38246..959c62a7 100644
--- a/egs/librispeech/v1/conf/clsp.conf
+++ b/egs/librispeech/v1/conf/clsp.conf
@@ -7,5 +7,5 @@ option num_threads=* -pe smp $0
 option num_threads=1  # Do not add anything to qsub_opts
 option max_jobs_run=* -tc $0
 default gpu=0
-option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[1345679]*|c2[12357]*'
 option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh b/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
index 41f9e21f..490baba7 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
@@ -23,7 +23,7 @@ nnet_name=${hf_model_name}_transducer_v3.3
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0060.pth
+nnet_s1=$nnet_s1_dir/model_ep0120.pth
 
 nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s2_args=""
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index 265a3536..bbcd0dc7 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -4,8 +4,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-
-
 from typing import Dict, List, Tuple
 
 import sentencepiece as spm
@@ -39,6 +37,7 @@
 
 from hyperion.torch.models.wav2transducer.beam_search import greedy_search, beam_search
 
+
 def init_device(use_gpu):
     set_float_cpu("float32")
     num_gpus = 1 if use_gpu else 0
@@ -55,12 +54,12 @@ def load_model(model_path, device):
     model.eval()
     return model
 
+
 def decode_one_batch(
-    model: nn.Module,
-    sp: spm.SentencePieceProcessor,
-    x: torch.Tensor,
-    decoding_method = "beam_search"
-) -> Dict[str, List[List[str]]]:
+        model: nn.Module,
+        sp: spm.SentencePieceProcessor,
+        x: torch.Tensor,
+        decoding_method="beam_search") -> Dict[str, List[List[str]]]:
     """Decode one batch and return the result in a dict. The dict has the
     following format:
         - key: It indicates the setting used for decoding. For example,
@@ -86,7 +85,7 @@ def decode_one_batch(
       the returned dict.
     """
     device = model.device
-    feature = x #batch["inputs"]
+    feature = x  #batch["inputs"]
     assert x.shape[0] == 1
     assert feature.ndim == 2
 
@@ -95,8 +94,9 @@ def decode_one_batch(
 
     feature_lens = torch.Tensor([x.shape[1]]).int()
 
-    encoder_out, hid_feats, encoder_out_lens = model.forward_feats(x=feature, x_lengths=feature_lens)
-    
+    encoder_out, hid_feats, encoder_out_lens = model.forward_feats(
+        x=feature, x_lengths=feature_lens)
+
     hyps = []
     batch_size = encoder_out.size(0)
 
@@ -104,40 +104,31 @@ def decode_one_batch(
 
     for i in range(batch_size):
         # fmt: off
-        encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]]
+        encoder_out_i = encoder_out[i:i + 1, :encoder_out_lens[i]]
         # fmt: on
         if decoding_method == "greedy_search":
             hyp = greedy_search(model=model, encoder_out=encoder_out_i)
         elif decoding_method == "beam_search":
-            hyp = beam_search(
-                model=model, encoder_out=encoder_out_i, beam=5
-            )
+            hyp = beam_search(model=model, encoder_out=encoder_out_i, beam=5)
         else:
             raise ValueError(f"Unsupported decoding method: {decoding_method}")
         hyps.append(sp.decode(hyp).split())
-    
+
     logging.info("hyps:{}".format(" ".join(hyps[0])))
-    
+
     if decoding_method == "greedy_search":
         return hyps[0]
     else:
         return hyps[0]
 
 
-def decode_transducer(
-    input_spec,
-    output_spec,
-    scp_sep,
-    model_path,
-    bpe_model,
-    use_gpu,
-    **kwargs
-):
+def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
+                      use_gpu, **kwargs):
 
     device = init_device(use_gpu)
     model = load_model(model_path, device)
 
-    sp  = spm.SentencePieceProcessor()
+    sp = spm.SentencePieceProcessor()
     sp.load(bpe_model)
 
     augmenter = None
@@ -147,10 +138,9 @@ def decode_transducer(
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output: %s" % (output_spec))
     # with DWF.create(output_spec, scp_sep=scp_sep) as writer:
-    with open(output_spec,"w") as writer:
-        logging.info(
-            "opening input stream: {} with args={}".format(input_spec, ar_args)
-        )
+    with open(output_spec, "w") as writer:
+        logging.info("opening input stream: {} with args={}".format(
+            input_spec, ar_args))
         with AR(input_spec, **ar_args) as reader:
             while not reader.eof():
                 t1 = time.time()
@@ -165,30 +155,28 @@ def decode_transducer(
                 logging.info("processing utt %s" % (key0))
                 for aug_id in range(num_augs):
                     t3 = time.time()
-                    key, x = key0, x0 #augment(key0, x0, augmenter, aug_df, aug_id)
+                    key, x = key0, x0  #augment(key0, x0, augmenter, aug_df, aug_id)
                     t4 = time.time()
                     with torch.no_grad():
                         x = torch.tensor(
-                            x[None, :], dtype=torch.get_default_dtype()
-                        ).to(device)
+                            x[None, :],
+                            dtype=torch.get_default_dtype()).to(device)
 
                         t5 = time.time()
                         tot_frames = x.shape[1]
 
                         logging.info(
-                            "utt %s detected %d/%d (%.2f %%) speech frames"
-                            % (
+                            "utt %s detected %d/%d (%.2f %%) speech frames" % (
                                 key,
                                 x.shape[1],
                                 tot_frames,
                                 x.shape[1] / tot_frames * 100,
-                            )
-                        )
-
+                            ))
 
                         t6 = time.time()
                         if x.shape[1] == 0:
-                            y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                            y = np.zeros((model.embed_dim, ),
+                                         dtype=float_cpu())
                         else:
                             y = decode_one_batch(model=model, sp=sp, x=x)
 
@@ -199,41 +187,36 @@ def decode_transducer(
                     read_time = t2 - t1
                     tot_time = read_time + t8 - t3
                     logging.info(
-                        (
-                            "utt %s total-time=%.3f read-time=%.3f "
-                            "aug-time=%.3f feat-time=%.3f "
-                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
-                            "rt-factor=%.2f"
-                        )
-                        % (
-                            key,
-                            tot_time,
-                            read_time,
-                            t4 - t3,
-                            t5 - t4,
-                            t6 - t5,
-                            t7 - t6,
-                            t8 - t7,
-                            x0.shape[0] / fs[0] / tot_time,
-                        )
-                    )
+                        ("utt %s total-time=%.3f read-time=%.3f "
+                         "aug-time=%.3f feat-time=%.3f "
+                         "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                         "rt-factor=%.2f") % (
+                             key,
+                             tot_time,
+                             read_time,
+                             t4 - t3,
+                             t5 - t4,
+                             t6 - t5,
+                             t7 - t6,
+                             t8 - t7,
+                             x0.shape[0] / fs[0] / tot_time,
+                         ))
 
 
 if __name__ == "__main__":
 
     parser = ArgumentParser(
-        description=(
-            "Extracts x-vectors from waveform computing " "acoustic features on the fly"
-        )
-    )
+        description=("Extracts x-vectors from waveform computing "
+                     "acoustic features on the fly"))
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument("--input", dest="input_spec", required=True)
-    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
+    parser.add_argument("--scp-sep",
+                        default=" ",
+                        help=("scp file field separator"))
 
     AR.add_class_args(parser)
 
-
     AF.add_class_args(parser, prefix="feats")
 
     parser.add_argument("--model-path", required=True)
@@ -241,12 +224,16 @@ def decode_transducer(
     parser.add_argument("--bpe-model", required=True)
 
     parser.add_argument("--output", dest="output_spec", required=True)
-    parser.add_argument(
-        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
-    )
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
+    parser.add_argument("--use-gpu",
+                        default=False,
+                        action="store_true",
+                        help="extract xvectors in gpu")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
 
     args = parser.parse_args()
     config_logger(args.verbose)
diff --git a/hyperion/torch/models/wav2transducer/beam_search.py b/hyperion/torch/models/wav2transducer/beam_search.py
index 95f6fadb..b23a0769 100644
--- a/hyperion/torch/models/wav2transducer/beam_search.py
+++ b/hyperion/torch/models/wav2transducer/beam_search.py
@@ -22,7 +22,8 @@
 from .hf_wav2transducer import HFWav2Transducer
 
 
-def greedy_search(model: HFWav2Transducer, encoder_out: torch.Tensor) -> List[int]:
+def greedy_search(model: HFWav2Transducer,
+                  encoder_out: torch.Tensor) -> List[int]:
     """
     Args:
       model:
@@ -39,7 +40,8 @@ def greedy_search(model: HFWav2Transducer, encoder_out: torch.Tensor) -> List[in
     blank_id = model.transducer.decoder.blank_id
     device = model.device
 
-    sos = torch.tensor([blank_id], device=device, dtype=torch.int64).reshape(1, 1)
+    sos = torch.tensor([blank_id], device=device,
+                       dtype=torch.int64).reshape(1, 1)
     decoder_out, (h, c) = model.transducer.decoder(sos)
     T = encoder_out.size(1)
     t = 0
@@ -53,7 +55,7 @@ def greedy_search(model: HFWav2Transducer, encoder_out: torch.Tensor) -> List[in
 
     while t < T and sym_per_utt < max_sym_per_utt:
         # fmt: off
-        current_encoder_out = encoder_out[:, t:t+1, :]
+        current_encoder_out = encoder_out[:, t:t + 1, :]
         # fmt: on
         logits = model.transducer.joiner(current_encoder_out, decoder_out)
         # logits is (1, 1, 1, vocab_size)
@@ -120,11 +122,12 @@ def beam_search(
     max_u = 20000  # terminate after this number of steps
     u = 0
 
-    cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]] = {}
+    cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor,
+                                               torch.Tensor]]] = {}
 
     while t < T and u < max_u:
         # fmt: off
-        current_encoder_out = encoder_out[:, t:t+1, :]
+        current_encoder_out = encoder_out[:, t:t + 1, :]
         # fmt: on
         A = B
         B = []
@@ -152,9 +155,8 @@ def beam_search(
             cached_key = "_".join(map(str, y_star.ys))
 
             if cached_key not in cache:
-                decoder_input = torch.tensor([y_star.ys[-1]], device=device).reshape(
-                    1, 1
-                )
+                decoder_input = torch.tensor([y_star.ys[-1]],
+                                             device=device).reshape(1, 1)
 
                 decoder_out, decoder_state = model.transducer.decoder(
                     decoder_input,
@@ -176,7 +178,8 @@ def beam_search(
             # First, choose blank
             skip_log_prob = log_prob[blank_id]
             new_y_star_log_prob = y_star.log_prob + skip_log_prob.item()
-
+            # print("tuAB0", t, u, len(y_star.ys), y_star.log_prob,
+            #       skip_log_prob.item(), new_y_star_log_prob)
             # ys[:] returns a copy of ys
             new_y_star = Hypothesis(
                 ys=y_star.ys[:],
@@ -186,8 +189,13 @@ def beam_search(
             )
             B.append(new_y_star)
 
+            topk_log_prob = log_prob.topk(beam, dim=-1)
+
             # Second, choose other labels
-            for i, v in enumerate(log_prob.tolist()):
+            #for i, v in enumerate(log_prob.tolist()):
+            for v, i in zip(*topk_log_prob):
+                v = v.item()
+                i = i.item()
                 if i == blank_id:
                     continue
                 new_ys = y_star.ys + [i]
@@ -202,15 +210,23 @@ def beam_search(
             # check whether B contains more than "beam" elements more probable
             # than the most probable in A
             A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
+            #print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
             B = sorted(
                 [hyp for hyp in B if hyp.log_prob > A_most_probable.log_prob],
                 key=lambda hyp: hyp.log_prob,
                 reverse=True,
             )
+            # print("tuAB2",
+            #       t,
+            #       u,
+            #       len(A),
+            #       A_most_probable.log_prob,
+            #       len(B),
+            #       flush=True)
             if len(B) >= beam:
                 B = B[:beam]
                 break
         t += 1
     best_hyp = max(B, key=lambda hyp: hyp.log_prob / len(hyp.ys[1:]))
     ys = best_hyp.ys[1:]  # [1:] to remove the blank
-    return ys
\ No newline at end of file
+    return ys

From 3bfb7f08a672ffdbae6511e965227992ba96c09e Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 26 Dec 2022 05:46:48 -0500
Subject: [PATCH 066/154] updated binaries to train w2v2 x-vectors

---
 hyperion/bin/extract_wav2vec2xvectors.py      | 32 +++++++++++++--
 hyperion/bin/finetune_wav2vec2xvector.py      | 40 ++++++-------------
 hyperion/bin/finetune_xvector_from_wav.py     |  2 -
 hyperion/bin/train_wav2vec2xvector.py         | 31 --------------
 hyperion/torch/data/audio_dataset.py          |  8 ++++
 .../data/class_weighted_seg_chunk_sampler.py  |  1 -
 .../models/wav2xvectors/hf_wav2xvector.py     |  4 ++
 hyperion/torch/models/xvectors/xvector.py     |  2 +-
 8 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index cfa28f0a..a09e5c11 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -19,6 +19,7 @@
 import pandas as pd
 
 import torch
+import torchaudio.transforms as tat
 
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.utils import Utt2Info
@@ -30,6 +31,25 @@
 from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
 
+resamplers = {}
+
+
+def get_resampler(source_fs, target_fs):
+    if source_fs in resamplers:
+        return resamplers[source_fs]
+
+    resampler = tat.Resample(
+        int(source_fs),
+        int(target_fs),
+        lowpass_filter_width=64,
+        rolloff=0.9475937167399596,
+        resampling_method="kaiser_window",
+        beta=14.769656459379492,
+    )
+    resampler_f = lambda x: resampler(torch.from_numpy(x)).numpy()
+    resamplers[source_fs] = resampler_f
+    return resampler_f
+
 
 def init_device(use_gpu):
     set_float_cpu("float32")
@@ -102,7 +122,7 @@ def extract_xvectors(
     num_augs,
     aug_info_path,
     use_gpu,
-    **kwargs
+    **kwargs,
 ):
 
     rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
@@ -122,12 +142,11 @@ def extract_xvectors(
         num_augs = 1
 
     ar_args = AR.filter_args(**kwargs)
+    ar_args["wav_scale"] = 1.0
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec, scp_sep=scp_sep) as writer:
 
-        logging.info(
-            "opening input stream: {} with args={}".format(input_spec, ar_args)
-        )
+        logging.info(f"opening input stream: {input_spec} with args={ar_args}")
         with AR(input_spec, **ar_args) as reader:
 
             if vad_spec is not None:
@@ -146,6 +165,11 @@ def extract_xvectors(
                 key0 = key[0]
                 fs = fs[0]
                 t2 = time.time()
+                if fs != model.sample_frequency:
+                    resampler = get_resampler(fs, model.sample_frequency)
+                    print(f"x01 {x0.shape} {np.max(x0)}")
+                    x0 = resampler(x0)
+                    print(f"x01 {x0.shape} {np.max(x0)}")
 
                 logging.info("processing utt %s", key0)
                 for aug_id in range(num_augs):
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index 25722b35..718aeeb9 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -25,7 +25,8 @@
 from hyperion.torch.utils import ddp
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SegSamplerFactory
+
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
     HFWav2Vec2ResNet1dXVector,
@@ -45,19 +46,21 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
-    sampler_args = Sampler.filter_args(**kwargs["sampler"])
+    sampler_args = kwargs["sampler"]
     if rank == 0:
         logging.info("{} audio dataset args={}".format(partition, ad_args))
         logging.info("{} sampler args={}".format(partition, sampler_args))
         logging.info("init %s dataset", partition)
 
-    ad_args["is_val"] = partition == "val"
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
     dataset = AD(**ad_args)
 
     if rank == 0:
         logging.info("init %s samplers", partition)
 
-    sampler = Sampler(dataset, **sampler_args)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
 
     if rank == 0:
         logging.info("init %s dataloader", partition)
@@ -71,18 +74,6 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
-# def init_model(num_classes, in_model_file, rank, **kwargs):
-#     xvec_args = kwargs["model"]["xvector"]
-#     if rank == 0:
-#         logging.info("xvector network ft args={}".format(xvec_args))
-#     xvec_args["num_classes"] = num_classes
-#     model = TML.load(in_model_file)
-#     model.rebuild_output_layer(**xvec_args)
-#     if rank == 0:
-#         logging.info("model={}".format(model))
-#     return model
-
-
 def init_model(num_classes, in_model_file, rank, **kwargs):
     model_args = kwargs["model"]
     if rank == 0:
@@ -127,7 +118,7 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.num_classes, **kwargs)
+    model = init_model(list(train_loader.dataset.num_classes.values())[0], **kwargs)
     init_hard_prototype_mining(model, train_loader, val_loader, rank)
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
@@ -135,11 +126,7 @@ def train_model(gpu_id, args):
         logging.info("trainer args={}".format(trn_args))
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
-        model,
-        device=device,
-        metrics=metrics,
-        ddp=world_size > 1,
-        **trn_args,
+        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
@@ -153,7 +140,7 @@ def make_parser(model_class):
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
     AD.add_class_args(train_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(train_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -163,7 +150,7 @@ def make_parser(model_class):
 
     val_parser = ArgumentParser(prog="")
     AD.add_class_args(val_parser, prefix="dataset", skip={})
-    Sampler.add_class_args(val_parser, prefix="sampler")
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
         type=int,
@@ -175,14 +162,11 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
     parser.link_arguments(
-        "data.train.dataset.class_file", "data.val.dataset.class_file"
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
     )
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
-    parser.link_arguments(
-        "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-    )
 
     parser.add_argument("--in-model-file", required=True)
     model_class.add_finetune_args(parser, prefix="model")
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 0f23fb0a..c6239b45 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -21,11 +21,9 @@
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.utils import ddp
 
-# from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.data import AudioDataset as AD
 
-# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index f1281904..7187c13c 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -27,7 +27,6 @@
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 
-# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
     HFWav2Vec2ResNet1dXVector,
@@ -74,36 +73,6 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
-# def init_data(partition, rank, num_gpus, **kwargs):
-
-#     kwargs = kwargs["data"][partition]
-#     ad_args = AD.filter_args(**kwargs["dataset"])
-#     sampler_args = Sampler.filter_args(**kwargs["sampler"])
-#     if rank == 0:
-#         logging.info("{} audio dataset args={}".format(partition, ad_args))
-#         logging.info("{} sampler args={}".format(partition, sampler_args))
-#         logging.info("init %s dataset", partition)
-
-#     ad_args["is_val"] = partition == "val"
-#     dataset = AD(**ad_args)
-
-#     if rank == 0:
-#         logging.info("init %s samplers", partition)
-
-#     sampler = Sampler(dataset, **sampler_args)
-
-#     if rank == 0:
-#         logging.info("init %s dataloader", partition)
-
-#     num_workers = kwargs["data_loader"]["num_workers"]
-#     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-#     largs = (
-#         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
-#     )
-#     data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
-#     return data_loader
-
-
 def init_model(num_classes, rank, model_class, **kwargs):
     model_args = model_class.filter_args(**kwargs["model"])
     if rank == 0:
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 439c00ba..a52e7ab3 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -697,6 +697,14 @@ def __getitem__(self, segment):
         else:
             r = [x]
 
+        # try:
+        #     import soundfile as sf
+
+        #     for i, z in enumerate(r):
+        #         sf.write(f"file_{seg_id}.wav", z, fs, "PCM_16")
+        # except:
+        #     print("soundfile failed", flush=True)
+
         # adds the segment labels
         seg_info = self._get_segment_info(seg_id)
         r.extend(seg_info)
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 07a61b8f..72b094d0 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -235,7 +235,6 @@ def set_hard_prototypes(self, affinity_matrix):
             if np.all(mask_i == 0):
                 affinity_matrix[:, i] = -1000
 
-        # affinity_matrix[np.diag(affinity_matrix.shape[0])] = -1.0
         # hard prototypes for a class are itself and k-1 closest to it.
         self.hard_prototypes = torch.topk(
             affinity_matrix, self.num_hard_prototypes, dim=-1
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 3fed7143..bd1ec4cd 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -83,6 +83,10 @@ def _fuse_hid_feats(self, hid_feats):
 
         return feats
 
+    @property
+    def sample_frequency(self):
+        return self.hf_feats.sample_frequency
+
     def compute_prototype_affinity(self):
         return self.xvector.compute_prototype_affinity()
 
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 15f0ce86..2939db5b 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -892,7 +892,7 @@ def add_finetune_args(parser, prefix=None):
         parser.add_argument(
             "--num-subcenters",
             default=2,
-            type=float,
+            type=int,
             help="number of subcenters in subcenter losses",
         )
 

From 13dd879c04ebc962df89f3af6ce3379eb3ff9826 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 27 Dec 2022 10:45:18 -0500
Subject: [PATCH 067/154] update configs for w2v

---
 .../v1.1/conf/train_data_default.yaml         | 28 +++---
 egs/voxceleb/v1.1/conf/val_data_default.yaml  | 28 +++---
 ...c2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...c2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml | 40 ++++++--
 ...c2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml | 39 ++++++--
 ...vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml | 40 ++++++--
 ...vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml | 40 ++++++--
 ...baseplus6l_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...s6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...baseplus9l_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...s9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...lmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml | 43 +++++++++
 ...lmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml | 40 ++++++--
 ...lmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml | 40 ++++++--
 ...lus_linfus_ecapatdnn512x3_stage1_v1.0.yaml | 42 ++++++---
 ...lmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...lmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml | 40 ++++++--
 ...vlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml | 40 ++++++--
 ...wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml | 40 ++++++--
 ...wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml | 40 ++++++--
 egs/voxceleb/v2/run_011_train_xvector.sh      | 18 ++--
 hyperion/torch/trainers/__init__.py           | 18 ++--
 hyperion/torch/trainers/ae_trainer.py         | 86 ++++++++++-------
 hyperion/torch/trainers/dvae_trainer.py       | 93 ++++++++++++-------
 hyperion/torch/trainers/plda_trainer.py       | 70 +++++++-------
 hyperion/torch/trainers/torch_trainer.py      | 88 ++++++++++--------
 hyperion/torch/trainers/vae_trainer.py        | 85 ++++++++++-------
 hyperion/torch/trainers/vq_dvae_trainer.py    | 92 +++++++++++-------
 hyperion/torch/trainers/vq_vae_trainer.py     | 86 ++++++++++-------
 .../torch/trainers/xvector_adv_trainer.py     | 73 ++++++++-------
 .../trainers/xvector_adv_trainer_from_wav.py  | 75 ++++++++-------
 hyperion/torch/trainers/xvector_trainer.py    | 70 +++++++-------
 .../trainers/xvector_trainer_deep_feat_reg.py | 69 +++++++-------
 .../xvector_trainer_deep_feat_reg_from_wav.py | 79 +++++++++-------
 .../trainers/xvector_trainer_from_wav.py      | 69 +++++++-------
 hyperion/torch/utils/devices.py               | 45 +++++++++
 hyperion/utils/__init__.py                    | 23 +++--
 hyperion/utils/ext_segment_list.py            |  4 +-
 hyperion/utils/fold_list.py                   |  2 +-
 hyperion/utils/info_table.py                  |  2 +-
 hyperion/utils/kaldi_matrix.py                |  1 +
 hyperion/utils/list_utils.py                  |  5 +-
 hyperion/utils/misc.py                        | 27 ++++++
 hyperion/utils/plotting.py                    |  6 +-
 hyperion/utils/queues.py                      |  8 +-
 hyperion/utils/rttm.py                        |  2 +-
 hyperion/utils/scp_list.py                    |  2 +-
 hyperion/utils/segment_list.py                |  2 +-
 hyperion/utils/sparse_trial_key.py            |  4 +-
 hyperion/utils/sparse_trial_scores.py         | 13 +--
 hyperion/utils/time_units.py                  |  1 +
 hyperion/utils/train_val_eval_list.py         |  2 +-
 hyperion/utils/trial_key.py                   |  4 +-
 hyperion/utils/trial_ndx.py                   |  4 +-
 hyperion/utils/trial_scores.py                |  8 +-
 hyperion/utils/trial_stats.py                 |  6 +-
 hyperion/utils/utt2info.py                    |  2 +-
 59 files changed, 1395 insertions(+), 709 deletions(-)

diff --git a/egs/voxceleb/v1.1/conf/train_data_default.yaml b/egs/voxceleb/v1.1/conf/train_data_default.yaml
index acd088e6..1f96d1f6 100644
--- a/egs/voxceleb/v1.1/conf/train_data_default.yaml
+++ b/egs/voxceleb/v1.1/conf/train_data_default.yaml
@@ -1,17 +1,19 @@
 dataset:
-  class_names:
+  dataset:
+    class_names:
     - class_id
-  aug_cfgs:
+    aug_cfgs:
     - conf/reverb_noise_aug.yaml
-  return_segment_info:
+    return_segment_info:
     - class_id
-sampler:
-  sampler_type: class_weighted_random_seg_chunk_sampler
-  batch_size: 32
-  max_chunk_length: 4.0
-  min_chunk_length: 4.0
-  num_chunks_per_seg_epoch: 6
-  class_name: class_id
-data_loader:
-  num_workers: 8
-  
\ No newline at end of file
+  sampler:
+    sampler_type: class_weighted_random_seg_chunk_sampler
+    min_batch_size: 32
+    max_chunk_length: 4.0
+    min_chunk_length: 4.0
+    num_chunks_per_seg_epoch: 6
+    class_name: class_id
+    seg_weight_mode: uniform
+    num_hard_prototypes: 0
+  data_loader:
+    num_workers: 8
diff --git a/egs/voxceleb/v1.1/conf/val_data_default.yaml b/egs/voxceleb/v1.1/conf/val_data_default.yaml
index acd088e6..1f96d1f6 100644
--- a/egs/voxceleb/v1.1/conf/val_data_default.yaml
+++ b/egs/voxceleb/v1.1/conf/val_data_default.yaml
@@ -1,17 +1,19 @@
 dataset:
-  class_names:
+  dataset:
+    class_names:
     - class_id
-  aug_cfgs:
+    aug_cfgs:
     - conf/reverb_noise_aug.yaml
-  return_segment_info:
+    return_segment_info:
     - class_id
-sampler:
-  sampler_type: class_weighted_random_seg_chunk_sampler
-  batch_size: 32
-  max_chunk_length: 4.0
-  min_chunk_length: 4.0
-  num_chunks_per_seg_epoch: 6
-  class_name: class_id
-data_loader:
-  num_workers: 8
-  
\ No newline at end of file
+  sampler:
+    sampler_type: class_weighted_random_seg_chunk_sampler
+    min_batch_size: 32
+    max_chunk_length: 4.0
+    min_chunk_length: 4.0
+    num_chunks_per_seg_epoch: 6
+    class_name: class_id
+    seg_weight_mode: uniform
+    num_hard_prototypes: 0
+  data_loader:
+    num_workers: 8
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
index e1d1b1ea..01ad8897 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr300m_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
index 1298a056..90e3b14f 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
index fb264a53..7a2f7bba 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
@@ -1,25 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 16
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
index 247f8a7c..f424275d 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr53_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
index 1298a056..90e3b14f 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
index 2867cfef..69bcc097 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 16
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
index 570aad6a..86dec831 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmbaseplus6l_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
index 9838b855..e22620ca 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus6l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmbaseplus6l_linfus_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
index 1028f79a..9860abfa 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmbaseplus9l_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
index 2c2e5b64..18b910d1 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmbaseplus9l_linfus_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
index f62b2e14..34c6e8dc 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,5 +1,48 @@
 data:
   train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+
+train:
     dataset:
       max_chunk_length: 3.0
       min_chunk_length: 3.0
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
index 1298a056..90e3b14f 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
index 1721e337..69bcc097 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
index eb32ce0c..8c00d0fa 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_linfus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
-      num_workers: 8
+      num_workers: 0
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmbaseplus_linfus_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
index 895bcb2b..ad699556 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmlarge12l_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
index 1721e337..69bcc097 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
index 181d8fd7..9602d562 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge6l_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmlarge6l_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
index 1af241ea..37b085f3 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model: wavlmlarge_ecapatdnn512x3.yaml
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
index 1298a056..90e3b14f 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
index 2867cfef..69bcc097 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
@@ -1,24 +1,44 @@
 data:
   train:
     dataset:
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 16
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
   val:
     dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
       wav_scale: 1
     sampler:
-      batch_size: 32
-      iters_per_epoch: 6
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
     data_loader:
       num_workers: 8
 model:
diff --git a/egs/voxceleb/v2/run_011_train_xvector.sh b/egs/voxceleb/v2/run_011_train_xvector.sh
index dc4e1dee..0eddb1a6 100755
--- a/egs/voxceleb/v2/run_011_train_xvector.sh
+++ b/egs/voxceleb/v2/run_011_train_xvector.sh
@@ -49,11 +49,11 @@ if [ $stage -le 1 ]; then
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_s1_dir $args \
     --num-gpus $ngpu
   
@@ -73,11 +73,11 @@ if [ $stage -le 2 ]; then
     --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --in-model-file $nnet_s1 \
     --trainer.exp-path $nnet_s2_dir $args \
     --num-gpus $ngpu \
@@ -98,11 +98,11 @@ if [ $stage -le 3 ]; then
     --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
-    --data.train.dataset.key-file $list_dir/lists_xvec/train.scp \
-    --data.train.dataset.class-file $list_dir/lists_xvec/class2int \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
-    --data.val.dataset.key-file $list_dir/lists_xvec/val.scp \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --in-model-file $nnet_s2 \
     --trainer.exp-path $nnet_s3_dir $args \
     --num-gpus $ngpu \
diff --git a/hyperion/torch/trainers/__init__.py b/hyperion/torch/trainers/__init__.py
index 8fef7df5..d8f0e908 100644
--- a/hyperion/torch/trainers/__init__.py
+++ b/hyperion/torch/trainers/__init__.py
@@ -3,17 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .dvae_trainer import DVAETrainer
 from .torch_trainer import TorchTrainer
-
+from .vae_trainer import VAETrainer
+from .vq_dvae_trainer import VQDVAETrainer
+from .vq_vae_trainer import VQVAETrainer
+from .xvector_adv_trainer import XVectorAdvTrainer
+from .xvector_adv_trainer_from_wav import XVectorAdvTrainerFromWav
 from .xvector_trainer import XVectorTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
-from .xvector_adv_trainer import XVectorAdvTrainer
-
+from .xvector_trainer_deep_feat_reg_from_wav import \
+    XVectorTrainerDeepFeatRegFromWav
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
-from .xvector_trainer_deep_feat_reg_from_wav import XVectorTrainerDeepFeatRegFromWav
-from .xvector_adv_trainer_from_wav import XVectorAdvTrainerFromWav
-
-from .vae_trainer import VAETrainer
-from .dvae_trainer import DVAETrainer
-from .vq_vae_trainer import VQVAETrainer
-from .vq_dvae_trainer import VQDVAETrainer
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 21d53d32..6faaf684 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
 
@@ -44,7 +45,8 @@ class AETrainer(TorchTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
-
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -75,38 +77,44 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="x",
     ):
 
         if loss is None:
             loss = nn.MSELoss()
-        super().__init__(
-            model,
-            loss,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     loss,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -191,3 +199,19 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="x", help="dict. key for nnet targets"
+            )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index b75a94ab..3300d152 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
 
@@ -43,7 +44,8 @@ class DVAETrainer(TorchTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
-
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -73,36 +75,40 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x_aug",
+        target_key="x",
     ):
-
-        super().__init__(
-            model,
-            None,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     None,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -203,3 +209,26 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super().add_class_args(
+            parser, train_modes, skip=skip.union({"input_key", "target_key"})
+        )
+        if "input_key" not in skip:
+            parser.add_argument(
+                "--input-key", default="x_aug", help="dict. key for nnet input"
+            )
+
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="x", help="dict. key for nnet targets"
+            )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index ea5e57af..1c27c30d 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -2,17 +2,17 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
 
+from ...utils.misc import filter_func_args
+from ..losses import BCEWithLLR
 from ..utils import MetricAcc
 from ..utils.misc import get_selfsim_tarnon
-from ..losses import BCEWithLLR
 from .torch_trainer import TorchTrainer
 
 
@@ -48,6 +48,8 @@ class PLDATrainer(TorchTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -80,38 +82,44 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
         if loss is None:
             loss = nn.CrossEntropyLoss()
-        super().__init__(
-            model,
-            loss,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     loss,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.loss_bce = BCEWithLLR(p_tar)
         self.loss_weights = loss_weights
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 5f573904..5cadd57c 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -3,28 +3,28 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os
-import math
 import contextlib
+import logging
+import math
+import os
 from collections import OrderedDict as ODict
 from enum import Enum
-from jsonargparse import ArgumentParser, ActionParser
-import logging
 from pathlib import Path
 
 import torch
-import torch.nn as nn
 import torch.cuda.amp as amp
-from torch.optim.swa_utils import AveragedModel, SWALR
 import torch.distributed as dist
-
+import torch.nn as nn
 from fairscale.optim.grad_scaler import ShardedGradScaler
+from jsonargparse import ActionParser, ArgumentParser
+from torch.optim.swa_utils import SWALR, AveragedModel
 
-from ..utils import MetricAcc, TorchDDP, FairShardedDDP, FairFullyShardedDDP
-from ..loggers import LoggerList, CSVLogger, ProgLogger, TensorBoardLogger, WAndBLogger
-from ..optim import OptimizerFactory as OF
-from ..lr_schedulers import LRSchedulerFactory as LRSF
+from ...utils.misc import filter_func_args
+from ..loggers import CSVLogger, LoggerList, ProgLogger, TensorBoardLogger, WAndBLogger
 from ..lr_schedulers import LRScheduler as LRS
+from ..lr_schedulers import LRSchedulerFactory as LRSF
+from ..optim import OptimizerFactory as OF
+from ..utils import FairFullyShardedDDP, FairShardedDDP, MetricAcc, TorchDDP
 
 
 class DDPType(str, Enum):
@@ -66,6 +66,8 @@ class TorchTrainer(object):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -96,6 +98,8 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
         self.model = model
@@ -126,6 +130,8 @@ def __init__(
         self.swa_lr = swa_lr
         self.swa_anneal_epochs = swa_anneal_epochs
         self.amp_args = {}
+        self.input_key = input_key
+        self.target_key = target_key
 
         self.set_train_mode()
 
@@ -150,9 +156,7 @@ def __init__(
                 oss = False if ddp_type == DDPType.DDP else True
                 self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
                 self.model = TorchDDP(
-                    self.model,
-                    device_ids=[device],
-                    output_device=device,
+                    self.model, device_ids=[device], output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
                 self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
@@ -616,32 +620,34 @@ def load_last_checkpoint(self):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "grad_acc_steps",
-            "eff_batch_size",
-            "epochs",
-            "log_interval",
-            "use_amp",
-            "ddp_type",
-            "grad_clip",
-            "grad_clip_norm",
-            "swa_start",
-            "swa_lr",
-            "swa_anneal_epochs",
-            "exp_path",
-            "optim",
-            "lrsched",
-            "cpu_offload",
-            "use_tensorboard",
-            "use_wandb",
-            "wandb",
-            "train_mode",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args = filter_func_args(TorchTrainer.__init__, kwargs)
+
+        # valid_args = (
+        #     "grad_acc_steps",
+        #     "eff_batch_size",
+        #     "epochs",
+        #     "log_interval",
+        #     "use_amp",
+        #     "ddp_type",
+        #     "grad_clip",
+        #     "grad_clip_norm",
+        #     "swa_start",
+        #     "swa_lr",
+        #     "swa_anneal_epochs",
+        #     "exp_path",
+        #     "optim",
+        #     "lrsched",
+        #     "cpu_offload",
+        #     "use_tensorboard",
+        #     "use_wandb",
+        #     "wandb",
+        #     "train_mode",
+        # )
+        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
+    def add_class_args(parser, prefix=None, train_modes=None, skip={}):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -744,6 +750,14 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=[]):
         )
 
         parser.add_argument("--exp-path", help="experiment path")
+        if "input_key" not in skip:
+            parser.add_argument(
+                "--input-key", default="x", help="dict. key for nnet input"
+            )
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="class_id", help="dict. key for nnet targets"
+            )
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 284f07d0..8e75d768 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
 
@@ -43,7 +44,8 @@ class VAETrainer(TorchTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
-
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -73,36 +75,41 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="x",
     ):
 
-        super().__init__(
-            model,
-            None,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     None,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     def train_epoch(self, data_loader):
 
@@ -189,3 +196,19 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="x", help="dict. key for nnet targets"
+            )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 30d2d3b3..bac95b78 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -2,15 +2,16 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
-from collections import OrderedDict as ODict
-
 import logging
 import math
+import os
+from collections import OrderedDict as ODict
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .dvae_trainer import DVAETrainer
 
@@ -43,7 +44,8 @@ class VQDVAETrainer(DVAETrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
-
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -73,35 +75,40 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x_aug",
+        target_key="x",
     ):
 
-        super().__init__(
-            model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     def train_epoch(self, data_loader):
 
@@ -199,3 +206,26 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super().add_class_args(
+            parser, train_modes, skip=skip.union({"input_key", "target_key"})
+        )
+        if "input_key" not in skip:
+            parser.add_argument(
+                "--input-key", default="x_aug", help="dict. key for nnet input"
+            )
+
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="x", help="dict. key for nnet targets"
+            )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index c484b5c7..c4b046c0 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -2,15 +2,16 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
-from collections import OrderedDict as ODict
-
 import logging
 import math
+import os
+from collections import OrderedDict as ODict
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .vae_trainer import VAETrainer
 
@@ -43,7 +44,8 @@ class VQVAETrainer(VAETrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
-
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -73,35 +75,39 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="x",
     ):
-
-        super().__init__(
-            model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     def train_epoch(self, data_loader):
 
@@ -199,3 +205,19 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        if "target_key" not in skip:
+            parser.add_argument(
+                "--target-key", default="x", help="dict. key for nnet targets"
+            )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 961597e5..22971deb 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -2,16 +2,16 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
-from collections import OrderedDict as ODict
-
 import time
-import logging
-from jsonargparse import ArgumentParser, ActionParser
+from collections import OrderedDict as ODict
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .xvector_trainer import XVectorTrainer
 
@@ -48,6 +48,8 @@ class XVectorAdvTrainer(XVectorTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -81,36 +83,41 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
-        super().__init__(
-            model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            loss=loss,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     loss=loss,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.attack = attack
         self.attack.to(device)
@@ -230,7 +237,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=[]):
+    def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 036ee46e..ac28b95a 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -2,16 +2,16 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
-from collections import OrderedDict as ODict
-
 import time
-import logging
-from jsonargparse import ArgumentParser, ActionParser
+from collections import OrderedDict as ODict
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
 
@@ -49,6 +49,8 @@ class XVectorAdvTrainerFromWav(XVectorTrainerFromWav):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -83,37 +85,42 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
-        super().__init__(
-            model,
-            feat_extractor,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            loss=loss,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     feat_extractor,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     loss=loss,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.attack = attack
         self.attack.to(device)
@@ -239,7 +246,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip=[]):
+    def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 9b04fdd0..6703ea5d 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -2,17 +2,17 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
+from torch.distributed.elastic.multiprocessing.errors import record
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
-from torch.distributed.elastic.multiprocessing.errors import record
 
 
 class XVectorTrainer(TorchTrainer):
@@ -45,6 +45,8 @@ class XVectorTrainer(TorchTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -75,38 +77,44 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
         if loss is None:
             loss = nn.CrossEntropyLoss()
-        super().__init__(
-            model,
-            loss,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     loss,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     @record
     def train_epoch(self, data_loader):
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 1c0c26b7..fdb2627e 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -2,15 +2,15 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
-from jsonargparse import ArgumentParser, ActionParser
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .xvector_trainer import XVectorTrainer
 
@@ -50,6 +50,8 @@ class XVectorTrainerDeepFeatReg(XVectorTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -86,36 +88,41 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
-        super().__init__(
-            model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            loss=loss,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     loss=loss,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.prior_model = prior_model
         if reg_loss is None or reg_loss == "l1":
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 1ad4d24a..07882f31 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -2,14 +2,14 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc
 from .torch_trainer import TorchTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
@@ -50,6 +50,8 @@ class XVectorTrainerDeepFeatRegFromWav(XVectorTrainerDeepFeatReg):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -87,42 +89,47 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
-        super().__init__(
-            model,
-            prior_model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            reg_layers_enc=reg_layers_enc,
-            reg_layers_classif=reg_layers_classif,
-            reg_weight_enc=reg_weight_enc,
-            reg_weight_classif=reg_weight_classif,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            loss=loss,
-            reg_loss=reg_loss,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     prior_model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     reg_layers_enc=reg_layers_enc,
+        #     reg_layers_classif=reg_layers_classif,
+        #     reg_weight_enc=reg_weight_enc,
+        #     reg_weight_classif=reg_weight_classif,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     loss=loss,
+        #     reg_loss=reg_loss,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.feat_extractor = feat_extractor
         if device is not None:
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 64a1d187..dfbd8e00 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -2,14 +2,14 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
 import torch
 import torch.nn as nn
 
+from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, TorchDDP
 from .xvector_trainer import XVectorTrainer
 
@@ -44,6 +44,8 @@ class XVectorTrainerFromWav(XVectorTrainer):
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
     """
 
     def __init__(
@@ -75,44 +77,45 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="class_id",
     ):
 
-        super().__init__(
-            model,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            loss=loss,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+        # super().__init__(
+        #     model,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     loss=loss,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
         self.feat_extractor = feat_extractor
         if device is not None:
             self.feat_extractor.to(device)
 
-        # if ddp:
-        #     self.feat_extractor = TorchDDP(self.feat_extractor)
-
     def train_epoch(self, data_loader):
         """Training epoch loop
 
diff --git a/hyperion/torch/utils/devices.py b/hyperion/torch/utils/devices.py
index 16c61a48..cb77f1e5 100644
--- a/hyperion/torch/utils/devices.py
+++ b/hyperion/torch/utils/devices.py
@@ -42,3 +42,48 @@ def find_free_gpus(num_gpus):
     except:
         gpu_ids = "0"
     return gpu_ids
+
+
+def tensors_to_device(data, device):
+    if isinstance(data, dict):
+        for k in data:
+            data[k] = data[k].to(device)
+    elif isinstance(data, list):
+        for i, value in enumerate(data):
+            data[i] = value.to(device)
+    elif isinstance(data, torch.Tensor):
+        data = data.to(device)
+    else:
+        raise Exception(f"Unknown data type for {data}")
+
+    return data
+
+
+def tensors_to_cpu(data):
+    if isinstance(data, dict):
+        for k in data:
+            data[k] = data[k].cpu()
+    elif isinstance(data, list):
+        for i, value in enumerate(data):
+            data[i] = value.cpu()
+    elif isinstance(data, torch.Tensor):
+        data = data.cpu()
+    else:
+        raise Exception(f"Unknown data type for {data}")
+
+    return data
+
+
+def tensors_to_numpy(data):
+    if isinstance(data, dict):
+        for k in data:
+            data[k] = data[k].cpu().numpy()
+    elif isinstance(data, list):
+        for i, value in enumerate(data):
+            data[i] = value.cpu().numpy()
+    elif isinstance(data, torch.Tensor):
+        data = data.cpu().numpy()
+    else:
+        raise Exception(f"Unknown data type for {data}")
+
+    return data
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index 251361ae..67f492f9 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -3,19 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .trial_ndx import TrialNdx
-from .trial_key import TrialKey
-from .trial_scores import TrialScores
-from .sparse_trial_key import SparseTrialKey
-from .sparse_trial_scores import SparseTrialScores
+from .class_info import ClassInfo
+from .feature_set import FeatureSet
+from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
+from .recording_set import RecordingSet
+from .rttm import RTTM
 from .scp_list import SCPList
-from .utt2info import Utt2Info
-
 # from .ext_segment_list import ExtSegmentList
 from .segment_list import SegmentList
-from .kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
-from .rttm import RTTM
-from .recording_set import RecordingSet
-from .class_info import ClassInfo
 from .segment_set import SegmentSet
-from .feature_set import FeatureSet
+from .sparse_trial_key import SparseTrialKey
+from .sparse_trial_scores import SparseTrialScores
+from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
+from .trial_scores import TrialScores
+from .utt2info import Utt2Info
diff --git a/hyperion/utils/ext_segment_list.py b/hyperion/utils/ext_segment_list.py
index 9c7d81d3..132cf7ff 100644
--- a/hyperion/utils/ext_segment_list.py
+++ b/hyperion/utils/ext_segment_list.py
@@ -3,10 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import logging
-from copy import deepcopy
+import os.path as path
 from collections import OrderedDict
+from copy import deepcopy
 
 import numpy as np
 import pandas as pd
diff --git a/hyperion/utils/fold_list.py b/hyperion/utils/fold_list.py
index d5731f10..f22263cf 100644
--- a/hyperion/utils/fold_list.py
+++ b/hyperion/utils/fold_list.py
@@ -5,8 +5,8 @@
  Class to make/read/write k-fold x-validation lists
 """
 
-import os.path as path
 import logging
+import os.path as path
 from collections import OrderedDict
 from copy import deepcopy
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 80199a33..f2262217 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -3,10 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from pathlib import Path
 import logging
 from collections import OrderedDict
 from copy import deepcopy
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
diff --git a/hyperion/utils/kaldi_matrix.py b/hyperion/utils/kaldi_matrix.py
index 11726cc7..c8e26cbb 100644
--- a/hyperion/utils/kaldi_matrix.py
+++ b/hyperion/utils/kaldi_matrix.py
@@ -6,6 +6,7 @@
 """
 
 import struct
+
 import numpy as np
 
 from ..hyp_defs import float_cpu
diff --git a/hyperion/utils/list_utils.py b/hyperion/utils/list_utils.py
index 6e805a25..4375183d 100644
--- a/hyperion/utils/list_utils.py
+++ b/hyperion/utils/list_utils.py
@@ -5,9 +5,10 @@
  Utilities for lists.
 """
 
-import numpy as np
-from operator import itemgetter
 from itertools import groupby
+from operator import itemgetter
+
+import numpy as np
 
 
 def list2ndarray(a, dtype=None):
diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index c185b9a3..b9bdf12b 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -4,6 +4,7 @@
 
  Miscellaneous functions
 """
+from inspect import signature
 
 import numpy as np
 
@@ -88,3 +89,29 @@ def filter_args(valid_args, kwargs):
       Dictionary with only valid_args keys if they exists
     """
     return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+
+def filter_func_args(func, kwargs, skip=set()):
+    """Filters arguments expected by a function
+
+    Args:
+      func: function object
+      kwargs: dictionary containing arguments
+      skip: set with keys of func arguments to remove from kwargs
+
+    Returns
+      Dictionary with arguments expected by the target function
+    """
+    sig = signature(func)
+    valid_args = sig.parameters.keys()
+    skip.add("self")
+    for param in skip:
+        if param in kwargs:
+            del kwargs[param]
+
+    my_kwargs = filter_args(valid_args, kwargs)
+    if "kwargs" in kwargs:
+        my_kwargs.update(kwargs["kwargs"])
+
+    args = sig.bind(**my_kwargs).arguments
+    return args
diff --git a/hyperion/utils/plotting.py b/hyperion/utils/plotting.py
index 7b87dbee..2341beb4 100644
--- a/hyperion/utils/plotting.py
+++ b/hyperion/utils/plotting.py
@@ -3,13 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
-import scipy.linalg as la
-
 import matplotlib
-
 # matplotlib.use('Agg')
 import matplotlib.pyplot as plt
+import numpy as np
+import scipy.linalg as la
 import scipy.stats as stats
 from mpl_toolkits.mplot3d import Axes3D as plt3d
 
diff --git a/hyperion/utils/queues.py b/hyperion/utils/queues.py
index ad4298be..8bfd0166 100644
--- a/hyperion/utils/queues.py
+++ b/hyperion/utils/queues.py
@@ -3,15 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import warnings
 import copy
-import time
-import numpy as np
 import multiprocessing
 import threading
-import six
+import time
+import warnings
 from abc import abstractmethod
 
+import numpy as np
+import six
 
 try:
     import queue
diff --git a/hyperion/utils/rttm.py b/hyperion/utils/rttm.py
index 2ff3a4b0..c691fc17 100644
--- a/hyperion/utils/rttm.py
+++ b/hyperion/utils/rttm.py
@@ -9,8 +9,8 @@
 import pandas as pd
 
 from .list_utils import *
-from .vad_utils import *
 from .segment_list import SegmentList
+from .vad_utils import *
 
 
 class RTTM(object):
diff --git a/hyperion/utils/scp_list.py b/hyperion/utils/scp_list.py
index 8109d905..5abf76f2 100644
--- a/hyperion/utils/scp_list.py
+++ b/hyperion/utils/scp_list.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
 import os
 import os.path as path
 from collections import OrderedDict
 from copy import deepcopy
-import logging
 
 import numpy as np
 
diff --git a/hyperion/utils/segment_list.py b/hyperion/utils/segment_list.py
index 33b432bd..0151e967 100644
--- a/hyperion/utils/segment_list.py
+++ b/hyperion/utils/segment_list.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import logging
+import os.path as path
 from copy import deepcopy
 
 import numpy as np
diff --git a/hyperion/utils/sparse_trial_key.py b/hyperion/utils/sparse_trial_key.py
index f18dee3b..5afc72a0 100644
--- a/hyperion/utils/sparse_trial_key.py
+++ b/hyperion/utils/sparse_trial_key.py
@@ -3,15 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import copy
+import os.path as path
 
 import numpy as np
 import scipy.sparse as sparse
 
 from .list_utils import *
-from .trial_ndx import TrialNdx
 from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
 
 
 class SparseTrialKey(TrialKey):
diff --git a/hyperion/utils/sparse_trial_scores.py b/hyperion/utils/sparse_trial_scores.py
index 0684c57e..7ed9a1d1 100644
--- a/hyperion/utils/sparse_trial_scores.py
+++ b/hyperion/utils/sparse_trial_scores.py
@@ -4,22 +4,23 @@
 """
 
 
-import os.path as path
-import logging
 import copy
+import logging
+import os.path as path
 
 import numpy as np
 import scipy.sparse as sparse
 
-# import h5py
-
 from ..hyp_defs import float_cpu
 from .list_utils import *
-from .trial_ndx import TrialNdx
-from .trial_key import TrialKey
 from .sparse_trial_key import SparseTrialKey
+from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
 from .trial_scores import TrialScores
 
+# import h5py
+
+
 
 class SparseTrialScores(TrialScores):
 
diff --git a/hyperion/utils/time_units.py b/hyperion/utils/time_units.py
index f8ed0846..6004329b 100644
--- a/hyperion/utils/time_units.py
+++ b/hyperion/utils/time_units.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
+
 import numpy as np
 
 
diff --git a/hyperion/utils/train_val_eval_list.py b/hyperion/utils/train_val_eval_list.py
index f8cc4ca0..fd17e240 100644
--- a/hyperion/utils/train_val_eval_list.py
+++ b/hyperion/utils/train_val_eval_list.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import logging
+import os.path as path
 from collections import OrderedDict
 from copy import deepcopy
 
diff --git a/hyperion/utils/trial_key.py b/hyperion/utils/trial_key.py
index b22babda..9552d7c0 100644
--- a/hyperion/utils/trial_key.py
+++ b/hyperion/utils/trial_key.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import copy
+import os.path as path
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .list_utils import *
 from .trial_ndx import TrialNdx
diff --git a/hyperion/utils/trial_ndx.py b/hyperion/utils/trial_ndx.py
index 58a36aa7..e26d19e2 100644
--- a/hyperion/utils/trial_ndx.py
+++ b/hyperion/utils/trial_ndx.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import copy
+import os.path as path
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .list_utils import *
 
diff --git a/hyperion/utils/trial_scores.py b/hyperion/utils/trial_scores.py
index 164b39df..a486647d 100644
--- a/hyperion/utils/trial_scores.py
+++ b/hyperion/utils/trial_scores.py
@@ -4,17 +4,17 @@
 """
 
 
-import os.path as path
-import logging
 import copy
+import logging
+import os.path as path
 
-import numpy as np
 import h5py
+import numpy as np
 
 from ..hyp_defs import float_cpu
 from .list_utils import *
-from .trial_ndx import TrialNdx
 from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
 
 
 class TrialScores(object):
diff --git a/hyperion/utils/trial_stats.py b/hyperion/utils/trial_stats.py
index 229bad3c..7d9d74d1 100644
--- a/hyperion/utils/trial_stats.py
+++ b/hyperion/utils/trial_stats.py
@@ -4,16 +4,16 @@
 """
 
 
-import os.path as path
-import logging
 import copy
+import logging
+import os.path as path
 
 import numpy as np
 import pandas as pd
 
 from ..hyp_defs import float_cpu
-from .trial_ndx import TrialNdx
 from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
 
 
 class TrialStats(object):
diff --git a/hyperion/utils/utt2info.py b/hyperion/utils/utt2info.py
index 3cf4179b..9785d021 100644
--- a/hyperion/utils/utt2info.py
+++ b/hyperion/utils/utt2info.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os.path as path
 import logging
+import os.path as path
 from collections import OrderedDict
 from copy import deepcopy
 

From 4ba13a742f5eb7889f17f5cedf93052e9901f5a6 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 27 Dec 2022 10:52:30 -0500
Subject: [PATCH 068/154] isort data

---
 hyperion/torch/data/__init__.py               |  4 +---
 hyperion/torch/data/audio_dataset.py          | 22 +++++++------------
 hyperion/torch/data/bucketing_seg_sampler.py  |  8 +++----
 .../data/class_weighted_seg_chunk_sampler.py  |  6 ++---
 hyperion/torch/data/embed_dataset.py          | 12 +++++-----
 hyperion/torch/data/feat_seq_dataset.py       | 16 ++++++--------
 hyperion/torch/data/hyp_sampler.py            |  7 +++---
 .../torch/data/paired_feat_seq_dataset.py     |  5 ++---
 hyperion/torch/data/seg_chunk_sampler.py      |  8 +++----
 hyperion/torch/data/seg_sampler.py            |  6 ++---
 hyperion/torch/data/seg_sampler_factory.py    | 13 ++++++-----
 hyperion/torch/data/weighted_embed_sampler.py |  3 +--
 hyperion/torch/data/weighted_seq_sampler.py   |  7 +++---
 13 files changed, 53 insertions(+), 64 deletions(-)

diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 752cf0f5..ae2efca6 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -3,12 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .audio_dataset import AudioDataset
 # datasets
 from .feat_seq_dataset import FeatSeqDataset
 from .paired_feat_seq_dataset import PairedFeatSeqDataset
-
-from .audio_dataset import AudioDataset
-
 # samplers
 # from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index a52e7ab3..fc100d12 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -4,27 +4,25 @@
 """
 
 import logging
-from jsonargparse import ActionYesNo, ArgumentParser, ActionParser
-import time
 import math
+import time
 
 import numpy as np
 import pandas as pd
-
 import torch
+import torch.distributed as dist
 import torchaudio.transforms as tat
 
-from ..torch_defs import floatstr_torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from torch.utils.data import Dataset
+
 from ...io import RandomAccessAudioReader as AR
 
 # from ...utils.utt2info import Utt2Info
 from ...np.augment import SpeechAugment
-
-from torch.utils.data import Dataset
-import torch.distributed as dist
-
-from hyperion.np import augment
-
+from ...utils.class_info import ClassInfo
+from ...utils.segment_set import SegmentSet
+from ..torch_defs import floatstr_torch
 
 # class AudioDataset1(Dataset):
 #     def __init__(
@@ -449,10 +447,6 @@
 #     add_argparse_args = add_class_args
 
 
-from ...utils.class_info import ClassInfo
-from ...utils.segment_set import SegmentSet
-
-
 class AudioDataset(Dataset):
     def __init__(
         self,
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 224660bb..02497f3b 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -3,16 +3,16 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import math
-from jsonargparse import ArgumentParser, ActionParser
 import logging
+import math
 
 import numpy as np
-
 import torch
+import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
+
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
-import torch.distributed as dist
 
 
 class BucketingSegSampler(HypSampler):
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 72b094d0..184c4ab0 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -3,15 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
+import math
 import time
 
 import numpy as np
 import pandas as pd
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index aa244d81..e489e3a3 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -8,18 +8,20 @@
 import logging
 import time
 
-# import copy
-
 import numpy as np
 import pandas as pd
-
 import torch
+from torch.utils.data import Dataset
 
-from ..torch_defs import floatstr_torch
 from ...io import RandomAccessDataReaderFactory as RF
 from ...utils.utt2info import Utt2Info
+from ..torch_defs import floatstr_torch
+
+# import copy
+
+
+
 
-from torch.utils.data import Dataset
 
 
 class EmbedDataset(Dataset):
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index 462bfe41..61fbd576 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -3,24 +3,22 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-import logging
-from jsonargparse import ArgumentParser, ActionParser
-import time
 import copy
+import logging
+import os
+import sys
 import threading
+import time
 
 import numpy as np
 import pandas as pd
-
 import torch
+from jsonargparse import ActionParser, ArgumentParser
+from torch.utils.data import Dataset
 
-from ..torch_defs import floatstr_torch
 from ...io import RandomAccessDataReaderFactory as RF
 from ...utils.utt2info import Utt2Info
-
-from torch.utils.data import Dataset
+from ..torch_defs import floatstr_torch
 
 
 class FeatSeqDataset(Dataset):
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index 18ae4b5d..c5097723 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -1,12 +1,11 @@
-import math
-from jsonargparse import ArgumentParser, ActionParser
 import logging
+import math
 
 import numpy as np
-
 import torch
-from torch.utils.data import Sampler
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
+from torch.utils.data import Sampler
 
 
 class HypSampler(Sampler):
diff --git a/hyperion/torch/data/paired_feat_seq_dataset.py b/hyperion/torch/data/paired_feat_seq_dataset.py
index 671bb6bf..fc17593e 100644
--- a/hyperion/torch/data/paired_feat_seq_dataset.py
+++ b/hyperion/torch/data/paired_feat_seq_dataset.py
@@ -4,13 +4,12 @@
 """
 
 import logging
-import numpy as np
 
+import numpy as np
 import torch
 
-from ..torch_defs import floatstr_torch
-
 from ...utils.utt2info import Utt2Info
+from ..torch_defs import floatstr_torch
 from .feat_seq_dataset import FeatSeqDataset
 
 
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 2f5cc610..76054cd8 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -3,18 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import math
-from jsonargparse import ArgumentParser, ActionParser
 import logging
+import math
 
 import numpy as np
 import pandas as pd
-
 import torch
+import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
+
 from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
-import torch.distributed as dist
 
 
 class SegChunkSampler(HypSampler):
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 73319dca..1c54a021 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -3,13 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
+import math
 
 import numpy as np
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index 251d937b..64dbb89e 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -2,17 +2,18 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from typing import Union, Optional
 import logging
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import Optional, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .audio_dataset import AudioDataset
+from .bucketing_seg_sampler import BucketingSegSampler
+from .class_weighted_seg_chunk_sampler import \
+    ClassWeightedRandomSegChunkSampler
 from .feat_seq_dataset import FeatSeqDataset
-
-from .seg_sampler import SegSampler
-from .class_weighted_seg_chunk_sampler import ClassWeightedRandomSegChunkSampler
 from .seg_chunk_sampler import SegChunkSampler
-from .bucketing_seg_sampler import BucketingSegSampler
+from .seg_sampler import SegSampler
 
 sampler_dict = {
     "class_weighted_random_seg_chunk_sampler": ClassWeightedRandomSegChunkSampler,
diff --git a/hyperion/torch/data/weighted_embed_sampler.py b/hyperion/torch/data/weighted_embed_sampler.py
index 2c381365..22da93f9 100644
--- a/hyperion/torch/data/weighted_embed_sampler.py
+++ b/hyperion/torch/data/weighted_embed_sampler.py
@@ -2,12 +2,11 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 # import os
 import math
-import logging
 
 import numpy as np
-
 import torch
 from torch.utils.data import Sampler
 
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index c50d577d..345c2429 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -2,16 +2,15 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 # import os
 import math
-from jsonargparse import ArgumentParser, ActionParser
-import logging
 
 import numpy as np
-
 import torch
-from torch.utils.data import Sampler
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
+from torch.utils.data import Sampler
 
 
 class ClassWeightedSeqSampler(Sampler):

From dd7b3edac34f48a9619a2af6b2f9b73c7af0edbc Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 27 Dec 2022 11:28:41 -0500
Subject: [PATCH 069/154] fixed filter_func_args

---
 hyperion/utils/misc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index b9bdf12b..6813c6b7 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -113,5 +113,5 @@ def filter_func_args(func, kwargs, skip=set()):
     if "kwargs" in kwargs:
         my_kwargs.update(kwargs["kwargs"])
 
-    args = sig.bind(**my_kwargs).arguments
+    args = sig.bind_partial(**my_kwargs).arguments
     return args

From b2748955ea4b2c74009fdfaf4c49a782e1edc5cd Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 28 Dec 2022 05:55:10 -0500
Subject: [PATCH 070/154] trainers use dictionaries

---
 hyperion/torch/data/audio_dataset.py          | 89 ++++++++++++++-----
 hyperion/torch/trainers/ae_trainer.py         | 33 +++----
 hyperion/torch/trainers/dvae_trainer.py       | 35 +++-----
 hyperion/torch/trainers/plda_trainer.py       | 22 ++---
 hyperion/torch/trainers/torch_trainer.py      | 30 ++++---
 hyperion/torch/trainers/vae_trainer.py        | 41 +++++----
 hyperion/torch/trainers/vq_dvae_trainer.py    | 38 +++-----
 hyperion/torch/trainers/vq_vae_trainer.py     | 37 +++-----
 .../torch/trainers/xvector_adv_trainer.py     | 31 ++++---
 .../trainers/xvector_adv_trainer_from_wav.py  | 33 ++++---
 hyperion/torch/trainers/xvector_trainer.py    | 10 +--
 .../trainers/xvector_trainer_deep_feat_reg.py | 16 ++--
 .../xvector_trainer_deep_feat_reg_from_wav.py | 22 ++---
 .../trainers/xvector_trainer_from_wav.py      | 30 ++++---
 hyperion/torch/utils/__init__.py              |  8 +-
 hyperion/torch/utils/devices.py               | 18 ++++
 16 files changed, 262 insertions(+), 231 deletions(-)

diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index fc100d12..4368ec94 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -615,8 +615,22 @@ def _read_audio(self, seg_id, start, duration):
         x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
         return x[0].astype(floatstr_torch(), copy=False), fs[0]
 
+    # def _apply_augs(self, x, num_samples, reverb_context_samples):
+    #     x_augs = []
+    #     # for each type of augmentation
+    #     for i, augmenter in enumerate(self.augmenters):
+    #         # we do n_augs per augmentation type
+    #         for j in range(self.num_augs):
+    #             # augment x
+    #             x_aug, aug_info = augmenter(x)
+    #             # remove the extra left context used to compute the reverberation.
+    #             x_aug = x_aug[reverb_context_samples : len(x)]
+    #             x_augs.append(x_aug.astype(floatstr_torch(), copy=False))
+
+    #     return x_augs
+
     def _apply_augs(self, x, num_samples, reverb_context_samples):
-        x_augs = []
+        x_augs = {}
         # for each type of augmentation
         for i, augmenter in enumerate(self.augmenters):
             # we do n_augs per augmentation type
@@ -625,26 +639,31 @@ def _apply_augs(self, x, num_samples, reverb_context_samples):
                 x_aug, aug_info = augmenter(x)
                 # remove the extra left context used to compute the reverberation.
                 x_aug = x_aug[reverb_context_samples : len(x)]
-                x_augs.append(x_aug.astype(floatstr_torch(), copy=False))
+                x_aug = x_aug.astype(floatstr_torch(), copy=False)
+                x_augs[f"x_aug_{i}_{j}"] = x_aug
+
+        if not self.return_orig and len(x_augs) == 1:
+            # if we just have one aug and we don't return the clean version,
+            # we just call x to the aug version
+            x_augs["x"] = x_augs.pop("x_aug_0_0")
 
         return x_augs
 
     def _get_segment_info(self, seg_id):
-        r = []
+        seg_info = {}
         # converts the class_ids to integers
         for info_name in self.return_segment_info:
-            seg_info = self.seg_set.loc[seg_id, info_name]
+            seg_info_i = self.seg_set.loc[seg_id, info_name]
             if info_name in self.class_info:
                 # if the type of information is a class-id
                 # we use the class information table to
                 # convert from id to integer
                 class_info = self.class_info[info_name]
-                idx = class_info.loc[seg_info, "class_idx"]
-                seg_info = idx
+                seg_info_i = class_info.loc[seg_info_i, "class_idx"]
 
-            r.append(seg_info)
+            seg_info[info_name] = seg_info_i
 
-        return r
+        return seg_info
 
     def _get_resampler(self, fs):
         if fs in self.resamplers:
@@ -671,39 +690,65 @@ def _resample(self, x, fs):
         except:
             return x, fs
 
+    # def __getitem__(self, segment):
+
+    #     seg_id, start, duration = self._parse_segment_item(segment)
+    #     x, fs = self._read_audio(seg_id, start, duration)
+    #     x, fs = self._resample(x, fs)
+    #     if self.augmenters:
+    #         # augmentations
+    #         num_samples = int(duration * fs)
+    #         reverb_context_samples = len(x) - num_samples
+    #         x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
+    #         r = x_augs
+
+    #         # add original non augmented audio
+    #         if self.return_orig:
+    #             x_orig = x[reverb_context_samples:]
+    #             r.append(x_orig)
+
+    #     else:
+    #         r = [x]
+
+    #     # try:
+    #     #     import soundfile as sf
+
+    #     #     for i, z in enumerate(r):
+    #     #         sf.write(f"file_{seg_id}.wav", z, fs, "PCM_16")
+    #     # except:
+    #     #     print("soundfile failed", flush=True)
+
+    #     # adds the segment labels
+    #     seg_info = self._get_segment_info(seg_id)
+    #     r.extend(seg_info)
+
+    #     return (*r,)
+
     def __getitem__(self, segment):
 
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
         x, fs = self._resample(x, fs)
+        data = {"seg_id": seg_id, "sample_freq": fs}
         if self.augmenters:
             # augmentations
             num_samples = int(duration * fs)
             reverb_context_samples = len(x) - num_samples
             x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
-            r = x_augs
+            data.update(x_augs)
 
             # add original non augmented audio
             if self.return_orig:
                 x_orig = x[reverb_context_samples:]
-                r.append(x_orig)
+                data["x"] = x_orig
 
         else:
-            r = [x]
-
-        # try:
-        #     import soundfile as sf
-
-        #     for i, z in enumerate(r):
-        #         sf.write(f"file_{seg_id}.wav", z, fs, "PCM_16")
-        # except:
-        #     print("soundfile failed", flush=True)
+            data["x"] = x
 
         # adds the segment labels
         seg_info = self._get_segment_info(seg_id)
-        r.extend(seg_info)
-
-        return (*r,)
+        data.update(seg_info)
+        return data
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 6faaf684..7d6c7f07 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 
 
@@ -122,26 +122,21 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
         for batch, data in enumerate(data_loader):
-
-            if isinstance(data, (tuple, list)):
-                data, _ = data
-
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data = data.to(self.device)
-            batch_size = data.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
-                output = self.model(data)
-                loss = self.loss(output, data).mean() / self.grad_acc_steps
+                output = self.model(input_data)
+                loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -155,7 +150,7 @@ def train_epoch(self, data_loader):
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, data)
+                batch_metrics[k] = metric(output, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -170,6 +165,7 @@ def train_epoch(self, data_loader):
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
 
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -181,18 +177,15 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-                if isinstance(data, (tuple, list)):
-                    data, _ = data
-
-                data = data.to(self.device)
-                batch_size = data.shape[0]
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(data)
-                    loss = self.loss(output, data)
+                    output = self.model(input_data)
+                    loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(output, data)
+                    batch_metrics[k] = metric(output, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 3300d152..383a26d1 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 
 
@@ -116,28 +116,21 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning noisy and clean features
         """
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
         for batch, data in enumerate(data_loader):
-
-            assert isinstance(data, (tuple, list))
-            x = data[0]
-            x_target = data[1]
-
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            x = x.to(self.device)
-            x_target = x_target.to(self.device)
-            batch_size = x.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
-                output = self.model(x, x_target=x_target, return_x_mean=True)
+                output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 elbo = output["elbo"].mean()
                 loss = -elbo / self.grad_acc_steps
@@ -157,7 +150,7 @@ def train_epoch(self, data_loader):
             for metric in ["log_px", "kldiv_z"]:
                 batch_metrics[metric] = output[metric].mean().item()
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(x_hat, x_target)
+                batch_metrics[k] = metric(x_hat, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -175,6 +168,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -186,23 +180,16 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-
-                assert isinstance(data, (tuple, list))
-                x = data[0]
-                x_target = data[1]
-
-                x = x.to(self.device)
-                x_target = x_target.to(self.device)
-                batch_size = x.shape[0]
-
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(x, x_target=x_target, return_x_mean=True)
+                    output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
                 for metric in ["elbo", "log_px", "kldiv_z"]:
                     batch_metrics[metric] = output[metric].mean().item()
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(x_hat, x_target)
+                    batch_metrics[k] = metric(x_hat, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index 1c27c30d..54c4205f 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -11,7 +11,7 @@
 
 from ...utils.misc import filter_func_args
 from ..losses import BCEWithLLR
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from ..utils.misc import get_selfsim_tarnon
 from .torch_trainer import TorchTrainer
 
@@ -130,7 +130,7 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_margin(self.cur_epoch)
 
         return_multi = self.loss_weights["multi"] > 0
@@ -140,20 +140,20 @@ def train_epoch(self, data_loader):
         metric_acc = MetricAcc()
         batch_metrics = ODict()
         self.model.train()
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
 
             if return_bin:
                 target_bin, mask_bin = get_selfsim_tarnon(target, return_mask=True)
             with self.amp_autocast():
                 output = self.model(
-                    data,
+                    input_data,
                     target,
                     return_multi=return_multi,
                     return_bin=return_bin,
@@ -204,7 +204,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc()
         batch_metrics = ODict()
         return_multi = self.loss_weights["multi"] > 0
@@ -218,15 +218,15 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 log_tag = "val_"
                 self.model.eval()
 
-            for batch, (data, target) in enumerate(data_loader):
-                data, target = data.to(self.device), target.to(self.device)
-                batch_size = data.shape[0]
+            for batch, data in enumerate(data_loader):
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
 
                 if return_bin:
                     target_bin, mask_bin = get_selfsim_tarnon(target, return_mask=True)
                 with self.amp_autocast():
                     output = self.model(
-                        data, return_multi=return_multi, return_bin=return_bin
+                        input_data, return_multi=return_multi, return_bin=return_bin
                     )
                     loss = 0
                     if return_multi:
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 5cadd57c..5099d829 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -24,7 +24,13 @@
 from ..lr_schedulers import LRScheduler as LRS
 from ..lr_schedulers import LRSchedulerFactory as LRSF
 from ..optim import OptimizerFactory as OF
-from ..utils import FairFullyShardedDDP, FairShardedDDP, MetricAcc, TorchDDP
+from ..utils import (
+    FairFullyShardedDDP,
+    FairShardedDDP,
+    MetricAcc,
+    TorchDDP,
+    tensors_subset,
+)
 
 
 class DDPType(str, Enum):
@@ -286,18 +292,20 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
+
             with self.amp_autocast():
-                output = self.model(data)
+                output = self.model(input_data)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -310,7 +318,6 @@ def train_epoch(self, data_loader):
                     self.lr_scheduler.on_opt_step()
                 self.update_model()
 
-            self._reduce_metric(loss)
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
@@ -333,7 +340,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
           data_loader: PyTorch data loader return input/output pairs.
           sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -344,12 +351,11 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 log_tag = "val_"
                 self.model.eval()
 
-            for batch, (data, target) in enumerate(data_loader):
-                data, target = data.to(self.device), target.to(self.device)
-                batch_size = data.shape[0]
-
+            for batch, data in enumerate(data_loader):
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(data)
+                    output = self.model(input_data)
                     loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 8e75d768..2db9b24b 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 
 
@@ -112,26 +112,27 @@ def __init__(
         # )
 
     def train_epoch(self, data_loader):
+        """Training epoch loop
 
+        Args:
+          data_loader: pytorch data loader returning noisy and clean features
+        """
+
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
         for batch, data in enumerate(data_loader):
-
-            if isinstance(data, (tuple, list)):
-                data, _ = data
-
             self.loggers.on_batch_begin(batch)
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data = data.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
 
             with self.amp_autocast():
-                output = self.model(data, return_x_mean=True)
+                output = self.model(input_data, x_target=target, return_x_mean=True)
                 elbo = output["elbo"].mean()
                 loss = -elbo / self.grad_acc_steps
             x_hat = output["x_mean"]
@@ -150,20 +151,26 @@ def train_epoch(self, data_loader):
             for metric in ["log_px", "kldiv_z"]:
                 batch_metrics[metric] = output[metric].mean().item()
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(x_hat, data)
+                batch_metrics[k] = metric(x_hat, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs = ODict(("train_" + k, v) for k, v in logs.items())
             logs["lr"] = self._get_lr()
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
+        logs = ODict(("train_" + k, v) for k, v in logs.items())
         logs["lr"] = self._get_lr()
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
+        """Validation epoch loop
 
+        Args:
+          data_loader: PyTorch data loader return input/output pairs
+        """
+
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -175,21 +182,17 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-                if isinstance(data, (tuple, list)):
-                    data, _ = data
-
-                data = data.to(self.device)
-                batch_size = data.shape[0]
-
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(data, return_x_mean=True)
+                    output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
                 for metric in ["elbo", "log_px", "kldiv_z"]:
                     batch_metrics[metric] = output[metric].mean().item()
 
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(x_hat, data)
+                    batch_metrics[k] = metric(x_hat, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index bac95b78..1f345f7d 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .dvae_trainer import DVAETrainer
 
 
@@ -112,27 +112,20 @@ def __init__(
 
     def train_epoch(self, data_loader):
 
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
         for batch, data in enumerate(data_loader):
-
-            assert isinstance(data, (tuple, list))
-            x = data[0]
-            x_target = data[1]
-
             self.loggers.on_batch_begin(batch)
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            x = x.to(self.device)
-            x_target = x_target.to(self.device)
-            batch_size = x.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
-                output = self.model(x, x_target=x_target, return_x_mean=True)
+                output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
                 loss = loss.mean() / self.grad_acc_steps
@@ -154,7 +147,7 @@ def train_epoch(self, data_loader):
                 output["log_perplexity"].mean().item()
             )
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(x_hat, x_target)
+                batch_metrics[k] = metric(x_hat, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -167,29 +160,22 @@ def train_epoch(self, data_loader):
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
             if swa_update_bn:
-                log_tag = "train"
+                log_tag = "train_"
                 self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-
-                assert isinstance(data, (tuple, list))
-                x = data[0]
-                x_target = data[1]
-
-                x = x.to(self.device)
-                x_target = x_target.to(self.device)
-                batch_size = x.shape[0]
-
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(x, x_target=x_target, return_x_mean=True)
+                    output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
                 for metric in ["loss", "elbo", "log_px", "kldiv_z", "vq_loss"]:
@@ -199,7 +185,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 )
 
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(x_hat, x_target)
+                    batch_metrics[k] = metric(x_hat, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index c4b046c0..83a30cc8 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .vae_trainer import VAETrainer
 
 
@@ -110,28 +110,20 @@ def __init__(
         # )
 
     def train_epoch(self, data_loader):
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
         for batch, data in enumerate(data_loader):
-
-            if isinstance(data, (tuple, list)):
-                x = data[0]
-            else:
-                x = data
-
             self.loggers.on_batch_begin(batch)
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            x = x.to(self.device)
-            batch_size = x.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
-                output = self.model(x, return_x_mean=True)
+                output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
                 loss = loss.mean() / self.grad_acc_steps
@@ -153,7 +145,7 @@ def train_epoch(self, data_loader):
                 output["log_perplexity"].mean().item()
             )
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(x_hat, x)
+                batch_metrics[k] = metric(x_hat, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -166,7 +158,7 @@ def train_epoch(self, data_loader):
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -178,17 +170,10 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-
-                if isinstance(data, (tuple, list)):
-                    x = data[0]
-                else:
-                    x = data
-
-                x = x.to(self.device)
-                batch_size = x.shape[0]
-
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
                 with self.amp_autocast():
-                    output = self.model(x, return_x_mean=True)
+                    output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
                 for metric in ["loss", "elbo", "log_px", "kldiv_z", "vq_loss"]:
@@ -198,7 +183,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 )
 
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(x_hat, x)
+                    batch_metrics[k] = metric(x_hat, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 22971deb..0a45c97f 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .xvector_trainer import XVectorTrainer
 
 
@@ -136,34 +136,33 @@ def __init__(
             )
 
     def train_epoch(self, data_loader):
-
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
-
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
 
             if batch % self.grad_acc_steps == 0:
                 if torch.rand(1) < self.p_attack:
                     # generate adversarial attacks
-                    logging.info("generating adv attack for batch=%d" % (batch))
+                    logging.info("generating adv attack for batch=%d", batch)
                     self.model.eval()
-                    data_adv = self.attack.generate(data, target)
+                    data_adv = self.attack.generate(inptu_data, target)
                     max_delta = torch.max(torch.abs(data_adv - data)).item()
-                    logging.info("adv attack max perturbation=%f" % (max_delta))
-                    data = data_adv
+                    logging.info("adv attack max perturbation=%f", max_delta)
+                    input_data = data_adv
                     self.model.train()
 
                 self.optimizer.zero_grad()
 
             with self.amp_autocast():
-                output = self.model(data, target)
+                output = self.model(input_data, target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -191,7 +190,7 @@ def train_epoch(self, data_loader):
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
 
@@ -202,14 +201,14 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             log_tag = "val_"
             self.model.eval()
 
-        for batch, (data, target) in enumerate(data_loader):
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+        for batch, data in enumerate(data_loader):
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
 
             if torch.rand(1) < self.p_val_attack:
                 # generate adversarial attacks
                 self.model.eval()
-                data = self.attack.generate(data, target)
+                data = self.attack.generate(input_data, target)
                 if swa_update_bn:
                     self.model.train()
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index ac28b95a..1a25e9a9 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -12,7 +12,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
 
 
@@ -134,41 +134,41 @@ def __init__(
                     "first step of the gradient acc. loop given that"
                     "adv optimization over-writes the gradients "
                     "stored in the model"
-                )
-                % (p_attack, 1.0 / self.grad_acc_steps)
+                ),
+                p_attack,
+                1.0 / self.grad_acc_steps,
             )
 
     def train_epoch(self, data_loader):
-
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
-
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
 
             if batch % self.grad_acc_steps == 0:
                 if torch.rand(1) < self.p_attack:
                     # generate adversarial attacks
                     # logging.info('generating adv attack for batch=%d' % (batch))
                     self.model.eval()
-                    data_adv = self.attack.generate(data, target)
+                    data_adv = self.attack.generate(input_data, target)
                     max_delta = torch.max(torch.abs(data_adv - data)).item()
                     # z = torch.abs(data_adv-data) > 100
                     # logging.info('zz {} {}'.format(data[z], data_adv[z]))
                     # logging.info('adv attack max perturbation=%f' % (max_delta))
-                    data = data_adv
+                    input_data = data_adv
                     self.model.train()
 
                 self.optimizer.zero_grad()
 
             with torch.no_grad():
-                feats = self.feat_extractor(data)
+                feats = self.feat_extractor(input_data)
 
             with self.amp_autocast():
                 output = self.model(feats, y=target)
@@ -199,7 +199,7 @@ def train_epoch(self, data_loader):
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
-
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
 
@@ -211,18 +211,17 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             self.model.eval()
 
         for batch, (data, target) in enumerate(data_loader):
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             if torch.rand(1) < self.p_val_attack:
                 # generate adversarial attacks
                 self.model.eval()
-                data = self.attack.generate(data, target)
+                input_data = self.attack.generate(input_data, target)
                 if swa_update_bn:
                     self.model.train()
 
             with torch.no_grad():
-                feats = self.feat_extractor(data)
+                feats = self.feat_extractor(input_data)
                 with self.amp_autocast():
                     output = self.model(feats)
                     loss = self.loss(output, target)
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 6703ea5d..9b97fb63 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -11,7 +11,7 @@
 from torch.distributed.elastic.multiprocessing.errors import record
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 
 
@@ -123,7 +123,7 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
@@ -135,10 +135,10 @@ def train_epoch(self, data_loader):
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
-                output = self.model(data, y=target)
+                output = self.model(input_data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index fdb2627e..5dbdfd0f 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -11,7 +11,7 @@
 from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .xvector_trainer import XVectorTrainer
 
 
@@ -144,24 +144,23 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with self.amp_autocast():
                 outputs = self.model(
-                    data,
+                    input_data,
                     y=target,
                     return_enc_layers=self.reg_layers_enc,
                     return_classif_layers=self.reg_layers_classif,
@@ -179,7 +178,7 @@ def train_epoch(self, data_loader):
                 batch_metrics["loss-classif"] = loss.item()
 
                 prior_outputs = self.prior_model(
-                    data,
+                    input_data,
                     return_enc_layers=self.reg_layers_enc,
                     return_classif_layers=self.reg_layers_classif,
                     return_output=False,
@@ -230,7 +229,6 @@ def train_epoch(self, data_loader):
             logs = ODict(("train_" + k, v) for k, v in logs.items())
             logs["lr"] = self._get_lr()
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
-            # total_batches +=1
 
         logs = metric_acc.metrics
         logs["lr"] = self._get_lr()
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 07882f31..6a9aa067 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -10,7 +10,7 @@
 import torch.nn as nn
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
 
@@ -141,23 +141,22 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
 
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
-
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with torch.no_grad():
-                feats = self.feat_extractor(data)
+                feats = self.feat_extractor(input_data)
 
             with self.amp_autocast():
                 outputs = self.model(
@@ -241,6 +240,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         Args:
           data_loader: PyTorch data loader return input/output pairs
         """
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -251,11 +251,11 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 log_tag = "val_"
                 self.model.eval()
 
-            for batch, (data, target) in enumerate(data_loader):
-                data, target = data.to(self.device), target.to(self.device)
-                batch_size = data.shape[0]
+            for batch, data in enumerate(data_loader):
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
 
-                feats = self.feat_extractor(data)
+                feats = self.feat_extractor(input_data)
                 with self.amp_autocast():
                     output = self.model(feats)
                     loss = self.loss(output, target)
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index dfbd8e00..4a66f0eb 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -7,10 +7,11 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 
 from ...utils.misc import filter_func_args
-from ..utils import MetricAcc, TorchDDP
+from ..utils import MetricAcc, TorchDDP, tensors_subset
 from .xvector_trainer import XVectorTrainer
 
 
@@ -122,24 +123,28 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-
+        batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.feat_extractor.train()
         self.model.train()
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            data, target = data.to(self.device), target.to(self.device)
-            batch_size = data.shape[0]
+            # input_data, target = (
+            #     data[self.input_key].to(self.device),
+            #     data[self.target_key].to(self.device),
+            # )
+            input_data, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = input_data.size(0)
             with torch.no_grad():
-                feats = self.feat_extractor(data)
+                feats = self.feat_extractor(input_data)
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(feats, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
@@ -174,6 +179,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
           data_loader: PyTorch data loader return input/output pairs.
           sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
+        batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.feat_extractor.eval()
@@ -185,12 +191,12 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 log_tag = "val_"
                 self.model.eval()
 
-            for batch, (data, target) in enumerate(data_loader):
-                data, target = data.to(self.device), target.to(self.device)
-                batch_size = data.shape[0]
+            for batch, data in enumerate(data_loader):
+                input_data, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = input_data.size(0)
 
-                feats = self.feat_extractor(data)
-                with self.amp_autocast():
+                feats = self.feat_extractor(input_data)
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats)
                     loss = self.loss(output, target)
 
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 3a4692dc..da4a3773 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -3,7 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .devices import open_device
+from .devices import (
+    open_device,
+    tensors_to_device,
+    tensors_to_cpu,
+    tensors_to_numpy,
+    tensors_subset,
+)
 from .metric_acc import MetricAcc
 from .masking import seq_lengths_to_mask, scale_seq_lengths
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
diff --git a/hyperion/torch/utils/devices.py b/hyperion/torch/utils/devices.py
index cb77f1e5..19c124b2 100644
--- a/hyperion/torch/utils/devices.py
+++ b/hyperion/torch/utils/devices.py
@@ -51,6 +51,8 @@ def tensors_to_device(data, device):
     elif isinstance(data, list):
         for i, value in enumerate(data):
             data[i] = value.to(device)
+    elif isinstance(data, tuple):
+        data = tuple(value.to(device) for value in data)
     elif isinstance(data, torch.Tensor):
         data = data.to(device)
     else:
@@ -66,6 +68,8 @@ def tensors_to_cpu(data):
     elif isinstance(data, list):
         for i, value in enumerate(data):
             data[i] = value.cpu()
+    elif isinstance(data, tuple):
+        data = tuple(value.cpu() for value in data)
     elif isinstance(data, torch.Tensor):
         data = data.cpu()
     else:
@@ -81,9 +85,23 @@ def tensors_to_numpy(data):
     elif isinstance(data, list):
         for i, value in enumerate(data):
             data[i] = value.cpu().numpy()
+    elif isinstance(data, tuple):
+        data = tuple(value.cpu().numpy() for value in data)
     elif isinstance(data, torch.Tensor):
         data = data.cpu().numpy()
     else:
         raise Exception(f"Unknown data type for {data}")
 
     return data
+
+
+def tensors_subset(data, keys, device=None, return_dict=False):
+    if return_dict:
+        data = {k: data[k] for k in keys}
+    else:
+        data = tuple(data[k] for k in keys)
+
+    if device is not None:
+        data = tensors_to_device(data, device)
+
+    return data

From 4f7bc3ddec8e2ebc63e71dca37ea63199765b2f7 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 28 Dec 2022 06:06:40 -0500
Subject: [PATCH 071/154] fixed amp

---
 hyperion/torch/trainers/ae_trainer.py                      | 5 +++--
 hyperion/torch/trainers/dvae_trainer.py                    | 5 +++--
 hyperion/torch/trainers/plda_trainer.py                    | 7 +++++--
 hyperion/torch/trainers/torch_trainer.py                   | 4 ++--
 hyperion/torch/trainers/vae_trainer.py                     | 5 +++--
 hyperion/torch/trainers/vq_dvae_trainer.py                 | 5 +++--
 hyperion/torch/trainers/vq_vae_trainer.py                  | 5 +++--
 hyperion/torch/trainers/xvector_adv_trainer.py             | 5 +++--
 hyperion/torch/trainers/xvector_adv_trainer_from_wav.py    | 5 +++--
 hyperion/torch/trainers/xvector_trainer.py                 | 3 ++-
 hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py   | 3 ++-
 .../trainers/xvector_trainer_deep_feat_reg_from_wav.py     | 5 +++--
 12 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 7d6c7f07..69e97cc6 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -134,7 +135,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
@@ -179,7 +180,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data)
                     loss = self.loss(output, target)
 
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 383a26d1..0523ad44 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -129,7 +130,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 elbo = output["elbo"].mean()
@@ -182,7 +183,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index 54c4205f..d6761e87 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -7,6 +7,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 
 from ...utils.misc import filter_func_args
@@ -151,7 +152,8 @@ def train_epoch(self, data_loader):
 
             if return_bin:
                 target_bin, mask_bin = get_selfsim_tarnon(target, return_mask=True)
-            with self.amp_autocast():
+
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(
                     input_data,
                     target,
@@ -224,7 +226,8 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
                 if return_bin:
                     target_bin, mask_bin = get_selfsim_tarnon(target, return_mask=True)
-                with self.amp_autocast():
+
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(
                         input_data, return_multi=return_multi, return_bin=return_bin
                     )
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 5099d829..ad3df161 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -304,7 +304,7 @@ def train_epoch(self, data_loader):
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
@@ -354,7 +354,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data)
                     loss = self.loss(output, target)
 
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 2db9b24b..ba401cb7 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -131,7 +132,7 @@ def train_epoch(self, data_loader):
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 elbo = output["elbo"].mean()
                 loss = -elbo / self.grad_acc_steps
@@ -184,7 +185,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 1f345f7d..03800e0d 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -124,7 +125,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
@@ -174,7 +175,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 83a30cc8..40b6b10d 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -122,7 +123,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
@@ -172,7 +173,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 0a45c97f..af915d6b 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -161,7 +162,7 @@ def train_epoch(self, data_loader):
 
                 self.optimizer.zero_grad()
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
@@ -213,7 +214,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                     self.model.train()
 
             with torch.no_grad():
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(data, **self.amp_args)
                     loss = self.loss(output, target)
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 1a25e9a9..1e1b1778 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -170,7 +171,7 @@ def train_epoch(self, data_loader):
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(feats, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
@@ -222,7 +223,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats)
                     loss = self.loss(output, target)
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 9b97fb63..a9a9d98f 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -7,6 +7,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from torch.distributed.elastic.multiprocessing.errors import record
 
@@ -137,7 +138,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 5dbdfd0f..4e791347 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -7,6 +7,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
@@ -158,7 +159,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 outputs = self.model(
                     input_data,
                     y=target,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 6a9aa067..6d06eac8 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -7,6 +7,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 
 from ...utils.misc import filter_func_args
@@ -158,7 +159,7 @@ def train_epoch(self, data_loader):
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp):
                 outputs = self.model(
                     feats,
                     y=target,
@@ -256,7 +257,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 batch_size = input_data.size(0)
 
                 feats = self.feat_extractor(input_data)
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats)
                     loss = self.loss(output, target)
 

From 5a138e8ff6251760febfd3897b80ddc9b8602406 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 29 Dec 2022 06:29:52 -0500
Subject: [PATCH 072/154] update feature, embeddings datasets and samplers,
 added fwse and cfwse, added IDRnD Resnets, ResNet should work with x_lengths

---
 hyperion/torch/data/__init__.py               |   3 +
 hyperion/torch/data/audio_dataset.py          |   8 +-
 .../data/class_weighted_embed_sampler.py      | 280 ++++++++++
 hyperion/torch/data/embed_dataset.py          | 187 ++++---
 hyperion/torch/data/embed_sampler.py          | 112 ++++
 hyperion/torch/data/embed_sampler_factory.py  | 125 +++++
 hyperion/torch/data/feat_seq_dataset.py       | 369 +++++--------
 hyperion/torch/layer_blocks/__init__.py       |  10 +-
 .../torch/layer_blocks/res2net1d_blocks.py    |   4 +-
 .../torch/layer_blocks/res2net2d_blocks.py    |   4 +-
 hyperion/torch/layer_blocks/res2net_blocks.py |  35 +-
 hyperion/torch/layer_blocks/se_blocks.py      |  99 +++-
 .../torch/layer_blocks/seresnet_blocks.py     |  44 +-
 hyperion/torch/layers/spec_augment.py         |  46 +-
 hyperion/torch/models/xvectors/xvector.py     |  39 +-
 hyperion/torch/narchs/dc1d_decoder.py         |  41 +-
 hyperion/torch/narchs/dc1d_encoder.py         |  42 +-
 hyperion/torch/narchs/dc2d_decoder.py         |  41 +-
 hyperion/torch/narchs/dc2d_encoder.py         |  41 +-
 hyperion/torch/narchs/resnet.py               | 483 +++++++++++++++++-
 hyperion/torch/narchs/resnet1d_decoder.py     |  43 +-
 hyperion/torch/narchs/resnet1d_encoder.py     |  44 +-
 hyperion/torch/narchs/resnet2d_decoder.py     |  41 +-
 hyperion/torch/narchs/resnet2d_encoder.py     |  42 +-
 hyperion/torch/narchs/resnet_factory.py       |  83 ++-
 hyperion/torch/trainers/torch_trainer.py      |   2 +-
 26 files changed, 1729 insertions(+), 539 deletions(-)
 create mode 100644 hyperion/torch/data/class_weighted_embed_sampler.py
 create mode 100644 hyperion/torch/data/embed_sampler.py
 create mode 100644 hyperion/torch/data/embed_sampler_factory.py

diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index ae2efca6..65608a0c 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -4,9 +4,12 @@
 """
 
 from .audio_dataset import AudioDataset
+
 # datasets
 from .feat_seq_dataset import FeatSeqDataset
 from .paired_feat_seq_dataset import PairedFeatSeqDataset
+
 # samplers
 # from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
+from .embed_sampler_factory import EmbedSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 4368ec94..02b81efa 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -482,16 +482,16 @@ def __init__(
         self.r = AR(audio_file, wav_scale=wav_scale)
 
         if rank == 0:
-            logging.info("loading segments file %s" % segments_file)
+            logging.info("loading segments file %s", segments_file)
 
         self.seg_set = SegmentSet.load(segments_file)
         if rank == 0:
-            logging.info("dataset contains %d seqs" % len(self.seg_set))
+            logging.info("dataset contains %d seqs", len(self.seg_set))
 
         self.is_val = is_val
         if time_durs_file is not None:
             if rank == 0:
-                logging.info("loading durations file %s" % time_durs_file)
+                logging.info("loading durations file %s", time_durs_file)
 
             time_durs = SegmentSet.load(time_durs_file)
             self.seg_set["duration"] = time_durs.loc[
@@ -771,7 +771,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip={}):
+    def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
new file mode 100644
index 00000000..aed9105d
--- /dev/null
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -0,0 +1,280 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import math
+import time
+
+import numpy as np
+import pandas as pd
+import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from .hyp_sampler import HypSampler
+
+
+class ClassWeightedEmbedSampler(HypSampler):
+    def __init__(
+        self,
+        embed_set,
+        class_info,
+        batch_size=1,
+        num_embeds_per_class=1,
+        weight_exponent=1.0,
+        weight_mode="custom",
+        num_hard_prototypes=0,
+        affinity_matrix=None,
+        class_name="class_id",
+        shuffle=False,
+        seed=1234,
+    ):
+        super().__init__(shuffle=shuffle, seed=seed)
+        self.class_name = class_name
+        self.embed_set = embed_set
+        self.class_info = class_info
+        self.batch_size = batch_size
+        self.avg_batch_size = batch_size
+
+        self.num_embeds_per_class = num_embeds_per_class
+
+        self.weight_exponent = weight_exponent
+        self.weight_mode = weight_mode
+
+        self.num_hard_prototypes = num_hard_prototypes
+        self.batch = 0
+
+        self._compute_len()
+        self._compute_num_classes_per_batch()
+        self._gather_class_info()
+        self._set_class_weights()
+
+        self.set_hard_prototypes(affinity_matrix)
+
+        logging.info(
+            ("sampler batches/epoch=%d batch-size=%d, " "classes/batch=%.2f "),
+            self._len,
+            self.batch_size,
+            self.num_classes_per_batch,
+        )
+
+    def _set_seed(self):
+        if self.shuffle:
+            self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.rank)
+        else:
+            self.rng.manual_seed(self.seed + 100 * self.rank)
+
+    def _compute_len(self):
+        self._len = int(
+            math.ceil(len(self.embed_set) / self.avg_batch_size / self.world_size)
+        )
+
+    def __len__(self):
+        return self._len
+
+    def _gather_class_info(self):
+        # we get some extra info that we need for the classes.
+        # we need the mapping from class index to id
+        self.map_class_idx_to_ids = self.class_info[["class_idx", "id"]]
+        self.map_class_idx_to_ids.set_index("class_idx", inplace=True)
+
+        # we need the list of embeddings from each class
+        # to speed up embedding sampling
+        # searching then in each batch, it is too slow
+        map_class_to_embeds = self.embed_set.df[["id", self.class_name]].set_index(
+            self.class_name
+        )
+        self.map_class_to_embed_idx = {}
+        for class_id in self.class_info["id"].values:
+            if class_id in map_class_to_embeds.index:
+                embed_ids = map_class_to_embeds.loc[class_id, "id"]
+                if isinstance(embed_ids, str):
+                    embed_ids = [embed_ids]
+                else:
+                    embed_ids = embed_ids.values
+
+                embed_idx = self.embed_set.get_loc(embed_ids)
+            else:
+                embed_idx = []
+                self.class_info.loc[class_id, "weights"] = 0.0
+                self.class_info.renorm_weights()
+
+            self.map_class_to_embed_idx[class_id] = embed_idx
+
+    def _set_class_weights(self):
+        if self.weight_mode == "uniform":
+            self.class_info.set_uniform_weights()
+        elif self.weight_mode == "data-prior":
+            weights = self.class_info["total_duration"].values
+            self.class_info.set_weights(self, weights)
+
+        if self.weight_exponent != 1.0:
+            self.class_info.exp_weights(self.weight_exponent)
+
+    @property
+    def hard_prototype_mining(self):
+        return self.num_hard_prototypes > 1
+
+    def set_hard_prototypes(self, affinity_matrix):
+        if affinity_matrix is None:
+            self.hard_prototypes = None
+            return
+
+        # don't sample hard negs from classes with zero weigth or absent
+        zero_w = self.class_info["weights"] == 0
+        if np.any(zero_w):
+            zero_w_idx = self.class_info.loc[zero_w, "class_idx"].values
+            affinity_matrix[:, zero_w_idx] = -1000
+
+        for i in range(affinity_matrix.size(1)):
+            mask_i = self.class_info["class_idx"] == i
+            if np.all(mask_i == 0):
+                affinity_matrix[:, i] = -1000
+
+        # hard prototypes for a class are itself and k-1 closest to it.
+        self.hard_prototypes = torch.topk(
+            affinity_matrix, self.num_hard_prototypes, dim=-1
+        ).indices
+
+    def get_hard_prototypes(self, class_idx):
+        return self.hard_prototypes[class_idx].flatten().numpy()
+
+    def _compute_num_classes_per_batch(self):
+        num_classes = self.batch_size / self.num_embeds_per_class
+        if self.hard_prototype_mining:
+            num_classes /= self.num_hard_prototypes
+        self.num_classes_per_batch = int(math.ceil(num_classes))
+
+    def _get_class_weights(self,):
+        return torch.as_tensor(self.class_info["weights"].values)
+
+    def _sample_classes(self):
+        weights = self._get_class_weights()
+        row_idx = torch.multinomial(
+            weights,
+            num_samples=self.num_classes_per_batch,
+            replacement=True,
+            generator=self.rng,
+        ).numpy()
+
+        class_ids = self.class_info.iloc[row_idx].id.values
+        if self.hard_prototype_mining:
+            # map class ids to class indexes
+            class_idx = self.class_info.loc[class_ids, "class_idx"].values
+            class_idx = self.get_hard_prototypes(class_idx)
+            # map back to class ids
+            class_ids = self.map_class_idx_to_ids.loc[class_idx, "id"].values
+
+        return class_ids
+
+    def _sample_embeds(self, class_ids):
+
+        id_col_idx = self.embed_set.get_col_idx("id")
+        embed_ids = []
+        for c in class_ids:
+            # get embeds belonging to c
+            embed_idx_c = self.map_class_to_embed_idx[c]
+            # sample num_embeds_per_class randomly
+            if len(embed_idx_c) == 0:
+                logging.error("no embeddings found with class=%s", c)
+
+            sel_idx = torch.randint(
+                low=0,
+                high=len(embed_idx_c),
+                size=(self.num_embeds_per_class,),
+                generator=self.rng,
+            ).numpy()
+
+            sel_embed_idx_c = embed_idx_c[sel_idx]
+            sel_embed_ids_c = list(self.embed_set.iloc[sel_embed_idx_c, id_col_idx])
+            embed_ids.extend(sel_embed_ids_c)
+
+        return embed_ids
+
+    def __next__(self):
+
+        if self.batch == self._len:
+            raise StopIteration
+
+        class_ids = self._sample_classes()
+        embed_ids = self._sample_embeds(class_ids)
+        if self.batch == 0:
+            logging.info("batch 0 uttidx=%s", str(embed_ids[:10]))
+
+        self.batch += 1
+        return embed_ids
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "batch_size",
+            "num_embeds_per_class",
+            "weight_exponent",
+            "weight_mode",
+            "num_hard_prototypes",
+            "class_name",
+            "shuffle",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--batch-size", type=int, default=1, help=("batch size per gpu"),
+        )
+
+        parser.add_argument(
+            "--num-embeds-per-class",
+            type=int,
+            default=1,
+            help=("number of embeds per class in batch"),
+        )
+        parser.add_argument(
+            "--weight-exponent",
+            default=1.0,
+            type=float,
+            help=("exponent for class weights"),
+        )
+        parser.add_argument(
+            "--weight-mode",
+            default="custom",
+            choices=["custom", "uniform", "data-prior"],
+            help=("method to get the class weights"),
+        )
+
+        parser.add_argument(
+            "--num-hard-prototypes",
+            type=int,
+            default=0,
+            help=("number of hard prototype classes per batch"),
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the embeddings at the beginning of the epoch",
+        )
+
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        parser.add_argument(
+            "--class-name",
+            default="class_id",
+            help="which column in the info table indicates the class",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index e489e3a3..2963854d 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -11,56 +11,60 @@
 import numpy as np
 import pandas as pd
 import torch
+import torch.distributed as dist
+
+from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
-from ...utils.utt2info import Utt2Info
+from ...utils.misc import filter_func_args
+from ...utils.class_info import ClassInfo
+from ...utils.info_table import InfoTable
 from ..torch_defs import floatstr_torch
 
-# import copy
-
-
-
-
-
 
 class EmbedDataset(Dataset):
     def __init__(
         self,
         embeds=None,
-        class_ids=None,
-        class_weights=None,
-        rspecifier=None,
-        key_file=None,
-        class_file=None,
+        embed_info=None,
+        class_info=None,
+        embed_file=None,
+        embed_info_file=None,
+        class_names=None,
+        class_files=None,
+        return_segment_info=None,
         path_prefix=None,
         preload_embeds=False,
-        return_class=True,
         is_val=False,
     ):
 
-        assert embeds is not None or rspecifier is not None
-        assert rspecifier is None or key_file is not None
-        assert class_ids is not None or key_file is not None
+        assert embeds is not None or embed_file is not None
+        assert embed_info is not None or embed_info is not None
+        assert class_info is not None or class_files is not None
+        super().__init__()
+        try:
+            rank = dist.get_rank()
+            world_size = dist.get_world_size()
+        except:
+            rank = 0
+            world_size = 1
 
         self.preload_embeds = preload_embeds
-        if key_file is not None:
-            if isinstance(key_file, Utt2Info):
-                self.u2c = key_file
-            else:
-                logging.info("loading utt2info file %s", key_file)
-                self.u2c = Utt2Info.load(key_file, sep=" ")
-            self.num_embeds = len(self.u2c)
-        else:
-            assert embeds is not None
-            self.u2c = None
-            self.num_embeds = len(embeds)
+
+        if embed_info is None:
+            embed_info = InfoTable.load(embed_info_file)
+
+        self.embed_info = embed_info
+        if rank == 0:
+            logging.info("dataset contains %d embeddings", len(self.embed_info))
 
         if embeds is None:
-            logging.info("opening dataset %s", rspecifier)
-            self.r = RF.create(rspecifier, path_prefix=path_prefix, scp_sep=" ")
+            if rank == 0:
+                logging.info("opening dataset %s", rspecifier)
+            self.r = RF.create(embed_file, path_prefix=path_prefix, scp_sep=" ")
             if self.preload_embeds:
-                self.embeds = self.r.load(u2c.key, squeeze=True).astype(
+                self.embeds = self.r.load(embed_info["id"], squeeze=True).astype(
                     floatstr_torch(), copy=False
                 )
                 del self.r
@@ -70,65 +74,80 @@ def __init__(
             self.embeds = embeds.astype(floatstr_torch(), copy=False)
 
         self.is_val = is_val
-        self._prepare_class_info(class_file, class_ids, class_weights)
-        self.return_class = return_class
-
-        logging.info("dataset contains %d embeds", self.num_embeds)
+        if rank == 0:
+            logging.info("loading class-info files")
+        self._load_class_infos(class_names, class_files, is_val)
+
+        self.return_segment_info = (
+            [] if return_segment_info is None else return_segment_info
+        )
+
+    def _load_class_infos(self, class_names, class_files, is_val):
+        self.class_info = {}
+        if class_names is None:
+            assert class_files is None
+            return
+
+        assert len(class_names) == len(class_files)
+        for name, file in zip(class_names, class_files):
+            assert (
+                name in self.seg_set
+            ), f"class_name {name} not present in the segment set"
+            if self.rank == 0:
+                logging.info("loading class-info file %s" % file)
+            table = ClassInfo.load(file)
+            self.class_info[name] = table
+            if not is_val:
+                # check that all classes are present in the training segments
+                class_ids = table["id"]
+                segment_class_ids = self.seg_set[name].unique()
+                for c_id in class_ids:
+                    if c_id not in segment_class_ids:
+                        logging.warning(
+                            "%s class: %s not present in dataset", name, c_id
+                        )
+
+    @property
+    def num_embeds(self):
+        return len(self.embed_info)
 
     def __len__(self):
         return self.num_embeds
 
-    def _prepare_class_info(self, class_file, class_idx=None, class_weights=None):
-        if class_file is None:
-            if self.u2c is not None:
-                classes, class_idx = np.unique(self.u2c.info, return_inverse=True)
-            self.num_classes = np.max(class_idx) + 1
-        else:
-            logging.info("reading class-file %s", class_file)
-            class_info = pd.read_csv(class_file, header=None, sep=" ")
-            class2idx = {str(k): i for i, k in enumerate(class_info[0])}
-            self.num_classes = len(class2idx)
-            class_idx = np.array([class2idx[k] for k in self.u2c.info], dtype=int)
-            if class_info.shape[1] == 2:
-                class_weights = np.array(class_info[1]).astype(
-                    floatstr_torch(), copy=False
-                )
+    @property
+    def num_classes(self):
+        return {k: t.num_classes for k, t in self.class_info.items()}
 
-        class2utt_idx = {}
-        class2num_utt = np.zeros((self.num_classes,), dtype=int)
-
-        for k in range(self.num_classes):
-            idx = (class_idx == k).nonzero()[0]
-            class2utt_idx[k] = idx
-            class2num_utt[k] = len(idx)
-            if class2num_utt[k] == 0:
-                if not self.is_val:
-                    logging.warning("class %d doesn't have any samples", k)
-                if class_weights is None:
-                    class_weights = np.ones((self.num_classes,), dtype=floatstr_torch())
-                class_weights[k] = 0
-
-        count_empty = np.sum(class2num_utt == 0)
-        if count_empty > 0:
-            logging.warning("%d classes have 0 samples", count_empty)
-
-        self.utt_idx2class = class_idx
-        self.class2utt_idx = class2utt_idx
-        self.class2num_utt = class2num_utt
-        if class_weights is not None:
-            class_weights /= np.sum(class_weights)
-            class_weights = torch.Tensor(class_weights)
-        self.class_weights = class_weights
-
-    def __getitem__(self, index):
+    def _read_embeds(self, embed_id):
         if self.preload_embeds:
+            index = self.embed_info.index.get_loc(embed_id)
             x = self.embeds[index]
         else:
-            key = self.u2c.key[index]
-            x = self.r.read([key])[0].astype(floatstr_torch(), copy=False)
-
-        if not self.return_class:
-            return x
-
-        class_idx = self.utt_idx2class[index]
-        return x, class_idx
+            x = self.r.read([embed_id])[0].astype(floatstr_torch(), copy=False)
+        return x
+
+    def _get_embed_info(self, embed_id):
+        embed_info = {}
+        # converts the class_ids to integers
+        for info_name in self.return_embed_info:
+            embed_info_i = self.embed_info.loc[embed_id, info_name]
+            if info_name in self.class_info:
+                # if the type of information is a class-id
+                # we use the class information table to
+                # convert from id to integer
+                class_info = self.class_info[info_name]
+                embed_info_i = class_info.loc[embed_info_i, "class_idx"]
+
+            embed_info[info_name] = embed_info_i
+
+        return embed_info
+
+    def __getitem__(self, embed_id):
+
+        x = self._read_embed(embed_id)
+
+        data = {"embed_id": embed_id, "x": x}
+        # adds the embed labels
+        embed_info = self._get_embed_info(embed_id)
+        data.update(embed_info)
+        return data
diff --git a/hyperion/torch/data/embed_sampler.py b/hyperion/torch/data/embed_sampler.py
new file mode 100644
index 00000000..8836fe2a
--- /dev/null
+++ b/hyperion/torch/data/embed_sampler.py
@@ -0,0 +1,112 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import math
+
+import numpy as np
+import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from .hyp_sampler import HypSampler
+
+
+class EmbedSampler(HypSampler):
+    def __init__(
+        self, embed_set, batch_size=1, shuffle=False, drop_last=False, seed=1234,
+    ):
+        super().__init__(shuffle=shuffle, seed=seed)
+        self.embed_set = embed_set
+        self.batch_size = batch_size
+        self.avg_batch_size = batch_size
+
+        num_batches = len(self.embed_set) / batch_size / self.world_size
+        if drop_last:
+            self._len = int(num_batches)
+        else:
+            self._len = int(math.ceil(num_batches))
+
+        self._permutation = None
+
+    def __len__(self):
+        return self._len
+
+    def _shuffle_embeds(self):
+        self._permutation = torch.randperm(
+            len(self.embed_set), generator=self.rng
+        ).numpy()
+
+    def __iter__(self):
+        super().__iter__()
+        if self.shuffle:
+            self._shuffle_segs()
+
+        self.start = self.rank
+        return self
+
+    def __next__(self):
+
+        if self.batch == self._len:
+            raise StopIteration
+
+        stop = min(
+            self.start + self.world_size * self.min_batch_size, len(self.embed_set)
+        )
+        if self.shuffle:
+            idx = self._permutation[self.start : stop : self.world_size]
+        else:
+            idx = slice(self.start, stop, self.world_size)
+
+        self.start += self.world_size * self.min_batch_size
+
+        embed_ids = self.embed_set.iloc[idx].id
+
+        if self.batch == 0:
+            logging.info("batch 0 chunks=%s", str(embed_ids[:10]))
+
+        self.batch += 1
+        return embed_ids
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "batch_size",
+            "shuffle",
+            "drop_last",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--batch-size", type=int, default=1, help=("minimum batch size per gpu"),
+        )
+
+        parser.add_argument(
+            "--drop-last", action=ActionYesNo, help="drops the last batch of the epoch",
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the segments or chunks at the beginning of the epoch",
+        )
+
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/embed_sampler_factory.py b/hyperion/torch/data/embed_sampler_factory.py
new file mode 100644
index 00000000..43d00b1d
--- /dev/null
+++ b/hyperion/torch/data/embed_sampler_factory.py
@@ -0,0 +1,125 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Optional, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from .embed_dataset import EmbedDataset
+from .bucketing_seg_sampler import BucketingSegSampler
+from .class_weighted_embed_sampler import ClassWeightedEmbedSampler
+from .embed_sampler import EmbedSampler
+
+sampler_dict = {
+    "class_weighted_embed_sampler": ClassWeightedEmbedSampler,
+    "embed_sampler": EmbedSampler,
+}
+
+
+class EmbedSamplerFactory(object):
+    """Factory class to create different types of samplers for
+    embeddings like x-vectors.
+    """
+
+    @staticmethod
+    def create(
+        dataset: EmbedDataset,
+        sampler_type: str = "class_weighted_embed_sampler",
+        **kwargs,
+    ):
+        """Functions that creates a sampler based on a dataset, sampler_type and sampler arguments.
+
+        Args:
+          dataset: embeddings dataset object containing the data info 
+          sampler_type: string indicating the sampler type.
+        """
+
+        sampler_class = sampler_dict[sampler_type]
+        sampler_kwargs = sampler_class.filter_args(**kwargs)
+
+        if sampler_type in ["class_weighted_embed_sampler"]:
+            try:
+                class_name = sampler_kwargs["class_name"]
+            except:
+                class_name = "class_id"
+            sampler_kwargs["class_info"] = dataset.class_info[class_name]
+
+        logging.info(f"sampler-args={sampler_kwargs}")
+
+        return sampler_class(dataset.embed_info, **sampler_kwargs)
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        valid_args = (
+            "batch_size",
+            "num_embeds_per_class",
+            "weight_exponent",
+            "weight_mode",
+            "num_hard_prototypes",
+            "class_name",
+            "shuffle",
+            "seed",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--batch-size", type=int, default=1, help=("batch size per gpu"),
+        )
+
+        parser.add_argument(
+            "--num-embeds-per-class",
+            type=int,
+            default=1,
+            help=("number of embeds per class in batch"),
+        )
+        parser.add_argument(
+            "--weight-exponent",
+            default=1.0,
+            type=float,
+            help=("exponent for class weights"),
+        )
+        parser.add_argument(
+            "--weight-mode",
+            default="custom",
+            choices=["custom", "uniform", "data-prior"],
+            help=("method to get the class weights"),
+        )
+
+        parser.add_argument(
+            "--num-hard-prototypes",
+            type=int,
+            default=0,
+            help=("number of hard prototype classes per batch"),
+        )
+
+        parser.add_argument(
+            "--shuffle",
+            action=ActionYesNo,
+            help="shuffles the embeddings at the beginning of the epoch",
+        )
+
+        parser.add_argument(
+            "--seed",
+            type=int,
+            default=1234,
+            help=("seed for sampler random number generator"),
+        )
+
+        parser.add_argument(
+            "--class-name",
+            default="class_id",
+            help="which column in the info table indicates the class",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index 61fbd576..1605ead3 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -13,11 +13,15 @@
 import numpy as np
 import pandas as pd
 import torch
-from jsonargparse import ActionParser, ArgumentParser
+import torch.distributed as dist
+
+from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
-from ...utils.utt2info import Utt2Info
+from ...utils.misc import filter_func_args
+from ...utils.class_info import ClassInfo
+from ...utils.segment_set import SegmentSet
 from ..torch_defs import floatstr_torch
 
 
@@ -25,82 +29,100 @@ class FeatSeqDataset(Dataset):
     def __init__(
         self,
         feat_file,
-        key_file,
-        class_file=None,
+        segments_file,
+        class_names=None,
+        class_files=None,
         num_frames_file=None,
+        return_segment_info=None,
         path_prefix=None,
-        min_chunk_length=1,
-        max_chunk_length=None,
-        return_fullseqs=False,
-        return_class=True,
         transpose_input=True,
         is_val=False,
     ):
 
-        logging.info("opening dataset %s", feat_file)
+        super().__init__()
+        try:
+            rank = dist.get_rank()
+            world_size = dist.get_world_size()
+        except:
+            rank = 0
+            world_size = 1
+
+        if rank == 0:
+            logging.info("opening feature reader %s", feat_file)
+
         self.r = RF.create(feat_file, path_prefix=path_prefix, scp_sep=" ")
-        logging.info("loading utt2info file %s" % key_file)
-        self.u2c = Utt2Info.load(key_file, sep=" ")
-        logging.info("dataset contains %d seqs" % self.num_seqs)
+
+        if rank == 0:
+            logging.info("loading segments file %s" % segments_file)
+
+        self.seg_set = SegmentSet.load(segments_file)
+        if rank == 0:
+            logging.info("dataset contains %d seqs", len(self.seg_set))
 
         self.is_val = is_val
-        self._seq_lengths = None
         if num_frames_file is not None:
-            self._read_num_frames_file(num_frames_file)
-        self._prune_short_seqs(min_chunk_length)
-
-        self.short_seq_exist = self._seq_shorter_than_max_length_exists(
-            max_chunk_length
-        )
+            if rank == 0:
+                logging.info("loading durations file %s", num_frames_file)
 
-        self._prepare_class_info(class_file)
+            time_durs = SegmentSet.load(num_frames_file)
+            self.seg_set["num_frames"] = time_durs.loc[
+                self.seg_set["id"]
+            ].class_id.values.astype(int, copy=False)
+        else:
+            assert "num_frames" in self.seg_set
 
-        if max_chunk_length is None:
-            max_chunk_length = min_chunk_length
-        self._min_chunk_length = min_chunk_length
-        self._max_chunk_length = max_chunk_length
+        logging.info("loading class-info files")
+        self._load_class_infos(class_names, class_files, is_val)
 
-        self.return_fullseqs = return_fullseqs
-        self.return_class = return_class
+        self.return_segment_info = (
+            [] if return_segment_info is None else return_segment_info
+        )
 
         self.transpose_input = transpose_input
 
-    def _read_num_frames_file(self, file_path):
-        logging.info("reading num_frames file %s" % file_path)
-        nf_df = pd.read_csv(file_path, header=None, sep=" ")
-        nf_df.index = nf_df[0]
-        self._seq_lengths = nf_df.loc[self.u2c.key, 1].values
+    def _load_class_infos(self, class_names, class_files, is_val):
+        self.class_info = {}
+        if class_names is None:
+            assert class_files is None
+            return
+
+        assert len(class_names) == len(class_files)
+        for name, file in zip(class_names, class_files):
+            assert (
+                name in self.seg_set
+            ), f"class_name {name} not present in the segment set"
+            if self.rank == 0:
+                logging.info("loading class-info file %s" % file)
+            table = ClassInfo.load(file)
+            self.class_info[name] = table
+            if not is_val:
+                # check that all classes are present in the training segments
+                class_ids = table["id"]
+                segment_class_ids = self.seg_set[name].unique()
+                for c_id in class_ids:
+                    if c_id not in segment_class_ids:
+                        logging.warning(
+                            "%s class: %s not present in dataset", name, c_id
+                        )
+
+    def set_epoch(self, epoch):
+        self.epoch = epoch
 
     @property
     def num_seqs(self):
-        return len(self.u2c)
+        return len(self.seg_set)
 
     def __len__(self):
         return self.num_seqs
 
     @property
     def seq_lengths(self):
-        if self._seq_lengths is None:
-            self._seq_lengths = self.r.read_num_rows(self.u2c.key)
-
-        return self._seq_lengths
+        return self.seg_set["num_frames"]
 
     @property
     def total_length(self):
         return np.sum(self.seq_lengths)
 
-    @property
-    def min_chunk_length(self):
-        if self.return_fullseqs:
-            self._min_chunk_length = np.min(self.seq_lengths)
-        return self._min_chunk_length
-
-    @property
-    def max_chunk_length(self):
-        if self._max_chunk_length is None:
-            self._max_chunk_length = np.max(self.seq_lengths)
-        return self._max_chunk_length
-
     @property
     def min_seq_length(self):
         return np.min(self.seq_lengths)
@@ -109,239 +131,114 @@ def min_seq_length(self):
     def max_seq_length(self):
         return np.max(self.seq_lengths)
 
-    def _prune_short_seqs(self, min_length):
-        logging.info("pruning short seqs")
-        keep_idx = self.seq_lengths >= min_length
-        self.u2c = self.u2c.filter_index(keep_idx)
-        self._seq_lengths = self.seq_lengths[keep_idx]
-        logging.info(
-            "pruned seqs with min_length < %d,"
-            "keep %d/%d seqs" % (min_length, self.num_seqs, len(keep_idx))
-        )
-
-    def _prepare_class_info(self, class_file):
-        class_weights = None
-        if class_file is None:
-            classes, class_idx = np.unique(self.u2c.info, return_inverse=True)
-            class2idx = {k: i for i, k in enumerate(classes)}
-        else:
-            logging.info("reading class-file %s" % (class_file))
-            class_info = pd.read_csv(class_file, header=None, sep=" ")
-            class2idx = {str(k): i for i, k in enumerate(class_info[0])}
-            class_idx = np.array([class2idx[k] for k in self.u2c.info], dtype=int)
-            if class_info.shape[1] == 2:
-                class_weights = np.array(class_info[1]).astype(
-                    floatstr_torch(), copy=False
-                )
-
-        self.num_classes = len(class2idx)
-
-        class2utt_idx = {}
-        class2num_utt = np.zeros((self.num_classes,), dtype=int)
-
-        for k in range(self.num_classes):
-            idx = (class_idx == k).nonzero()[0]
-            class2utt_idx[k] = idx
-            class2num_utt[k] = len(idx)
-            if class2num_utt[k] == 0:
-                if not self.is_val:
-                    logging.warning("class %d doesn't have any samples" % (k))
-                if class_weights is None:
-                    class_weights = np.ones((self.num_classes,), dtype=floatstr_torch())
-                class_weights[k] = 0
-
-        count_empty = np.sum(class2num_utt == 0)
-        if count_empty > 0:
-            logging.warning("%d classes have 0 samples" % (count_empty))
-
-        self.utt_idx2class = class_idx
-        self.class2utt_idx = class2utt_idx
-        self.class2num_utt = class2num_utt
-        if class_weights is not None:
-            class_weights /= np.sum(class_weights)
-            class_weights = torch.Tensor(class_weights)
-        self.class_weights = class_weights
-
-        if self.short_seq_exist:
-            # if there are seq shorter than max_chunk_lenght we need some extra variables
-            # we will need class_weights to put to 0 classes that have all utts shorter than the batch chunk length
-            if self.class_weights is None:
-                self.class_weights = torch.ones((self.num_classes,))
-
-            # we need the max length of the utterances of each class
-            class2max_length = torch.zeros((self.num_classes,), dtype=torch.int)
-            for c in range(self.num_classes):
-                if class2num_utt[c] > 0:
-                    class2max_length[c] = int(
-                        np.max(self.seq_lengths[self.class2utt_idx[c]])
-                    )
-
-            self.class2max_length = class2max_length
-
-    def _seq_shorter_than_max_length_exists(self, max_length):
-        return np.any(self.seq_lengths < max_length)
-
     @property
-    def var_chunk_length(self):
-        return self.min_chunk_length < self.max_chunk_length
-
-    def get_random_chunk_length(self):
-
-        if self.var_chunk_length:
-            return torch.randint(
-                low=self.min_chunk_length, high=self.max_chunk_length + 1, size=(1,)
-            ).item()
-
-        return self.max_chunk_length
-
-    # def get_random_chunk_length(self, index):
-
-    #     if self.min_chunk_length < self.max_chunk_length:
-    #         if self.short_seq_exist:
-    #             max_chunk_length = min(int(np.min(self.seq_lengths[index])),
-    #                                    self.max_chunk_length)
-    #         else:
-    #             max_chunk_length = self.max_chunk_length
+    def num_classes(self):
+        return {k: t.num_classes for k, t in self.class_info.items()}
+
+    def _parse_segment_item(self, segment):
+        if isinstance(segment, (tuple, list)):
+            seg_id, start, num_frames = segment
+            assert num_frames <= self.seg_set.loc[seg_id].num_frames, (
+                f"{seg_id} with start={start} num_frames "
+                f"({self.seg_set.loc[seg_id].num_frames}) < "
+                f"chunk duration ({num_frames})"
+            )
+        else:
+            seg_id, start, num_frames = segment, 0, 0
 
-    #         chunk_length = torch.randint(
-    #             low=self.min_chunk_length, high=max_chunk_length+1, size=(1,)).item()
+        if "start" in self.seg_set:
+            start += self.seg_set.loc[seg_id].start
 
-    #         # logging.info('{} {} {} set_random_chunk_length={}'.format(
-    #         #     self,os.getpid(), threading.get_ident(), chunk_length))
-    #         return chunk_length
+        return seg_id, int(start), int(num_frames)
 
-    #     return self.max_chunk_length
+    def _read_feats(self, seg_id, start, num_frames):
+        x = self.r.read(seg_id, row_offset=start, num_rows=num_frames)[0].astype(
+            floatstr_torch(), copy=False
+        )
+        return x
 
-    def __getitem__(self, index):
-        # logging.info('{} {} {} get item {}'.format(
-        #     self, os.getpid(), threading.get_ident(), index))
-        if self.return_fullseqs:
-            return self._get_fullseq(index)
-        else:
-            return self._get_random_chunk(index)
+    def _get_segment_info(self, seg_id):
+        seg_info = {}
+        # converts the class_ids to integers
+        for info_name in self.return_segment_info:
+            seg_info_i = self.seg_set.loc[seg_id, info_name]
+            if info_name in self.class_info:
+                # if the type of information is a class-id
+                # we use the class information table to
+                # convert from id to integer
+                class_info = self.class_info[info_name]
+                seg_info_i = class_info.loc[seg_info_i, "class_idx"]
 
-    def _get_fullseq(self, index):
-        key = self.u2c.key[index]
-        x = self.r.read([key])[0].astype(floatstr_torch(), copy=False)
-        if self.transpose_input:
-            x = x.T
-        if not self.return_class:
-            return x
+            seg_info[info_name] = seg_info_i
 
-        class_idx = self.utt_idx2class[index]
-        return x, class_idx
+        return seg_info
 
-    def _get_random_chunk(self, index):
+    def __getitem__(self, segment):
 
-        if len(index) == 2:
-            index, chunk_length = index
-        else:
-            chunk_length = self.max_chunk_length
-
-        key = self.u2c.key[index]
-        full_seq_length = int(self.seq_lengths[index])
-        assert (
-            chunk_length <= full_seq_length
-        ), "chunk_length(%d) <= full_seq_length(%d)" % (chunk_length, full_seq_length)
-        first_frame = torch.randint(
-            low=0, high=full_seq_length - chunk_length + 1, size=(1,)
-        ).item()
-
-        x = self.r.read([key], row_offset=first_frame, num_rows=chunk_length)[0].astype(
-            floatstr_torch(), copy=False
-        )
+        seg_id, start, num_frames = self._parse_segment_item(segment)
+        x = self._read_feats(seg_id, start, num_frames)
+        num_frames = x.shape[0]
         if self.transpose_input:
             x = x.T
 
-        if not self.return_class:
-            return x
+        data = {"seg_id": seg_id, "x": x, "x_lengths": num_frames}
 
-        class_idx = self.utt_idx2class[index]
-        return x, class_idx
+        # adds the segment labels
+        seg_info = self._get_segment_info(seg_id)
+        data.update(seg_info)
+        return data
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "feat_file",
-            "key_file",
-            "path_prefix",
-            "class_file",
-            "num_frames_file",
-            "min_chunk_length",
-            "max_chunk_length",
-            "return_fullseqs",
-            "part_idx",
-            "num_parts",
-        )
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return filter_func_args(FeatSeqDataset.__init__, kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None, skip={"feat_file", "key_file"}):
+    def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
         if "feat_file" not in skip:
             parser.add_argument(
-                "--feat-file",
-                required=True,
-                help=("acoustic features manifest file"),
+                "--audio-file", required=True, help=("feature manifest file"),
             )
 
-        if "key_file" not in skip:
+        if "segments_file" not in skip:
             parser.add_argument(
-                "--key-file",
-                required=True,
-                help=("key manifest file"),
+                "--segments-file", required=True, help=("segments manifest file"),
             )
 
         parser.add_argument(
-            "--path-prefix", default="", help=("path prefix for rspecifier scp file")
+            "--class-names",
+            default=None,
+            nargs="+",
+            help=(
+                "list with the names of the types of classes in the datasets, e.g., speaker, language"
+            ),
         )
 
         parser.add_argument(
-            "--class-file",
-            default=None,
-            help=("ordered list of classes keys, it can contain class weights"),
+            "--class-files", default=None, nargs="+", help=("list of class info files"),
         )
 
         parser.add_argument(
             "--num-frames-file",
             default=None,
-            help=(
-                "utt to num_frames file, if None it reads from the dataset "
-                "but it is slow"
-            ),
+            help=("segment to num-frames file, if durations are not in segments_file"),
         )
 
         parser.add_argument(
-            "--min-chunk-length",
-            type=int,
-            default=None,
-            help=("minimum length of sequence chunks"),
-        )
-        parser.add_argument(
-            "--max-chunk-length",
-            type=int,
+            "--return-segment-info",
             default=None,
-            help=("maximum length of sequence chunks"),
+            nargs="+",
+            help=(
+                "list of columns of the segment file which should be returned as supervisions"
+            ),
         )
 
         parser.add_argument(
-            "--return-fullseqs",
-            default=False,
-            action="store_true",
-            help=("returns full sequences instead of chunks"),
+            "--path-prefix", default="", help=("path prefix for rspecifier scp file")
         )
-
-        # parser.add_argument('--part-idx',
-        #                     type=int, default=1,
-        #                     help=('splits the list of files in num-parts and process part_idx'))
-        # parser.add_argument('--num-parts',
-        #                     type=int, default=1,
-        #                     help=('splits the list of files in num-parts and process part_idx'))
+        RF.add_class_args(parser)
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='feature sequence dataset options')
-
-    add_argparse_args = add_class_args
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index c292f09a..2fa71766 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -4,7 +4,15 @@
 """
 
 from .fc_blocks import FCBlock
-from .se_blocks import SEBlock2D, TSEBlock2D, SEBlock2d, TSEBlock2d, SEBlock1d
+from .se_blocks import (
+    SEBlock2D,
+    TSEBlock2D,
+    SEBlock2d,
+    TSEBlock2d,
+    FwSEBlock2d,
+    CFwSEBlock2d,
+    SEBlock1d,
+)
 from .tdnn_blocks import TDNNBlock
 from .etdnn_blocks import ETDNNBlock
 from .resetdnn_blocks import ResETDNNBlock
diff --git a/hyperion/torch/layer_blocks/res2net1d_blocks.py b/hyperion/torch/layer_blocks/res2net1d_blocks.py
index 59706f61..804dbbd3 100644
--- a/hyperion/torch/layer_blocks/res2net1d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net1d_blocks.py
@@ -213,7 +213,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn1s[i](x_i)
             x_i = self.act1(x_i)
             if not self.norm_before:
-                x_i = self.bn1(x_i)
+                x_i = self.bn1s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
@@ -405,7 +405,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn2s[i](x_i)
             x_i = self.act2(x_i)
             if not self.norm_before:
-                x_i = self.bn2(x_i)
+                x_i = self.bn2s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
diff --git a/hyperion/torch/layer_blocks/res2net2d_blocks.py b/hyperion/torch/layer_blocks/res2net2d_blocks.py
index e426d809..26d19a9a 100644
--- a/hyperion/torch/layer_blocks/res2net2d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net2d_blocks.py
@@ -213,7 +213,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn1s[i](x_i)
             x_i = self.act1(x_i)
             if not self.norm_before:
-                x_i = self.bn1(x_i)
+                x_i = self.bn1s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
@@ -402,7 +402,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn2s[i](x_i)
             x_i = self.act2(x_i)
             if not self.norm_before:
-                x_i = self.bn2(x_i)
+                x_i = self.bn2s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 83ce7601..072926c9 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -8,7 +8,7 @@
 from torch.nn import Conv2d, BatchNorm2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
-from .se_blocks import SEBlock2D, TSEBlock2D
+from .se_blocks import SEBlock2d, TSEBlock2d, FwSEBlock2d, CFwSEBlock2d
 
 
 def _conv3x3(in_channels, out_channels, stride=1, groups=1, dilation=1, bias=False):
@@ -80,6 +80,7 @@ def __init__(
         norm_layer=None,
         norm_before=True,
         se_r=None,
+        se_type="cw-se",
         time_se=False,
         num_feats=None,
     ):
@@ -148,9 +149,16 @@ def __init__(
 
         if se_r is not None:
             if time_se:
-                self.se_layer = TSEBlock2D(channels, num_feats, se_r, activation)
-            else:
-                self.se_layer = SEBlock2D(channels, se_r, activation)
+                se_type = "cw-se"
+
+            if se_type == "t-se":
+                self.se_layer = TSEBlock2d(channels, num_feats, se_r, activation)
+            elif se_type == "cw-se":
+                self.se_layer = SEBlock2d(channels, se_r, activation)
+            elif se_type == "fw-se":
+                self.se_layer = FwSEBlock2d(num_feats, se_r, activation)
+            elif se_type == "cfw-se":
+                self.se_layer = CFwSEBlock2d(channels, num_feats, se_r, activation)
         else:
             self.se_layer = None
 
@@ -255,6 +263,7 @@ def __init__(
         norm_layer=None,
         norm_before=True,
         se_r=None,
+        se_type="cw-se",
         time_se=False,
         num_feats=None,
     ):
@@ -318,11 +327,17 @@ def __init__(
 
         if se_r is not None:
             if time_se:
-                self.se_layer = TSEBlock2D(
-                    channels * self.expansion, num_feats, se_r, activation
-                )
-            else:
-                self.se_layer = SEBlock2D(channels * self.expansion, se_r, activation)
+                se_type = "t-se"
+
+            se_channels = channels * self.expansion
+            if se_type == "t-se":
+                self.se_layer = TSEBlock2d(se_channels, num_feats, se_r, activation)
+            elif se_type == "cw-se":
+                self.se_layer = SEBlock2d(se_channels, se_r, activation)
+            elif se_type == "fw-se":
+                self.se_layer = FwSEBlock2d(num_feats, se_r, activation)
+            elif se_type == "cfw-se":
+                self.se_layer = CFwSEBlock2d(se_channels, num_feats, se_r, activation)
         else:
             self.se_layer = None
 
@@ -362,7 +377,7 @@ def forward(self, x, x_mask=None):
                 x_i = self.bn2s[i](x_i)
             x_i = self.act2(x_i)
             if not self.norm_before:
-                x_i = self.bn2(x_i)
+                x_i = self.bn2s[i](x_i)
             x.append(x_i)
 
         if self.scale > 1:
diff --git a/hyperion/torch/layer_blocks/se_blocks.py b/hyperion/torch/layer_blocks/se_blocks.py
index c3ba8e20..e99d545e 100644
--- a/hyperion/torch/layer_blocks/se_blocks.py
+++ b/hyperion/torch/layer_blocks/se_blocks.py
@@ -10,7 +10,7 @@
 from ..layers import ActivationFactory as AF
 
 
-class SEBlock2D(nn.Module):
+class SEBlock2d(nn.Module):
     """Squeeze-excitation block 2d
         from https://arxiv.org/abs/1709.01507.
 
@@ -43,8 +43,8 @@ def _standardize_mask(self, mask):
 
         return mask
 
-    def forward(self, x, x_mask=None):
-        """Forward function.
+    def compute_scale_logit(self, x, x_mask=None):
+        """comptue the scale before the sigmoid
 
         Args:
           x: input tensor with shape = (batch, channels, heigh, width).
@@ -61,12 +61,26 @@ def forward(self, x, x_mask=None):
             total = torch.mean(x_mask, dim=(2, 3), keepdim=True)
             z = torch.mean(x * x_mask, dim=(2, 3), keepdim=True) / total
 
-        scale = self.sigmoid(self.conv2(self.act(self.conv1(z))))
+        return self.conv2(self.act(self.conv1(z)))
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, heigh, width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
+        Returns:
+          Tensor with shape = (batch, channels, heigh, width).
+        """
+        scale_logit = self.compute_scale_logit(x, x_mask)
+        scale = self.sigmoid(scale_logit)
         y = scale * x
         return y
 
 
-class TSEBlock2D(nn.Module):
+class TSEBlock2d(nn.Module):
     """From https://arxiv.org/abs/1709.01507
     Modified to do pooling only in time dimension.
 
@@ -138,6 +152,77 @@ def forward(self, x, x_mask=None):
         return y
 
 
+class FwSEBlock2d(SEBlock2d):
+    """frequency-wise Squeeze-excitation block 2d
+
+    Attributes:
+      num_feats:      input/output channels.
+      r:                 Squeeze-excitation compression ratio.
+      activation:        Non-linear activation object, string of configuration dictionary.
+
+    """
+
+    def __init__(self, num_feats, r=16, activation={"name": "relu", "inplace": True}):
+        super().__init__(num_feats, r, activation)
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, heigh, width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time)
+        Returns:
+          Tensor with shape = (batch, channels, heigh, width).
+        """
+        x = x.transpose(1, 2)
+        y = super().forward(x, x_mask)
+        y = y.tranpose(1, 2).continous()
+        return y
+
+
+class CFwSEBlock2d(nn.Module):
+    """2-d channel and frequency wise squeeze-excitation block
+
+    Attributes:
+      num_channels:      input/output channels.
+      num_feats:         Number of features in dimension 2.
+      r:                 Squeeze-excitation compression ratio.
+      activation:        Non-linear activation object, string of configuration dictionary.
+
+    """
+
+    def __init__(
+        self,
+        num_channels,
+        num_feats,
+        r=16,
+        activation={"name": "relu", "inplace": True},
+    ):
+        super().__init__()
+        self.cw_se = SEBlock2d(num_channels, r, activation)
+        self.fw_se = SEBlock2d(num_feats, r, activation)
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with shape = (batch, channels, heigh, width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time)
+        Returns:
+          Tensor with shape = (batch, channels, heigh, width).
+        """
+        cw_scale_logits = self.cw_se.compute_scale_logits(x, x_mask)
+        fw_scale_logits = self.fw_se.compute_scale_logits(
+            x.transpose(1, 2), x_mask
+        ).transpose(1, 2)
+        scale_logits = cw_scale_logits + fw_scale_logits
+        scale = torch.sigmoid(scale_logits)
+        y = scale * x
+        return y
+
+
 class SEBlock1d(nn.Module):
     """1d Squeeze Excitation version of
     https://arxiv.org/abs/1709.01507
@@ -191,5 +276,5 @@ def forward(self, x, x_mask=None):
 
 
 # aliases to mantein backwards compatibility
-SEBlock2d = SEBlock2D
-TSEBlock2d = TSEBlock2D
+SEBlock2D = SEBlock2d
+TSEBlock2D = TSEBlock2d
diff --git a/hyperion/torch/layer_blocks/seresnet_blocks.py b/hyperion/torch/layer_blocks/seresnet_blocks.py
index 7f8d0bae..5074f466 100644
--- a/hyperion/torch/layer_blocks/seresnet_blocks.py
+++ b/hyperion/torch/layer_blocks/seresnet_blocks.py
@@ -8,7 +8,7 @@
 from torch.nn import Conv2d, Linear, BatchNorm2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
-from .se_blocks import SEBlock2D, TSEBlock2D
+from .se_blocks import SEBlock2d, TSEBlock2d, FwSEBlock2d, CFwSEBlock2d
 from .resnet_blocks import ResNetBasicBlock, ResNetBNBlock
 
 
@@ -42,6 +42,7 @@ def __init__(
         norm_layer=None,
         norm_before=True,
         se_r=16,
+        se_type="cw-se",
         time_se=False,
         num_feats=None,
     ):
@@ -59,16 +60,24 @@ def __init__(
         )
 
         if time_se:
-            self.se_layer = TSEBlock2D(channels, num_feats, se_r, activation)
-        else:
-            self.se_layer = SEBlock2D(channels, se_r, activation)
+            se_type = "t-se"
+
+        if se_type == "t-se":
+            self.se_layer = TSEBlock2d(channels, num_feats, se_r, activation)
+        elif se_type == "cw-se":
+            self.se_layer = SEBlock2d(channels, se_r, activation)
+        elif se_type == "fw-se":
+            self.se_layer = FwSEBlock2d(num_feats, se_r, activation)
+        elif se_type == "cfw-se":
+            self.se_layer = CFwSEBlock2d(channels, num_feats, se_r, activation)
 
     def forward(self, x, x_mask=None):
         """Forward function.
 
         Args:
           x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
-          x_mask: unused.
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
 
         Returns:
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
@@ -92,7 +101,7 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act2(x)
 
@@ -135,6 +144,7 @@ def __init__(
         norm_layer=None,
         norm_before=True,
         se_r=16,
+        se_type="cw-se",
         time_se=False,
         num_feats=None,
     ):
@@ -152,18 +162,26 @@ def __init__(
         )
 
         if time_se:
-            self.se_layer = TSEBlock2D(
-                channels * self.expansion, num_feats, se_r, activation
-            )
-        else:
-            self.se_layer = SEBlock2D(channels * self.expansion, se_r, activation)
+            se_type = "t-se"
+
+        se_channels = channels * self.expansion
+        if se_type == "t-se":
+            self.se_layer = TSEBlock2d(se_channels, num_feats, se_r, activation)
+        elif se_type == "cw-se":
+            self.se_layer = SEBlock2d(se_channels, se_r, activation)
+        elif se_type == "fw-se":
+            self.se_layer = FwSEBlock2d(num_feats, se_r, activation)
+        elif se_type == "cfw-se":
+            self.se_layer = CFwSEBlock2d(se_channels, num_feats, se_r, activation)
 
     def forward(self, x, x_mask=None):
         """Forward function.
 
         Args:
           x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
-          x_mask: unused.
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time), (batch, height, width)
+
         Returns:
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
@@ -190,7 +208,7 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
-        x = self.se_layer(x)
+        x = self.se_layer(x, x_mask=x_mask)
         x += residual
         x = self.act3(x)
 
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index 1366172b..a7ebcfb1 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -22,7 +22,7 @@ class AxisMasker(nn.Module):
       min_num_mask: minimum number of masks.
       max_num_mask: maximum number of masks.
       dim: axis where we apply the mask
-      fill_value: masking value
+      mask_value: masking value
     """
 
     def __init__(
@@ -32,7 +32,8 @@ def __init__(
         min_num_masks=1,
         max_num_masks=2,
         dim=-1,
-        fill_value=0,
+        mask_method="constant",
+        mask_value=0,
     ):
         super().__init__()
         assert min_width >= 0
@@ -45,13 +46,14 @@ def __init__(
         self.min_num_masks = min_num_masks
         self.max_num_masks = max_num_masks
         self.dim = dim
-        self.fill_value = fill_value
+        self.mask_method = mask_method
+        self.mask_value = mask_value
 
     def __repr__(self):
         s = (
             "{}(min_width={}, max_width={}, "
             "min_num_masks={}, max_num_masks={}, "
-            "dim={}, fill_value={})"
+            "dim={}, mask_method={}, mask_value={})"
         ).format(
             self.__class__.__name__,
             self.min_width,
@@ -59,7 +61,8 @@ def __repr__(self):
             self.min_num_masks,
             self.max_num_masks,
             self.dim,
-            self.fill_value,
+            self.mask_method,
+            self.mask_value,
         )
         return s
 
@@ -111,7 +114,14 @@ def forward(self, x):
         else:
             mask = mask.unsqueeze(-1)
 
-        x = x.masked_fill(mask, self.fill_value)
+        if self.mask_method == "mean":
+            mask_value = x.mean().item()
+        elif self.mask_method == "min":
+            mask_value = x.min().item()
+        else:
+            mask_value = self.mask_value
+
+        x = x.masked_fill(mask, mask_value)
         if ndim > 3:
             x = x.view(in_shape)
 
@@ -225,7 +235,7 @@ class SpecAugment(nn.Module):
       freq_max_width:    maximum width of the frequency mask.
       freq_min_num_mask: minimum number of frequency masks.
       freq_max_num_mask: maximum number of frequency masks.
-      fill_value:        masking value.
+      mask_value:        masking value.
     """
 
     def __init__(
@@ -243,7 +253,8 @@ def __init__(
         freq_mask_max_width=20,
         freq_mask_min_num_masks=1,
         freq_mask_max_num_masks=2,
-        fill_value=0,
+        mask_method="constant",
+        mask_value=0,
     ):
 
         super().__init__()
@@ -260,7 +271,7 @@ def __init__(
         self.freq_mask_max_width = freq_mask_max_width
         self.freq_mask_min_num_masks = freq_mask_min_num_masks
         self.freq_mask_max_num_masks = freq_mask_max_num_masks
-        self.fill_value = fill_value
+        self.mask_value = mask_value
 
         self.time_masker = None
         self.freq_masker = None
@@ -273,7 +284,8 @@ def __init__(
                 min_num_masks=time_mask_min_num_masks,
                 max_num_masks=time_mask_max_num_masks,
                 dim=-2,
-                fill_value=fill_value,
+                mask_method=mask_method,
+                mask_value=mask_value,
             )
 
         if self.freq_mask_prob > 0:
@@ -283,7 +295,8 @@ def __init__(
                 min_num_masks=freq_mask_min_num_masks,
                 max_num_masks=freq_mask_max_num_masks,
                 dim=-1,
-                fill_value=fill_value,
+                mask_method=mask_method,
+                mask_value=mask_value,
             )
 
         if self.time_warp_prob > 0:
@@ -368,7 +381,8 @@ def filter_args(**kwargs):
             "freq_mask_min_width",
             "freq_mask_max_num_masks",
             "freq_mask_min_num_masks",
-            "fill_value",
+            "mask_value",
+            "mask_method",
         )
 
         d = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
@@ -463,9 +477,15 @@ def add_class_args(parser, prefix=None):
             default=2,
             help="max. number of freq mask",
         )
+        parser.add_argument(
+            "--mask-method",
+            default="constant",
+            choices=["constant", "min", "mean"],
+            help="mothod to get the masked value",
+        )
 
         parser.add_argument(
-            "--fill-value",
+            "---mask-value",
             type=float,
             default=0.0,
             help="filling value for the masked spec. bins",
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 2939db5b..2072241d 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -655,13 +655,13 @@ def valid_train_modes():
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         # get arguments for pooling
         pool_args = PF.filter_args(**kwargs["pool_net"])
@@ -792,18 +792,31 @@ def add_class_args(parser, prefix=None, skip=set()):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         try:
diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index 82ac5a8a..22f63de6 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -4,7 +4,7 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 import torch
 import torch.nn as nn
@@ -279,13 +279,13 @@ def get_config(self):
 
     @staticmethod
     def filter_args(**kwargs):
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -418,18 +418,31 @@ def add_class_args(parser, prefix=None, head_channels=False):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index c2fb3d02..619851bb 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -2,7 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
 import torch
@@ -252,13 +252,13 @@ def get_config(self):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_feats",
@@ -392,18 +392,32 @@ def add_class_args(parser, prefix=None, head_channels=False, in_feats=False):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
+
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index e21d615a..0166baca 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
 import torch
@@ -300,13 +300,13 @@ def get_config(self):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -439,18 +439,31 @@ def add_class_args(parser, prefix=None, head_channels=False):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index 4102c4f7..e847dbb6 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -4,7 +4,7 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 import torch
 import torch.nn as nn
@@ -258,13 +258,13 @@ def get_config(self):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -397,18 +397,31 @@ def add_class_args(parser, prefix=None, head_channels=False):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index 9185964c..34ac9b81 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -10,6 +10,7 @@
 import torch.nn as nn
 from torch.nn import Conv1d, Linear, BatchNorm1d
 
+from ..utils import seq_lengths_to_mask, scale_seq_lengths
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..layer_blocks import (
@@ -89,10 +90,12 @@ def __init__(
         do_maxpool=True,
         in_norm=True,
         se_r=16,
-        time_se=False,
+        se_type="cw-se",
         in_feats=None,
         res2net_scale=4,
         res2net_width_factor=1,
+        resb_channels=None,
+        time_se=False,
     ):
 
         super().__init__()
@@ -100,6 +103,7 @@ def __init__(
         self.block = block
         self.has_se = False
         self.is_res2net = False
+
         if isinstance(block, str):
             if block == "basic":
                 self._block = ResNetBasicBlock
@@ -117,7 +121,7 @@ def __init__(
             elif block == "res2bn":
                 self._block = Res2NetBNBlock
                 self.is_res2net = True
-            elif block == "seres2bn" or block == "tseres2bn":
+            elif block in ("seres2bn", "tseres2bn"):
                 self._block = Res2NetBNBlock
                 self.has_se = True
                 self.is_res2net = True
@@ -140,9 +144,13 @@ def __init__(
         # self.width_per_group = width_per_group
         self.se_r = se_r
         self.time_se = time_se
+        if time_se:
+            se_type = "t-se"
+        self.se_type = se_type
         self.in_feats = in_feats
         self.res2net_scale = res2net_scale
         self.res2net_width_factor = res2net_width_factor
+        self.resb_channels = resb_channels
 
         self.multilevel = multilevel
         self.endpoint_channels = endpoint_channels
@@ -186,25 +194,31 @@ def __init__(
         self._context = self.in_block.context
         self._downsample_factor = self.in_block.downsample_factor
 
+        if resb_channels is None:
+            resb_channels = [base_channels * (2 ** i) for i in range(4)]
+
         self.cur_in_channels = conv_channels
-        self.layer1 = self._make_layer(self._block, base_channels, num_layers[0])
+        self.layer1 = self._make_layer(self._block, resb_channels[0], num_layers[0])
         self.layer2 = self._make_layer(
             self._block,
-            2 * base_channels,
+            # 2 * base_channels,
+            resb_channels[1],
             num_layers[1],
             stride=2,
             dilate=replace_stride_with_dilation[0],
         )
         self.layer3 = self._make_layer(
             self._block,
-            4 * base_channels,
+            # 4 * base_channels,
+            resb_channels[2],
             num_layers[2],
             stride=2,
             dilate=replace_stride_with_dilation[1],
         )
         self.layer4 = self._make_layer(
             self._block,
-            8 * base_channels,
+            # 8 * base_channels,
+            resb_channels[3],
             num_layers[3],
             stride=2,
             dilate=replace_stride_with_dilation[2],
@@ -277,8 +291,6 @@ def __init__(
                     nn.init.constant_(m.bn2.weight, 0)
 
     def _make_layer(self, block, channels, num_blocks, stride=1, dilate=False):
-        norm_layer = self._norm_layer
-        downsample = None
         previous_dilation = self.dilation
         if dilate:
             self.dilation *= stride
@@ -286,11 +298,11 @@ def _make_layer(self, block, channels, num_blocks, stride=1, dilate=False):
 
         kwargs = {}
         if self.has_se:
-            if self.time_se:
+            if self.se_type == "cw-se":
+                kwargs = {"se_r": self.se_r}
+            else:
                 num_feats = int(self.in_feats / (self._downsample_factor * stride))
                 kwargs = {"se_r": self.se_r, "time_se": True, "num_feats": num_feats}
-            else:
-                kwargs = {"se_r": self.se_r}
 
         if self.is_res2net:
             kwargs["scale"] = self.res2net_scale
@@ -401,6 +413,15 @@ def out_shape(self, in_shape=None):
 
         return (in_shape[0], self.layer4[-1].out_channels, H, W)
 
+    def _forward_layer_with_lens(layer, x, in_lengths, max_in_length):
+        x_lengths = scale_seq_lengths(in_lengths, x.size(-1), max_in_length)
+        x_mask = seq_lengths_to_mask(x_lengths, x.size(-1), time_dim=3)
+
+        for sub_layer in layer:
+            x = sub_layer(x, x_mask)
+
+        return x
+
     def forward(self, x, x_lengths=None):
         """forward function
 
@@ -414,21 +435,39 @@ def forward(self, x, x_lengths=None):
            otherwise, it returns tensor of represeantions of size=(batch, Cout, Hout, Wout)
 
         """
+        if x_lengths is not None:
+            # if all lengths are eq. to the max length, we set x_lengths to None
+            max_length = x.size(-1)
+            if torch.all(x_lengths == max_length):
+                x_lengths = None
 
         if self.in_norm:
             x = self.in_bn(x)
         feats = []
         x = self.in_block(x)
-        x = self.layer1(x)
-        x = self.layer2(x)
-        if self.multilevel:
-            feats.append(x)
-        x = self.layer3(x)
-        if self.multilevel:
-            feats.append(x)
-        x = self.layer4(x)
-        if self.multilevel:
-            feats.append(x)
+
+        if x_lengths is None:
+            x = self.layer1(x)
+            x = self.layer2(x)
+            if self.multilevel:
+                feats.append(x)
+            x = self.layer3(x)
+            if self.multilevel:
+                feats.append(x)
+            x = self.layer4(x)
+            if self.multilevel:
+                feats.append(x)
+        else:
+            x = self._forward_layer_with_lens(self.layer1, x, x_lengths, max_length)
+            x = self._forward_layer_with_lens(self.layer2, x, x_lengths, max_length)
+            if self.multilevel:
+                feats.append(x)
+            x = self._forward_layer_with_lens(self.layer3, x, x_lengths, max_length)
+            if self.multilevel:
+                feats.append(x)
+            x = self._forward_layer_with_lens(self.layer4, x, x_lengths, max_length)
+            if self.multilevel:
+                feats.append(x)
 
         if self.multilevel:
             out2 = self.endpoint2(feats[0])
@@ -547,9 +586,11 @@ def get_config(self):
             "out_act": out_act,
             "hid_act": hid_act,
             "se_r": self.se_r,
+            "se_type": self.se_type,
             "in_feats": self.in_feats,
             "res2net_scale": self.res2net_scale,
             "res2net_width_factor": self.res2net_width_factor,
+            "resb_channels": self.resb_channels,
         }
 
         base_config = super().get_config()
@@ -608,6 +649,20 @@ def __init__(self, in_channels, **kwargs):
         super().__init__("bn", [3, 4, 23, 3], in_channels, **kwargs)
 
 
+class IdRndResNet100(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        super().__init__("basic", [6, 16, 24, 3], in_channels, **kwargs)
+
+
+class IdRndResNet202(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        super().__init__("basic", [6, 16, 75, 3], in_channels, **kwargs)
+
+
 class LResNet18(ResNet):
     def __init__(self, in_channels, **kwargs):
         kwargs["conv_channels"] = 16
@@ -636,6 +691,16 @@ def __init__(self, in_channels, **kwargs):
         super().__init__("bn", [3, 4, 6, 3], in_channels, **kwargs)
 
 
+# multi-level feature ResNet
+class LResNet34_345(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["multilevel"] = True
+        kwargs["endpoint_channels"] = 64
+        super().__init__("basic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
 # Squezee-Excitation ResNets
 
 
@@ -813,6 +878,228 @@ def __init__(self, in_channels, **kwargs):
         super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
 
 
+# Freq-wise Squezee-Excitation ResNets
+
+
+class FwSEResNet18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebasic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class FwSEResNet34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebasic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEResNet101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSEResNet152(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 8, 36, 3], in_channels, **kwargs)
+
+
+class FwSEResNext50_32x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEResNext101_32x8d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 256
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSEWideResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEWideResNet101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSELResNet18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebasic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class FwSELResNet34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebasic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSELResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSELResNext50_4x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 4
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "fw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEIdRndResNet100(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        kwargs["se_type"] = "fw-se"
+        super().__init__("basic", [6, 16, 24, 3], in_channels, **kwargs)
+
+
+class FwSEIdRndResNet202(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        kwargs["se_type"] = "fw-se"
+        super().__init__("basic", [6, 16, 75, 3], in_channels, **kwargs)
+
+
+# Channel-Freq-wise Squezee-Excitation ResNets
+
+
+class CFwSEResNet18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebasic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class CFwSEResNet34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebasic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEResNet101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSEResNet152(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 8, 36, 3], in_channels, **kwargs)
+
+
+class CFwSEResNext50_32x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEResNext101_32x8d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 256
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSEWideResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEWideResNet101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSELResNet18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebasic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class CFwSELResNet34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebasic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSELResNet50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSELResNext50_4x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 4
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("sebn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEIdRndResNet100(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("basic", [6, 16, 24, 3], in_channels, **kwargs)
+
+
+class CFwSEIdRndResNet202(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["resb_channels"] = [128, 128, 256, 256]
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("basic", [6, 16, 75, 3], in_channels, **kwargs)
+
+
 #################### Res2Net variants ########################
 
 # Standard Res2Nets
@@ -1024,11 +1311,155 @@ def __init__(self, in_channels, **kwargs):
         super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
 
 
-# multi-level feature ResNet
-class LResNet34_345(ResNet):
+# frequency-wise  Squezee-Excitation Res2Nets
+class FwSERes2Net18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("se2basic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class FwSERes2Net34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("se2basic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSERes2Net50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSERes2Net101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSERes2Net152(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 8, 36, 3], in_channels, **kwargs)
+
+
+class FwSERes2Next50_32x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSERes2Next101_32x8d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 256
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSEWideRes2Net50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSEWideRes2Net101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class FwSELRes2Net50(ResNet):
     def __init__(self, in_channels, **kwargs):
         kwargs["conv_channels"] = 16
         kwargs["base_channels"] = 16
-        kwargs["multilevel"] = True
-        kwargs["endpoint_channels"] = 64
-        super().__init__("basic", [3, 4, 6, 3], in_channels, **kwargs)
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class FwSELRes2Next50_4x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 4
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "fw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+# channel-frequency-wise  Squezee-Excitation Res2Nets
+class CFwSERes2Net18(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("se2basic", [2, 2, 2, 2], in_channels, **kwargs)
+
+
+class CFwSERes2Net34(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("se2basic", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSERes2Net50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSERes2Net101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSERes2Net152(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 8, 36, 3], in_channels, **kwargs)
+
+
+class CFwSERes2Next50_32x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSERes2Next101_32x8d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 32
+        kwargs["base_channels"] = 256
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSEWideRes2Net50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSEWideRes2Net101(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["base_channels"] = 128
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 23, 3], in_channels, **kwargs)
+
+
+class CFwSELRes2Net50(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["conv_channels"] = 16
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
+
+
+class CFwSELRes2Next50_4x4d(ResNet):
+    def __init__(self, in_channels, **kwargs):
+        kwargs["groups"] = 4
+        kwargs["base_channels"] = 16
+        kwargs["se_type"] = "cfw-se"
+        super().__init__("seres2bn", [3, 4, 6, 3], in_channels, **kwargs)
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index f24887fe..3ab454ae 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -2,7 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
 import torch
@@ -323,13 +323,13 @@ def get_config(self):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -349,7 +349,7 @@ def filter_args(**kwargs):
             "head_act",
             "dropout_rate",
             "use_norm",
-            "norm-layer",
+            "norm_layer",
             "norm_before",
         )
 
@@ -478,18 +478,31 @@ def add_class_args(parser, prefix=None):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index 794f8144..deef9c59 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -537,13 +537,13 @@ def change_dropouts(self, dropout_rate, drop_connect_rate):
 
     @staticmethod
     def filter_args(**kwargs):
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_feats",
@@ -722,18 +722,31 @@ def add_class_args(parser, prefix=None, skip=set(["in_feats"])):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         parser.add_argument(
@@ -754,10 +767,7 @@ def add_class_args(parser, prefix=None, skip=set(["in_feats"])):
         )
 
         parser.add_argument(
-            "--res2net-scale",
-            default=1,
-            type=int,
-            help=("res2net scaling parameter "),
+            "--res2net-scale", default=1, type=int, help=("res2net scaling parameter "),
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index 6457ada1..22b1e7a7 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -4,7 +4,7 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
 import torch
 import torch.nn as nn
@@ -330,13 +330,13 @@ def get_config(self):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -485,18 +485,31 @@ def add_class_args(parser, prefix=None):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index b27e883d..3af174cf 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -4,7 +4,7 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
 
 import torch
@@ -372,13 +372,13 @@ def change_dropouts(self, dropout_rate, drop_connect_rate):
     @staticmethod
     def filter_args(**kwargs):
 
-        if "wo_norm" in kwargs:
-            kwargs["use_norm"] = not kwargs["wo_norm"]
-            del kwargs["wo_norm"]
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
 
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         valid_args = (
             "in_channels",
@@ -540,18 +540,32 @@ def add_class_args(parser, prefix=None, skip=set()):
         except:
             pass
 
+        # parser.add_argument(
+        #     "--wo-norm",
+        #     default=False,
+        #     action="store_true",
+        #     help="without batch normalization",
+        # )
+
+        # parser.add_argument(
+        #     "--norm-after",
+        #     default=False,
+        #     action="store_true",
+        #     help="batch normalizaton after activation",
+        # )
+
         parser.add_argument(
-            "--wo-norm",
-            default=False,
-            action="store_true",
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
-            action="store_true",
-            help="batch normalizaton after activation",
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/narchs/resnet_factory.py b/hyperion/torch/narchs/resnet_factory.py
index 645b7f2b..c9d5806e 100644
--- a/hyperion/torch/narchs/resnet_factory.py
+++ b/hyperion/torch/narchs/resnet_factory.py
@@ -21,6 +21,7 @@
     "lresnet34": LResNet34,
     "lresnet50": LResNet50,
     "lresnext50_4x4d": LResNext50_4x4d,
+    "lresnet34_345": LResNet34_345,
     "seresnet18": SEResNet18,
     "seresnet34": SEResNet34,
     "seresnet50": SEResNet50,
@@ -47,6 +48,32 @@
     "tselresnet34": TSELResNet34,
     "tselresnet50": TSELResNet50,
     "tselresnext50_4x4d": TSELResNext50_4x4d,
+    "fwseresnet18": FwSEResNet18,
+    "fwseresnet34": FwSEResNet34,
+    "fwseresnet50": FwSEResNet50,
+    "fwseresnet101": FwSEResNet101,
+    "fwseresnet152": FwSEResNet152,
+    "fwseresnext50_32x4d": FwSEResNext50_32x4d,
+    "fwseresnext101_32x8d": FwSEResNext101_32x8d,
+    "fwsewideresnet50": FwSEWideResNet50,
+    "fwsewideresnet101": FwSEWideResNet101,
+    "fwselresnet18": FwSELResNet18,
+    "fwselresnet34": FwSELResNet34,
+    "fwselresnet50": FwSELResNet50,
+    "fwselresnext50_4x4d": FwSELResNext50_4x4d,
+    "cfwseresnet18": CFwSEResNet18,
+    "cfwseresnet34": CFwSEResNet34,
+    "cfwseresnet50": CFwSEResNet50,
+    "cfwseresnet101": CFwSEResNet101,
+    "cfwseresnet152": CFwSEResNet152,
+    "cfwseresnext50_32x4d": CFwSEResNext50_32x4d,
+    "cfwseresnext101_32x8d": CFwSEResNext101_32x8d,
+    "cfwsewideresnet50": CFwSEWideResNet50,
+    "cfwsewideresnet101": CFwSEWideResNet101,
+    "cfwselresnet18": CFwSELResNet18,
+    "cfwselresnet34": CFwSELResNet34,
+    "cfwselresnet50": CFwSELResNet50,
+    "cfwselresnext50_4x4d": CFwSELResNext50_4x4d,
     "res2net18": Res2Net18,
     "res2net34": Res2Net34,
     "res2net50": Res2Net50,
@@ -80,7 +107,34 @@
     "tsewideres2net101": TSEWideRes2Net101,
     "tselres2net50": TSELRes2Net50,
     "tselres2next50_4x4d": TSELRes2Next50_4x4d,
-    "lresnet34_345": LResNet34_345,
+    "fwseres2net18": FwSERes2Net18,
+    "fwseres2net34": FwSERes2Net34,
+    "fwseres2net50": FwSERes2Net50,
+    "fwseres2net101": FwSERes2Net101,
+    "fwseres2net152": FwSERes2Net152,
+    "fwseres2next50_32x4d": FwSERes2Next50_32x4d,
+    "fwseres2next101_32x8d": FwSERes2Next101_32x8d,
+    "fwsewideres2net50": FwSEWideRes2Net50,
+    "fwsewideres2net101": FwSEWideRes2Net101,
+    "fwselres2net50": FwSELRes2Net50,
+    "fwselres2next50_4x4d": FwSELRes2Next50_4x4d,
+    "cfwseres2net18": CFwSERes2Net18,
+    "cfwseres2net34": CFwSERes2Net34,
+    "cfwseres2net50": CFwSERes2Net50,
+    "cfwseres2net101": CFwSERes2Net101,
+    "cfwseres2net152": CFwSERes2Net152,
+    "cfwseres2next50_32x4d": CFwSERes2Next50_32x4d,
+    "cfwseres2next101_32x8d": CFwSERes2Next101_32x8d,
+    "cfwsewideres2net50": CFwSEWideRes2Net50,
+    "cfwsewideres2net101": CFwSEWideRes2Net101,
+    "cfwselres2net50": CFwSELRes2Net50,
+    "cfwselres2next50_4x4d": CFwSELRes2Next50_4x4d,
+    "idrndresnet100": IdRndResNet100,
+    "idrndresnet202": IdRndResNet202,
+    "fwseidrndresnet100": FwSEIdRndResNet100,
+    "fwseidrndresnet202": FwSEIdRndResNet202,
+    "cfwseidrndresnet100": CFwSEIdRndResNet100,
+    "cfwseidrndresnet202": CFwSEIdRndResNet202,
 }
 
 
@@ -141,9 +195,9 @@ def create(
         return resnet
 
     def filter_args(**kwargs):
-        if "norm_after" in kwargs:
-            kwargs["norm_before"] = not kwargs["norm_after"]
-            del kwargs["norm_after"]
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
 
         if "no_maxpool" in kwargs:
             kwargs["do_maxpool"] = not kwargs["no_maxpool"]
@@ -247,21 +301,21 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--in-norm",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="batch normalization at the input",
         )
 
         parser.add_argument(
             "--no-maxpool",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="don't do max pooling after first convolution",
         )
 
         parser.add_argument(
             "--zero-init-residual",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="Zero-initialize the last BN in each residual branch",
         )
 
@@ -293,11 +347,18 @@ def add_class_args(parser, prefix=None):
 
         try:
             parser.add_argument(
-                "--norm-after",
-                default=False,
-                action="store_true",
-                help="batch normalizaton after activation",
+                "--norm-before",
+                default=True,
+                action=ActionYesNo,
+                help="batch normalizaton before activation",
             )
+
+            # parser.add_argument(
+            #     "--norm-after",
+            #     default=False,
+            #     action="store_true",
+            #     help="batch normalizaton after activation",
+            # )
         except:
             pass
 
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index ad3df161..93571acf 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -653,7 +653,7 @@ def filter_args(**kwargs):
         return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, train_modes=None, skip={}):
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")

From 15db427110988b942dfc3c6caf58c02e7aed13d9 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 29 Dec 2022 07:59:34 -0500
Subject: [PATCH 073/154] added scripts to extract xvectors from feats back to
 bin

---
 hyperion/bin/compute_mfcc_feats.py            | 116 ++++++
 hyperion/bin/extract_xvectors_from_feats.py   | 247 ++++++++++++
 hyperion/bin/extract_xvectors_from_wav.py     |  48 ++-
 .../extract_xvectors_slidwin_from_feats.py    | 306 +++++++++++++++
 .../bin/extract_xvectors_slidwin_from_wav.py  | 358 ++++++++++++++++++
 5 files changed, 1050 insertions(+), 25 deletions(-)
 create mode 100755 hyperion/bin/compute_mfcc_feats.py
 create mode 100755 hyperion/bin/extract_xvectors_from_feats.py
 create mode 100755 hyperion/bin/extract_xvectors_slidwin_from_feats.py
 create mode 100755 hyperion/bin/extract_xvectors_slidwin_from_wav.py

diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
new file mode 100755
index 00000000..b7e90056
--- /dev/null
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.io import SequentialDataReaderFactory as DRF
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import compression_methods
+from hyperion.np.feats import MFCC
+
+
+def compute_mfcc_feats(
+    input_path, output_path, compress, compression_method, write_num_frames, **kwargs
+):
+
+    mfcc_args = MFCC.filter_args(**kwargs)
+    mfcc = MFCC(**mfcc_args)
+
+    if mfcc.input_step == "wave":
+        input_args = AR.filter_args(**kwargs)
+        reader = AR(input_path, **input_args)
+    else:
+        input_args = DRF.filter_args(**kwargs)
+        reader = DRF.create(input_path, **input_args)
+
+    writer = DWF.create(
+        output_path,
+        scp_sep=" ",
+        compress=compress,
+        compression_method=compression_method,
+    )
+
+    if write_num_frames is not None:
+        f_num_frames = open(write_num_frames, "w")
+
+    for data in reader:
+        if mfcc.input_step == "wave":
+            key, x, fs = data
+        else:
+            key, x = data
+        logging.info("Extracting MFCC for %s num_samples=%d" % (key, len(x)))
+        t1 = time.time()
+        y = mfcc.compute(x)
+        dt = (time.time() - t1) * 1000
+        rtf = dt / (mfcc.frame_shift * y.shape[0])
+        logging.info(
+            "Extracted MFCC for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f"
+            % (key, y.shape[0], dt, rtf)
+        )
+        writer.write([key], [y])
+
+        if write_num_frames is not None:
+            f_num_frames.write("%s %d\n" % (key, y.shape[0]))
+
+        mfcc.reset()
+
+    if write_num_frames is not None:
+        f_num_frames.close()
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Compute MFCC features")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_path", required=True)
+    parser.add_argument("--output", dest="output_path", required=True)
+    parser.add_argument("--write-num-frames", default=None)
+
+    AR.add_class_args(parser)
+    DRF.add_class_args(parser)
+    MFCC.add_class_args(parser)
+    parser.add_argument(
+        "--compress",
+        dest="compress",
+        default=False,
+        action="store_true",
+        help="Compress the features",
+    )
+    parser.add_argument(
+        "--compression-method",
+        dest="compression_method",
+        default="auto",
+        choices=compression_methods,
+        help="Compression method",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+        help="Verbose level",
+    )
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    compute_mfcc_feats(**namespace_to_dict(args))
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
new file mode 100755
index 00000000..9fb1006c
--- /dev/null
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+import torch
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.feats import MeanVarianceNorm as MVN
+
+from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_mvn(device, **kwargs):
+    mvn_args = MVN.filter_args(**kwargs["mvn"])
+    logging.info("mvn args={}".format(mvn_args))
+    mvn = MVN(**mvn_args)
+    if mvn.norm_mean or mvn.norm_var:
+        return mvn
+    return None
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
+    utt_length = rng.randint(low=min_utt_length, high=max_utt_length + 1)
+    if utt_length < x.shape[1]:
+        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        x = x[:, first_frame : first_frame + utt_length]
+        logging.info(
+            "extract-random-utt %s of length=%d first-frame=%d"
+            % (key, x.shape[1], first_frame)
+        )
+    return x
+
+
+def extract_xvectors(
+    input_spec,
+    output_spec,
+    vad_spec,
+    write_num_frames_spec,
+    vad_path_prefix,
+    model_path,
+    chunk_length,
+    embed_layer,
+    random_utt_length,
+    min_utt_length,
+    max_utt_length,
+    use_gpu,
+    **kwargs
+):
+
+    logging.info("initializing")
+    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    device = init_device(use_gpu)
+    mvn = init_mvn(device, **kwargs)
+    model = load_model(model_path, device)
+
+    if write_num_frames_spec is not None:
+        keys = []
+        info = []
+
+    dr_args = DRF.filter_args(**kwargs)
+    logging.info("opening output stream: %s" % (output_spec))
+    with DWF.create(output_spec) as writer:
+
+        logging.info("opening input stream: %s" % (input_spec))
+        with DRF.create(input_spec, **dr_args) as reader:
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s" % (vad_spec))
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
+
+            while not reader.eof():
+                t1 = time.time()
+                key, data = reader.read(1)
+                if len(key) == 0:
+                    break
+                t2 = time.time()
+                logging.info("processing utt %s" % (key[0]))
+                x = data[0]
+                if mvn is not None:
+                    x = mvn.normalize(x)
+                t3 = time.time()
+                tot_frames = x.shape[0]
+                if vad_spec is not None:
+                    vad = v_reader.read(key, num_frames=x.shape[0])[0].astype(
+                        "bool", copy=False
+                    )
+                    x = x[vad]
+
+                logging.info(
+                    "utt %s detected %d/%d (%.2f %%) speech frames"
+                    % (key[0], x.shape[0], tot_frames, x.shape[0] / tot_frames * 100)
+                )
+
+                if random_utt_length:
+                    x = select_random_chunk(key, x, min_utt_length, max_utt_length, rng)
+
+                t4 = time.time()
+                if x.shape[0] == 0:
+                    y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                else:
+                    xx = torch.tensor(x.T[None, :], dtype=torch.get_default_dtype())
+                    with torch.no_grad():
+                        y = (
+                            model.extract_embed(
+                                xx, chunk_length=chunk_length, embed_layer=embed_layer
+                            )
+                            .detach()
+                            .cpu()
+                            .numpy()[0]
+                        )
+
+                t5 = time.time()
+                writer.write(key, [y])
+                if write_num_frames_spec is not None:
+                    keys.append(key[0])
+                    info.append(str(x.shape[0]))
+                t6 = time.time()
+                logging.info(
+                    (
+                        "utt %s total-time=%.3f read-time=%.3f mvn-time=%.3f "
+                        "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                        "rt-factor=%.2f"
+                    )
+                    % (
+                        key[0],
+                        t6 - t1,
+                        t2 - t1,
+                        t3 - t2,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        x.shape[0] * 1e-2 / (t6 - t1),
+                    )
+                )
+
+    if write_num_frames_spec is not None:
+        logging.info("writing num-frames to %s" % (write_num_frames_spec))
+        u2nf = Utt2Info.create(keys, info)
+        u2nf.save(write_num_frames_spec)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Extracts x-vectors from features")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    DRF.add_class_args(parser)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--write-num-frames", dest="write_num_frames_spec", default=None
+    )
+    parser.add_argument(
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
+    )
+
+    MVN.add_class_args(parser, prefix="mvn")
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--chunk-length",
+        type=int,
+        default=0,
+        help=(
+            "number of frames used in each forward pass of the x-vector encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--random-utt-length",
+        default=False,
+        action="store_true",
+        help="calculates x-vector from a random chunk of the utterance",
+    )
+    parser.add_argument(
+        "--min-utt-length",
+        type=int,
+        default=500,
+        help=("minimum utterance length when using random utt length"),
+    )
+    parser.add_argument(
+        "--max-utt-length",
+        type=int,
+        default=12000,
+        help=("maximum utterance length when using random utt length"),
+    )
+
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    extract_xvectors(**namespace_to_dict(args))
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index e9746897..c1cdf02d 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -88,8 +88,10 @@ def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
         first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
         x = x[:, first_frame : first_frame + utt_length]
         logging.info(
-            "extract-random-utt %s of length=%d first-frame=%d"
-            % (key, x.shape[1], first_frame)
+            "extract-random-utt %s of length=%d first-frame=%d",
+            key,
+            x.shape[1],
+            first_frame,
         )
     return x
 
@@ -132,7 +134,7 @@ def extract_xvectors(
         num_augs = 1
 
     ar_args = AR.filter_args(**kwargs)
-    logging.info("opening output stream: %s" % (output_spec))
+    logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec, scp_sep=scp_sep) as writer:
 
         logging.info(
@@ -141,7 +143,7 @@ def extract_xvectors(
         with AR(input_spec, **ar_args) as reader:
 
             if vad_spec is not None:
-                logging.info("opening VAD stream: %s" % (vad_spec))
+                logging.info("opening VAD stream: %s", vad_spec)
                 v_reader = VRF.create(
                     vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
                 )
@@ -156,7 +158,7 @@ def extract_xvectors(
                 key0 = key[0]
                 t2 = time.time()
 
-                logging.info("processing utt %s" % (key0))
+                logging.info("processing utt %s", key0)
                 for aug_id in range(num_augs):
                     t3 = time.time()
                     key, x = augment(key0, x0, augmenter, aug_df, aug_id)
@@ -175,13 +177,11 @@ def extract_xvectors(
                             x = x[:, vad]
 
                         logging.info(
-                            "utt %s detected %d/%d (%.2f %%) speech frames"
-                            % (
-                                key,
-                                x.shape[1],
-                                tot_frames,
-                                x.shape[1] / tot_frames * 100,
-                            )
+                            "utt %s detected %d/%d (%.2f %%) speech frames",
+                            key,
+                            x.shape[1],
+                            tot_frames,
+                            x.shape[1] / tot_frames * 100,
                         )
 
                         if random_utt_length:
@@ -219,22 +219,20 @@ def extract_xvectors(
                             "aug-time=%.3f feat-time=%.3f "
                             "vad-time=%.3f embed-time=%.3f write-time=%.3f "
                             "rt-factor=%.2f"
-                        )
-                        % (
-                            key,
-                            tot_time,
-                            read_time,
-                            t4 - t3,
-                            t5 - t4,
-                            t6 - t5,
-                            t7 - t6,
-                            t8 - t7,
-                            x0.shape[0] / fs[0] / tot_time,
-                        )
+                        ),
+                        key,
+                        tot_time,
+                        read_time,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        t7 - t6,
+                        t8 - t7,
+                        x0.shape[0] / fs[0] / tot_time,
                     )
 
     if write_num_frames_spec is not None:
-        logging.info("writing num-frames to %s" % (write_num_frames_spec))
+        logging.info("writing num-frames to %s", write_num_frames_spec)
         u2nf = Utt2Info.create(keys, info)
         u2nf.save(write_num_frames_spec)
 
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
new file mode 100755
index 00000000..d14f16f3
--- /dev/null
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -0,0 +1,306 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+import torch
+import yaml
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.feats import MeanVarianceNorm as MVN
+
+from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_mvn(device, **kwargs):
+    mvn_args = MVN.filter_args(**kwargs["mvn"])
+    logging.info("mvn args={}".format(mvn_args))
+    mvn = MVN(**mvn_args)
+    if mvn.norm_mean or mvn.norm_var:
+        return mvn
+    return None
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def extract_xvectors(
+    input_spec,
+    output_spec,
+    vad_spec,
+    write_timestamps_spec,
+    slidwin_params_path,
+    vad_path_prefix,
+    model_path,
+    chunk_length,
+    embed_layer,
+    win_length,
+    win_shift,
+    snip_edges,
+    feat_frame_length,
+    feat_frame_shift,
+    feat_snip_edges,
+    use_gpu,
+    **kwargs
+):
+
+    logging.info("initializing")
+    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    device = init_device(use_gpu)
+    mvn = init_mvn(device, **kwargs)
+    model = load_model(model_path, device)
+
+    if write_timestamps_spec is not None:
+        time_writer = DWF.create(write_timestamps_spec, scp_sep=" ")
+
+    dr_args = DRF.filter_args(**kwargs)
+    logging.info("opening output stream: %s" % (output_spec))
+    with DWF.create(output_spec) as writer:
+
+        logging.info("opening input stream: %s" % (output_spec))
+        with DRF.create(input_spec, **dr_args) as reader:
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s" % (vad_spec))
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
+
+            while not reader.eof():
+                t1 = time.time()
+                key, data = reader.read(1)
+                if len(key) == 0:
+                    break
+                t2 = time.time()
+                logging.info("processing utt %s" % (key[0]))
+                x = data[0]
+                if mvn is not None:
+                    x = mvn.normalize(x)
+                t3 = time.time()
+                tot_frames = x.shape[0]
+                if vad_spec is not None:
+                    vad = v_reader.read(key, num_frames=x.shape[0])[0].astype(
+                        "bool", copy=False
+                    )
+                    x = x[vad]
+
+                logging.info(
+                    "utt %s detected %d/%d (%.2f %%) speech frames"
+                    % (key[0], x.shape[0], tot_frames, x.shape[0] / tot_frames * 100)
+                )
+
+                t4 = time.time()
+                if x.shape[0] == 0:
+                    y = np.zeros((1, model.embed_dim,), dtype=float_cpu(),)
+                else:
+                    xx = torch.tensor(x.T[None, :], dtype=torch.get_default_dtype())
+                    with torch.no_grad():
+                        y = (
+                            model.extract_embed_slidwin(
+                                xx,
+                                win_length,
+                                win_shift,
+                                snip_edges=snip_edges,
+                                feat_frame_length=feat_frame_length,
+                                feat_frame_shift=feat_frame_shift,
+                                chunk_length=chunk_length,
+                                embed_layer=embed_layer,
+                                detach_chunks=True,
+                            )
+                            .detach()
+                            .cpu()
+                            .numpy()[0]
+                        )
+
+                t5 = time.time()
+                y = y.T
+                writer.write(key, [y])
+
+                if write_timestamps_spec is not None:
+                    num_wins = y.shape[0]
+                    timestamps = model.compute_slidwin_timestamps(
+                        num_wins,
+                        win_length,
+                        win_shift,
+                        snip_edges,
+                        feat_frame_length,
+                        feat_frame_length,
+                        feat_snip_edges,
+                    ).numpy()
+                    logging.info("{}".format(timestamps))
+                    time_writer.write(key, [timestamps])
+                t6 = time.time()
+                logging.info(
+                    (
+                        "utt %s total-time=%.3f read-time=%.3f mvn-time=%.3f "
+                        "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                        "rt-factor=%.2f"
+                    )
+                    % (
+                        key[0],
+                        t6 - t1,
+                        t2 - t1,
+                        t3 - t2,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        x.shape[0] * 1e-2 / (t6 - t1),
+                    )
+                )
+
+    if write_timestamps_spec is not None:
+        time_writer.close()
+
+    if slidwin_params_path is not None:
+        params = {
+            "padding": model.compute_slidwin_left_padding(
+                win_length,
+                win_shift,
+                snip_edges,
+                feat_frame_length,
+                feat_frame_length,
+                feat_snip_edges,
+            ),
+            "win_length": win_length,
+            "win_shift": win_shift,
+        }
+        with open(slidwin_params_path, "w") as f:
+            yaml.dump(params, f)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Extract x-vectors over a sliding window")
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    DRF.add_class_args(parser)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--write-timestamps", dest="write_timestamps_spec", default=None
+    )
+    parser.add_argument("--slidwin-params-path", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    MVN.add_class_args(parser, prefix="mvn")
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--win-length",
+        type=float,
+        default=1.5,
+        help=("window length for x-vector extraction in seconds"),
+    )
+    parser.add_argument(
+        "--win-shift",
+        type=float,
+        default=0.25,
+        help=("window shift for x-vector extraction in seconds"),
+    )
+    parser.add_argument(
+        "--snip-edges",
+        default=False,
+        action="store_true",
+        help=(
+            "If true, end effects will be handled by outputting "
+            "only windows that completely fit in the file, "
+            "and the number of windows depends on the window-length. "
+            "If false, the number of windows depends only on "
+            "the window-shift, and we reflect the data at the ends."
+        ),
+    )
+
+    parser.add_argument(
+        "--feat-frame-length",
+        type=float,
+        default=25,
+        help=("frame-length used to compute the acoustic features in msecs"),
+    )
+    parser.add_argument(
+        "--feat-frame-shift",
+        type=float,
+        default=10,
+        help=("frame-shift used to compute the acoustic features in msecs"),
+    )
+    parser.add_argument(
+        "--feat-snip-edges",
+        default=False,
+        action="store_true",
+        help=(
+            "If true, end effects will be handled by outputting only windows "
+            "that completely fit in the file, and the number of windows "
+            "depends on the feat-frame-length. "
+            "If false, the number of feature frames depends only on the "
+            "feat-frame-shift, and we reflect the waveform at the ends."
+        ),
+    )
+
+    parser.add_argument(
+        "--chunk-length",
+        type=int,
+        default=0,
+        help=(
+            "number of frames used in each forward pass of the x-vector encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    extract_xvectors(**namespace_to_dict(args))
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
new file mode 100755
index 00000000..b4bd2b0d
--- /dev/null
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+import pandas as pd
+import yaml
+
+import torch
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.augment import SpeechAugment
+
+from hyperion.torch.utils import open_device
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch import TorchModelLoader as TML
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(device, **kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    feat_extractor.to(device)
+    return feat_extractor
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def augment(key0, x0, augmenter, aug_df, aug_id):
+    if augmenter is None:
+        x = x0
+        key = key0
+    else:
+        x, aug_info = augmenter(x0)
+        key = "%s-aug-%02d" % (key0, aug_id)
+        aug_df_row = {
+            "key_aug": key,
+            "key_orig": key0,
+            "noise_type": aug_info["noise"]["noise_type"],
+            "snr": aug_info["noise"]["snr"],
+            "rir_type": aug_info["reverb"]["rir_type"],
+            "srr": aug_info["reverb"]["srr"],
+            "sdr": aug_info["sdr"],
+        }
+
+        aug_df.append(pd.DataFrame(aug_df_row, index=[0]))
+
+    return key, x
+
+
+def extract_xvectors(
+    input_spec,
+    output_spec,
+    vad_spec,
+    write_timestamps_spec,
+    slidwin_params_path,
+    scp_sep,
+    vad_path_prefix,
+    model_path,
+    chunk_length,
+    embed_layer,
+    win_length,
+    win_shift,
+    snip_edges,
+    aug_cfg,
+    num_augs,
+    aug_info_path,
+    use_gpu,
+    **kwargs
+):
+
+    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    device = init_device(use_gpu)
+    feat_extractor = init_feats(device, **kwargs)
+    model = load_model(model_path, device)
+
+    feat_args = kwargs["feats"]["audio_feats"]
+    feat_frame_length = feat_args["frame_length"]
+    feat_frame_shift = feat_args["frame_shift"]
+    feat_snip_edges = feat_args["snip_edges"]
+
+    if write_timestamps_spec is not None:
+        time_writer = DWF.create(write_timestamps_spec, scp_sep=scp_sep)
+
+    if aug_cfg is not None:
+        augmenter = SpeechAugment.create(aug_cfg, rng=rng)
+        aug_df = []
+    else:
+        augmenter = None
+        aug_df = None
+        num_augs = 1
+
+    ar_args = AR.filter_args(**kwargs)
+    logging.info("opening output stream: %s", output_spec)
+    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+
+        logging.info(
+            "opening input stream: {} with args={}".format(input_spec, ar_args)
+        )
+        with AR(input_spec, **ar_args) as reader:
+
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s", vad_spec)
+                v_reader = VRF.create(
+                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
+                )
+
+            while not reader.eof():
+                t1 = time.time()
+                key, x0, fs = reader.read(1)
+                if len(key) == 0:
+                    break
+
+                x0 = x0[0]
+                key0 = key[0]
+                t2 = time.time()
+
+                logging.info("processing utt %s", key0)
+                for aug_id in range(num_augs):
+                    t3 = time.time()
+                    key, x = augment(key0, x0, augmenter, aug_df, aug_id)
+                    t4 = time.time()
+                    with torch.no_grad():
+                        x = torch.tensor(
+                            x[None, :], dtype=torch.get_default_dtype()
+                        ).to(device)
+
+                        x = feat_extractor(x)
+                        t5 = time.time()
+                        tot_frames = x.shape[1]
+                        if vad_spec is not None:
+                            vad = v_reader.read(key0, num_frames=tot_frames)[0]
+                            vad = torch.tensor(vad, dtype=torch.bool).to(device)
+                            x = x[:, vad]
+
+                        logging.info(
+                            "utt %s detected %d/%d (%.2f %%) speech frames"
+                            % (
+                                key,
+                                x.shape[1],
+                                tot_frames,
+                                x.shape[1] / tot_frames * 100,
+                            )
+                        )
+
+                        t6 = time.time()
+                        if x.shape[1] == 0:
+                            y = np.zeros((1, model.embed_dim,), dtype=float_cpu(),)
+                        else:
+                            x = x.transpose(1, 2).contiguous()
+                            y = (
+                                model.extract_embed_slidwin(
+                                    x,
+                                    win_length,
+                                    win_shift,
+                                    snip_edges=snip_edges,
+                                    feat_frame_length=feat_frame_length,
+                                    feat_frame_shift=feat_frame_shift,
+                                    chunk_length=chunk_length,
+                                    embed_layer=embed_layer,
+                                    detach_chunks=True,
+                                )
+                                .detach()
+                                .cpu()
+                                .numpy()[0]
+                            )
+
+                    t7 = time.time()
+                    y = y.T
+                    writer.write([key], [y])
+
+                    if write_timestamps_spec is not None:
+                        num_wins = y.shape[0]
+                        timestamps = model.compute_slidwin_timestamps(
+                            num_wins,
+                            win_length,
+                            win_shift,
+                            snip_edges,
+                            feat_frame_length,
+                            feat_frame_length,
+                            feat_snip_edges,
+                        ).numpy()
+                        logging.info("{}".format(timestamps))
+                        time_writer.write([key], [timestamps])
+
+                    t8 = time.time()
+                    read_time = t2 - t1
+                    tot_time = read_time + t8 - t3
+                    logging.info(
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "aug-time=%.3f feat-time=%.3f "
+                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                            "rt-factor=%.2f"
+                        ),
+                        key,
+                        tot_time,
+                        read_time,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        t7 - t6,
+                        t8 - t7,
+                        x0.shape[0] / fs[0] / tot_time,
+                    )
+
+    if write_timestamps_spec is not None:
+        time_writer.close()
+
+    if aug_info_path is not None:
+        aug_df = pd.concat(aug_df, ignore_index=True)
+        aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
+
+    if slidwin_params_path is not None:
+        params = {
+            "padding": model.compute_slidwin_left_padding(
+                win_length,
+                win_shift,
+                snip_edges,
+                feat_frame_length,
+                feat_frame_length,
+                feat_snip_edges,
+            ),
+            "win_length": win_length,
+            "win_shift": win_shift,
+        }
+        with open(slidwin_params_path, "w") as f:
+            yaml.dump(params, f)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Extract x-vectors over a sliding window"
+            "from waveform computing "
+            "acoustic features on the fly"
+        )
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--write-timestamps", dest="write_timestamps_spec", default=None
+    )
+    parser.add_argument("--slidwin-params-path", default=None)
+
+    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
+    parser.add_argument(
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
+    )
+
+    AR.add_argparse_args(parser)
+
+    parser.add_argument("--aug-cfg", default=None)
+    parser.add_argument("--aug-info-path", default=None)
+    parser.add_argument(
+        "--num-augs", default=1, type=int, help="number of augmentations per utterance"
+    )
+
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--win-length",
+        type=float,
+        default=1.5,
+        help=("window length for x-vector extraction in seconds"),
+    )
+    parser.add_argument(
+        "--win-shift",
+        type=float,
+        default=0.25,
+        help=("window shift for x-vector extraction in seconds"),
+    )
+    parser.add_argument(
+        "--snip-edges",
+        default=False,
+        action="store_true",
+        help=(
+            "If true, end effects will be handled by outputting "
+            "only windows that completely fit in the file, "
+            "and the number of windows depends on the window-length. "
+            "If false, the number of windows depends only on "
+            "the window-shift, and we reflect the data at the ends."
+        ),
+    )
+
+    parser.add_argument(
+        "--chunk-length",
+        type=int,
+        default=0,
+        help=(
+            "number of frames used in each forward pass "
+            "of the x-vector encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from, "
+            "if None, it uses layer set in training phase"
+        ),
+    )
+
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    extract_xvectors(**namespace_to_dict(args))

From f9e4a2409b7edc6144cb92711849f056fb98f0c3 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 29 Dec 2022 08:08:35 -0500
Subject: [PATCH 074/154] isorted everything

---
 hyperion/__init__.py                          |  7 +-
 hyperion/bin/compute_energy_vad.py            | 14 ++--
 hyperion/bin/compute_mfcc_feats.py            | 14 ++--
 hyperion/bin/extract_wav2vec2xvectors.py      | 20 ++---
 hyperion/bin/extract_xvectors_from_feats.py   | 18 ++---
 hyperion/bin/extract_xvectors_from_wav.py     | 20 ++---
 .../extract_xvectors_slidwin_from_feats.py    | 20 ++---
 .../bin/extract_xvectors_slidwin_from_wav.py  | 20 ++---
 hyperion/bin/finetune_wav2vec2xvector.py      | 32 +++-----
 .../bin/finetune_xvector_dfr_from_feats.py    | 27 +++----
 hyperion/bin/finetune_xvector_dfr_from_wav.py | 23 +++---
 hyperion/bin/finetune_xvector_from_feats.py   | 27 +++----
 hyperion/bin/finetune_xvector_from_wav.py     | 34 ++++-----
 hyperion/bin/make_babble_noise_audio_files.py | 20 ++---
 hyperion/bin/pack_wav_rirs.py                 | 16 ++--
 hyperion/bin/plot_embedding_tsne.py           | 20 ++---
 hyperion/bin/plot_embedding_tsne_per_class.py | 23 ++----
 hyperion/bin/preprocess_audio_files.py        | 20 ++---
 hyperion/bin/train_wav2vec2xvector.py         | 28 +++----
 hyperion/bin/train_xvector_from_feats.py      | 29 +++----
 hyperion/bin/train_xvector_from_wav.py        | 33 ++++----
 hyperion/bin_deprec/ark2hyp.py                |  4 +-
 hyperion/bin_deprec/arkvad2nist.py            |  7 +-
 hyperion/bin_deprec/compute-gmm-post.py       | 13 ++--
 .../bin_deprec/eval-2class-performance.py     | 10 +--
 hyperion/bin_deprec/eval-elbo-ubm.py          | 10 +--
 .../bin_deprec/eval-q-scoring-homo-gbe.py     | 14 ++--
 hyperion/bin_deprec/eval-score-norm.py        |  8 +-
 hyperion/bin_deprec/h5vad2nist.py             |  6 +-
 hyperion/bin_deprec/init-ubm.py               | 11 ++-
 hyperion/bin_deprec/scores2lre_format.py      |  8 +-
 .../torch-train-conformer-enc-v1-vq-dvae.py   | 17 ++---
 .../torch-train-conformer-enc-v1-vq-vae.py    | 17 ++---
 hyperion/bin_deprec/torch-train-dc1d-dvae.py  | 17 ++---
 hyperion/bin_deprec/torch-train-dc1d-vae.py   | 17 ++---
 hyperion/bin_deprec/torch-train-dc2d-dvae.py  | 17 ++---
 hyperion/bin_deprec/torch-train-dc2d-vae.py   | 17 ++---
 .../bin_deprec/torch-train-resnet1d-dvae.py   | 17 ++---
 .../bin_deprec/torch-train-resnet1d-vae.py    | 17 ++---
 .../torch-train-resnet1d-vq-dvae.py           | 17 ++---
 .../bin_deprec/torch-train-resnet1d-vq-vae.py | 17 ++---
 .../bin_deprec/torch-train-resnet2d-dvae.py   | 17 ++---
 .../bin_deprec/torch-train-resnet2d-vae.py    | 27 +++----
 .../torch-train-resnet2d-vq-dvae.py           | 17 ++---
 .../bin_deprec/torch-train-resnet2d-vq-vae.py | 17 ++---
 .../torch-train-transformer-enc-v1-dvae.py    | 17 ++---
 .../torch-train-transformer-enc-v1-vae.py     | 17 ++---
 .../torch-train-transformer-enc-v1-vq-dvae.py | 17 ++---
 .../torch-train-transformer-enc-v1-vq-vae.py  | 17 ++---
 hyperion/bin_deprec/torch-train-xvector.py    | 25 +++---
 .../bin_deprec/train-q-scoring-homo-gbe.py    | 10 +--
 hyperion/bin_deprec/vectors2scores.py         |  6 +-
 .../bin_deprec2/apply-mvn-select-frames.py    | 20 ++---
 hyperion/bin_deprec2/compute-mfcc-feats.py    | 14 ++--
 hyperion/bin_deprec2/copy-feats.py            |  7 +-
 hyperion/bin_deprec2/eval-cos-1vs1.py         | 12 +--
 hyperion/bin_deprec2/eval-linear-gbe-up.py    | 16 ++--
 hyperion/bin_deprec2/eval-linear-gbe.py       | 16 ++--
 hyperion/bin_deprec2/eval-linear-svmc.py      | 16 ++--
 .../bin_deprec2/eval-logistic-regression.py   | 16 ++--
 hyperion/bin_deprec2/eval-plda-1vs1.py        | 14 ++--
 hyperion/bin_deprec2/eval-plda-nvs1.py        | 14 ++--
 hyperion/bin_deprec2/merge-h5-files.py        |  5 +-
 hyperion/bin_deprec2/pack-audio-files.py      | 12 +--
 hyperion/bin_deprec2/plot-vector-hist.py      | 10 +--
 hyperion/bin_deprec2/rttm-to-bin-vad.py       |  8 +-
 hyperion/bin_deprec2/segments-to-bin-vad.py   | 14 ++--
 .../torch-adv-finetune-xvec-from-wav.py       | 28 +++----
 .../bin_deprec2/torch-adv-finetune-xvec.py    | 29 +++----
 .../bin_deprec2/torch-compute-mfcc-feats.py   | 16 ++--
 hyperion/bin_deprec2/torch-eval-vae.py        | 18 ++---
 ...osine-scoring-from-adv-test-wav-wavegan.py | 36 ++++-----
 ...l-xvec-cosine-scoring-from-adv-test-wav.py | 31 +++-----
 ...l-xvec-cosine-scoring-from-art-test-wav.py | 36 ++++-----
 ...-eval-xvec-cosine-scoring-from-test-wav.py | 24 +++---
 ...sine-scoring-from-transfer-adv-test-wav.py | 31 +++-----
 ...sine-scoring-from-transfer-art-test-wav.py | 36 ++++-----
 .../torch-eval-xvec-logits-from-wav.py        | 20 ++---
 ...rch-extract-xvectors-from-wav-with-rttm.py | 20 ++---
 ...torch-extract-xvectors-slidwin-from-wav.py | 20 ++---
 .../torch-extract-xvectors-slidwin.py         | 18 ++---
 .../torch-extract-xvectors-vae-preproc.py     | 18 ++---
 .../bin_deprec2/torch-extract-xvectors.py     | 18 ++---
 ...ch-generate-adv-attacks-xvector-classif.py | 25 +++---
 ...orch-generate-adv-attacks-xvector-verif.py | 31 +++-----
 hyperion/bin_deprec2/torch-train-dc1d-ae.py   | 17 ++---
 hyperion/bin_deprec2/torch-train-dvae.py      | 34 ++++-----
 .../torch-train-efficientnet-xvec-from-wav.py | 22 ++----
 .../torch-train-efficientnet-xvec.py          | 26 +++----
 .../torch-train-resnet-xvec-from-wav.py       | 37 ++++-----
 .../bin_deprec2/torch-train-resnet-xvec.py    | 26 +++----
 .../torch-train-resnet1d-xvec-from-wav.py     | 25 +++---
 .../torch-train-spinenet-xvec-from-wav.py     | 21 ++---
 .../torch-train-tdnn-xvec-from-wav.py         | 22 ++----
 hyperion/bin_deprec2/torch-train-tdnn-xvec.py | 26 +++----
 ...orch-train-transformer-xvec-v1-from-wav.py | 22 ++----
 .../torch-train-transformer-xvec-v1.py        | 26 +++----
 hyperion/bin_deprec2/torch-train-vae.py       | 34 ++++-----
 hyperion/bin_deprec2/torch-train-vq-dvae.py   | 34 ++++-----
 hyperion/bin_deprec2/torch-train-vq-vae.py    | 34 ++++-----
 hyperion/bin_deprec2/train-cw-up.py           | 10 +--
 hyperion/bin_deprec2/train-cw.py              | 10 +--
 hyperion/bin_deprec2/train-gaussianizer.py    | 10 +--
 hyperion/bin_deprec2/train-lda.py             | 10 +--
 hyperion/bin_deprec2/train-linear-gbe-up.py   | 10 +--
 hyperion/bin_deprec2/train-linear-gbe.py      | 10 +--
 hyperion/bin_deprec2/train-linear-svmc.py     | 10 +--
 .../bin_deprec2/train-logistic-regression.py  | 10 +--
 hyperion/bin_deprec2/train-mvn.py             | 10 +--
 hyperion/bin_deprec2/train-nda.py             | 10 +--
 hyperion/bin_deprec2/train-pca.py             | 10 +--
 hyperion/bin_deprec2/train-plda.py            | 10 +--
 hyperion/helpers/__init__.py                  | 10 +--
 hyperion/helpers/classif_trial_data_reader.py | 10 +--
 .../helpers/multi_test_trial_data_reader.py   |  8 +-
 .../multi_test_trial_data_reader_v2.py        |  8 +-
 hyperion/helpers/plda_factory.py              |  2 +-
 hyperion/helpers/tracking_data_reader.py      |  8 +-
 hyperion/helpers/trial_data_reader.py         | 10 +--
 hyperion/helpers/vector_class_reader.py       | 10 +--
 hyperion/helpers/vector_reader.py             | 10 +--
 hyperion/io/__init__.py                       | 30 +++-----
 hyperion/io/ark_data_reader.py                | 10 ++-
 hyperion/io/ark_data_writer.py                |  6 +-
 hyperion/io/audio_reader.py                   |  6 +-
 hyperion/io/audio_writer.py                   |  4 +-
 hyperion/io/bin_vad_reader.py                 |  3 +-
 hyperion/io/data_reader.py                    |  5 +-
 hyperion/io/data_rw_factory.py                | 16 ++--
 hyperion/io/h5_data_reader.py                 | 11 +--
 hyperion/io/h5_data_writer.py                 |  7 +-
 hyperion/io/h5_merger.py                      |  1 +
 hyperion/io/hyp_data_reader.py                |  5 +-
 hyperion/io/hyp_data_writer.py                |  5 +-
 hyperion/io/kaldi_data_reader.py              |  5 +-
 hyperion/io/packed_audio_reader.py            | 10 +--
 hyperion/io/packed_audio_writer.py            |  2 +-
 hyperion/io/segment_vad_reader.py             |  3 +-
 hyperion/io/vad_reader.py                     |  1 +
 hyperion/io/vad_rw_factory.py                 |  3 +-
 hyperion/np/augment/__init__.py               |  4 +-
 hyperion/np/augment/noise_augment.py          |  2 +-
 hyperion/np/augment/reverb_augment.py         |  4 +-
 hyperion/np/augment/speech_augment.py         |  3 +-
 hyperion/np/augment/speed_augment.py          |  3 +-
 .../np/calibration/unsup_gauss_calibration.py |  1 +
 hyperion/np/classifiers/__init__.py           |  6 +-
 hyperion/np/classifiers/greedy_fusion.py      |  4 +-
 hyperion/np/classifiers/linear_gbe.py         |  5 +-
 hyperion/np/classifiers/linear_gbe_up.py      | 10 +--
 hyperion/np/classifiers/linear_svmc.py        |  6 +-
 .../np/classifiers/logistic_regression.py     |  4 +-
 hyperion/np/classifiers/q_scoring_homo_gbe.py |  3 +-
 hyperion/np/classifiers/svmc.py               |  8 +-
 hyperion/np/clustering/__init__.py            |  2 +-
 hyperion/np/clustering/ahc.py                 |  6 +-
 hyperion/np/clustering/kmeans.py              |  5 +-
 hyperion/np/diarization/diar_ahc_plda.py      |  3 +-
 hyperion/np/feats/__init__.py                 | 10 +--
 hyperion/np/feats/feature_normalization.py    |  2 +-
 hyperion/np/feats/filter_banks.py             |  2 +-
 hyperion/np/feats/mfcc.py                     |  2 +-
 hyperion/np/metrics/__init__.py               |  5 +-
 hyperion/np/metrics/confusion_matrix.py       |  3 +-
 hyperion/np/metrics/roc.py                    |  2 +-
 hyperion/np/metrics/utils.py                  |  2 +-
 hyperion/np/metrics/verification_evaluator.py |  8 +-
 hyperion/np/np_model.py                       |  6 +-
 hyperion/np/pdfs/__init__.py                  |  4 +-
 hyperion/np/pdfs/core/__init__.py             |  4 +-
 hyperion/np/pdfs/core/normal.py               | 20 ++---
 hyperion/np/pdfs/core/normal_diag_cov.py      | 10 +--
 hyperion/np/pdfs/hmm/hmm.py                   |  2 +-
 hyperion/np/pdfs/jfa/jfa_total.py             |  9 +--
 hyperion/np/pdfs/mixtures/__init__.py         |  4 +-
 .../np/pdfs/mixtures/exp_family_mixture.py    |  6 +-
 hyperion/np/pdfs/mixtures/gmm.py              | 26 ++-----
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py     | 14 ++--
 .../np/pdfs/mixtures/gmm_tied_diag_cov.py     | 14 ++--
 hyperion/np/pdfs/plda/__init__.py             |  4 +-
 hyperion/np/pdfs/plda/plda_base.py            |  2 +-
 hyperion/np/score_norm/__init__.py            |  6 +-
 hyperion/np/score_norm/adapt_s_norm.py        |  2 +-
 hyperion/np/score_norm/s_norm.py              |  2 +-
 hyperion/np/score_norm/t_norm.py              |  2 +-
 hyperion/np/score_norm/zt_norm.py             |  2 +-
 hyperion/np/transforms/__init__.py            | 19 +++--
 hyperion/np/transforms/cent_whiten.py         |  5 +-
 hyperion/np/transforms/cent_whiten_up.py      |  3 +-
 hyperion/np/transforms/coral.py               |  3 +-
 hyperion/np/transforms/gaussianizer.py        |  4 +-
 hyperion/np/transforms/lda.py                 |  3 +-
 hyperion/np/transforms/lnorm.py               |  2 +-
 hyperion/np/transforms/lnorm_up.py            |  2 +-
 hyperion/np/transforms/mvn.py                 |  3 +-
 hyperion/np/transforms/nap.py                 |  3 +-
 hyperion/np/transforms/nda.py                 |  5 +-
 hyperion/np/transforms/pca.py                 |  7 +-
 hyperion/np/transforms/sb_sw.py               |  5 +-
 hyperion/np/transforms/skl_tsne.py            |  3 +-
 hyperion/np/transforms/transform_list.py      | 13 ++--
 hyperion/torch/adv_attacks/__init__.py        | 13 ++--
 .../torch/adv_attacks/art_attack_factory.py   |  2 +-
 hyperion/torch/adv_attacks/attack_factory.py  | 12 +--
 .../torch/adv_attacks/carlini_wagner_l2.py    |  2 +-
 hyperion/torch/adv_attacks/pgd_attack.py      |  3 +-
 .../adv_attacks/random_attack_factory.py      |  4 +-
 hyperion/torch/adv_defenses/wave_gan_white.py |  5 +-
 hyperion/torch/data/__init__.py               |  4 +-
 hyperion/torch/data/audio_dataset.py          |  7 +-
 hyperion/torch/data/bucketing_seg_sampler.py  |  3 +-
 .../data/class_weighted_embed_sampler.py      |  3 +-
 .../data/class_weighted_seg_chunk_sampler.py  |  3 +-
 hyperion/torch/data/embed_dataset.py          |  6 +-
 hyperion/torch/data/embed_sampler.py          |  3 +-
 hyperion/torch/data/embed_sampler_factory.py  |  2 +-
 hyperion/torch/data/feat_seq_dataset.py       |  6 +-
 hyperion/torch/data/hyp_sampler.py            |  3 +-
 .../torch/data/paired_feat_seq_dataset.py     |  1 +
 hyperion/torch/data/seg_chunk_sampler.py      |  3 +-
 hyperion/torch/data/seg_sampler.py            |  3 +-
 hyperion/torch/data/weighted_embed_sampler.py |  1 +
 hyperion/torch/data/weighted_seq_sampler.py   |  3 +-
 hyperion/torch/layer_blocks/__init__.py       | 76 +++++++------------
 .../layer_blocks/conformer_encoder_v1.py      |  2 +-
 hyperion/torch/layer_blocks/dc1d_blocks.py    |  2 +-
 hyperion/torch/layer_blocks/dc2d_blocks.py    |  2 +-
 hyperion/torch/layer_blocks/etdnn_blocks.py   |  2 +-
 hyperion/torch/layer_blocks/fc_blocks.py      |  2 +-
 hyperion/torch/layer_blocks/mbconv_blocks.py  |  5 +-
 .../torch/layer_blocks/res2net1d_blocks.py    |  5 +-
 .../torch/layer_blocks/res2net2d_blocks.py    |  3 +-
 hyperion/torch/layer_blocks/res2net_blocks.py |  5 +-
 .../torch/layer_blocks/resetdnn_blocks.py     |  2 +-
 .../torch/layer_blocks/resnet1d_blocks.py     |  4 +-
 .../torch/layer_blocks/resnet2d_blocks.py     |  2 +-
 hyperion/torch/layer_blocks/resnet_blocks.py  |  2 +-
 hyperion/torch/layer_blocks/se_blocks.py      |  2 +-
 .../torch/layer_blocks/seresnet_blocks.py     |  4 +-
 hyperion/torch/layer_blocks/spine_blocks.py   |  8 +-
 hyperion/torch/layer_blocks/tdnn_blocks.py    |  2 +-
 hyperion/torch/layers/__init__.py             | 35 ++++-----
 hyperion/torch/layers/activation_factory.py   |  1 +
 hyperion/torch/layers/audio_feats.py          |  4 +-
 hyperion/torch/layers/audio_feats_factory.py  |  5 +-
 hyperion/torch/layers/global_pool.py          |  1 +
 hyperion/torch/layers/margin_losses.py        |  4 +-
 hyperion/torch/layers/mvn.py                  |  2 +-
 hyperion/torch/layers/pdf_storage.py          |  2 +-
 hyperion/torch/layers/pool_factory.py         |  3 +-
 hyperion/torch/layers/spec_augment.py         |  3 +-
 hyperion/torch/layers/tensor2pdf.py           |  2 +-
 hyperion/torch/layers/tensor2pdf1.py          |  2 +-
 hyperion/torch/layers/vq.py                   |  2 +-
 hyperion/torch/loggers/__init__.py            |  2 +-
 hyperion/torch/loggers/csv_logger.py          |  3 +-
 hyperion/torch/loggers/logger.py              |  1 +
 hyperion/torch/loggers/logger_list.py         |  1 +
 hyperion/torch/loggers/prog_logger.py         |  2 +-
 hyperion/torch/loggers/tensorboard_logger.py  |  1 +
 hyperion/torch/loggers/wandb_logger.py        |  2 +-
 hyperion/torch/lr_schedulers/__init__.py      |  8 +-
 hyperion/torch/lr_schedulers/cos_lr.py        |  2 +-
 hyperion/torch/lr_schedulers/factory.py       |  6 +-
 hyperion/torch/lr_schedulers/noam_lr.py       |  5 +-
 hyperion/torch/lr_schedulers/triangular_lr.py |  2 +-
 hyperion/torch/metrics/__init__.py            |  4 +-
 hyperion/torch/metrics/accuracy.py            |  2 +-
 hyperion/torch/models/__init__.py             | 23 +++---
 hyperion/torch/models/ae/ae.py                |  2 +-
 hyperion/torch/models/plda/plda_base.py       |  4 +-
 hyperion/torch/models/plda/splda.py           |  2 +-
 hyperion/torch/models/tvector/__init__.py     |  2 +-
 .../torch/models/tvector/resnet_tvector.py    |  2 +-
 hyperion/torch/models/tvector/tvector.py      |  7 +-
 hyperion/torch/models/vae/vae.py              |  8 +-
 hyperion/torch/models/vae/vq_vae.py           |  6 +-
 .../torch/models/wav2xvectors/__init__.py     | 12 ++-
 .../hf_hubert2resnet1d_xvector.py             |  7 +-
 .../hf_wav2vec2resnet1d_xvector.py            |  7 +-
 .../models/wav2xvectors/hf_wav2xvector.py     | 10 ++-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py |  7 +-
 .../wav2xvectors/wav2resnet1d_xvector.py      |  5 +-
 .../models/wav2xvectors/wav2resnet_xvector.py |  5 +-
 .../torch/models/wav2xvectors/wav2xvector.py  |  5 +-
 hyperion/torch/models/xvectors/__init__.py    | 10 +--
 .../models/xvectors/efficient_net_xvector.py  |  5 +-
 .../torch/models/xvectors/resnet1d_xvector.py |  5 +-
 .../torch/models/xvectors/resnet_xvector.py   |  5 +-
 .../torch/models/xvectors/spinenet_xvector.py |  5 +-
 .../torch/models/xvectors/tdnn_xvector.py     |  5 +-
 .../models/xvectors/transformer_xvector_v1.py |  5 +-
 hyperion/torch/models/xvectors/xvector.py     |  5 +-
 hyperion/torch/narchs/__init__.py             | 42 ++++------
 hyperion/torch/narchs/audio_feats_mvn.py      |  2 +-
 hyperion/torch/narchs/classif_head.py         |  7 +-
 hyperion/torch/narchs/conformer_encoder_v1.py |  9 ++-
 hyperion/torch/narchs/dc1d_decoder.py         |  8 +-
 hyperion/torch/narchs/dc1d_encoder.py         |  5 +-
 hyperion/torch/narchs/dc2d_decoder.py         |  8 +-
 hyperion/torch/narchs/dc2d_encoder.py         |  5 +-
 hyperion/torch/narchs/efficient_net.py        |  7 +-
 hyperion/torch/narchs/etdnn.py                |  2 +-
 hyperion/torch/narchs/fcnet.py                |  4 +-
 hyperion/torch/narchs/resetdnn.py             |  4 +-
 hyperion/torch/narchs/resnet.py               | 18 ++---
 hyperion/torch/narchs/resnet1d_decoder.py     | 11 ++-
 hyperion/torch/narchs/resnet1d_encoder.py     | 20 ++---
 hyperion/torch/narchs/resnet2d_decoder.py     | 11 ++-
 hyperion/torch/narchs/resnet2d_encoder.py     | 14 ++--
 hyperion/torch/narchs/resnet_factory.py       |  2 +-
 hyperion/torch/narchs/spinenet.py             | 12 +--
 hyperion/torch/narchs/spinenet_factory.py     |  2 +-
 hyperion/torch/narchs/tdnn.py                 |  2 +-
 hyperion/torch/narchs/tdnn_factory.py         |  4 +-
 hyperion/torch/narchs/torch_na_loader.py      | 34 ++++-----
 .../torch/narchs/transformer_encoder_v1.py    |  6 +-
 hyperion/torch/optim/__init__.py              |  2 +-
 hyperion/torch/optim/factory.py               |  5 +-
 hyperion/torch/optim/radam.py                 |  1 +
 hyperion/torch/seq_embed/__init__.py          |  8 +-
 hyperion/torch/torch_defs.py                  |  1 -
 hyperion/torch/torch_model_loader.py          |  4 +-
 hyperion/torch/tpm/__init__.py                |  2 +-
 hyperion/torch/tpm/hf/__init__.py             |  2 +-
 hyperion/torch/tpm/hf/hf_hubert.py            | 12 +--
 hyperion/torch/tpm/hf/hf_wav2vec2.py          | 12 +--
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 13 ++--
 hyperion/torch/tpm/hf/hf_wavlm.py             | 12 +--
 hyperion/torch/trainers/ae_trainer.py         |  3 +-
 hyperion/torch/trainers/dvae_trainer.py       |  3 +-
 hyperion/torch/trainers/torch_trainer.py      | 17 ++---
 hyperion/torch/trainers/vae_trainer.py        |  3 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |  3 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |  3 +-
 .../torch/trainers/xvector_adv_trainer.py     |  3 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |  3 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |  3 +-
 hyperion/torch/utils/__init__.py              | 17 ++---
 hyperion/torch/utils/ddp.py                   | 13 ++--
 hyperion/torch/utils/devices.py               |  2 +-
 hyperion/torch/utils/eval_utils.py            |  1 +
 hyperion/torch/utils/metric_acc.py            |  1 +
 hyperion/torch/utils/misc.py                  |  2 +-
 344 files changed, 1557 insertions(+), 1952 deletions(-)

diff --git a/hyperion/__init__.py b/hyperion/__init__.py
index 055441cd..fc35423c 100644
--- a/hyperion/__init__.py
+++ b/hyperion/__init__.py
@@ -4,11 +4,6 @@
 """
 
 
-from . import utils
-from . import np
-from . import io
-from . import torch
-from . import helpers
-
+from . import helpers, io, np, torch, utils
 
 __version__ = "0.4.0a"
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index 99f562cf..15d74f3a 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -3,22 +3,18 @@
  Copyright 2018 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.feats import EnergyVAD
 
 
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index b7e90056..a83f95d1 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -3,23 +3,19 @@
  Copyright 2018 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
-from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
 
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index a09e5c11..2a92a83e 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
-
-import torch
 import torchaudio.transforms as tat
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 resamplers = {}
 
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index 9fb1006c..926e0bcc 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -4,30 +4,24 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index c1cdf02d..addabbcf 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index d14f16f3..e3d2fcbb 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -4,31 +4,25 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
-
-import torch
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index b4bd2b0d..2b1bba3b 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -4,33 +4,27 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index 718aeeb9..b3edd9b5 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -3,37 +3,29 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.models import (
-    HFWav2Vec2ResNet1dXVector,
-    HFHubert2ResNet1dXVector,
-    HFWavLM2ResNet1dXVector,
-)
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.models import (HFHubert2ResNet1dXVector,
+                                   HFWav2Vec2ResNet1dXVector,
+                                   HFWavLM2ResNet1dXVector)
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/finetune_xvector_dfr_from_feats.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
index a26c14fb..2ac01025 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_feats.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_feats.py
@@ -4,32 +4,27 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
+from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.models import XVector as XVec
+from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_dfr_from_wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
index 437c76f0..ff97d3ca 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_wav.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_wav.py
@@ -3,32 +3,27 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
+from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import XVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin/finetune_xvector_from_feats.py b/hyperion/bin/finetune_xvector_from_feats.py
index ec6386c8..7a1fb5a9 100755
--- a/hyperion/bin/finetune_xvector_from_feats.py
+++ b/hyperion/bin/finetune_xvector_from_feats.py
@@ -3,31 +3,26 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
+from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.models import XVector as XVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index c6239b45..7b68b9dd 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -3,37 +3,31 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
-import torch
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.data import AudioDataset as AD
-
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.models import ResNetXVector as RXVec
-from hyperion.torch.models import ResNet1dXVector as R1dXVec
 from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
 from hyperion.torch.models import TDNNXVector as TDXVec
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
-from hyperion.torch.models import SpineNetXVector as SpineXVec
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 460f4044..972ff01f 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -3,26 +3,22 @@
  Copyright 2020 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
+import math
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
-import math
 import numpy as np
-from scipy import signal, ndimage
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import Utt2Info
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as Writer
+from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
+from hyperion.utils import Utt2Info
 
 
 def make_noise(xs):
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index 00177988..dccf58da 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -3,23 +3,19 @@
  Copyright 2020 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
+import math
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
-import math
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
 
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
index e514252f..e011dfe8 100755
--- a/hyperion/bin/plot_embedding_tsne.py
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -4,28 +4,22 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 import logging
-import sys
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-    ActionYesNo,
-)
+import sys
 import time
 from pathlib import Path
 
-import numpy as np
-import pandas as pd
 import matplotlib
-
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import SegmentSet
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.np.transforms import PCA, SklTSNE, LNorm
+from hyperion.np.transforms import PCA, LNorm, SklTSNE
+from hyperion.utils import SegmentSet
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 5e832bff..6f35f074 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -4,31 +4,24 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 import logging
-import sys
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-    ActionYesNo,
-)
+import sys
 import time
 from pathlib import Path
 
-import numpy as np
-import pandas as pd
 import matplotlib
-
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import SegmentSet
-from hyperion.utils.math import cosine_scoring
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.np.transforms import PCA, SklTSNE, LNorm
 from hyperion.np.clustering import AHC
-
+from hyperion.np.transforms import PCA, LNorm, SklTSNE
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import cosine_scoring
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index 67b1cf61..2f4e5cbc 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -3,26 +3,22 @@
  Copyright 2020 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
+import math
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
-import math
 import numpy as np
-from scipy import signal, ndimage
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import Utt2Info
-from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import AudioWriter as Writer
+from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
+from hyperion.utils import Utt2Info
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 7187c13c..cb2b4a6d 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -3,36 +3,28 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
+import multiprocessing
 # import sys
 import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
 import time
-import logging
-import multiprocessing
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.models import (
-    HFWav2Vec2ResNet1dXVector,
-    HFHubert2ResNet1dXVector,
-    HFWavLM2ResNet1dXVector,
-)
+from hyperion.torch.models import (HFHubert2ResNet1dXVector,
+                                   HFWav2Vec2ResNet1dXVector,
+                                   HFWavLM2ResNet1dXVector)
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index c09f15a4..7f4ab0fa 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -3,36 +3,31 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.models import ResNetXVector as RXVec
-from hyperion.torch.models import ResNet1dXVector as R1dXVec
 from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
 from hyperion.torch.models import TDNNXVector as TDXVec
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
-from hyperion.torch.models import SpineNetXVector as SpineXVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 0e074977..57a33b56 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -3,36 +3,31 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
-import torch
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.data import AudioDataset as AD
-
 # from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.models import ResNetXVector as RXVec
-from hyperion.torch.models import ResNet1dXVector as R1dXVec
 from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
 from hyperion.torch.models import TDNNXVector as TDXVec
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
-from hyperion.torch.models import SpineNetXVector as SpineXVec
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin_deprec/ark2hyp.py b/hyperion/bin_deprec/ark2hyp.py
index 45a20712..abcb4457 100755
--- a/hyperion/bin_deprec/ark2hyp.py
+++ b/hyperion/bin_deprec/ark2hyp.py
@@ -7,9 +7,9 @@
 Converts from Ark format to h5 format (deprecated, use copy-feats.py)
 """
 
-import sys
-import os
 import argparse
+import os
+import sys
 import time
 
 import numpy as np
diff --git a/hyperion/bin_deprec/arkvad2nist.py b/hyperion/bin_deprec/arkvad2nist.py
index bd15592a..559371be 100755
--- a/hyperion/bin_deprec/arkvad2nist.py
+++ b/hyperion/bin_deprec/arkvad2nist.py
@@ -7,15 +7,14 @@
 Converts from Ark format to NIST OpenSAT
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-
 from hyperion.io import KaldiDataReader
 
 
diff --git a/hyperion/bin_deprec/compute-gmm-post.py b/hyperion/bin_deprec/compute-gmm-post.py
index 1b0a8d04..58675336 100755
--- a/hyperion/bin_deprec/compute-gmm-post.py
+++ b/hyperion/bin_deprec/compute-gmm-post.py
@@ -7,21 +7,20 @@
 Computes GMM posteriors
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
-
 from keras import backend as K
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.io import HypDataWriter
 from hyperion.helpers import SequenceReader as SR
-from hyperion.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import HypDataWriter
 from hyperion.pdfs import DiagGMM
+from hyperion.transforms import TransformList
 
 
 def to_sparse(r, num_comp):
diff --git a/hyperion/bin_deprec/eval-2class-performance.py b/hyperion/bin_deprec/eval-2class-performance.py
index a10ec5c0..eff16830 100755
--- a/hyperion/bin_deprec/eval-2class-performance.py
+++ b/hyperion/bin_deprec/eval-2class-performance.py
@@ -7,18 +7,18 @@
 Evals EER, DCF, DET
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.utils.trial_key import TrialKey
 from hyperion.metrics import compute_eer
+from hyperion.utils.trial_key import TrialKey
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_2class_performance(score_file, key_file, output_path):
diff --git a/hyperion/bin_deprec/eval-elbo-ubm.py b/hyperion/bin_deprec/eval-elbo-ubm.py
index 5cf1aa0d..bf4839db 100755
--- a/hyperion/bin_deprec/eval-elbo-ubm.py
+++ b/hyperion/bin_deprec/eval-elbo-ubm.py
@@ -7,18 +7,18 @@
 Evaluate the likelihood of the ubm on some data
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.helpers import SequenceReader as SR
-from hyperion.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.pdfs import DiagGMM
+from hyperion.transforms import TransformList
 
 
 def eval_elbo(
diff --git a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
index 9e2880f8..4548e49b 100755
--- a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
@@ -7,21 +7,21 @@
 Evals Q-scoring back-end
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
+from hyperion.classifiers import QScoringHomoGBE as GBE
+from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
 from hyperion.io import HypDataWriter as HDW
-from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.transforms import TransformList
-from hyperion.classifiers import QScoringHomoGBE as GBE
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_qscoring_gbe(
diff --git a/hyperion/bin_deprec/eval-score-norm.py b/hyperion/bin_deprec/eval-score-norm.py
index fd6e2e00..4b620518 100755
--- a/hyperion/bin_deprec/eval-score-norm.py
+++ b/hyperion/bin_deprec/eval-score-norm.py
@@ -7,18 +7,18 @@
 Score Normalization
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 from hyperion.hyp_defs import config_logger
 from hyperion.score_norm import *
-from hyperion.utils.trial_scroes import TrialScores
 from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scroes import TrialScores
 
 
 def load_scores(score_file, enr_coh_file, coh_test_file, coh_coh_file):
diff --git a/hyperion/bin_deprec/h5vad2nist.py b/hyperion/bin_deprec/h5vad2nist.py
index 804c8637..fb45c22b 100755
--- a/hyperion/bin_deprec/h5vad2nist.py
+++ b/hyperion/bin_deprec/h5vad2nist.py
@@ -7,11 +7,11 @@
 Converts from Ark format to NIST OpenSAT
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
diff --git a/hyperion/bin_deprec/init-ubm.py b/hyperion/bin_deprec/init-ubm.py
index 8a162314..204ca855 100755
--- a/hyperion/bin_deprec/init-ubm.py
+++ b/hyperion/bin_deprec/init-ubm.py
@@ -8,20 +8,19 @@
 Initialize UBM
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
-
 from keras import backend as K
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.multithreading import threadsafe_generator
 from hyperion.helpers import SequenceReader as SR
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.pdfs import DiagGMM
+from hyperion.utils.multithreading import threadsafe_generator
 
 
 @threadsafe_generator
diff --git a/hyperion/bin_deprec/scores2lre_format.py b/hyperion/bin_deprec/scores2lre_format.py
index 50e9147f..717c1535 100755
--- a/hyperion/bin_deprec/scores2lre_format.py
+++ b/hyperion/bin_deprec/scores2lre_format.py
@@ -4,12 +4,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
-import re
 import logging
+import os
+import re
+import sys
+import time
 
 import numpy as np
 
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
index 9adb2cfd..608a5271 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ConformerEncoderV1 as Encoder
-from hyperion.torch.narchs import ConformerEncoderV1 as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ConformerEncoderV1 as Decoder
+from hyperion.torch.narchs import ConformerEncoderV1 as Encoder
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
index d227a8b2..a4cc54e6 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ConformerEncoderV1 as Encoder
-from hyperion.torch.narchs import ConformerEncoderV1 as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ConformerEncoderV1 as Decoder
+from hyperion.torch.narchs import ConformerEncoderV1 as Encoder
 from hyperion.torch.trainers import VQVAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-dc1d-dvae.py b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
index 343807c2..1b88beba 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import DC1dEncoder as Encoder
-from hyperion.torch.narchs import DC1dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import DC1dDecoder as Decoder
+from hyperion.torch.narchs import DC1dEncoder as Encoder
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-dc1d-vae.py b/hyperion/bin_deprec/torch-train-dc1d-vae.py
index daa67b3e..dd5d2e72 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs.dc1d_encoder import DC1dEncoder as Encoder
-from hyperion.torch.narchs.dc1d_decoder import DC1dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs.dc1d_decoder import DC1dDecoder as Decoder
+from hyperion.torch.narchs.dc1d_encoder import DC1dEncoder as Encoder
 from hyperion.torch.trainers.vae_trainer import VAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-dc2d-dvae.py b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
index 2e32b9f9..3f7cb17d 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import DC2dEncoder as Encoder
-from hyperion.torch.narchs import DC2dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import DC2dDecoder as Decoder
+from hyperion.torch.narchs import DC2dEncoder as Encoder
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-dc2d-vae.py b/hyperion/bin_deprec/torch-train-dc2d-vae.py
index d8675ae9..5b97f55c 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import DC2dEncoder as Encoder
-from hyperion.torch.narchs import DC2dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import DC2dDecoder as Decoder
+from hyperion.torch.narchs import DC2dEncoder as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
index 420cf7b2..ca6f6996 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet1dEncoder as Encoder
-from hyperion.torch.narchs import ResNet1dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import ResNet1dDecoder as Decoder
+from hyperion.torch.narchs import ResNet1dEncoder as Encoder
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
index a8edb3c3..a6218567 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet1dEncoder as Encoder
-from hyperion.torch.narchs import ResNet1dDecoder as Decoder
 from hyperion.torch.models.vae.vae import VAE
+from hyperion.torch.narchs import ResNet1dDecoder as Decoder
+from hyperion.torch.narchs import ResNet1dEncoder as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
index 9571eff8..89448754 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet1dEncoder as Encoder
-from hyperion.torch.narchs import ResNet1dDecoder as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ResNet1dDecoder as Decoder
+from hyperion.torch.narchs import ResNet1dEncoder as Encoder
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
index 373be8f3..4a84bbff 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet1dEncoder as Encoder
-from hyperion.torch.narchs import ResNet1dDecoder as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ResNet1dDecoder as Decoder
+from hyperion.torch.narchs import ResNet1dEncoder as Encoder
 from hyperion.torch.trainers import VQVAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
index 6845750f..3f6cd6ba 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet2dEncoder as Encoder
-from hyperion.torch.narchs import ResNet2dDecoder as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import ResNet2dDecoder as Decoder
+from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
index 575c5575..4e853230 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
@@ -3,32 +3,27 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.narchs import ResNet2dEncoder as Encoder
-from hyperion.torch.narchs import ResNet2dDecoder as Decoder
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import ResNet2dDecoder as Decoder
+from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
index 95eb3923..5e0add50 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet2dEncoder as Encoder
-from hyperion.torch.narchs import ResNet2dDecoder as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ResNet2dDecoder as Decoder
+from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
index 07f25d5f..6398d959 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import ResNet2dEncoder as Encoder
-from hyperion.torch.narchs import ResNet2dDecoder as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import ResNet2dDecoder as Decoder
+from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import VQVAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
index 39ee2974..0137e101 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
@@ -4,27 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
-from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
+from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
index 9f5cbdf8..71021825 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
-from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
 from hyperion.torch.models.vae.vae import VAE
+from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
+from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
index c6246fe3..a6908c4f 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedSeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
-from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
+from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
-from hyperion.torch.data import PairedSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
index 4659e0d8..b3b07682 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
-from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import TransformerEncoderV1 as Decoder
+from hyperion.torch.narchs import TransformerEncoderV1 as Encoder
 from hyperion.torch.trainers import VQVAETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_vae(
diff --git a/hyperion/bin_deprec/torch-train-xvector.py b/hyperion/bin_deprec/torch-train-xvector.py
index 4cc443ae..4c69eb25 100755
--- a/hyperion/bin_deprec/torch-train-xvector.py
+++ b/hyperion/bin_deprec/torch-train-xvector.py
@@ -3,26 +3,27 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
-import torch
-from torch.utils.data import DataLoader
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.torch.torch_defs import float_torch
-from hyperion.torch.utils import open_device
-from hyperion.torch.data import SeqDataset, ClassWeightedSeqSampler as Sampler
-from hyperion.torch.helpers import TorchNALoader
+import torch
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset
 from hyperion.torch.helpers import OptimizerFactory as OF
-from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
+from hyperion.torch.helpers import TorchNALoader
 from hyperion.torch.layers import GlobalPool1dFactory as PF
-from hyperion.torch.seq_embed import XVector, XVectorTrainer
+from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.seq_embed import XVector, XVectorTrainer
+from hyperion.torch.torch_defs import float_torch
+from hyperion.torch.utils import open_device
+from torch.utils.data import DataLoader
 
 
 def train_xvector(
diff --git a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
index 69780865..8a348728 100755
--- a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
@@ -7,18 +7,18 @@
 Trains Q-scoring back-end
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
+from hyperion.classifiers import QScoringHomoGBE as GBE
 from hyperion.helpers import VectorClassReader as VCR
+from hyperion.hyp_defs import config_logger
 from hyperion.transforms import TransformList
-from hyperion.classifiers import QScoringHomoGBE as GBE
 
 
 def train_qscoring_backend(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin_deprec/vectors2scores.py b/hyperion/bin_deprec/vectors2scores.py
index cc936115..ab4be8ac 100755
--- a/hyperion/bin_deprec/vectors2scores.py
+++ b/hyperion/bin_deprec/vectors2scores.py
@@ -4,11 +4,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
+import os
 import re
+import sys
+import time
 
 import numpy as np
 
diff --git a/hyperion/bin_deprec2/apply-mvn-select-frames.py b/hyperion/bin_deprec2/apply-mvn-select-frames.py
index 4f73628e..a2456dc9 100755
--- a/hyperion/bin_deprec2/apply-mvn-select-frames.py
+++ b/hyperion/bin_deprec2/apply-mvn-select-frames.py
@@ -4,27 +4,23 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils.kaldi_matrix import compression_methods
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
-from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
-from hyperion.np.feats import MeanVarianceNorm as MVN
+from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.np.feats import FrameSelector as FSel
+from hyperion.np.feats import MeanVarianceNorm as MVN
+from hyperion.utils import Utt2Info
+from hyperion.utils.kaldi_matrix import compression_methods
 
 
 def process_feats(
diff --git a/hyperion/bin_deprec2/compute-mfcc-feats.py b/hyperion/bin_deprec2/compute-mfcc-feats.py
index b7e90056..a83f95d1 100755
--- a/hyperion/bin_deprec2/compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/compute-mfcc-feats.py
@@ -3,23 +3,19 @@
  Copyright 2018 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
-from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
 
diff --git a/hyperion/bin_deprec2/copy-feats.py b/hyperion/bin_deprec2/copy-feats.py
index 1ef044f5..0385cc55 100755
--- a/hyperion/bin_deprec2/copy-feats.py
+++ b/hyperion/bin_deprec2/copy-feats.py
@@ -5,18 +5,17 @@
  Copy features/vectors and change format
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
-
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(
diff --git a/hyperion/bin_deprec2/eval-cos-1vs1.py b/hyperion/bin_deprec2/eval-cos-1vs1.py
index 16c9122a..de508333 100755
--- a/hyperion/bin_deprec2/eval-cos-1vs1.py
+++ b/hyperion/bin_deprec2/eval-cos-1vs1.py
@@ -7,19 +7,19 @@
 Evals cosine scoring
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
+from hyperion.helpers import TrialDataReader as TDR
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.np.transforms import LNorm, TransformList
 from hyperion.utils.trial_ndx import TrialNdx
 from hyperion.utils.trial_scores import TrialScores
-from hyperion.helpers import TrialDataReader as TDR
-from hyperion.np.transforms import TransformList, LNorm
 
 
 def eval_cos(
diff --git a/hyperion/bin_deprec2/eval-linear-gbe-up.py b/hyperion/bin_deprec2/eval-linear-gbe-up.py
index a8c3d999..d82bf967 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe-up.py
@@ -7,21 +7,21 @@
 Evals linear GBE with uncertainty propagation.
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import HypDataWriter as HDW
 from hyperion.np.classifiers import LinearGBEUP as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_linear_gbe(
diff --git a/hyperion/bin_deprec2/eval-linear-gbe.py b/hyperion/bin_deprec2/eval-linear-gbe.py
index 0970bb5e..cf788392 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe.py
@@ -7,21 +7,21 @@
 Evals linear GBE
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import HypDataWriter as HDW
 from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_linear_gbe(
diff --git a/hyperion/bin_deprec2/eval-linear-svmc.py b/hyperion/bin_deprec2/eval-linear-svmc.py
index d6c96c11..ba4c5e81 100755
--- a/hyperion/bin_deprec2/eval-linear-svmc.py
+++ b/hyperion/bin_deprec2/eval-linear-svmc.py
@@ -7,21 +7,21 @@
 Evals SVM
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import HypDataWriter as HDW
 from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_svm(
diff --git a/hyperion/bin_deprec2/eval-logistic-regression.py b/hyperion/bin_deprec2/eval-logistic-regression.py
index 91a092ea..992ca7b8 100755
--- a/hyperion/bin_deprec2/eval-logistic-regression.py
+++ b/hyperion/bin_deprec2/eval-logistic-regression.py
@@ -7,21 +7,21 @@
 Evals logistic regression
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.io import HypDataWriter as HDW
 from hyperion.helpers import ClassifTrialDataReader as TDR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import HypDataWriter as HDW
 from hyperion.np.classifiers import LogisticRegression as LR
+from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_lr(
diff --git a/hyperion/bin_deprec2/eval-plda-1vs1.py b/hyperion/bin_deprec2/eval-plda-1vs1.py
index eadf4a87..5a810cf7 100755
--- a/hyperion/bin_deprec2/eval-plda-1vs1.py
+++ b/hyperion/bin_deprec2/eval-plda-1vs1.py
@@ -7,20 +7,20 @@
 Evals PDDA LLR
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
+from hyperion.helpers import TrialDataReader as TDR
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_plda(
diff --git a/hyperion/bin_deprec2/eval-plda-nvs1.py b/hyperion/bin_deprec2/eval-plda-nvs1.py
index 5a63e5a5..5c5d200c 100755
--- a/hyperion/bin_deprec2/eval-plda-nvs1.py
+++ b/hyperion/bin_deprec2/eval-plda-nvs1.py
@@ -7,20 +7,20 @@
 Evals PLDA LLR
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import set_float_cpu, float_cpu, config_logger
-from hyperion.utils.trial_ndx import TrialNdx
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
+from hyperion.helpers import TrialDataReader as TDR
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.np.transforms import TransformList
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.utils.trial_scores import TrialScores
 
 
 def eval_plda(
diff --git a/hyperion/bin_deprec2/merge-h5-files.py b/hyperion/bin_deprec2/merge-h5-files.py
index a807c69c..aeda3bab 100755
--- a/hyperion/bin_deprec2/merge-h5-files.py
+++ b/hyperion/bin_deprec2/merge-h5-files.py
@@ -6,10 +6,11 @@
 """
 Merges multiple hdf5 files into one file
 """
-import sys
-import os
 import argparse
+import os
+import sys
 import time
+
 import numpy as np
 
 from hyperion.io import H5Merger
diff --git a/hyperion/bin_deprec2/pack-audio-files.py b/hyperion/bin_deprec2/pack-audio-files.py
index 4953d345..5d544df4 100755
--- a/hyperion/bin_deprec2/pack-audio-files.py
+++ b/hyperion/bin_deprec2/pack-audio-files.py
@@ -3,19 +3,19 @@
  Copyright 2020 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
-import os
 import argparse
-import time
 import logging
-
 import math
+import os
+import sys
+import time
+
 import numpy as np
-from scipy import signal, ndimage
+from scipy import ndimage, signal
 
 from hyperion.hyp_defs import config_logger
-from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import PackedAudioWriter as Writer
+from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.io import WSpecifier as WS
 
diff --git a/hyperion/bin_deprec2/plot-vector-hist.py b/hyperion/bin_deprec2/plot-vector-hist.py
index 60560a80..75236726 100755
--- a/hyperion/bin_deprec2/plot-vector-hist.py
+++ b/hyperion/bin_deprec2/plot-vector-hist.py
@@ -4,20 +4,20 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
-import numpy as np
 import matplotlib
+import numpy as np
 
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import TransformList
 
 
diff --git a/hyperion/bin_deprec2/rttm-to-bin-vad.py b/hyperion/bin_deprec2/rttm-to-bin-vad.py
index 9c51ba2c..19e98d8f 100755
--- a/hyperion/bin_deprec2/rttm-to-bin-vad.py
+++ b/hyperion/bin_deprec2/rttm-to-bin-vad.py
@@ -3,18 +3,18 @@
 # Apache 2.0.
 #
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 import pandas as pd
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import SegmentList, RTTM
 from hyperion.io import DataWriterFactory as DWF
+from hyperion.utils import RTTM, SegmentList
 
 
 def rttm_to_bin_vad(
diff --git a/hyperion/bin_deprec2/segments-to-bin-vad.py b/hyperion/bin_deprec2/segments-to-bin-vad.py
index 2b3a7d91..24021a4b 100755
--- a/hyperion/bin_deprec2/segments-to-bin-vad.py
+++ b/hyperion/bin_deprec2/segments-to-bin-vad.py
@@ -3,23 +3,19 @@
 # Apache 2.0.
 #
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import SegmentList
 from hyperion.io import DataWriterFactory as DWF
+from hyperion.utils import SegmentList
 
 
 def segments_to_bin_vad(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
index eb118102..ad33515c 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
@@ -3,35 +3,29 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import XVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.adv_attacks import AttackFactory
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
index ae2cb37b..850233e2 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
@@ -4,32 +4,27 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.models import XVector as XVec
-from hyperion.torch.trainers import XVectorAdvTrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.adv_attacks import AttackFactory
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.models import XVector as XVec
+from hyperion.torch.trainers import XVectorAdvTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
index 4fc6bec2..07f71bfb 100755
--- a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
@@ -3,23 +3,19 @@
  Copyright 2018 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
-import torch
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
-from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import compression_methods
 from hyperion.torch.layers import AudioFeatsFactory as AFF
 
diff --git a/hyperion/bin_deprec2/torch-eval-vae.py b/hyperion/bin_deprec2/torch-eval-vae.py
index 44ed0bfb..d676b0f1 100755
--- a/hyperion/bin_deprec2/torch-eval-vae.py
+++ b/hyperion/bin_deprec2/torch-eval-vae.py
@@ -3,19 +3,15 @@
  Copyright 2020 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import time
 import logging
+import time
 from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
 
+import matplotlib
 import numpy as np
 import pandas as pd
-import matplotlib
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 matplotlib.use("Agg")
 # matplotlib.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
@@ -23,16 +19,14 @@
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
index 411873ac..aaa91214 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
@@ -3,43 +3,35 @@
   Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
+# [Added Sonal May21]
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.layers import LinBinCalibrator as Calibrator
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
 from hyperion.torch import TorchModelLoader as TML
-
 from hyperion.torch.adv_attacks import AttackFactory
-
-# [Added Sonal May21]
-from pathlib import Path
 from hyperion.torch.adv_defenses.wave_gan_white import WaveGANDefender
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
index 18d6843f..437127b2 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
@@ -3,39 +3,32 @@
   Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.layers import LinBinCalibrator as Calibrator
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
 from hyperion.torch import TorchModelLoader as TML
-
 from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
index 73da6088..8d4add76 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
@@ -4,42 +4,34 @@
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
-from hyperion.torch import TorchModelLoader as TML
-
-from art.classifiers import PyTorchClassifier
-from hyperion.torch.adv_attacks.art_attack_factory import (
-    ARTAttackFactory as AttackFactory,
-)
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
index a8b4b962..0e9493c0 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
@@ -4,35 +4,29 @@
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import RandomAccessAudioReader as AR
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import l2_norm
-from hyperion.torch import TorchModelLoader as TML
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
index 51a8afbb..e0754498 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
@@ -3,39 +3,32 @@
   Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.layers import LinBinCalibrator as Calibrator
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
 from hyperion.torch import TorchModelLoader as TML
-
 from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
index 9fcc8f30..0f9f375d 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
@@ -4,42 +4,34 @@
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
-from hyperion.torch import TorchModelLoader as TML
-
-from art.classifiers import PyTorchClassifier
-from hyperion.torch.adv_attacks.art_attack_factory import (
-    ARTAttackFactory as AttackFactory,
-)
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
index 61acebd4..da6389fb 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
index 101d6a10..fc494448 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info, RTTM
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import RTTM, Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
index ecf65037..c85fe4c9 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
@@ -4,33 +4,27 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
 import pandas as pd
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
index 7d6d9f11..6da57e16 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
@@ -4,30 +4,24 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
index afa7a117..6edf60ed 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
@@ -4,30 +4,24 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors.py b/hyperion/bin_deprec2/torch-extract-xvectors.py
index f36e35e2..76d941e0 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors.py
@@ -4,30 +4,24 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.feats import MeanVarianceNorm as MVN
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
index 274bdf32..88b0b1d9 100755
--- a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
+++ b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
@@ -3,37 +3,30 @@
   Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx
+from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
-
-from hyperion.torch.utils import open_device
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
-
 from hyperion.torch.adv_attacks import RandomAttackFactory
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialNdx, Utt2Info
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
index c13bd815..a4df5091 100755
--- a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
+++ b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
@@ -3,41 +3,34 @@
   Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import AudioWriter as AW
-from hyperion.utils import Utt2Info, TrialNdx, TrialKey, TrialScores
-from hyperion.utils.list_utils import ismember
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.layers import LinBinCalibrator as Calibrator
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.utils.misc import l2_norm, compute_stats_adv_attack
 from hyperion.torch import TorchModelLoader as TML
-
 from hyperion.torch.adv_attacks import RandomAttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-train-dc1d-ae.py b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
index e7547927..50ac7d42 100755
--- a/hyperion/bin_deprec2/torch-train-dc1d-ae.py
+++ b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
@@ -3,27 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SeqDataset as SD
 from hyperion.torch.helpers import OptimizerFactory as OF
 from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.narchs.dc1d_encoder import DC1dEncoder as Encoder
-from hyperion.torch.narchs.dc1d_decoder import DC1dDecoder as Decoder
 from hyperion.torch.models import AE
+from hyperion.torch.narchs.dc1d_decoder import DC1dDecoder as Decoder
+from hyperion.torch.narchs.dc1d_encoder import DC1dEncoder as Encoder
 from hyperion.torch.trainers import AETrainer as Trainer
-from hyperion.torch.data import SeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import open_device
 
 
 def train_ae(
diff --git a/hyperion/bin_deprec2/torch-train-dvae.py b/hyperion/bin_deprec2/torch-train-dvae.py
index e13c26ac..808bfbba 100755
--- a/hyperion/bin_deprec2/torch-train-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-dvae.py
@@ -3,36 +3,30 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.narchs import DC1dEncoder, DC1dDecoder
-from hyperion.torch.narchs import DC2dEncoder, DC2dDecoder
-from hyperion.torch.narchs import ResNet1dEncoder, ResNet1dDecoder
-from hyperion.torch.narchs import ResNet2dEncoder, ResNet2dDecoder
-from hyperion.torch.narchs import TransformerEncoderV1
-from hyperion.torch.narchs import ConformerEncoderV1
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedFeatSeqDataset as SD
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import (ConformerEncoderV1, DC1dDecoder,
+                                   DC1dEncoder, DC2dDecoder, DC2dEncoder,
+                                   ResNet1dDecoder, ResNet1dEncoder,
+                                   ResNet2dDecoder, ResNet2dEncoder,
+                                   TransformerEncoderV1)
 from hyperion.torch.trainers import DVAETrainer as Trainer
-from hyperion.torch.data import PairedFeatSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import ddp, open_device
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
index 6d7c41ee..f256f735 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
@@ -3,32 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import EfficientNetXVector as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import EfficientNetXVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
index c259a590..622ac62e 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.models import EfficientNetXVector as XVec
-from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import EfficientNetXVector as XVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
index 436e4001..3d135b18 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
@@ -3,41 +3,36 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
-# import torch.multiprocessing as mp
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-
-# from hyperion.torch.helpers import OptimizerFactory as OF
-# from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import ResNetXVector as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
-
+from hyperion.torch.models import ResNetXVector as XVec
 # from hyperion.torch.layers import AudioFeatsFactory as AFF
 # from hyperion.torch.layers import MeanVarianceNorm as MVN
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+# from hyperion.torch.helpers import OptimizerFactory as OF
+# from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
+
+# import torch.multiprocessing as mp
+
+
+
 
 # from torch.utils.data import dataloader
 # from torch.multiprocessing import reductions
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec.py b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
index 6e7f4242..f976cc6e 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.models import ResNetXVector as XVec
-from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import ResNetXVector as XVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
index bf531745..3ee6bf18 100755
--- a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
@@ -3,34 +3,27 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import ResNet1dXVector as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
-
+from hyperion.torch.models import ResNet1dXVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
index 7bac503c..0857ce5c 100755
--- a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
@@ -4,32 +4,27 @@
  Copyright 2020 Magdalena Rybicka
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import SpineNetXVector as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import SpineNetXVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
index 0ab0cb67..7bbbff03 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
@@ -3,32 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import TDNNXVector as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import TDNNXVector as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
index 2075ca34..5614f1b9 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.models import TDNNXVector as XVec
-from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import TDNNXVector as XVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
index 636fb390..6b361583 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
@@ -3,32 +3,26 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
-from hyperion.torch.models import TransformerXVectorV1 as XVec
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import TransformerXVectorV1 as XVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
index 033408b6..62164f15 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
@@ -4,32 +4,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import XVectorTrainer as Trainer
-from hyperion.torch.models import TransformerXVectorV1 as XVec
-from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import TransformerXVectorV1 as XVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp, open_device
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-vae.py b/hyperion/bin_deprec2/torch-train-vae.py
index 7ceb3014..4c41d49c 100755
--- a/hyperion/bin_deprec2/torch-train-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vae.py
@@ -3,36 +3,30 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.narchs import DC1dEncoder, DC1dDecoder
-from hyperion.torch.narchs import DC2dEncoder, DC2dDecoder
-from hyperion.torch.narchs import ResNet1dEncoder, ResNet1dDecoder
-from hyperion.torch.narchs import ResNet2dEncoder, ResNet2dDecoder
-from hyperion.torch.narchs import TransformerEncoderV1
-from hyperion.torch.narchs import ConformerEncoderV1
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.models import VAE
+from hyperion.torch.narchs import (ConformerEncoderV1, DC1dDecoder,
+                                   DC1dEncoder, DC2dDecoder, DC2dEncoder,
+                                   ResNet1dDecoder, ResNet1dEncoder,
+                                   ResNet2dDecoder, ResNet2dEncoder,
+                                   TransformerEncoderV1)
 from hyperion.torch.trainers import VAETrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import ddp, open_device
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-dvae.py b/hyperion/bin_deprec2/torch-train-vq-dvae.py
index 6e49df08..5de1bbd4 100755
--- a/hyperion/bin_deprec2/torch-train-vq-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-dvae.py
@@ -3,36 +3,30 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.narchs import DC1dEncoder, DC1dDecoder
-from hyperion.torch.narchs import DC2dEncoder, DC2dDecoder
-from hyperion.torch.narchs import ResNet1dEncoder, ResNet1dDecoder
-from hyperion.torch.narchs import ResNet2dEncoder, ResNet2dDecoder
-from hyperion.torch.narchs import TransformerEncoderV1
-from hyperion.torch.narchs import ConformerEncoderV1
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import PairedFeatSeqDataset as SD
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import (ConformerEncoderV1, DC1dDecoder,
+                                   DC1dEncoder, DC2dDecoder, DC2dEncoder,
+                                   ResNet1dDecoder, ResNet1dEncoder,
+                                   ResNet2dDecoder, ResNet2dEncoder,
+                                   TransformerEncoderV1)
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
-from hyperion.torch.data import PairedFeatSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import ddp, open_device
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-vae.py b/hyperion/bin_deprec2/torch-train-vq-vae.py
index fa8b336c..2a95f853 100755
--- a/hyperion/bin_deprec2/torch-train-vq-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-vae.py
@@ -3,36 +3,30 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import open_device, ddp
-from hyperion.torch.narchs import DC1dEncoder, DC1dDecoder
-from hyperion.torch.narchs import DC2dEncoder, DC2dDecoder
-from hyperion.torch.narchs import ResNet1dEncoder, ResNet1dDecoder
-from hyperion.torch.narchs import ResNet2dEncoder, ResNet2dDecoder
-from hyperion.torch.narchs import TransformerEncoderV1
-from hyperion.torch.narchs import ConformerEncoderV1
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.models import VQVAE as VAE
+from hyperion.torch.narchs import (ConformerEncoderV1, DC1dDecoder,
+                                   DC1dEncoder, DC2dDecoder, DC2dEncoder,
+                                   ResNet1dDecoder, ResNet1dEncoder,
+                                   ResNet2dDecoder, ResNet2dEncoder,
+                                   TransformerEncoderV1)
 from hyperion.torch.trainers import VQVAETrainer as Trainer
-from hyperion.torch.data import FeatSeqDataset as SD
-from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.utils import ddp, open_device
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/train-cw-up.py b/hyperion/bin_deprec2/train-cw-up.py
index a7392a32..c1c372ad 100755
--- a/hyperion/bin_deprec2/train-cw-up.py
+++ b/hyperion/bin_deprec2/train-cw-up.py
@@ -7,18 +7,18 @@
 Trains Centering and whitening with uncertainty prop.
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
-from hyperion.np.transforms import TransformList, CentWhitenUP, LNormUP
+from hyperion.np.transforms import CentWhitenUP, LNormUP, TransformList
 
 
 def load_model(input_path, with_lnorm, name, **kwargs):
diff --git a/hyperion/bin_deprec2/train-cw.py b/hyperion/bin_deprec2/train-cw.py
index a70485a6..cabca7c2 100755
--- a/hyperion/bin_deprec2/train-cw.py
+++ b/hyperion/bin_deprec2/train-cw.py
@@ -7,18 +7,18 @@
 Trains Centering and whitening
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
-from hyperion.np.transforms import TransformList, CentWhiten, LNorm
+from hyperion.np.transforms import CentWhiten, LNorm, TransformList
 
 
 def load_model(input_path, with_lnorm, name, **kwargs):
diff --git a/hyperion/bin_deprec2/train-gaussianizer.py b/hyperion/bin_deprec2/train-gaussianizer.py
index a265403e..aeb51e46 100755
--- a/hyperion/bin_deprec2/train-gaussianizer.py
+++ b/hyperion/bin_deprec2/train-gaussianizer.py
@@ -7,18 +7,18 @@
 Trains Gaussianization for i-vectors.
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
-from hyperion.np.transforms import TransformList, Gaussianizer
+from hyperion.np.transforms import Gaussianizer, TransformList
 
 
 def load_model(input_path, **kwargs):
diff --git a/hyperion/bin_deprec2/train-lda.py b/hyperion/bin_deprec2/train-lda.py
index 36217c8f..1887a72f 100755
--- a/hyperion/bin_deprec2/train-lda.py
+++ b/hyperion/bin_deprec2/train-lda.py
@@ -6,17 +6,17 @@
 """
 Trains LDA
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList, LDA, SbSw
+from hyperion.hyp_defs import config_logger
+from hyperion.np.transforms import LDA, SbSw, TransformList
 
 
 def train_lda(
diff --git a/hyperion/bin_deprec2/train-linear-gbe-up.py b/hyperion/bin_deprec2/train-linear-gbe-up.py
index 5accb785..9986b6bc 100755
--- a/hyperion/bin_deprec2/train-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/train-linear-gbe-up.py
@@ -7,18 +7,18 @@
 Trains linear GBE with uncertainty propagation
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBEUP as GBE
+from hyperion.np.transforms import TransformList
 
 
 def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin_deprec2/train-linear-gbe.py b/hyperion/bin_deprec2/train-linear-gbe.py
index a7ac5236..e9455cb8 100755
--- a/hyperion/bin_deprec2/train-linear-gbe.py
+++ b/hyperion/bin_deprec2/train-linear-gbe.py
@@ -7,18 +7,18 @@
 Trains linear Gaussian back-end
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.transforms import TransformList
 
 
 def train_linear_gbe(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin_deprec2/train-linear-svmc.py b/hyperion/bin_deprec2/train-linear-svmc.py
index 6b589491..90ff8768 100755
--- a/hyperion/bin_deprec2/train-linear-svmc.py
+++ b/hyperion/bin_deprec2/train-linear-svmc.py
@@ -7,18 +7,18 @@
 Trains linear SVM classifier
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.transforms import TransformList
 
 
 def train_svm(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin_deprec2/train-logistic-regression.py b/hyperion/bin_deprec2/train-logistic-regression.py
index 1d657dc4..1aa128a3 100755
--- a/hyperion/bin_deprec2/train-logistic-regression.py
+++ b/hyperion/bin_deprec2/train-logistic-regression.py
@@ -7,18 +7,18 @@
 Trains linear logistic regression classifier
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList
+from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LogisticRegression as LR
+from hyperion.np.transforms import TransformList
 
 
 def train_lr(iv_file, train_list, preproc_file, output_path, **kwargs):
diff --git a/hyperion/bin_deprec2/train-mvn.py b/hyperion/bin_deprec2/train-mvn.py
index a0204fd5..2d10b116 100755
--- a/hyperion/bin_deprec2/train-mvn.py
+++ b/hyperion/bin_deprec2/train-mvn.py
@@ -7,18 +7,18 @@
 Trains global mean and variance normalization of i-vectors.
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
-from hyperion.np.transforms import TransformList, MVN, SbSw
+from hyperion.np.transforms import MVN, SbSw, TransformList
 
 
 def train_mvn(
diff --git a/hyperion/bin_deprec2/train-nda.py b/hyperion/bin_deprec2/train-nda.py
index 11cd7da3..946a8baa 100755
--- a/hyperion/bin_deprec2/train-nda.py
+++ b/hyperion/bin_deprec2/train-nda.py
@@ -7,17 +7,17 @@
 Trains NDA
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorClassReader as VCR
-from hyperion.np.transforms import TransformList, NDA, NSbSw
+from hyperion.hyp_defs import config_logger
+from hyperion.np.transforms import NDA, NSbSw, TransformList
 
 
 def train_nda(
diff --git a/hyperion/bin_deprec2/train-pca.py b/hyperion/bin_deprec2/train-pca.py
index d1ab1c7e..25dcb366 100755
--- a/hyperion/bin_deprec2/train-pca.py
+++ b/hyperion/bin_deprec2/train-pca.py
@@ -6,17 +6,17 @@
 """
 Trains PCA
 """
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
 from hyperion.helpers import VectorReader as VR
-from hyperion.np.transforms import TransformList, PCA
+from hyperion.hyp_defs import config_logger
+from hyperion.np.transforms import PCA, TransformList
 
 
 def load_model(input_path, name, **kwargs):
diff --git a/hyperion/bin_deprec2/train-plda.py b/hyperion/bin_deprec2/train-plda.py
index 26f6e0a8..520f4cd7 100755
--- a/hyperion/bin_deprec2/train-plda.py
+++ b/hyperion/bin_deprec2/train-plda.py
@@ -7,17 +7,17 @@
 Trains PLDA
 """
 
-import sys
-import os
 import argparse
-import time
 import logging
+import os
+import sys
+import time
 
 import numpy as np
 
-from hyperion.hyp_defs import config_logger
-from hyperion.helpers import VectorClassReader as VCR
 from hyperion.helpers import PLDAFactory as F
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import TransformList
 
 
diff --git a/hyperion/helpers/__init__.py b/hyperion/helpers/__init__.py
index 48bf1476..8b48b161 100644
--- a/hyperion/helpers/__init__.py
+++ b/hyperion/helpers/__init__.py
@@ -3,12 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .vector_reader import VectorReader
-from .vector_class_reader import VectorClassReader
-
-from .trial_data_reader import TrialDataReader
+from .classif_trial_data_reader import ClassifTrialDataReader
 from .multi_test_trial_data_reader import MultiTestTrialDataReader
 from .multi_test_trial_data_reader_v2 import MultiTestTrialDataReaderV2
-from .classif_trial_data_reader import ClassifTrialDataReader
-
 from .plda_factory import PLDAFactory
+from .trial_data_reader import TrialDataReader
+from .vector_class_reader import VectorClassReader
+from .vector_reader import VectorReader
diff --git a/hyperion/helpers/classif_trial_data_reader.py b/hyperion/helpers/classif_trial_data_reader.py
index f7aeb727..2f577621 100644
--- a/hyperion/helpers/classif_trial_data_reader.py
+++ b/hyperion/helpers/classif_trial_data_reader.py
@@ -3,18 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
-import logging
 import argparse
-import time
 import copy
+import logging
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import HypDataReader
-from ..utils import TrialNdx, SCPList
 from ..np.transforms import TransformList
+from ..utils import SCPList, TrialNdx
 
 
 class ClassifTrialDataReader(object):
diff --git a/hyperion/helpers/multi_test_trial_data_reader.py b/hyperion/helpers/multi_test_trial_data_reader.py
index eeea60f2..bd2d5a35 100644
--- a/hyperion/helpers/multi_test_trial_data_reader.py
+++ b/hyperion/helpers/multi_test_trial_data_reader.py
@@ -3,17 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils import TrialNdx, TrialKey, Utt2Info
 from ..np.transforms import TransformList
+from ..utils import TrialKey, TrialNdx, Utt2Info
 
 
 class MultiTestTrialDataReader(object):
diff --git a/hyperion/helpers/multi_test_trial_data_reader_v2.py b/hyperion/helpers/multi_test_trial_data_reader_v2.py
index 43fd1254..226131bf 100644
--- a/hyperion/helpers/multi_test_trial_data_reader_v2.py
+++ b/hyperion/helpers/multi_test_trial_data_reader_v2.py
@@ -3,17 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils import Utt2Info, TrialNdx, TrialKey
 from ..np.transforms import TransformList
+from ..utils import TrialKey, TrialNdx, Utt2Info
 
 
 class MultiTestTrialDataReaderV2(object):
diff --git a/hyperion/helpers/plda_factory.py b/hyperion/helpers/plda_factory.py
index 0fdd2609..16cf01c4 100644
--- a/hyperion/helpers/plda_factory.py
+++ b/hyperion/helpers/plda_factory.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from ..np.pdfs.plda import FRPLDA, SPLDA, PLDA
+from ..np.pdfs.plda import FRPLDA, PLDA, SPLDA
 
 
 class PLDAFactory(object):
diff --git a/hyperion/helpers/tracking_data_reader.py b/hyperion/helpers/tracking_data_reader.py
index 4bac5be2..f6741d9a 100644
--- a/hyperion/helpers/tracking_data_reader.py
+++ b/hyperion/helpers/tracking_data_reader.py
@@ -3,17 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils import Utt2Info, TrialNdx, ExtSegmentList
 from ..np.transforms import TransformList
+from ..utils import ExtSegmentList, TrialNdx, Utt2Info
 
 
 class TrackingDataReader(object):
diff --git a/hyperion/helpers/trial_data_reader.py b/hyperion/helpers/trial_data_reader.py
index 219ee6ce..4f33770b 100644
--- a/hyperion/helpers/trial_data_reader.py
+++ b/hyperion/helpers/trial_data_reader.py
@@ -2,18 +2,18 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils.utt2info import Utt2Info
-from ..utils import TrialNdx, TrialKey  # , SparseTrialNdx, SparseTrialKey
 from ..np.transforms import TransformList
+from ..utils import TrialKey, TrialNdx  # , SparseTrialNdx, SparseTrialKey
+from ..utils.utt2info import Utt2Info
 
 
 class TrialDataReader(object):
diff --git a/hyperion/helpers/vector_class_reader.py b/hyperion/helpers/vector_class_reader.py
index 0c6f346d..c4c531ad 100644
--- a/hyperion/helpers/vector_class_reader.py
+++ b/hyperion/helpers/vector_class_reader.py
@@ -3,18 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils.utt2info import Utt2Info
-from ..utils.tensors import to3D_by_class
 from ..np.transforms import TransformList
+from ..utils.tensors import to3D_by_class
+from ..utils.utt2info import Utt2Info
 
 
 class VectorClassReader(object):
diff --git a/hyperion/helpers/vector_reader.py b/hyperion/helpers/vector_reader.py
index 0ac1b11a..4f480d6d 100644
--- a/hyperion/helpers/vector_reader.py
+++ b/hyperion/helpers/vector_reader.py
@@ -2,18 +2,18 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
-import sys
-import os
 import argparse
-import time
 import copy
+import os
+import sys
+import time
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..io import RandomAccessDataReaderFactory as DRF
-from ..utils.scp_list import SCPList
 from ..np.transforms import TransformList
+from ..utils.scp_list import SCPList
 
 
 class VectorReader(object):
diff --git a/hyperion/io/__init__.py b/hyperion/io/__init__.py
index 5ddf131b..14b1b35f 100644
--- a/hyperion/io/__init__.py
+++ b/hyperion/io/__init__.py
@@ -5,29 +5,21 @@
 
 from .ark_data_reader import *
 from .ark_data_writer import *
-from .h5_data_reader import *
-from .h5_data_writer import *
-from .data_rw_factory import *
-from .copy_feats import CopyFeats
-
-
-from .bin_vad_reader import BinVADReader
-from .segment_vad_reader import SegmentVADReader
-from .vad_rw_factory import VADReaderFactory
-
 from .audio_reader import *
 from .audio_writer import *
-from .packed_audio_reader import (
-    SequentialPackedAudioReader,
-    RandomAccessPackedAudioReader,
-)
-from .packed_audio_writer import PackedAudioWriter
-
-
+from .bin_vad_reader import BinVADReader
+from .copy_feats import CopyFeats
+from .data_rw_factory import *
+from .h5_data_reader import *
+from .h5_data_writer import *
+from .h5_merger import *
 from .hyp_data_reader import *
 from .hyp_data_writer import *
-from .h5_merger import *
 from .kaldi_data_reader import *
-
+from .packed_audio_reader import (RandomAccessPackedAudioReader,
+                                  SequentialPackedAudioReader)
+from .packed_audio_writer import PackedAudioWriter
+from .segment_vad_reader import SegmentVADReader
+from .vad_rw_factory import VADReaderFactory
 
 # from .queues import *
diff --git a/hyperion/io/ark_data_reader.py b/hyperion/io/ark_data_reader.py
index 7f6ec350..3919ddfa 100644
--- a/hyperion/io/ark_data_reader.py
+++ b/hyperion/io/ark_data_reader.py
@@ -3,15 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import multiprocessing as threading
 import sys
+
 import numpy as np
-import multiprocessing as threading
 
 from ..hyp_defs import float_cpu
+from ..utils.kaldi_io_funcs import (init_kaldi_input_stream, is_token, peek,
+                                    read_token)
+from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from ..utils.scp_list import SCPList
-from ..utils.kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
-from ..utils.kaldi_io_funcs import is_token, read_token, peek, init_kaldi_input_stream
-from .data_reader import SequentialDataReader, RandomAccessDataReader
+from .data_reader import RandomAccessDataReader, SequentialDataReader
 
 
 class SequentialArkDataReader(SequentialDataReader):
diff --git a/hyperion/io/ark_data_writer.py b/hyperion/io/ark_data_writer.py
index 50fdd3f6..58f5c0a1 100644
--- a/hyperion/io/ark_data_writer.py
+++ b/hyperion/io/ark_data_writer.py
@@ -4,12 +4,14 @@
 """
 
 import sys
+
 import numpy as np
 
 from ..hyp_defs import float_save
+from ..utils.kaldi_io_funcs import (init_kaldi_output_stream, is_token,
+                                    write_token)
+from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from ..utils.scp_list import SCPList
-from ..utils.kaldi_io_funcs import is_token, write_token, init_kaldi_output_stream
-from ..utils.kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
 from .data_writer import DataWriter
 
 
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index 043ae778..0c2f0446 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -3,14 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os
-import logging
 import io
+import logging
 import math
+import os
 import subprocess
-import soundfile as sf
 
 import numpy as np
+import soundfile as sf
 
 from ..hyp_defs import float_cpu
 from ..utils import SCPList, SegmentList
diff --git a/hyperion/io/audio_writer.py b/hyperion/io/audio_writer.py
index 2fb9ce3c..f98a3251 100644
--- a/hyperion/io/audio_writer.py
+++ b/hyperion/io/audio_writer.py
@@ -5,13 +5,13 @@
 
 import os
 import re
-import soundfile as sf
 
 import numpy as np
+import soundfile as sf
 
 from ..hyp_defs import float_cpu
-from ..utils.scp_list import SCPList
 from ..utils.kaldi_io_funcs import is_token
+from ..utils.scp_list import SCPList
 from .audio_reader import valid_ext
 
 subtype_to_npdtype = {
diff --git a/hyperion/io/bin_vad_reader.py b/hyperion/io/bin_vad_reader.py
index 452eb106..e4e64777 100644
--- a/hyperion/io/bin_vad_reader.py
+++ b/hyperion/io/bin_vad_reader.py
@@ -4,12 +4,13 @@
 """
 
 import logging
+
 import numpy as np
 
 from ..hyp_defs import float_cpu
 from ..utils.vad_utils import bin_vad_to_timestamps
-from .vad_reader import VADReader
 from .data_rw_factory import RandomAccessDataReaderFactory as DRF
+from .vad_reader import VADReader
 
 
 class BinVADReader(VADReader):
diff --git a/hyperion/io/data_reader.py b/hyperion/io/data_reader.py
index da0279e1..bbefa62d 100644
--- a/hyperion/io/data_reader.py
+++ b/hyperion/io/data_reader.py
@@ -4,13 +4,14 @@
 """
 
 import logging
+import multiprocessing
 from abc import ABCMeta, abstractmethod
+
 import numpy as np
-import multiprocessing
 
 from ..hyp_defs import float_cpu
-from ..utils.scp_list import SCPList
 from ..np.transforms import TransformList
+from ..utils.scp_list import SCPList
 
 
 class DataReader(object):
diff --git a/hyperion/io/data_rw_factory.py b/hyperion/io/data_rw_factory.py
index 0c49cd9f..7868baae 100644
--- a/hyperion/io/data_rw_factory.py
+++ b/hyperion/io/data_rw_factory.py
@@ -4,19 +4,21 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..utils.kaldi_matrix import compression_methods
-from .rw_specifiers import ArchiveType, WSpecifier, RSpecifier, WSpecType, RSpecType
-from .h5_data_writer import H5DataWriter as H5DW
-from .ark_data_writer import ArkDataWriter as ADW
+from .ark_data_reader import RandomAccessArkDataReader as RADR
 from .ark_data_reader import SequentialArkFileDataReader as SAFDR
 from .ark_data_reader import SequentialArkScriptDataReader as SASDR
-from .ark_data_reader import RandomAccessArkDataReader as RADR
-from .h5_data_reader import SequentialH5FileDataReader as SH5FDR
-from .h5_data_reader import SequentialH5ScriptDataReader as SH5SDR
+from .ark_data_writer import ArkDataWriter as ADW
 from .h5_data_reader import RandomAccessH5FileDataReader as RH5FDR
 from .h5_data_reader import RandomAccessH5ScriptDataReader as RH5SDR
+from .h5_data_reader import SequentialH5FileDataReader as SH5FDR
+from .h5_data_reader import SequentialH5ScriptDataReader as SH5SDR
+from .h5_data_writer import H5DataWriter as H5DW
+from .rw_specifiers import (ArchiveType, RSpecifier, RSpecType, WSpecifier,
+                            WSpecType)
 
 
 class DataWriterFactory(object):
diff --git a/hyperion/io/h5_data_reader.py b/hyperion/io/h5_data_reader.py
index 7ade2549..dfefbec3 100644
--- a/hyperion/io/h5_data_reader.py
+++ b/hyperion/io/h5_data_reader.py
@@ -5,18 +5,19 @@
  Classes to read data from hdf5 files.
 """
 
+import multiprocessing
 import sys
 import time
-import numpy as np
+
 import h5py
-import multiprocessing
+import numpy as np
 
 from ..hyp_defs import float_cpu
+from ..utils.kaldi_io_funcs import is_token
+from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from ..utils.list_utils import split_list, split_list_group_by_key
 from ..utils.scp_list import SCPList
-from ..utils.kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
-from ..utils.kaldi_io_funcs import is_token
-from .data_reader import SequentialDataReader, RandomAccessDataReader
+from .data_reader import RandomAccessDataReader, SequentialDataReader
 
 
 def _read_h5_data(dset, row_offset=0, num_rows=0, transform=None):
diff --git a/hyperion/io/h5_data_writer.py b/hyperion/io/h5_data_writer.py
index 0685d9b8..fed91d1e 100644
--- a/hyperion/io/h5_data_writer.py
+++ b/hyperion/io/h5_data_writer.py
@@ -4,13 +4,14 @@
 """
 
 import sys
-import numpy as np
+
 import h5py
+import numpy as np
 
 from ..hyp_defs import float_save
-from ..utils.scp_list import SCPList
-from ..utils.kaldi_matrix import KaldiMatrix, KaldiCompressedMatrix
 from ..utils.kaldi_io_funcs import is_token
+from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
+from ..utils.scp_list import SCPList
 from .data_writer import DataWriter
 
 
diff --git a/hyperion/io/h5_merger.py b/hyperion/io/h5_merger.py
index f1b408e7..3e73608e 100644
--- a/hyperion/io/h5_merger.py
+++ b/hyperion/io/h5_merger.py
@@ -4,6 +4,7 @@
 """
 
 import sys
+
 import numpy as np
 
 from .hyp_data_reader import HypDataReader as HR
diff --git a/hyperion/io/hyp_data_reader.py b/hyperion/io/hyp_data_reader.py
index 9219187a..575c3087 100644
--- a/hyperion/io/hyp_data_reader.py
+++ b/hyperion/io/hyp_data_reader.py
@@ -4,11 +4,12 @@
 """
 
 import sys
-import numpy as np
+
 import h5py
+import numpy as np
 
 from ..hyp_defs import float_cpu
-from ..utils.list_utils import list2ndarray, ismember
+from ..utils.list_utils import ismember, list2ndarray
 
 
 class HypDataReader(object):
diff --git a/hyperion/io/hyp_data_writer.py b/hyperion/io/hyp_data_writer.py
index 9a5b5906..81ad2501 100644
--- a/hyperion/io/hyp_data_writer.py
+++ b/hyperion/io/hyp_data_writer.py
@@ -4,11 +4,12 @@
 """
 
 import sys
-import numpy as np
+
 import h5py
+import numpy as np
 
 from ..hyp_defs import float_save
-from ..utils.list_utils import list2ndarray, ismember
+from ..utils.list_utils import ismember, list2ndarray
 
 
 class HypDataWriter(object):
diff --git a/hyperion/io/kaldi_data_reader.py b/hyperion/io/kaldi_data_reader.py
index 6313cb29..60b55bfd 100644
--- a/hyperion/io/kaldi_data_reader.py
+++ b/hyperion/io/kaldi_data_reader.py
@@ -3,9 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import gzip
+import re
+import struct
 import sys
-import gzip, struct, re
 from collections import OrderedDict
+
 import numpy as np
 
 from ..hyp_defs import float_cpu
diff --git a/hyperion/io/packed_audio_reader.py b/hyperion/io/packed_audio_reader.py
index 61ebbd65..17f78bc2 100644
--- a/hyperion/io/packed_audio_reader.py
+++ b/hyperion/io/packed_audio_reader.py
@@ -2,15 +2,15 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import soundfile as sf
-
-import time
-import math
 import logging
-import numpy as np
+import math
 import multiprocessing
+import time
 from copy import deepcopy
 
+import numpy as np
+import soundfile as sf
+
 from ..hyp_defs import float_cpu
 from ..utils import SCPList, SegmentList
 
diff --git a/hyperion/io/packed_audio_writer.py b/hyperion/io/packed_audio_writer.py
index 3a15227a..ceda0d90 100644
--- a/hyperion/io/packed_audio_writer.py
+++ b/hyperion/io/packed_audio_writer.py
@@ -4,9 +4,9 @@
 """
 import os
 import re
-import soundfile as sf
 
 import numpy as np
+import soundfile as sf
 
 from ..utils.kaldi_io_funcs import is_token
 from .audio_reader import valid_ext
diff --git a/hyperion/io/segment_vad_reader.py b/hyperion/io/segment_vad_reader.py
index df8d39e5..01bf413e 100644
--- a/hyperion/io/segment_vad_reader.py
+++ b/hyperion/io/segment_vad_reader.py
@@ -3,13 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
+
 import numpy as np
 
 from ..hyp_defs import float_cpu
 from ..utils import SegmentList
 from ..utils.vad_utils import vad_timestamps_to_bin
-from .vad_reader import VADReader
 from .data_reader import DataReader
+from .vad_reader import VADReader
 
 
 class SegmentVADReader(VADReader):
diff --git a/hyperion/io/vad_reader.py b/hyperion/io/vad_reader.py
index c56a8ffe..40e2dda2 100644
--- a/hyperion/io/vad_reader.py
+++ b/hyperion/io/vad_reader.py
@@ -4,6 +4,7 @@
 """
 
 import logging
+
 import numpy as np
 
 from ..hyp_defs import float_cpu
diff --git a/hyperion/io/vad_rw_factory.py b/hyperion/io/vad_rw_factory.py
index 7b855b07..32032d1d 100644
--- a/hyperion/io/vad_rw_factory.py
+++ b/hyperion/io/vad_rw_factory.py
@@ -5,8 +5,9 @@
 
 import logging
 
-from .rw_specifiers import ArchiveType, WSpecifier, RSpecifier, WSpecType, RSpecType
 from .bin_vad_reader import BinVADReader as BVR
+from .rw_specifiers import (ArchiveType, RSpecifier, RSpecType, WSpecifier,
+                            WSpecType)
 from .segment_vad_reader import SegmentVADReader as SVR
 
 
diff --git a/hyperion/np/augment/__init__.py b/hyperion/np/augment/__init__.py
index 210f54e7..1f99ffb0 100644
--- a/hyperion/np/augment/__init__.py
+++ b/hyperion/np/augment/__init__.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .speech_augment import SpeechAugment
-from .speed_augment import SpeedAugment
 from .noise_augment import NoiseAugment
 from .reverb_augment import ReverbAugment
+from .speech_augment import SpeechAugment
+from .speed_augment import SpeedAugment
diff --git a/hyperion/np/augment/noise_augment.py b/hyperion/np/augment/noise_augment.py
index e180a292..799db930 100644
--- a/hyperion/np/augment/noise_augment.py
+++ b/hyperion/np/augment/noise_augment.py
@@ -6,10 +6,10 @@
 import logging
 import math
 import multiprocessing
-import yaml
 from copy import deepcopy
 
 import numpy as np
+import yaml
 
 from ...hyp_defs import float_cpu
 from ...io import RandomAccessAudioReader as AR
diff --git a/hyperion/np/augment/reverb_augment.py b/hyperion/np/augment/reverb_augment.py
index ef5293d6..cf4cc6cb 100644
--- a/hyperion/np/augment/reverb_augment.py
+++ b/hyperion/np/augment/reverb_augment.py
@@ -3,15 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import time
 import logging
 import math
 import multiprocessing
-import yaml
+import time
 from copy import deepcopy
 from enum import Enum
 
 import numpy as np
+import yaml
 from scipy import signal
 
 from ...hyp_defs import float_cpu
diff --git a/hyperion/np/augment/speech_augment.py b/hyperion/np/augment/speech_augment.py
index e3eab4ea..0b1233f1 100644
--- a/hyperion/np/augment/speech_augment.py
+++ b/hyperion/np/augment/speech_augment.py
@@ -5,12 +5,11 @@
 
 import logging
 import math
-import yaml
 
 import numpy as np
+import yaml
 
 from ...hyp_defs import float_cpu
-
 from .noise_augment import NoiseAugment
 from .reverb_augment import ReverbAugment
 from .speed_augment import SpeedAugment
diff --git a/hyperion/np/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
index 2f353ebe..4400a4b4 100644
--- a/hyperion/np/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -5,8 +5,9 @@
 
 import logging
 from copy import deepcopy
-import yaml
+
 import numpy as np
+import yaml
 from librosa.effects import time_stretch
 
 from ...hyp_defs import float_cpu
diff --git a/hyperion/np/calibration/unsup_gauss_calibration.py b/hyperion/np/calibration/unsup_gauss_calibration.py
index 5f368a71..fd440995 100644
--- a/hyperion/np/calibration/unsup_gauss_calibration.py
+++ b/hyperion/np/calibration/unsup_gauss_calibration.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import sys
+
 import numpy as np
 
 from ..pdfs.mixtures.diag_gmm_tiedcovs import DiagGMMTiedCovs as GMM
diff --git a/hyperion/np/classifiers/__init__.py b/hyperion/np/classifiers/__init__.py
index 92a9305d..d9d02ed0 100644
--- a/hyperion/np/classifiers/__init__.py
+++ b/hyperion/np/classifiers/__init__.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .linear_gbe import LinearGBE
-from .linear_gbe_up import LinearGBEUP
-from .logistic_regression import LogisticRegression
 from .binary_logistic_regression import BinaryLogisticRegression
 from .greedy_fusion import GreedyFusionBinaryLR
+from .linear_gbe import LinearGBE
+from .linear_gbe_up import LinearGBEUP
 from .linear_svmc import LinearSVMC
+from .logistic_regression import LogisticRegression
 from .q_scoring_homo_gbe import QScoringHomoGBE
 from .svmc import GaussianSVMC
diff --git a/hyperion/np/classifiers/greedy_fusion.py b/hyperion/np/classifiers/greedy_fusion.py
index 2102bc22..842b850e 100644
--- a/hyperion/np/classifiers/greedy_fusion.py
+++ b/hyperion/np/classifiers/greedy_fusion.py
@@ -4,12 +4,12 @@
 """
 
 import logging
+
 import numpy as np
 
 from ...hyp_defs import float_cpu, float_save
-from ..np_model import NPModel
 from ..metrics import dcf
-
+from ..np_model import NPModel
 from .binary_logistic_regression import BinaryLogisticRegression as BLR
 
 
diff --git a/hyperion/np/classifiers/linear_gbe.py b/hyperion/np/classifiers/linear_gbe.py
index 00a8b1bf..a6b8c7cc 100644
--- a/hyperion/np/classifiers/linear_gbe.py
+++ b/hyperion/np/classifiers/linear_gbe.py
@@ -4,13 +4,14 @@
 """
 
 import logging
+
 import numpy as np
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
+from ...utils.math import int2onehot, invert_pdmat, logdet_pdmat, softmax
 from ..np_model import NPModel
-from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
 
 
 class LinearGBE(NPModel):
diff --git a/hyperion/np/classifiers/linear_gbe_up.py b/hyperion/np/classifiers/linear_gbe_up.py
index 4a489639..8566aeab 100644
--- a/hyperion/np/classifiers/linear_gbe_up.py
+++ b/hyperion/np/classifiers/linear_gbe_up.py
@@ -4,18 +4,14 @@
 """
 
 import logging
+
 import numpy as np
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
+from ...utils.math import (fullcov_varfloor, int2onehot, invert_pdmat,
+                           logdet_pdmat, softmax)
 from ..np_model import NPModel
-from ...utils.math import (
-    int2onehot,
-    logdet_pdmat,
-    invert_pdmat,
-    softmax,
-    fullcov_varfloor,
-)
 from .linear_gbe import LinearGBE
 
 
diff --git a/hyperion/np/classifiers/linear_svmc.py b/hyperion/np/classifiers/linear_svmc.py
index 607d83de..5d743a46 100644
--- a/hyperion/np/classifiers/linear_svmc.py
+++ b/hyperion/np/classifiers/linear_svmc.py
@@ -4,14 +4,14 @@
 """
 
 import logging
-import numpy as np
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
+import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from sklearn.svm import LinearSVC as SVC
 
 from ...hyp_defs import float_cpu
-from ..np_model import NPModel
 from ...utils.math import softmax
+from ..np_model import NPModel
 
 
 class LinearSVMC(NPModel):
diff --git a/hyperion/np/classifiers/logistic_regression.py b/hyperion/np/classifiers/logistic_regression.py
index 932a28e3..8e3d7e2e 100644
--- a/hyperion/np/classifiers/logistic_regression.py
+++ b/hyperion/np/classifiers/logistic_regression.py
@@ -4,13 +4,13 @@
 """
 
 import logging
-import numpy as np
 
+import numpy as np
 from sklearn.linear_model import LogisticRegression as LR
 
 from ...hyp_defs import float_cpu
-from ..np_model import NPModel
 from ...utils.math import softmax
+from ..np_model import NPModel
 
 
 class LogisticRegression(NPModel):
diff --git a/hyperion/np/classifiers/q_scoring_homo_gbe.py b/hyperion/np/classifiers/q_scoring_homo_gbe.py
index 8ef42052..9e54e0f4 100644
--- a/hyperion/np/classifiers/q_scoring_homo_gbe.py
+++ b/hyperion/np/classifiers/q_scoring_homo_gbe.py
@@ -4,12 +4,13 @@
 """
 
 import logging
+
 import numpy as np
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
+from ...utils.math import int2onehot, invert_pdmat, logdet_pdmat, softmax
 from ..np_model import NPModel
-from ...utils.math import int2onehot, logdet_pdmat, invert_pdmat, softmax
 
 
 class QScoringHomoGBE(NPModel):
diff --git a/hyperion/np/classifiers/svmc.py b/hyperion/np/classifiers/svmc.py
index 77a05ff9..9311b8e8 100644
--- a/hyperion/np/classifiers/svmc.py
+++ b/hyperion/np/classifiers/svmc.py
@@ -3,17 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os
 import logging
+import os
 import pickle
-import numpy as np
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 
+import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from sklearn.svm import SVC as SVC
 
 from ...hyp_defs import float_cpu
-from ..np_model import NPModel
 from ...utils.math import softmax
+from ..np_model import NPModel
 
 
 class GaussianSVMC(NPModel):
diff --git a/hyperion/np/clustering/__init__.py b/hyperion/np/clustering/__init__.py
index f22aa6f3..0841d47e 100644
--- a/hyperion/np/clustering/__init__.py
+++ b/hyperion/np/clustering/__init__.py
@@ -3,5 +3,5 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .kmeans import KMeans
 from .ahc import AHC
+from .kmeans import KMeans
diff --git a/hyperion/np/clustering/ahc.py b/hyperion/np/clustering/ahc.py
index f2f0b93b..e6e0d81b 100644
--- a/hyperion/np/clustering/ahc.py
+++ b/hyperion/np/clustering/ahc.py
@@ -3,12 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
-import h5py
 from copy import copy
 
+import h5py
+import numpy as np
 from scipy.cluster.hierarchy import linkage
-from sklearn.metrics import homogeneity_score, completeness_score
+from sklearn.metrics import completeness_score, homogeneity_score
 
 from ...hyp_defs import float_cpu
 from ..np_model import NPModel
diff --git a/hyperion/np/clustering/kmeans.py b/hyperion/np/clustering/kmeans.py
index dc5b67c0..abb88463 100644
--- a/hyperion/np/clustering/kmeans.py
+++ b/hyperion/np/clustering/kmeans.py
@@ -3,10 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
 import logging
-import numpy as np
+import sys
+
 import h5py
+import numpy as np
 
 from ...hyp_defs import float_cpu
 from ..np_model import NPModel
diff --git a/hyperion/np/diarization/diar_ahc_plda.py b/hyperion/np/diarization/diar_ahc_plda.py
index b8fb0fa6..4bfbc06b 100644
--- a/hyperion/np/diarization/diar_ahc_plda.py
+++ b/hyperion/np/diarization/diar_ahc_plda.py
@@ -5,14 +5,13 @@
 import logging
 from pathlib import Path
 
-import numpy as np
 import h5py
 import matplotlib
+import numpy as np
 
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 
-
 from ..clustering import AHC
 from ..pdfs import GMMTiedDiagCov as GMM
 from ..transforms import PCA, LNorm
diff --git a/hyperion/np/feats/__init__.py b/hyperion/np/feats/__init__.py
index 9d77e032..5173bf4b 100644
--- a/hyperion/np/feats/__init__.py
+++ b/hyperion/np/feats/__init__.py
@@ -4,10 +4,10 @@
 """
 #
 
-from .filter_banks import FilterBankFactory
-from .feature_windows import FeatureWindowFactory
-from .stft import *
-from .mfcc import MFCC
 from .energy_vad import EnergyVAD
-from .frame_selector import FrameSelector
 from .feature_normalization import MeanVarianceNorm
+from .feature_windows import FeatureWindowFactory
+from .filter_banks import FilterBankFactory
+from .frame_selector import FrameSelector
+from .mfcc import MFCC
+from .stft import *
diff --git a/hyperion/np/feats/feature_normalization.py b/hyperion/np/feats/feature_normalization.py
index 2a8cf6e2..27683739 100644
--- a/hyperion/np/feats/feature_normalization.py
+++ b/hyperion/np/feats/feature_normalization.py
@@ -4,7 +4,7 @@
 """
 
 import numpy as np
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 from scipy.signal import convolve2d
 
 from ...hyp_defs import float_cpu
diff --git a/hyperion/np/feats/filter_banks.py b/hyperion/np/feats/filter_banks.py
index 3b0da644..0e0eaf84 100644
--- a/hyperion/np/feats/filter_banks.py
+++ b/hyperion/np/feats/filter_banks.py
@@ -3,10 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
 import logging
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
 from librosa.filters import mel as make_mel_librosa
 
 from ...hyp_defs import float_cpu
diff --git a/hyperion/np/feats/mfcc.py b/hyperion/np/feats/mfcc.py
index d6b8dd3f..cd98840d 100644
--- a/hyperion/np/feats/mfcc.py
+++ b/hyperion/np/feats/mfcc.py
@@ -13,7 +13,7 @@
 from ...utils.misc import str2bool
 from .feature_windows import FeatureWindowFactory as FWF
 from .filter_banks import FilterBankFactory as FBF
-from .stft import strft, st_logE
+from .stft import st_logE, strft
 
 
 class MFCCSteps(Enum):
diff --git a/hyperion/np/metrics/__init__.py b/hyperion/np/metrics/__init__.py
index 6725621a..36afdbf5 100644
--- a/hyperion/np/metrics/__init__.py
+++ b/hyperion/np/metrics/__init__.py
@@ -3,8 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .utils import effective_prior
 from .acc import compute_accuracy
 from .confusion_matrix import *
+from .dcf import (compute_act_dcf, compute_dcf, compute_min_dcf,
+                  fast_eval_dcf_eer)
 from .eer import compute_eer, compute_prbep
-from .dcf import compute_dcf, compute_min_dcf, compute_act_dcf, fast_eval_dcf_eer
+from .utils import effective_prior
diff --git a/hyperion/np/metrics/confusion_matrix.py b/hyperion/np/metrics/confusion_matrix.py
index 084aa7a9..57f8f1ab 100644
--- a/hyperion/np/metrics/confusion_matrix.py
+++ b/hyperion/np/metrics/confusion_matrix.py
@@ -4,8 +4,9 @@
 """
 
 import sys
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
 from sklearn.metrics import confusion_matrix
 
 from ...utils.list_utils import list2ndarray
diff --git a/hyperion/np/metrics/roc.py b/hyperion/np/metrics/roc.py
index 38e4fa3c..f8df8d10 100644
--- a/hyperion/np/metrics/roc.py
+++ b/hyperion/np/metrics/roc.py
@@ -3,9 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import matplotlib.pyplot as plt
 import numpy as np
 import scipy.linalg as sla
-import matplotlib.pyplot as plt
 
 from .utils import pavx
 
diff --git a/hyperion/np/metrics/utils.py b/hyperion/np/metrics/utils.py
index c5871dfd..0715d809 100644
--- a/hyperion/np/metrics/utils.py
+++ b/hyperion/np/metrics/utils.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from ...hyp_defs import float_cpu
-from ...utils.math import softmax, logsumexp
+from ...utils.math import logsumexp, softmax
 
 
 def effective_prior(p_tar, c_miss, c_fa):
diff --git a/hyperion/np/metrics/verification_evaluator.py b/hyperion/np/metrics/verification_evaluator.py
index 9c9c3208..2adf15cf 100644
--- a/hyperion/np/metrics/verification_evaluator.py
+++ b/hyperion/np/metrics/verification_evaluator.py
@@ -4,13 +4,13 @@
 """
 
 
+import copy
 import logging
 import re
-import numpy as np
-import pandas as pd
-import copy
 
 import matplotlib
+import numpy as np
+import pandas as pd
 
 matplotlib.use("Agg")
 matplotlib.rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"]})
@@ -20,8 +20,8 @@
 from ...hyp_defs import float_cpu
 from ...utils import TrialKey, TrialScores
 from ...utils.trial_stats import TrialStats
-from .utils import effective_prior
 from .dcf import fast_eval_dcf_eer
+from .utils import effective_prior
 
 
 class VerificationEvaluator(object):
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index db49f6d5..8ee84ee8 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -2,14 +2,14 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
 import json
+import os
 from copy import deepcopy
 
-import numpy as np
 import h5py
+import numpy as np
 
-from ..hyp_defs import float_save, float_cpu
+from ..hyp_defs import float_cpu, float_save
 
 
 class NPModel(object):
diff --git a/hyperion/np/pdfs/__init__.py b/hyperion/np/pdfs/__init__.py
index 91af5497..8a91e269 100644
--- a/hyperion/np/pdfs/__init__.py
+++ b/hyperion/np/pdfs/__init__.py
@@ -4,7 +4,7 @@
 """
 
 from .core import *
+from .hmm import *
+from .jfa import *
 from .mixtures import *
 from .plda import *
-from .jfa import *
-from .hmm import *
diff --git a/hyperion/np/pdfs/core/__init__.py b/hyperion/np/pdfs/core/__init__.py
index 2defe6d4..0f6287f2 100644
--- a/hyperion/np/pdfs/core/__init__.py
+++ b/hyperion/np/pdfs/core/__init__.py
@@ -4,7 +4,7 @@
 """
 
 
-from .pdf import PDF
 from .exp_family import ExpFamily
-from .normal_diag_cov import NormalDiagCov, DiagNormal
 from .normal import Normal
+from .normal_diag_cov import DiagNormal, NormalDiagCov
+from .pdf import PDF
diff --git a/hyperion/np/pdfs/core/normal.py b/hyperion/np/pdfs/core/normal.py
index 4c3c70cf..b8f8bb54 100644
--- a/hyperion/np/pdfs/core/normal.py
+++ b/hyperion/np/pdfs/core/normal.py
@@ -7,21 +7,11 @@
 import scipy.linalg as la
 
 from ....hyp_defs import float_cpu
-from ....utils.plotting import (
-    plot_gaussian_1D,
-    plot_gaussian_ellipsoid_2D,
-    plot_gaussian_ellipsoid_3D,
-    plot_gaussian_3D,
-)
-from ....utils.math import (
-    invert_pdmat,
-    invert_trimat,
-    symmat2vec,
-    vec2symmat,
-    fullcov_varfloor,
-    logdet_pdmat,
-)
-
+from ....utils.math import (fullcov_varfloor, invert_pdmat, invert_trimat,
+                            logdet_pdmat, symmat2vec, vec2symmat)
+from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
+                                plot_gaussian_ellipsoid_2D,
+                                plot_gaussian_ellipsoid_3D)
 from .exp_family import ExpFamily
 
 
diff --git a/hyperion/np/pdfs/core/normal_diag_cov.py b/hyperion/np/pdfs/core/normal_diag_cov.py
index 8a896cd5..c9986f4c 100644
--- a/hyperion/np/pdfs/core/normal_diag_cov.py
+++ b/hyperion/np/pdfs/core/normal_diag_cov.py
@@ -7,13 +7,9 @@
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.plotting import (
-    plot_gaussian_1D,
-    plot_gaussian_ellipsoid_2D,
-    plot_gaussian_ellipsoid_3D,
-    plot_gaussian_3D,
-)
-
+from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
+                                plot_gaussian_ellipsoid_2D,
+                                plot_gaussian_ellipsoid_3D)
 from .exp_family import ExpFamily
 
 
diff --git a/hyperion/np/pdfs/hmm/hmm.py b/hyperion/np/pdfs/hmm/hmm.py
index 704f0991..80232e36 100644
--- a/hyperion/np/pdfs/hmm/hmm.py
+++ b/hyperion/np/pdfs/hmm/hmm.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from ....hyp_defs import float_cpu
-from ....utils.math import softmax, logsumexp
+from ....utils.math import logsumexp, softmax
 from ..core import PDF
 
 
diff --git a/hyperion/np/pdfs/jfa/jfa_total.py b/hyperion/np/pdfs/jfa/jfa_total.py
index 993da9d6..041431fb 100644
--- a/hyperion/np/pdfs/jfa/jfa_total.py
+++ b/hyperion/np/pdfs/jfa/jfa_total.py
@@ -7,13 +7,8 @@
 from scipy import linalg as la
 
 from ....hyp_defs import float_cpu
-from ....utils.math import (
-    invert_pdmat,
-    invert_trimat,
-    logdet_pdmat,
-    vec2symmat,
-    symmat2vec,
-)
+from ....utils.math import (invert_pdmat, invert_trimat, logdet_pdmat,
+                            symmat2vec, vec2symmat)
 from ..core.pdf import PDF
 
 
diff --git a/hyperion/np/pdfs/mixtures/__init__.py b/hyperion/np/pdfs/mixtures/__init__.py
index f9168905..dccad8d1 100644
--- a/hyperion/np/pdfs/mixtures/__init__.py
+++ b/hyperion/np/pdfs/mixtures/__init__.py
@@ -5,6 +5,6 @@
 
 
 from .exp_family_mixture import ExpFamilyMixture
-from .gmm_diag_cov import GMMDiagCov, DiagGMM
-from .gmm_tied_diag_cov import GMMTiedDiagCov, DiagGMMTiedCov
 from .gmm import GMM
+from .gmm_diag_cov import DiagGMM, GMMDiagCov
+from .gmm_tied_diag_cov import DiagGMMTiedCov, GMMTiedDiagCov
diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index f684e453..5560882c 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -2,12 +2,12 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import numpy as np
-
 import logging
 
+import numpy as np
+
 from ....hyp_defs import float_cpu
-from ....utils.math import softmax, logsumexp
+from ....utils.math import logsumexp, softmax
 from ....utils.queues import GeneratorQueue
 from ..core import PDF
 
diff --git a/hyperion/np/pdfs/mixtures/gmm.py b/hyperion/np/pdfs/mixtures/gmm.py
index 4f6d599e..ca197142 100644
--- a/hyperion/np/pdfs/mixtures/gmm.py
+++ b/hyperion/np/pdfs/mixtures/gmm.py
@@ -2,31 +2,19 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import numpy as np
 import h5py
+import numpy as np
 import scipy.linalg as la
 from scipy.special import erf
 
-
 from ....hyp_defs import float_cpu
-from ....utils.math import (
-    softmax,
-    logsumexp,
-    invert_pdmat,
-    invert_trimat,
-    symmat2vec,
-    vec2symmat,
-    fullcov_varfloor,
-    logdet_pdmat,
-)
-from ....utils.plotting import (
-    plot_gaussian_1D,
-    plot_gaussian_ellipsoid_2D,
-    plot_gaussian_ellipsoid_3D,
-    plot_gaussian_3D,
-)
+from ....utils.math import (fullcov_varfloor, invert_pdmat, invert_trimat,
+                            logdet_pdmat, logsumexp, softmax, symmat2vec,
+                            vec2symmat)
+from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
+                                plot_gaussian_ellipsoid_2D,
+                                plot_gaussian_ellipsoid_3D)
 from ...clustering import KMeans
-
 from ..core import Normal
 from .exp_family_mixture import ExpFamilyMixture
 
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index 4a0ba27d..90141573 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -3,20 +3,16 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
+import numpy as np
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.math import softmax, logsumexp
-from ....utils.plotting import (
-    plot_gaussian_1D,
-    plot_gaussian_ellipsoid_2D,
-    plot_gaussian_ellipsoid_3D,
-    plot_gaussian_3D,
-)
+from ....utils.math import logsumexp, softmax
+from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
+                                plot_gaussian_ellipsoid_2D,
+                                plot_gaussian_ellipsoid_3D)
 from ...clustering import KMeans
-
 from .exp_family_mixture import ExpFamilyMixture
 
 
diff --git a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
index ff02ec62..4dc8f46e 100644
--- a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
@@ -2,20 +2,16 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import numpy as np
 import h5py
+import numpy as np
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.math import softmax, logsumexp
-from ....utils.plotting import (
-    plot_gaussian_1D,
-    plot_gaussian_ellipsoid_2D,
-    plot_gaussian_ellipsoid_3D,
-    plot_gaussian_3D,
-)
+from ....utils.math import logsumexp, softmax
+from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
+                                plot_gaussian_ellipsoid_2D,
+                                plot_gaussian_ellipsoid_3D)
 from ...clustering import KMeans
-
 from .gmm_diag_cov import GMMDiagCov
 
 
diff --git a/hyperion/np/pdfs/plda/__init__.py b/hyperion/np/pdfs/plda/__init__.py
index 9d11ad38..13bc2d81 100644
--- a/hyperion/np/pdfs/plda/__init__.py
+++ b/hyperion/np/pdfs/plda/__init__.py
@@ -4,7 +4,7 @@
 """
 
 
-from .plda_base import PLDABase
 from .frplda import FRPLDA
-from .splda import SPLDA
 from .plda import PLDA
+from .plda_base import PLDABase
+from .splda import SPLDA
diff --git a/hyperion/np/pdfs/plda/plda_base.py b/hyperion/np/pdfs/plda/plda_base.py
index 72503965..9dde58b1 100644
--- a/hyperion/np/pdfs/plda/plda_base.py
+++ b/hyperion/np/pdfs/plda/plda_base.py
@@ -6,8 +6,8 @@
 import numpy as np
 
 from ....hyp_defs import float_cpu
-from ..core.pdf import PDF
 from ...transforms import LNorm
+from ..core.pdf import PDF
 
 
 class PLDABase(PDF):
diff --git a/hyperion/np/score_norm/__init__.py b/hyperion/np/score_norm/__init__.py
index b0eb8000..7707b669 100644
--- a/hyperion/np/score_norm/__init__.py
+++ b/hyperion/np/score_norm/__init__.py
@@ -4,9 +4,9 @@
 """
 
 
+from .adapt_s_norm import AdaptSNorm
+from .s_norm import SNorm
 from .t_norm import TNorm
+from .tz_norm import TZNorm
 from .z_norm import ZNorm
 from .zt_norm import ZTNorm
-from .tz_norm import TZNorm
-from .s_norm import SNorm
-from .adapt_s_norm import AdaptSNorm
diff --git a/hyperion/np/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
index 46d1fc14..a5ae6f13 100644
--- a/hyperion/np/score_norm/adapt_s_norm.py
+++ b/hyperion/np/score_norm/adapt_s_norm.py
@@ -4,8 +4,8 @@
 """
 
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .score_norm import ScoreNorm
 
diff --git a/hyperion/np/score_norm/s_norm.py b/hyperion/np/score_norm/s_norm.py
index 2cf81ffc..4c991d95 100644
--- a/hyperion/np/score_norm/s_norm.py
+++ b/hyperion/np/score_norm/s_norm.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .score_norm import ScoreNorm
 from .t_norm import TNorm
diff --git a/hyperion/np/score_norm/t_norm.py b/hyperion/np/score_norm/t_norm.py
index a5a80def..bf514b3d 100644
--- a/hyperion/np/score_norm/t_norm.py
+++ b/hyperion/np/score_norm/t_norm.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .score_norm import ScoreNorm
 
diff --git a/hyperion/np/score_norm/zt_norm.py b/hyperion/np/score_norm/zt_norm.py
index 415ddca8..078dd8ce 100644
--- a/hyperion/np/score_norm/zt_norm.py
+++ b/hyperion/np/score_norm/zt_norm.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .score_norm import ScoreNorm
 from .t_norm import TNorm
diff --git a/hyperion/np/transforms/__init__.py b/hyperion/np/transforms/__init__.py
index 3f6c5f45..c963e32b 100644
--- a/hyperion/np/transforms/__init__.py
+++ b/hyperion/np/transforms/__init__.py
@@ -4,17 +4,16 @@
 """
 
 from .cent_whiten import CentWhiten
-from .lnorm import LNorm
-from .sb_sw import SbSw
-from .pca import PCA
-from .lda import LDA
-from .nda import NDA
-from .nap import NAP
-from .mvn import MVN
+from .cent_whiten_up import CentWhitenUP
 from .coral import CORAL
 from .gaussianizer import Gaussianizer
+from .lda import LDA
+from .lnorm import LNorm
+from .lnorm_up import LNormUP
+from .mvn import MVN
+from .nap import NAP
+from .nda import NDA
+from .pca import PCA
+from .sb_sw import SbSw
 from .skl_tsne import SklTSNE
 from .transform_list import TransformList
-
-from .cent_whiten_up import CentWhitenUP
-from .lnorm_up import LNormUP
diff --git a/hyperion/np/transforms/cent_whiten.py b/hyperion/np/transforms/cent_whiten.py
index 5f71c173..35e79d80 100644
--- a/hyperion/np/transforms/cent_whiten.py
+++ b/hyperion/np/transforms/cent_whiten.py
@@ -2,11 +2,10 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..np_model import NPModel
 from ..pdfs import Normal
diff --git a/hyperion/np/transforms/cent_whiten_up.py b/hyperion/np/transforms/cent_whiten_up.py
index 9290eae6..7e677d16 100644
--- a/hyperion/np/transforms/cent_whiten_up.py
+++ b/hyperion/np/transforms/cent_whiten_up.py
@@ -3,9 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/coral.py b/hyperion/np/transforms/coral.py
index 54bd27bc..90cc9774 100644
--- a/hyperion/np/transforms/coral.py
+++ b/hyperion/np/transforms/coral.py
@@ -3,9 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/gaussianizer.py b/hyperion/np/transforms/gaussianizer.py
index 393364b6..2c208e02 100644
--- a/hyperion/np/transforms/gaussianizer.py
+++ b/hyperion/np/transforms/gaussianizer.py
@@ -4,9 +4,9 @@
 """
 
 import logging
-import numpy as np
-import h5py
 
+import h5py
+import numpy as np
 import scipy.linalg as la
 from scipy.special import erfinv
 
diff --git a/hyperion/np/transforms/lda.py b/hyperion/np/transforms/lda.py
index b4f5cbc8..fc886ede 100644
--- a/hyperion/np/transforms/lda.py
+++ b/hyperion/np/transforms/lda.py
@@ -3,9 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/lnorm.py b/hyperion/np/transforms/lnorm.py
index 9b4f36fe..302dedbe 100644
--- a/hyperion/np/transforms/lnorm.py
+++ b/hyperion/np/transforms/lnorm.py
@@ -2,8 +2,8 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import numpy as np
 import h5py
+import numpy as np
 
 from .cent_whiten import CentWhiten
 
diff --git a/hyperion/np/transforms/lnorm_up.py b/hyperion/np/transforms/lnorm_up.py
index 0814f9fe..2f3c1baf 100644
--- a/hyperion/np/transforms/lnorm_up.py
+++ b/hyperion/np/transforms/lnorm_up.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
+import numpy as np
 
 from .cent_whiten_up import CentWhitenUP
 
diff --git a/hyperion/np/transforms/mvn.py b/hyperion/np/transforms/mvn.py
index 484a6913..f8154148 100644
--- a/hyperion/np/transforms/mvn.py
+++ b/hyperion/np/transforms/mvn.py
@@ -3,9 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/nap.py b/hyperion/np/transforms/nap.py
index c6f8f8de..c826e887 100644
--- a/hyperion/np/transforms/nap.py
+++ b/hyperion/np/transforms/nap.py
@@ -3,9 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/nda.py b/hyperion/np/transforms/nda.py
index 71910c92..13fe6aef 100644
--- a/hyperion/np/transforms/nda.py
+++ b/hyperion/np/transforms/nda.py
@@ -3,13 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 
-from ..np_model import NPModel
 from ...hyp_defs import float_cpu
+from ..np_model import NPModel
 from .sb_sw import NSbSw
 
 
diff --git a/hyperion/np/transforms/pca.py b/hyperion/np/transforms/pca.py
index 36f6012b..eabb200d 100644
--- a/hyperion/np/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -2,12 +2,11 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-import numpy as np
 import h5py
-
-from numpy.linalg import matrix_rank
+import numpy as np
 import scipy.linalg as la
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from numpy.linalg import matrix_rank
 
 from ..np_model import NPModel
 
diff --git a/hyperion/np/transforms/sb_sw.py b/hyperion/np/transforms/sb_sw.py
index 6d013e55..e182c8e6 100644
--- a/hyperion/np/transforms/sb_sw.py
+++ b/hyperion/np/transforms/sb_sw.py
@@ -2,14 +2,13 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import numpy as np
 import h5py
-
+import numpy as np
 import scipy.linalg as la
 from sklearn.neighbors import BallTree
 
-from ..np_model import NPModel
 from ...hyp_defs import float_cpu
+from ..np_model import NPModel
 
 
 class SbSw(NPModel):
diff --git a/hyperion/np/transforms/skl_tsne.py b/hyperion/np/transforms/skl_tsne.py
index 71a3e084..3f60c4be 100644
--- a/hyperion/np/transforms/skl_tsne.py
+++ b/hyperion/np/transforms/skl_tsne.py
@@ -2,9 +2,8 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
 import numpy as np
-
+from jsonargparse import ActionParser, ArgumentParser
 from sklearn.manifold import TSNE
 
 from ..np_model import NPModel
diff --git a/hyperion/np/transforms/transform_list.py b/hyperion/np/transforms/transform_list.py
index 1ddceeaa..58da16eb 100644
--- a/hyperion/np/transforms/transform_list.py
+++ b/hyperion/np/transforms/transform_list.py
@@ -5,21 +5,20 @@
 
 import logging
 
-import numpy as np
 import h5py
+import numpy as np
 
 from ..np_model import NPModel
-
 from .cent_whiten import CentWhiten
 from .cent_whiten_up import CentWhitenUP
+from .gaussianizer import Gaussianizer
+from .lda import LDA
 from .lnorm import LNorm
 from .lnorm_up import LNormUP
-from .pca import PCA
-from .lda import LDA
-from .nda import NDA
-from .nap import NAP
 from .mvn import MVN
-from .gaussianizer import Gaussianizer
+from .nap import NAP
+from .nda import NDA
+from .pca import PCA
 
 
 class TransformList(NPModel):
diff --git a/hyperion/torch/adv_attacks/__init__.py b/hyperion/torch/adv_attacks/__init__.py
index 906b8740..5fda4ac9 100644
--- a/hyperion/torch/adv_attacks/__init__.py
+++ b/hyperion/torch/adv_attacks/__init__.py
@@ -3,14 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .fgsm_attack import FGSMAttack
-from .snr_fgsm_attack import SNRFGSMAttack
-from .rand_fgsm_attack import RandFGSMAttack
-from .iter_fgsm_attack import IterFGSMAttack
+from .attack_factory import AttackFactory
+from .carlini_wagner_l0 import CarliniWagnerL0
 from .carlini_wagner_l2 import CarliniWagnerL2
 from .carlini_wagner_linf import CarliniWagnerLInf
-from .carlini_wagner_l0 import CarliniWagnerL0
+from .fgsm_attack import FGSMAttack
+from .iter_fgsm_attack import IterFGSMAttack
 from .pgd_attack import PGDAttack
-
-from .attack_factory import AttackFactory
+from .rand_fgsm_attack import RandFGSMAttack
 from .random_attack_factory import RandomAttackFactory
+from .snr_fgsm_attack import SNRFGSMAttack
diff --git a/hyperion/torch/adv_attacks/art_attack_factory.py b/hyperion/torch/adv_attacks/art_attack_factory.py
index e09c62ff..ba103acf 100644
--- a/hyperion/torch/adv_attacks/art_attack_factory.py
+++ b/hyperion/torch/adv_attacks/art_attack_factory.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
 
 try:
     from art.attacks import evasion as attacks
diff --git a/hyperion/torch/adv_attacks/attack_factory.py b/hyperion/torch/adv_attacks/attack_factory.py
index 8ea952ad..5d53f6bc 100644
--- a/hyperion/torch/adv_attacks/attack_factory.py
+++ b/hyperion/torch/adv_attacks/attack_factory.py
@@ -2,16 +2,16 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
-from .fgsm_attack import FGSMAttack
-from .snr_fgsm_attack import SNRFGSMAttack
-from .rand_fgsm_attack import RandFGSMAttack
-from .iter_fgsm_attack import IterFGSMAttack
-from .carlini_wagner_l2 import CarliniWagnerL2
 from .carlini_wagner_l0 import CarliniWagnerL0
+from .carlini_wagner_l2 import CarliniWagnerL2
 from .carlini_wagner_linf import CarliniWagnerLInf
+from .fgsm_attack import FGSMAttack
+from .iter_fgsm_attack import IterFGSMAttack
 from .pgd_attack import PGDAttack
+from .rand_fgsm_attack import RandFGSMAttack
+from .snr_fgsm_attack import SNRFGSMAttack
 
 
 class AttackFactory(object):
diff --git a/hyperion/torch/adv_attacks/carlini_wagner_l2.py b/hyperion/torch/adv_attacks/carlini_wagner_l2.py
index 27cffe97..e8b545b5 100644
--- a/hyperion/torch/adv_attacks/carlini_wagner_l2.py
+++ b/hyperion/torch/adv_attacks/carlini_wagner_l2.py
@@ -2,8 +2,8 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import math
 import logging
+import math
 
 import torch
 import torch.nn as nn
diff --git a/hyperion/torch/adv_attacks/pgd_attack.py b/hyperion/torch/adv_attacks/pgd_attack.py
index 879531ed..ca496e64 100644
--- a/hyperion/torch/adv_attacks/pgd_attack.py
+++ b/hyperion/torch/adv_attacks/pgd_attack.py
@@ -2,10 +2,11 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import math
 import logging
+import math
 
 import torch
+
 from .adv_attack import AdvAttack
 
 
diff --git a/hyperion/torch/adv_attacks/random_attack_factory.py b/hyperion/torch/adv_attacks/random_attack_factory.py
index e333b119..0c83bc56 100644
--- a/hyperion/torch/adv_attacks/random_attack_factory.py
+++ b/hyperion/torch/adv_attacks/random_attack_factory.py
@@ -4,9 +4,11 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
+
 from .attack_factory import AttackFactory as AF
 
 
diff --git a/hyperion/torch/adv_defenses/wave_gan_white.py b/hyperion/torch/adv_defenses/wave_gan_white.py
index ad7f985e..5d045f08 100644
--- a/hyperion/torch/adv_defenses/wave_gan_white.py
+++ b/hyperion/torch/adv_defenses/wave_gan_white.py
@@ -2,15 +2,15 @@
 # Added wave_gan_model_ckpt to test using different model ckpts [Sonal 24Aug20]
 
 import logging
+import math
 from pathlib import Path
 from typing import Tuple
 
-import math
 import librosa
 import numpy as np
+import yaml
 
 import torch
-import yaml
 
 try:
     # import parallel_wavegan.models
@@ -21,6 +21,7 @@
     pass
 
 from sklearn.preprocessing import StandardScaler
+
 from torch import nn
 
 
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 65608a0c..239b278d 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -4,12 +4,10 @@
 """
 
 from .audio_dataset import AudioDataset
-
+from .embed_sampler_factory import EmbedSamplerFactory
 # datasets
 from .feat_seq_dataset import FeatSeqDataset
 from .paired_feat_seq_dataset import PairedFeatSeqDataset
-
 # samplers
 # from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
-from .embed_sampler_factory import EmbedSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 02b81efa..60d4bc98 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -9,15 +9,14 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import torch.distributed as dist
 import torchaudio.transforms as tat
-
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+import torch
+import torch.distributed as dist
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessAudioReader as AR
-
 # from ...utils.utt2info import Utt2Info
 from ...np.augment import SpeechAugment
 from ...utils.class_info import ClassInfo
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 02497f3b..c9ba677f 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
index aed9105d..edf1c00d 100644
--- a/hyperion/torch/data/class_weighted_embed_sampler.py
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -9,9 +9,10 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 184c4ab0..81e9082f 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -9,9 +9,10 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index 2963854d..519f498d 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -10,16 +10,16 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.distributed as dist
-
-from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
-from ...utils.misc import filter_func_args
 from ...utils.class_info import ClassInfo
 from ...utils.info_table import InfoTable
+from ...utils.misc import filter_func_args
 from ..torch_defs import floatstr_torch
 
 
diff --git a/hyperion/torch/data/embed_sampler.py b/hyperion/torch/data/embed_sampler.py
index 8836fe2a..65adcba6 100644
--- a/hyperion/torch/data/embed_sampler.py
+++ b/hyperion/torch/data/embed_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/embed_sampler_factory.py b/hyperion/torch/data/embed_sampler_factory.py
index 43d00b1d..aea35ddf 100644
--- a/hyperion/torch/data/embed_sampler_factory.py
+++ b/hyperion/torch/data/embed_sampler_factory.py
@@ -7,9 +7,9 @@
 
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
-from .embed_dataset import EmbedDataset
 from .bucketing_seg_sampler import BucketingSegSampler
 from .class_weighted_embed_sampler import ClassWeightedEmbedSampler
+from .embed_dataset import EmbedDataset
 from .embed_sampler import EmbedSampler
 
 sampler_dict = {
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index 1605ead3..bb487dda 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -12,15 +12,15 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.distributed as dist
-
-from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
-from ...utils.misc import filter_func_args
 from ...utils.class_info import ClassInfo
+from ...utils.misc import filter_func_args
 from ...utils.segment_set import SegmentSet
 from ..torch_defs import floatstr_torch
 
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index c5097723..d1bcb0a8 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -2,9 +2,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/data/paired_feat_seq_dataset.py b/hyperion/torch/data/paired_feat_seq_dataset.py
index fc17593e..eff2ed58 100644
--- a/hyperion/torch/data/paired_feat_seq_dataset.py
+++ b/hyperion/torch/data/paired_feat_seq_dataset.py
@@ -6,6 +6,7 @@
 import logging
 
 import numpy as np
+
 import torch
 
 from ...utils.utt2info import Utt2Info
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 76054cd8..2933dcc6 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -8,9 +8,10 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 1c54a021..ac66eaf6 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/weighted_embed_sampler.py b/hyperion/torch/data/weighted_embed_sampler.py
index 22da93f9..5870512a 100644
--- a/hyperion/torch/data/weighted_embed_sampler.py
+++ b/hyperion/torch/data/weighted_embed_sampler.py
@@ -7,6 +7,7 @@
 import math
 
 import numpy as np
+
 import torch
 from torch.utils.data import Sampler
 
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index 345c2429..b6f0b670 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index 2fa71766..7ec806a5 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -3,56 +3,34 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .fc_blocks import FCBlock
-from .se_blocks import (
-    SEBlock2D,
-    TSEBlock2D,
-    SEBlock2d,
-    TSEBlock2d,
-    FwSEBlock2d,
-    CFwSEBlock2d,
-    SEBlock1d,
-)
-from .tdnn_blocks import TDNNBlock
-from .etdnn_blocks import ETDNNBlock
-from .resetdnn_blocks import ResETDNNBlock
-from .resnet_blocks import ResNetInputBlock, ResNetBasicBlock, ResNetBNBlock
-from .resnet_blocks import ResNetEndpointBlock
-from .seresnet_blocks import SEResNetBasicBlock, SEResNetBNBlock
-from .res2net_blocks import Res2NetBasicBlock, Res2NetBNBlock
-from .mbconv_blocks import MBConvBlock, MBConvInOutBlock
-from .transformer_feedforward import PositionwiseFeedForward, Conv1dx2, Conv1dLinear
-from .transformer_encoder_v1 import TransformerEncoderBlockV1
-from .transformer_conv2d_subsampler import TransformerConv2dSubsampler
 from .conformer_conv import ConformerConvBlock
 from .conformer_encoder_v1 import ConformerEncoderBlockV1
-from .dc1d_blocks import DC1dEncBlock, DC1dDecBlock
-from .dc2d_blocks import DC2dEncBlock, DC2dDecBlock
-from .resnet1d_blocks import (
-    ResNet1dBasicBlock,
-    ResNet1dBasicDecBlock,
-    ResNet1dBNBlock,
-    ResNet1dBNDecBlock,
-    ResNet1dEndpoint,
-)
-from .resnet1d_blocks import (
-    SEResNet1dBasicBlock,
-    SEResNet1dBasicDecBlock,
-    SEResNet1dBNBlock,
-    SEResNet1dBNDecBlock,
-)
+from .dc1d_blocks import DC1dDecBlock, DC1dEncBlock
+from .dc2d_blocks import DC2dDecBlock, DC2dEncBlock
+from .etdnn_blocks import ETDNNBlock
+from .fc_blocks import FCBlock
+from .mbconv_blocks import MBConvBlock, MBConvInOutBlock
 from .res2net1d_blocks import Res2Net1dBasicBlock, Res2Net1dBNBlock
-from .resnet2d_blocks import (
-    ResNet2dBasicBlock,
-    ResNet2dBasicDecBlock,
-    ResNet2dBNBlock,
-    ResNet2dBNDecBlock,
-)
-from .resnet2d_blocks import (
-    SEResNet2dBasicBlock,
-    SEResNet2dBasicDecBlock,
-    SEResNet2dBNBlock,
-    SEResNet2dBNDecBlock,
-)
 from .res2net2d_blocks import Res2Net2dBasicBlock, Res2Net2dBNBlock
-from .spine_blocks import BlockSpec, SpineResample, SpineEndpoints, SpineConv
+from .res2net_blocks import Res2NetBasicBlock, Res2NetBNBlock
+from .resetdnn_blocks import ResETDNNBlock
+from .resnet1d_blocks import (ResNet1dBasicBlock, ResNet1dBasicDecBlock,
+                              ResNet1dBNBlock, ResNet1dBNDecBlock,
+                              ResNet1dEndpoint, SEResNet1dBasicBlock,
+                              SEResNet1dBasicDecBlock, SEResNet1dBNBlock,
+                              SEResNet1dBNDecBlock)
+from .resnet2d_blocks import (ResNet2dBasicBlock, ResNet2dBasicDecBlock,
+                              ResNet2dBNBlock, ResNet2dBNDecBlock,
+                              SEResNet2dBasicBlock, SEResNet2dBasicDecBlock,
+                              SEResNet2dBNBlock, SEResNet2dBNDecBlock)
+from .resnet_blocks import (ResNetBasicBlock, ResNetBNBlock,
+                            ResNetEndpointBlock, ResNetInputBlock)
+from .se_blocks import (CFwSEBlock2d, FwSEBlock2d, SEBlock1d, SEBlock2D,
+                        SEBlock2d, TSEBlock2D, TSEBlock2d)
+from .seresnet_blocks import SEResNetBasicBlock, SEResNetBNBlock
+from .spine_blocks import BlockSpec, SpineConv, SpineEndpoints, SpineResample
+from .tdnn_blocks import TDNNBlock
+from .transformer_conv2d_subsampler import TransformerConv2dSubsampler
+from .transformer_encoder_v1 import TransformerEncoderBlockV1
+from .transformer_feedforward import (Conv1dLinear, Conv1dx2,
+                                      PositionwiseFeedForward)
diff --git a/hyperion/torch/layer_blocks/conformer_encoder_v1.py b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
index a54e3b99..b2eab352 100644
--- a/hyperion/torch/layer_blocks/conformer_encoder_v1.py
+++ b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
@@ -8,8 +8,8 @@
 import torch.nn as nn
 
 from ..layers.attention import *
-from .transformer_feedforward import *
 from .conformer_conv import ConformerConvBlock
+from .transformer_feedforward import *
 
 
 class ConformerEncoderBlockV1(nn.Module):
diff --git a/hyperion/torch/layer_blocks/dc1d_blocks.py b/hyperion/torch/layer_blocks/dc1d_blocks.py
index da643c34..780af960 100644
--- a/hyperion/torch/layer_blocks/dc1d_blocks.py
+++ b/hyperion/torch/layer_blocks/dc1d_blocks.py
@@ -4,7 +4,7 @@
 """
 
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
 from ..layers import ActivationFactory as AF
 from ..layers import Dropout1d
diff --git a/hyperion/torch/layer_blocks/dc2d_blocks.py b/hyperion/torch/layer_blocks/dc2d_blocks.py
index bae8e203..a99f9211 100644
--- a/hyperion/torch/layer_blocks/dc2d_blocks.py
+++ b/hyperion/torch/layer_blocks/dc2d_blocks.py
@@ -4,7 +4,7 @@
 """
 
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
 from ..layers.subpixel_convs import SubPixelConv2d
diff --git a/hyperion/torch/layer_blocks/etdnn_blocks.py b/hyperion/torch/layer_blocks/etdnn_blocks.py
index 17f3f8ef..b6afdd29 100644
--- a/hyperion/torch/layer_blocks/etdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/etdnn_blocks.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
 from ..layers import ActivationFactory as AF
 from ..layers import Dropout1d
diff --git a/hyperion/torch/layer_blocks/fc_blocks.py b/hyperion/torch/layer_blocks/fc_blocks.py
index 567474bf..e56ab83e 100644
--- a/hyperion/torch/layer_blocks/fc_blocks.py
+++ b/hyperion/torch/layer_blocks/fc_blocks.py
@@ -7,7 +7,7 @@
 # import numpy as np
 
 import torch.nn as nn
-from torch.nn import Linear, BatchNorm1d, Dropout
+from torch.nn import BatchNorm1d, Dropout, Linear
 
 from ..layers import ActivationFactory as AF
 
diff --git a/hyperion/torch/layer_blocks/mbconv_blocks.py b/hyperion/torch/layer_blocks/mbconv_blocks.py
index 89c746ea..8a956b21 100644
--- a/hyperion/torch/layer_blocks/mbconv_blocks.py
+++ b/hyperion/torch/layer_blocks/mbconv_blocks.py
@@ -7,12 +7,13 @@
 import torch
 import torch.nn as nn
 
-# from torch.nn import Conv2d, BatchNorm2d
-
 from ..layers import ActivationFactory as AF
 from ..layers import DropConnect2d
 from .se_blocks import SEBlock2D, TSEBlock2D
 
+# from torch.nn import Conv2d, BatchNorm2d
+
+
 
 def _conv1x1(in_channels, out_channels, stride=1, bias=False):
     """1x1 convolution"""
diff --git a/hyperion/torch/layer_blocks/res2net1d_blocks.py b/hyperion/torch/layer_blocks/res2net1d_blocks.py
index 804dbbd3..1decc327 100644
--- a/hyperion/torch/layer_blocks/res2net1d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net1d_blocks.py
@@ -3,12 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
+
 import torch
 import torch.nn as nn
-from torch.nn import Conv1d, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d
 
 from ..layers import ActivationFactory as AF
-from ..layers import Dropout1d, DropConnect1d
+from ..layers import DropConnect1d, Dropout1d
 from .se_blocks import SEBlock1d
 
 
diff --git a/hyperion/torch/layer_blocks/res2net2d_blocks.py b/hyperion/torch/layer_blocks/res2net2d_blocks.py
index 26d19a9a..d833a5e3 100644
--- a/hyperion/torch/layer_blocks/res2net2d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net2d_blocks.py
@@ -3,9 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
+
 import torch
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
 from .se_blocks import SEBlock2d, TSEBlock2d
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 072926c9..6a785956 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -3,12 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
+
 import torch
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
-from .se_blocks import SEBlock2d, TSEBlock2d, FwSEBlock2d, CFwSEBlock2d
+from .se_blocks import CFwSEBlock2d, FwSEBlock2d, SEBlock2d, TSEBlock2d
 
 
 def _conv3x3(in_channels, out_channels, stride=1, groups=1, dilation=1, bias=False):
diff --git a/hyperion/torch/layer_blocks/resetdnn_blocks.py b/hyperion/torch/layer_blocks/resetdnn_blocks.py
index 775118d1..dfea3720 100644
--- a/hyperion/torch/layer_blocks/resetdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/resetdnn_blocks.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
 from ..layers import ActivationFactory as AF
 from ..layers import Dropout1d
diff --git a/hyperion/torch/layer_blocks/resnet1d_blocks.py b/hyperion/torch/layer_blocks/resnet1d_blocks.py
index ca99bb3d..dd914eba 100644
--- a/hyperion/torch/layer_blocks/resnet1d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet1d_blocks.py
@@ -3,10 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import torch.nn as nn
-from torch.nn import Conv1d, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d
 
 from ..layers import ActivationFactory as AF
-from ..layers import Dropout1d, DropConnect1d, Interpolate
+from ..layers import DropConnect1d, Dropout1d, Interpolate
 from ..layers.subpixel_convs import SubPixelConv1d
 from .se_blocks import SEBlock1d
 
diff --git a/hyperion/torch/layer_blocks/resnet2d_blocks.py b/hyperion/torch/layer_blocks/resnet2d_blocks.py
index 65761526..7fe89b56 100644
--- a/hyperion/torch/layer_blocks/resnet2d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet2d_blocks.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
 from ..layers.subpixel_convs import SubPixelConv2d
diff --git a/hyperion/torch/layer_blocks/resnet_blocks.py b/hyperion/torch/layer_blocks/resnet_blocks.py
index 83e6d174..e25c0cbb 100644
--- a/hyperion/torch/layer_blocks/resnet_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet_blocks.py
@@ -4,8 +4,8 @@
 """
 
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
 import torch.nn.functional as nnf
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
 
diff --git a/hyperion/torch/layer_blocks/se_blocks.py b/hyperion/torch/layer_blocks/se_blocks.py
index e99d545e..c53d5ecc 100644
--- a/hyperion/torch/layer_blocks/se_blocks.py
+++ b/hyperion/torch/layer_blocks/se_blocks.py
@@ -5,7 +5,7 @@
 
 import torch
 import torch.nn as nn
-from torch.nn import Conv2d, Conv1d
+from torch.nn import Conv1d, Conv2d
 
 from ..layers import ActivationFactory as AF
 
diff --git a/hyperion/torch/layer_blocks/seresnet_blocks.py b/hyperion/torch/layer_blocks/seresnet_blocks.py
index 5074f466..4807e94b 100644
--- a/hyperion/torch/layer_blocks/seresnet_blocks.py
+++ b/hyperion/torch/layer_blocks/seresnet_blocks.py
@@ -5,11 +5,11 @@
 
 import torch
 import torch.nn as nn
-from torch.nn import Conv2d, Linear, BatchNorm2d, Dropout2d
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d, Linear
 
 from ..layers import ActivationFactory as AF
-from .se_blocks import SEBlock2d, TSEBlock2d, FwSEBlock2d, CFwSEBlock2d
 from .resnet_blocks import ResNetBasicBlock, ResNetBNBlock
+from .se_blocks import CFwSEBlock2d, FwSEBlock2d, SEBlock2d, TSEBlock2d
 
 
 class SEResNetBasicBlock(ResNetBasicBlock):
diff --git a/hyperion/torch/layer_blocks/spine_blocks.py b/hyperion/torch/layer_blocks/spine_blocks.py
index c97cb027..bb7a454a 100644
--- a/hyperion/torch/layer_blocks/spine_blocks.py
+++ b/hyperion/torch/layer_blocks/spine_blocks.py
@@ -3,14 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
+
 import torch.nn as nn
-from torch.nn import Conv2d, BatchNorm2d, Dropout2d
 import torch.nn.functional as nnf
+from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
-from ..layers.subpixel_convs import SubPixelConv2d
 from ..layers import ActivationFactory as AF
-
-import logging
+from ..layers.subpixel_convs import SubPixelConv2d
 
 
 class Interpolate(nn.Module):
diff --git a/hyperion/torch/layer_blocks/tdnn_blocks.py b/hyperion/torch/layer_blocks/tdnn_blocks.py
index e979b7db..c1a21d52 100644
--- a/hyperion/torch/layer_blocks/tdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/tdnn_blocks.py
@@ -4,7 +4,7 @@
 """
 
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
 from ..layers import ActivationFactory as AF
 from ..layers import Dropout1d
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index 45ce75f8..42b40303 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -3,29 +3,20 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .dropout import Dropout1d, DropConnect2d, DropConnect1d
-from .global_pool import *
-
 from .activation_factory import ActivationFactory
-from .norm_layer_factory import NormLayer2dFactory, NormLayer1dFactory
-from .pool_factory import GlobalPool1dFactory
-
-from .margin_losses import CosLossOutput, ArcLossOutput, SubCenterArcLossOutput
-
+from .attention import (LocalScaledDotProdAttRelPosEncV1,
+                        LocalScaledDotProdAttV1, ScaledDotProdAttRelPosEncV1,
+                        ScaledDotProdAttV1)
 from .audio_feats import *
 from .audio_feats_factory import AudioFeatsFactory
-from .spec_augment import AxisMasker, SpecWarper, SpecAugment
-from .mvn import MeanVarianceNorm
-
-from .attention import (
-    ScaledDotProdAttV1,
-    LocalScaledDotProdAttV1,
-    ScaledDotProdAttRelPosEncV1,
-    LocalScaledDotProdAttRelPosEncV1,
-)
-from .pos_encoder import PosEncoder, RelPosEncoder, NoPosEncoder
-
-from .subpixel_convs import SubPixelConv1d, SubPixelConv2d, ICNR1d, ICNR2d
-from .interpolate import Interpolate
-
 from .calibrators import LinBinCalibrator
+from .dropout import DropConnect1d, DropConnect2d, Dropout1d
+from .global_pool import *
+from .interpolate import Interpolate
+from .margin_losses import ArcLossOutput, CosLossOutput, SubCenterArcLossOutput
+from .mvn import MeanVarianceNorm
+from .norm_layer_factory import NormLayer1dFactory, NormLayer2dFactory
+from .pool_factory import GlobalPool1dFactory
+from .pos_encoder import NoPosEncoder, PosEncoder, RelPosEncoder
+from .spec_augment import AxisMasker, SpecAugment, SpecWarper
+from .subpixel_convs import ICNR1d, ICNR2d, SubPixelConv1d, SubPixelConv2d
diff --git a/hyperion/torch/layers/activation_factory.py b/hyperion/torch/layers/activation_factory.py
index 1d3bdfd2..d07b184e 100644
--- a/hyperion/torch/layers/activation_factory.py
+++ b/hyperion/torch/layers/activation_factory.py
@@ -5,6 +5,7 @@
 #
 
 import torch.nn as nn
+
 from .swish import Swish
 
 act_dict = {
diff --git a/hyperion/torch/layers/audio_feats.py b/hyperion/torch/layers/audio_feats.py
index 34cb9aa3..3bc4add9 100644
--- a/hyperion/torch/layers/audio_feats.py
+++ b/hyperion/torch/layers/audio_feats.py
@@ -4,12 +4,12 @@
 """
 #
 
-import math
 import logging
+import math
 
 import torch
-import torch.nn as nn
 import torch.cuda.amp as amp
+import torch.nn as nn
 
 try:
     from torch.fft import rfft as torch_rfft
diff --git a/hyperion/torch/layers/audio_feats_factory.py b/hyperion/torch/layers/audio_feats_factory.py
index 71c3a8e8..a8398dac 100644
--- a/hyperion/torch/layers/audio_feats_factory.py
+++ b/hyperion/torch/layers/audio_feats_factory.py
@@ -2,11 +2,12 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
 import re
 
-from ...utils.misc import str2bool
+from jsonargparse import ActionParser, ArgumentParser
+
 from ...np.feats.filter_banks import FilterBankFactory as FBF
+from ...utils.misc import str2bool
 from .audio_feats import *
 
 FFT = "fft"
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 5001bfd0..5e38494f 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -4,6 +4,7 @@
 """
 import logging
 import math
+
 import numpy as np
 
 import torch
diff --git a/hyperion/torch/layers/margin_losses.py b/hyperion/torch/layers/margin_losses.py
index acb7a514..3f991567 100644
--- a/hyperion/torch/layers/margin_losses.py
+++ b/hyperion/torch/layers/margin_losses.py
@@ -3,13 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
 import logging
 import math
+import sys
 
 import torch
-import torch.nn as nn
 import torch.cuda.amp as amp
+import torch.nn as nn
 
 
 def _l2_norm(x, axis=-1):
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index 4f569089..4b4c5927 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -2,7 +2,7 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
diff --git a/hyperion/torch/layers/pdf_storage.py b/hyperion/torch/layers/pdf_storage.py
index f3f34b37..6a87cd0d 100644
--- a/hyperion/torch/layers/pdf_storage.py
+++ b/hyperion/torch/layers/pdf_storage.py
@@ -5,8 +5,8 @@
 #
 
 import torch
-import torch.nn as nn
 import torch.distributions as pdf
+import torch.nn as nn
 
 
 class StdNormal(nn.Module):
diff --git a/hyperion/torch/layers/pool_factory.py b/hyperion/torch/layers/pool_factory.py
index 723c64a4..84d0cbf1 100644
--- a/hyperion/torch/layers/pool_factory.py
+++ b/hyperion/torch/layers/pool_factory.py
@@ -2,7 +2,8 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch.nn as nn
 
 from .global_pool import *
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index a7ebcfb1..f4e03842 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -3,7 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
diff --git a/hyperion/torch/layers/tensor2pdf.py b/hyperion/torch/layers/tensor2pdf.py
index 55c890a3..41d1bc37 100644
--- a/hyperion/torch/layers/tensor2pdf.py
+++ b/hyperion/torch/layers/tensor2pdf.py
@@ -5,9 +5,9 @@
 #
 
 import torch
+import torch.distributions as pdf
 import torch.nn as nn
 import torch.nn.functional as nnf
-import torch.distributions as pdf
 
 
 class Tensor2PDF(nn.Module):
diff --git a/hyperion/torch/layers/tensor2pdf1.py b/hyperion/torch/layers/tensor2pdf1.py
index 87ba3475..45c51f17 100644
--- a/hyperion/torch/layers/tensor2pdf1.py
+++ b/hyperion/torch/layers/tensor2pdf1.py
@@ -4,8 +4,8 @@
 """
 
 import torch
-import torch.nn as nn
 import torch.distributions as pdf
+import torch.nn as nn
 
 
 class Tensor2PDF(nn.Module):
diff --git a/hyperion/torch/layers/vq.py b/hyperion/torch/layers/vq.py
index c56b58f6..4a59b305 100644
--- a/hyperion/torch/layers/vq.py
+++ b/hyperion/torch/layers/vq.py
@@ -5,9 +5,9 @@
 import math
 
 import torch
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.distributed as dist
 
 from ..utils import seq_lengths_to_mask
 
diff --git a/hyperion/torch/loggers/__init__.py b/hyperion/torch/loggers/__init__.py
index c48b9965..8842393c 100644
--- a/hyperion/torch/loggers/__init__.py
+++ b/hyperion/torch/loggers/__init__.py
@@ -3,9 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .csv_logger import CSVLogger
 from .logger import Logger
 from .logger_list import LoggerList
-from .csv_logger import CSVLogger
 from .prog_logger import ProgLogger
 from .tensorboard_logger import TensorBoardLogger
 from .wandb_logger import WAndBLogger
diff --git a/hyperion/torch/loggers/csv_logger.py b/hyperion/torch/loggers/csv_logger.py
index 402ddcd5..67fdc464 100644
--- a/hyperion/torch/loggers/csv_logger.py
+++ b/hyperion/torch/loggers/csv_logger.py
@@ -3,9 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os
 import csv
+import os
 from collections import OrderedDict as ODict
+
 import numpy as np
 
 from .logger import Logger
diff --git a/hyperion/torch/loggers/logger.py b/hyperion/torch/loggers/logger.py
index 46c1130d..7e9c91f2 100644
--- a/hyperion/torch/loggers/logger.py
+++ b/hyperion/torch/loggers/logger.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.distributed as dist
 
 
diff --git a/hyperion/torch/loggers/logger_list.py b/hyperion/torch/loggers/logger_list.py
index 20ae58ec..0291a01f 100644
--- a/hyperion/torch/loggers/logger_list.py
+++ b/hyperion/torch/loggers/logger_list.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.distributed as dist
 
 from .tensorboard_logger import TensorBoardLogger as TBL
diff --git a/hyperion/torch/loggers/prog_logger.py b/hyperion/torch/loggers/prog_logger.py
index 26479197..8df63b15 100644
--- a/hyperion/torch/loggers/prog_logger.py
+++ b/hyperion/torch/loggers/prog_logger.py
@@ -3,8 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import time
 import logging
+import time
 from collections import OrderedDict
 
 import numpy as np
diff --git a/hyperion/torch/loggers/tensorboard_logger.py b/hyperion/torch/loggers/tensorboard_logger.py
index 314757d1..a80fa175 100644
--- a/hyperion/torch/loggers/tensorboard_logger.py
+++ b/hyperion/torch/loggers/tensorboard_logger.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import re
+
 from torch.utils.tensorboard import SummaryWriter
 
 from .logger import Logger
diff --git a/hyperion/torch/loggers/wandb_logger.py b/hyperion/torch/loggers/wandb_logger.py
index c864e9b1..094f619a 100644
--- a/hyperion/torch/loggers/wandb_logger.py
+++ b/hyperion/torch/loggers/wandb_logger.py
@@ -2,8 +2,8 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import re
 import os
+import re
 
 try:
     import wandb
diff --git a/hyperion/torch/lr_schedulers/__init__.py b/hyperion/torch/lr_schedulers/__init__.py
index be77dc15..7d1b07db 100644
--- a/hyperion/torch/lr_schedulers/__init__.py
+++ b/hyperion/torch/lr_schedulers/__init__.py
@@ -4,11 +4,11 @@
 """
 
 
-from .lr_scheduler import LRScheduler
-from .red_lr_on_plateau import ReduceLROnPlateau
+from .cos_lr import AdamCosineLR, CosineLR
 from .exp_lr import ExponentialLR
-from .cos_lr import CosineLR, AdamCosineLR
+from .factory import LRSchedulerFactory
 from .invpow_lr import InvPowLR
+from .lr_scheduler import LRScheduler
 from .noam_lr import NoamLR
+from .red_lr_on_plateau import ReduceLROnPlateau
 from .triangular_lr import TriangularLR
-from .factory import LRSchedulerFactory
diff --git a/hyperion/torch/lr_schedulers/cos_lr.py b/hyperion/torch/lr_schedulers/cos_lr.py
index 5caf12bb..b9e7d069 100644
--- a/hyperion/torch/lr_schedulers/cos_lr.py
+++ b/hyperion/torch/lr_schedulers/cos_lr.py
@@ -4,8 +4,8 @@
 """
 
 
-import math
 import logging
+import math
 
 import torch
 
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index 3fef6e93..d3111140 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -2,15 +2,15 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 
-from .red_lr_on_plateau import ReduceLROnPlateau
+from .cos_lr import AdamCosineLR, CosineLR
 from .exp_lr import ExponentialLR
 from .invpow_lr import InvPowLR
-from .cos_lr import CosineLR, AdamCosineLR
 from .noam_lr import NoamLR
+from .red_lr_on_plateau import ReduceLROnPlateau
 from .triangular_lr import TriangularLR
 
 
diff --git a/hyperion/torch/lr_schedulers/noam_lr.py b/hyperion/torch/lr_schedulers/noam_lr.py
index 4acdc3b9..7d034f77 100644
--- a/hyperion/torch/lr_schedulers/noam_lr.py
+++ b/hyperion/torch/lr_schedulers/noam_lr.py
@@ -2,12 +2,13 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import math
 import logging
+import math
+
+from .invpow_lr import InvPowLR
 
 # import torch
 
-from .invpow_lr import InvPowLR
 
 
 class NoamLR(InvPowLR):
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
index f2578e1d..10e3f83d 100644
--- a/hyperion/torch/lr_schedulers/triangular_lr.py
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -4,8 +4,8 @@
 """
 
 
-import math
 import logging
+import math
 
 import torch
 
diff --git a/hyperion/torch/metrics/__init__.py b/hyperion/torch/metrics/__init__.py
index b4a2eaac..33d67c21 100644
--- a/hyperion/torch/metrics/__init__.py
+++ b/hyperion/torch/metrics/__init__.py
@@ -4,6 +4,6 @@
 """
 
 
-from .metrics import TorchMetric
-from .accuracy_functional import *
 from .accuracy import *
+from .accuracy_functional import *
+from .metrics import TorchMetric
diff --git a/hyperion/torch/metrics/accuracy.py b/hyperion/torch/metrics/accuracy.py
index ebd02e32..93d71683 100644
--- a/hyperion/torch/metrics/accuracy.py
+++ b/hyperion/torch/metrics/accuracy.py
@@ -5,8 +5,8 @@
 
 import torch
 
-from .metrics import TorchMetric
 from .accuracy_functional import *
+from .metrics import TorchMetric
 
 
 class CategoricalAccuracy(TorchMetric):
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index e953f58c..db984616 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -4,19 +4,14 @@
 
 """
 
-from .xvectors.xvector import XVector
-from .xvectors.tdnn_xvector import TDNNXVector
-from .xvectors.resnet_xvector import ResNetXVector
-from .xvectors.efficient_net_xvector import EfficientNetXVector
-from .xvectors.transformer_xvector_v1 import TransformerXVectorV1
-from .xvectors.spinenet_xvector import SpineNetXVector
-from .xvectors.resnet1d_xvector import ResNet1dXVector
-
-from .wav2xvectors import (
-    HFWav2Vec2ResNet1dXVector,
-    HFHubert2ResNet1dXVector,
-    HFWavLM2ResNet1dXVector,
-)
-
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
+from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
+                           HFWavLM2ResNet1dXVector)
+from .xvectors.efficient_net_xvector import EfficientNetXVector
+from .xvectors.resnet1d_xvector import ResNet1dXVector
+from .xvectors.resnet_xvector import ResNetXVector
+from .xvectors.spinenet_xvector import SpineNetXVector
+from .xvectors.tdnn_xvector import TDNNXVector
+from .xvectors.transformer_xvector_v1 import TransformerXVectorV1
+from .xvectors.xvector import XVector
diff --git a/hyperion/torch/models/ae/ae.py b/hyperion/torch/models/ae/ae.py
index 57d30edc..32cd68ea 100644
--- a/hyperion/torch/models/ae/ae.py
+++ b/hyperion/torch/models/ae/ae.py
@@ -8,8 +8,8 @@
 import torch
 import torch.nn as nn
 
-from ...torch_model import TorchModel
 from ...narchs import TorchNALoader
+from ...torch_model import TorchModel
 
 
 class AE(TorchModel):
diff --git a/hyperion/torch/models/plda/plda_base.py b/hyperion/torch/models/plda/plda_base.py
index d6100a36..2556627d 100644
--- a/hyperion/torch/models/plda/plda_base.py
+++ b/hyperion/torch/models/plda/plda_base.py
@@ -2,15 +2,15 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import time
 import logging
 import math
+import time
 
 import torch
 import torch.nn as nn
 
 from ...torch_model import TorchModel
-from ...utils.misc import l2_norm, get_selfsim_tarnon
+from ...utils.misc import get_selfsim_tarnon, l2_norm
 
 
 class PLDABase(TorchModel):
diff --git a/hyperion/torch/models/plda/splda.py b/hyperion/torch/models/plda/splda.py
index 0025e4e7..2272793e 100644
--- a/hyperion/torch/models/plda/splda.py
+++ b/hyperion/torch/models/plda/splda.py
@@ -2,8 +2,8 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import time
 import logging
+import time
 
 import torch
 import torch.nn as nn
diff --git a/hyperion/torch/models/tvector/__init__.py b/hyperion/torch/models/tvector/__init__.py
index 98db2561..36999146 100644
--- a/hyperion/torch/models/tvector/__init__.py
+++ b/hyperion/torch/models/tvector/__init__.py
@@ -3,6 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .resnet_tvector import ResNetTVector
 # t-vectors
 from .tvector import TVector
-from .resnet_tvector import ResNetTVector
diff --git a/hyperion/torch/models/tvector/resnet_tvector.py b/hyperion/torch/models/tvector/resnet_tvector.py
index d74272aa..c84a38fc 100644
--- a/hyperion/torch/models/tvector/resnet_tvector.py
+++ b/hyperion/torch/models/tvector/resnet_tvector.py
@@ -9,8 +9,8 @@
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ..narchs import ResNetFactory as RNF
+from .xvector import XVector
 
 
 class ResNetXVector(XVector):
diff --git a/hyperion/torch/models/tvector/tvector.py b/hyperion/torch/models/tvector/tvector.py
index 8a3758fb..a46fc324 100644
--- a/hyperion/torch/models/tvector/tvector.py
+++ b/hyperion/torch/models/tvector/tvector.py
@@ -4,14 +4,15 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..layers import GlobalPool1dFactory as PF
-from ..layer_blocks import TDNNBlock
 from ...narchs import ClassifHead, ConformerEncoderV1, TorchNALoader
+from ..layer_blocks import TDNNBlock
+from ..layers import GlobalPool1dFactory as PF
 from ..torch_model import TorchModel
 from ..utils import eval_nnet_by_chunks
 
diff --git a/hyperion/torch/models/vae/vae.py b/hyperion/torch/models/vae/vae.py
index 32239718..86938bf2 100644
--- a/hyperion/torch/models/vae/vae.py
+++ b/hyperion/torch/models/vae/vae.py
@@ -6,13 +6,13 @@
 import logging
 
 import torch
-import torch.nn as nn
 import torch.distributions as pdf
+import torch.nn as nn
 
-from ...torch_model import TorchModel
-from ...narchs import TorchNALoader
-from ...layers import tensor2pdf as t2pdf
 from ...layers import pdf_storage
+from ...layers import tensor2pdf as t2pdf
+from ...narchs import TorchNALoader
+from ...torch_model import TorchModel
 
 
 class VAE(TorchModel):
diff --git a/hyperion/torch/models/vae/vq_vae.py b/hyperion/torch/models/vae/vq_vae.py
index 9fcc22a0..e86cd04f 100644
--- a/hyperion/torch/models/vae/vq_vae.py
+++ b/hyperion/torch/models/vae/vq_vae.py
@@ -6,13 +6,13 @@
 import logging
 
 import torch
-import torch.nn as nn
 import torch.distributions as pdf
+import torch.nn as nn
 
-from ...torch_model import TorchModel
-from ...narchs import TorchNALoader
 from ...layers import tensor2pdf as t2pdf
 from ...layers import vq
+from ...narchs import TorchNALoader
+from ...torch_model import TorchModel
 
 
 class VQVAE(TorchModel):
diff --git a/hyperion/torch/models/wav2xvectors/__init__.py b/hyperion/torch/models/wav2xvectors/__init__.py
index 015c8d0f..62123d13 100644
--- a/hyperion/torch/models/wav2xvectors/__init__.py
+++ b/hyperion/torch/models/wav2xvectors/__init__.py
@@ -4,14 +4,12 @@
 
 """
 
-# from .wav2tdnn_xvector import Wav2TDNNXVector
-from .wav2resnet_xvector import Wav2ResNetXVector
-
+from .hf_hubert2resnet1d_xvector import HFHubert2ResNet1dXVector
+from .hf_wav2vec2resnet1d_xvector import HFWav2Vec2ResNet1dXVector
+from .hf_wavlm2resnet1d_xvector import HFWavLM2ResNet1dXVector
 # from .wav2efficient_net_xvector import Wav2EfficientNetXVector
 # from .wav2transformer_xvector_v1 import Wav2TransformerXVectorV1
 # from .wav2spinenet_xvector import Wav2SpineNetXVector
 from .wav2resnet1d_xvector import Wav2ResNet1dXVector
-
-from .hf_wav2vec2resnet1d_xvector import HFWav2Vec2ResNet1dXVector
-from .hf_hubert2resnet1d_xvector import HFHubert2ResNet1dXVector
-from .hf_wavlm2resnet1d_xvector import HFWavLM2ResNet1dXVector
+# from .wav2tdnn_xvector import Wav2TDNNXVector
+from .wav2resnet_xvector import Wav2ResNetXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index bd5c3f1b..b75ac53f 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..xvectors import ResNet1dXVector
 from ...tpm import HFHubert
+from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
 
 
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index a5166d4d..43ab2382 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..xvectors import ResNet1dXVector
 from ...tpm import HFWav2Vec2
+from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
 
 
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index bd1ec4cd..8a65f12e 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -2,18 +2,20 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
 import contextlib
-from jsonargparse import ArgumentParser, ActionParser
+import logging
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-# import torch.nn.functional as nnf
-
 from ...torch_model import TorchModel
 from ...utils import remove_silence
 
+# import torch.nn.functional as nnf
+
+
 
 class HFWav2XVector(TorchModel):
     """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index 2f4b66ce..56a19130 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -3,14 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..xvectors import ResNet1dXVector
 from ...tpm import HFWavLM
+from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
 
 
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index 983fbac2..0d9f1bc4 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .wav2xvector import Wav2XVector
 from ..xvectors import ResNet1dXVector
+from .wav2xvector import Wav2XVector
 
 
 class Wav2ResNet1dXVector(Wav2XVector):
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
index dea2e442..1f7283a0 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .wav2xvector import Wav2XVector
 from ..xvectors import ResNetXVector
+from .wav2xvector import Wav2XVector
 
 
 class Wav2ResNetXVector(Wav2XVector):
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index c7a77f3e..824b5830 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -3,13 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ...torch_model import TorchModel
 from ...narchs import AudioFeatsMVN
+from ...torch_model import TorchModel
 from ...utils import remove_silence
 
 
diff --git a/hyperion/torch/models/xvectors/__init__.py b/hyperion/torch/models/xvectors/__init__.py
index 408de716..57819128 100644
--- a/hyperion/torch/models/xvectors/__init__.py
+++ b/hyperion/torch/models/xvectors/__init__.py
@@ -4,10 +4,10 @@
 
 """
 
-from .xvector import XVector
-from .tdnn_xvector import TDNNXVector
-from .resnet_xvector import ResNetXVector
 from .efficient_net_xvector import EfficientNetXVector
-from .transformer_xvector_v1 import TransformerXVectorV1
-from .spinenet_xvector import SpineNetXVector
 from .resnet1d_xvector import ResNet1dXVector
+from .resnet_xvector import ResNetXVector
+from .spinenet_xvector import SpineNetXVector
+from .tdnn_xvector import TDNNXVector
+from .transformer_xvector_v1 import TransformerXVectorV1
+from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index df5965cd..a8663cd9 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import EfficientNet as EN
+from .xvector import XVector
 
 
 class EfficientNetXVector(XVector):
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 5957c9f5..1bce0f87 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import ResNet1dEncoder as Encoder
+from .xvector import XVector
 
 
 class ResNet1dXVector(XVector):
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index fe88ff57..c6889626 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import ResNetFactory as RNF
+from .xvector import XVector
 
 
 class ResNetXVector(XVector):
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 1e616570..203008be 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -3,14 +3,15 @@
  Copyright 2020 Magdalena Rybicka
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import SpineNetFactory as SNF
+from .xvector import XVector
 
 
 class SpineNetXVector(XVector):
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index f28dc9b3..530ca63b 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import TDNNFactory as TF
+from .xvector import XVector
 
 
 class TDNNXVector(XVector):
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index b3428783..7c55844a 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -4,13 +4,14 @@
 """
 
 import logging
-from jsonargparse import ArgumentParser, ActionParser
+
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from .xvector import XVector
 from ...narchs import TransformerEncoderV1 as TE
+from .xvector import XVector
 
 
 class TransformerXVectorV1(XVector):
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 2072241d..3807bbd8 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -4,14 +4,15 @@
 """
 import logging
 from enum import Enum
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 from typing import Optional
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
 
-from ...layers import GlobalPool1dFactory as PF
 from ...layer_blocks import TDNNBlock
+from ...layers import GlobalPool1dFactory as PF
 from ...narchs import ClassifHead, TorchNALoader
 from ...torch_model import TorchModel
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
diff --git a/hyperion/torch/narchs/__init__.py b/hyperion/torch/narchs/__init__.py
index 71cd9de4..c8504425 100644
--- a/hyperion/torch/narchs/__init__.py
+++ b/hyperion/torch/narchs/__init__.py
@@ -3,36 +3,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .fcnet import FCNetV1, FCNetV2
-
-from .tdnn import TDNNV1
+from .audio_feats_mvn import AudioFeatsMVN
+from .classif_head import ClassifHead
+from .conformer_encoder_v1 import ConformerEncoderV1
+from .dc1d_decoder import DC1dDecoder
+from .dc1d_encoder import DC1dEncoder
+from .dc2d_decoder import DC2dDecoder
+from .dc2d_encoder import DC2dEncoder
+from .efficient_net import EfficientNet
 from .etdnn import ETDNNV1
+from .fcnet import FCNetV1, FCNetV2
 from .resetdnn import ResETDNNV1
-from .tdnn_factory import TDNNFactory
-
 from .resnet import *
+from .resnet1d_decoder import ResNet1dDecoder
+from .resnet1d_encoder import ResNet1dEncoder
+from .resnet2d_decoder import ResNet2dDecoder
+from .resnet2d_encoder import ResNet2dEncoder
 from .resnet_factory import ResNetFactory
-
 from .spinenet import *
 from .spinenet_factory import SpineNetFactory
-
-from .transformer_encoder_v1 import TransformerEncoderV1
-from .conformer_encoder_v1 import ConformerEncoderV1
-
-from .dc1d_encoder import DC1dEncoder
-from .dc1d_decoder import DC1dDecoder
-from .dc2d_encoder import DC2dEncoder
-from .dc2d_decoder import DC2dDecoder
-
-from .resnet1d_encoder import ResNet1dEncoder
-from .resnet1d_decoder import ResNet1dDecoder
-from .resnet2d_encoder import ResNet2dEncoder
-from .resnet2d_decoder import ResNet2dDecoder
-
-from .efficient_net import EfficientNet
-
-from .classif_head import ClassifHead
-
-from .audio_feats_mvn import AudioFeatsMVN
-
+from .tdnn import TDNNV1
+from .tdnn_factory import TDNNFactory
 from .torch_na_loader import TorchNALoader
+from .transformer_encoder_v1 import TransformerEncoderV1
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 9092e9d8..160ee61b 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -2,7 +2,7 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index adfeceb3..5d179fdb 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -3,16 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 from torch.nn import Linear
 
+from ..layer_blocks import FCBlock
 from ..layers import ActivationFactory as AF
-from ..layers import CosLossOutput, ArcLossOutput, SubCenterArcLossOutput
+from ..layers import ArcLossOutput, CosLossOutput
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import FCBlock
+from ..layers import SubCenterArcLossOutput
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 4fabe8d2..3acd44d2 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -3,16 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..layers import ActivationFactory as AF
-from ..layers import NormLayer1dFactory as NLF
-from ..layers import PosEncoder, RelPosEncoder, NoPosEncoder
 from ..layer_blocks import ConformerEncoderBlockV1 as EBlock
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
+from ..layers import ActivationFactory as AF
+from ..layers import NoPosEncoder
+from ..layers import NormLayer1dFactory as NLF
+from ..layers import PosEncoder, RelPosEncoder
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index 22f63de6..f5ab74d5 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -4,15 +4,17 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import DC1dDecBlock
 from ..layers import ActivationFactory as AF
+from ..layers import ICNR1d
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import DC1dDecBlock
-from ..layers import SubPixelConv1d, ICNR1d
+from ..layers import SubPixelConv1d
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index 619851bb..0c331a5e 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -2,15 +2,16 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
 
+from ..layer_blocks.dc1d_blocks import DC1dEncBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks.dc1d_blocks import DC1dEncBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index 0166baca..4106cbfd 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -3,16 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import DC2dDecBlock
 from ..layers import ActivationFactory as AF
+from ..layers import ICNR2d
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import DC2dDecBlock
-from ..layers import SubPixelConv2d, ICNR2d
+from ..layers import SubPixelConv2d
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index e847dbb6..ce7b9677 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -4,14 +4,15 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import DC2dEncBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import DC2dEncBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/efficient_net.py b/hyperion/torch/narchs/efficient_net.py
index 273fa183..b9efdcef 100644
--- a/hyperion/torch/narchs/efficient_net.py
+++ b/hyperion/torch/narchs/efficient_net.py
@@ -4,15 +4,16 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
-from torch.nn import Linear, Dropout
+from torch.nn import Dropout, Linear
 
+from ..layer_blocks import MBConvBlock, MBConvInOutBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import MBConvBlock, MBConvInOutBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/etdnn.py b/hyperion/torch/narchs/etdnn.py
index ebc14534..a73439b7 100644
--- a/hyperion/torch/narchs/etdnn.py
+++ b/hyperion/torch/narchs/etdnn.py
@@ -9,9 +9,9 @@
 import torch.nn as nn
 from torch.nn import Conv1d, Linear
 
+from ..layer_blocks import ETDNNBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import ETDNNBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/fcnet.py b/hyperion/torch/narchs/fcnet.py
index e0c8afd5..cdbf1940 100644
--- a/hyperion/torch/narchs/fcnet.py
+++ b/hyperion/torch/narchs/fcnet.py
@@ -4,12 +4,12 @@
 """
 
 import torch.nn as nn
-from torch.nn import Linear, BatchNorm1d, Dropout
+from torch.nn import BatchNorm1d, Dropout, Linear
 
+from ..layer_blocks import FCBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
 from .net_arch import NetArch
-from ..layer_blocks import FCBlock
 
 
 class FCNetV1(NetArch):
diff --git a/hyperion/torch/narchs/resetdnn.py b/hyperion/torch/narchs/resetdnn.py
index 2c7f3e00..eb964fa5 100644
--- a/hyperion/torch/narchs/resetdnn.py
+++ b/hyperion/torch/narchs/resetdnn.py
@@ -7,11 +7,11 @@
 
 import torch
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
+from ..layer_blocks import ETDNNBlock, ResETDNNBlock, TDNNBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import ResETDNNBlock, ETDNNBlock, TDNNBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index 34ac9b81..e3264f33 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -8,21 +8,15 @@
 
 import torch
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
-from ..utils import seq_lengths_to_mask, scale_seq_lengths
+from ..layer_blocks import (Res2NetBasicBlock, Res2NetBNBlock,
+                            ResNetBasicBlock, ResNetBNBlock,
+                            ResNetEndpointBlock, ResNetInputBlock,
+                            SEResNetBasicBlock, SEResNetBNBlock)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import (
-    ResNetInputBlock,
-    ResNetBasicBlock,
-    ResNetBNBlock,
-    SEResNetBasicBlock,
-    SEResNetBNBlock,
-    Res2NetBasicBlock,
-    Res2NetBNBlock,
-)
-from ..layer_blocks import ResNetEndpointBlock
+from ..utils import scale_seq_lengths, seq_lengths_to_mask
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index 3ab454ae..0c577174 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -2,17 +2,20 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import (DC1dDecBlock, ResNet1dBasicDecBlock,
+                            ResNet1dBNDecBlock, SEResNet1dBasicDecBlock,
+                            SEResNet1dBNDecBlock)
 from ..layers import ActivationFactory as AF
+from ..layers import ICNR1d
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import ResNet1dBasicDecBlock, ResNet1dBNDecBlock, DC1dDecBlock
-from ..layer_blocks import SEResNet1dBasicDecBlock, SEResNet1dBNDecBlock
-from ..layers import SubPixelConv1d, ICNR1d
+from ..layers import SubPixelConv1d
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index deef9c59..5bdad186 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -3,28 +3,22 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-import math
 import logging
+import math
 
 import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..utils import seq_lengths_to_mask
+from ..layer_blocks import (DC1dEncBlock, Res2Net1dBasicBlock,
+                            Res2Net1dBNBlock, ResNet1dBasicBlock,
+                            ResNet1dBNBlock, ResNet1dEndpoint,
+                            SEResNet1dBasicBlock, SEResNet1dBNBlock)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import (
-    ResNet1dBasicBlock,
-    ResNet1dBNBlock,
-    DC1dEncBlock,
-    ResNet1dEndpoint,
-    SEResNet1dBasicBlock,
-    SEResNet1dBNBlock,
-    Res2Net1dBasicBlock,
-    Res2Net1dBNBlock,
-)
+from ..utils import seq_lengths_to_mask
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index 22b1e7a7..426b37f5 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -4,16 +4,19 @@
 """
 
 import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import (DC2dDecBlock, ResNet2dBasicDecBlock,
+                            ResNet2dBNDecBlock, SEResNet2dBasicDecBlock,
+                            SEResNet2dBNDecBlock)
 from ..layers import ActivationFactory as AF
+from ..layers import ICNR2d
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import ResNet2dBasicDecBlock, ResNet2dBNDecBlock, DC2dDecBlock
-from ..layer_blocks import SEResNet2dBasicDecBlock, SEResNet2dBNDecBlock
-from ..layers import SubPixelConv2d, ICNR2d
+from ..layers import SubPixelConv2d
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index 3af174cf..84e6599e 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -3,19 +3,21 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import math
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
+import math
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 import torch
 import torch.nn as nn
 
-from ..utils import seq_lengths_to_mask
+from ..layer_blocks import (DC2dEncBlock, Res2Net2dBasicBlock,
+                            Res2Net2dBNBlock, ResNet2dBasicBlock,
+                            ResNet2dBNBlock, SEResNet2dBasicBlock,
+                            SEResNet2dBNBlock)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import ResNet2dBasicBlock, ResNet2dBNBlock, DC2dEncBlock
-from ..layer_blocks import SEResNet2dBasicBlock, SEResNet2dBNBlock
-from ..layer_blocks import Res2Net2dBasicBlock, Res2Net2dBNBlock
+from ..utils import seq_lengths_to_mask
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/resnet_factory.py b/hyperion/torch/narchs/resnet_factory.py
index c9d5806e..2d17a6d7 100644
--- a/hyperion/torch/narchs/resnet_factory.py
+++ b/hyperion/torch/narchs/resnet_factory.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .resnet import *
 
diff --git a/hyperion/torch/narchs/spinenet.py b/hyperion/torch/narchs/spinenet.py
index 4185d9c4..117c0733 100644
--- a/hyperion/torch/narchs/spinenet.py
+++ b/hyperion/torch/narchs/spinenet.py
@@ -3,17 +3,19 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import numpy as np
 import logging
+
+import numpy as np
+
 import torch
 import torch.nn as nn
-from torch.nn import Conv1d, Linear, BatchNorm1d
+from torch.nn import BatchNorm1d, Conv1d, Linear
 
+from ..layer_blocks import (BlockSpec, Res2NetBasicBlock, Res2NetBNBlock,
+                            ResNetBasicBlock, ResNetBNBlock, ResNetInputBlock,
+                            SpineConv, SpineEndpoints, SpineResample)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
-from ..layer_blocks import ResNetInputBlock, ResNetBasicBlock, ResNetBNBlock
-from ..layer_blocks import Res2NetBNBlock, Res2NetBasicBlock
-from ..layer_blocks import BlockSpec, SpineResample, SpineEndpoints, SpineConv
 from .net_arch import NetArch
 
 SPINENET_BLOCK_SPECS = [
diff --git a/hyperion/torch/narchs/spinenet_factory.py b/hyperion/torch/narchs/spinenet_factory.py
index 9e94a1be..092cbd0e 100644
--- a/hyperion/torch/narchs/spinenet_factory.py
+++ b/hyperion/torch/narchs/spinenet_factory.py
@@ -2,7 +2,7 @@
  Copyright 2020 Magdalena Rybicka
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .spinenet import *
 
diff --git a/hyperion/torch/narchs/tdnn.py b/hyperion/torch/narchs/tdnn.py
index 6cdcbf85..55e47e6a 100644
--- a/hyperion/torch/narchs/tdnn.py
+++ b/hyperion/torch/narchs/tdnn.py
@@ -9,9 +9,9 @@
 import torch.nn as nn
 from torch.nn import Linear
 
+from ..layer_blocks import TDNNBlock
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
-from ..layer_blocks import TDNNBlock
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/tdnn_factory.py b/hyperion/torch/narchs/tdnn_factory.py
index 6a9e6010..901cc9d0 100644
--- a/hyperion/torch/narchs/tdnn_factory.py
+++ b/hyperion/torch/narchs/tdnn_factory.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
-from .tdnn import TDNNV1
 from .etdnn import ETDNNV1
 from .resetdnn import ResETDNNV1
+from .tdnn import TDNNV1
 
 
 class TDNNFactory(object):
diff --git a/hyperion/torch/narchs/torch_na_loader.py b/hyperion/torch/narchs/torch_na_loader.py
index 97bf5fb9..58152fc7 100644
--- a/hyperion/torch/narchs/torch_na_loader.py
+++ b/hyperion/torch/narchs/torch_na_loader.py
@@ -5,32 +5,24 @@
 
 import torch
 
-from .fcnet import FCNetV1
-
-from .tdnn import TDNNV1
-from .etdnn import ETDNNV1
-from .resetdnn import ResETDNNV1
-
-from .resnet import *
-
-from .transformer_encoder_v1 import TransformerEncoderV1
+from .audio_feats_mvn import AudioFeatsMVN
+from .classif_head import ClassifHead
 from .conformer_encoder_v1 import ConformerEncoderV1
-
-from .dc1d_encoder import DC1dEncoder
 from .dc1d_decoder import DC1dDecoder
-from .dc2d_encoder import DC2dEncoder
+from .dc1d_encoder import DC1dEncoder
 from .dc2d_decoder import DC2dDecoder
-
-from .resnet1d_encoder import ResNet1dEncoder
+from .dc2d_encoder import DC2dEncoder
+from .efficient_net import EfficientNet
+from .etdnn import ETDNNV1
+from .fcnet import FCNetV1
+from .resetdnn import ResETDNNV1
+from .resnet import *
 from .resnet1d_decoder import ResNet1dDecoder
-from .resnet2d_encoder import ResNet2dEncoder
+from .resnet1d_encoder import ResNet1dEncoder
 from .resnet2d_decoder import ResNet2dDecoder
-
-from .efficient_net import EfficientNet
-
-from .classif_head import ClassifHead
-
-from .audio_feats_mvn import AudioFeatsMVN
+from .resnet2d_encoder import ResNet2dEncoder
+from .tdnn import TDNNV1
+from .transformer_encoder_v1 import TransformerEncoderV1
 
 
 class TorchNALoader(object):
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index d2949c12..4468185e 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -3,15 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ArgumentParser, ActionParser
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.nn as nn
 
+from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
+from ..layer_blocks import TransformerEncoderBlockV1 as EBlock
 from ..layers import ActivationFactory as AF
 from ..layers import PosEncoder, RelPosEncoder
-from ..layer_blocks import TransformerEncoderBlockV1 as EBlock
-from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/optim/__init__.py b/hyperion/torch/optim/__init__.py
index cba89796..fd05c755 100644
--- a/hyperion/torch/optim/__init__.py
+++ b/hyperion/torch/optim/__init__.py
@@ -3,6 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .factory import OptimizerFactory
 from .fgsm import FGSM
 from .radam import RAdam
-from .factory import OptimizerFactory
diff --git a/hyperion/torch/optim/factory.py b/hyperion/torch/optim/factory.py
index ab350098..95117b05 100644
--- a/hyperion/torch/optim/factory.py
+++ b/hyperion/torch/optim/factory.py
@@ -2,13 +2,14 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser
 import logging
 
-from ...utils.misc import filter_args
+from jsonargparse import ActionParser, ArgumentParser
 
 import torch
 import torch.optim as optim
+
+from ...utils.misc import filter_args
 from .radam import RAdam
 
 
diff --git a/hyperion/torch/optim/radam.py b/hyperion/torch/optim/radam.py
index 459646c1..1b7a588f 100644
--- a/hyperion/torch/optim/radam.py
+++ b/hyperion/torch/optim/radam.py
@@ -4,6 +4,7 @@
 #
 
 import math
+
 import torch
 from torch.optim.optimizer import Optimizer, required
 
diff --git a/hyperion/torch/seq_embed/__init__.py b/hyperion/torch/seq_embed/__init__.py
index 24ee9555..8ecc2cf8 100644
--- a/hyperion/torch/seq_embed/__init__.py
+++ b/hyperion/torch/seq_embed/__init__.py
@@ -6,9 +6,9 @@
 # xvectors had been moved to models
 # we import them here for backwards compatibility
 
-from ..models.xvector import XVector
-from ..models.tdnn_xvector import TDNNXVector
-from ..models.resnet_xvector import ResNetXVector
 from ..models.efficient_net_xvector import EfficientNetXVector
-from ..models.transformer_xvector_v1 import TransformerXVectorV1
+from ..models.resnet_xvector import ResNetXVector
 from ..models.spinenet_xvector import SpineNetXVector
+from ..models.tdnn_xvector import TDNNXVector
+from ..models.transformer_xvector_v1 import TransformerXVectorV1
+from ..models.xvector import XVector
diff --git a/hyperion/torch/torch_defs.py b/hyperion/torch/torch_defs.py
index a567de50..b08beaeb 100644
--- a/hyperion/torch/torch_defs.py
+++ b/hyperion/torch/torch_defs.py
@@ -5,7 +5,6 @@
 
 import torch
 
-
 str2torch_dtype = {
     "float32": torch.float32,
     "float64": torch.float64,
diff --git a/hyperion/torch/torch_model_loader.py b/hyperion/torch/torch_model_loader.py
index c173cd50..2273bee8 100644
--- a/hyperion/torch/torch_model_loader.py
+++ b/hyperion/torch/torch_model_loader.py
@@ -3,13 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from collections import OrderedDict as ODict
 import re
+from collections import OrderedDict as ODict
 
 import torch
 
-from .narchs import *
 from .models import *
+from .narchs import *
 
 
 class TorchModelLoader(object):
diff --git a/hyperion/torch/tpm/__init__.py b/hyperion/torch/tpm/__init__.py
index dfa5c14b..e3a17e4f 100644
--- a/hyperion/torch/tpm/__init__.py
+++ b/hyperion/torch/tpm/__init__.py
@@ -3,4 +3,4 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .hf import HFWav2Vec2, HFHubert, HFWavLM
+from .hf import HFHubert, HFWav2Vec2, HFWavLM
diff --git a/hyperion/torch/tpm/hf/__init__.py b/hyperion/torch/tpm/hf/__init__.py
index 4db1c95d..d0f91785 100644
--- a/hyperion/torch/tpm/hf/__init__.py
+++ b/hyperion/torch/tpm/hf/__init__.py
@@ -3,6 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .hf_wav2vec2 import HFWav2Vec2
 from .hf_hubert import HFHubert
+from .hf_wav2vec2 import HFWav2Vec2
 from .hf_wavlm import HFWavLM
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index ba331573..b2198924 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -2,17 +2,17 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
 import logging
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-from typing import Optional, Tuple, Union, List, Callable
+import os
+from typing import Callable, List, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import HubertConfig, HubertModel
 
 import torch
 import torch.nn as nn
 
-from transformers import HubertModel, HubertConfig
-
-from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
 
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index 579574a1..e1f21153 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -2,17 +2,17 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
 import logging
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-from typing import Optional, Tuple, Union, List, Callable
+import os
+from typing import Callable, List, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2Config, Wav2Vec2Model
 
 import torch
 import torch.nn as nn
 
-from transformers import Wav2Vec2Model, Wav2Vec2Config
-
-from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
 
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 1dceed1c..1c1b1030 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -3,21 +3,20 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import os
 import logging
+import os
 from turtle import right
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import List, Optional, Tuple, Union
 
-from typing import Optional, Tuple, Union, List
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
 
 import torch
 import torch.nn as nn
 
-from transformers import Wav2Vec2Processor, Wav2Vec2FeatureExtractor
-
 from ...torch_model import TorchModel
-from ...utils import seq_lengths_to_mask, scale_seq_lengths
-from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from ...utils import scale_seq_lengths, seq_lengths_to_mask
+from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 
 
 class HFWav2VecBase(TorchModel):
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 15b8248d..0d5c5ad3 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -2,17 +2,17 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
 import logging
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
-from typing import Optional, Tuple, Union, List, Callable
+import os
+from typing import Callable, List, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import WavLMConfig, WavLMModel
 
 import torch
 import torch.nn as nn
 
-from transformers import WavLMModel, WavLMConfig
-
-from ...utils.ddp import ddp_wait_for_all_procs, ddp_get_rank
+from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
 
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 69e97cc6..9f5fafe6 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 0523ad44..e2d2d1f6 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 93571acf..72058182 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -11,26 +11,23 @@
 from enum import Enum
 from pathlib import Path
 
+from fairscale.optim.grad_scaler import ShardedGradScaler
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.distributed as dist
 import torch.nn as nn
-from fairscale.optim.grad_scaler import ShardedGradScaler
-from jsonargparse import ActionParser, ArgumentParser
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
-from ..loggers import CSVLogger, LoggerList, ProgLogger, TensorBoardLogger, WAndBLogger
+from ..loggers import (CSVLogger, LoggerList, ProgLogger, TensorBoardLogger,
+                       WAndBLogger)
 from ..lr_schedulers import LRScheduler as LRS
 from ..lr_schedulers import LRSchedulerFactory as LRSF
 from ..optim import OptimizerFactory as OF
-from ..utils import (
-    FairFullyShardedDDP,
-    FairShardedDDP,
-    MetricAcc,
-    TorchDDP,
-    tensors_subset,
-)
+from ..utils import (FairFullyShardedDDP, FairShardedDDP, MetricAcc, TorchDDP,
+                     tensors_subset)
 
 
 class DDPType(str, Enum):
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index ba401cb7..f4877dc6 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 03800e0d..fc9d98f1 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 40b6b10d..35946e96 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index af915d6b..303427de 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -7,10 +7,11 @@
 import time
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 1e1b1778..2a012dde 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -7,10 +7,11 @@
 import time
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 4e791347..9d04af42 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -6,10 +6,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index da4a3773..0fee1bdb 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -3,17 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .devices import (
-    open_device,
-    tensors_to_device,
-    tensors_to_cpu,
-    tensors_to_numpy,
-    tensors_subset,
-)
-from .metric_acc import MetricAcc
-from .masking import seq_lengths_to_mask, scale_seq_lengths
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
+from .data_parallel import TorchDataParallel
+from .ddp import FairFullyShardedDDP, FairShardedDDP, TorchDDP
+from .devices import (open_device, tensors_subset, tensors_to_cpu,
+                      tensors_to_device, tensors_to_numpy)
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add
+from .masking import scale_seq_lengths, seq_lengths_to_mask
+from .metric_acc import MetricAcc
 from .vad_utils import remove_silence
-from .data_parallel import TorchDataParallel
-from .ddp import TorchDDP, FairShardedDDP, FairFullyShardedDDP
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 7038cff3..ea3d18ea 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -2,14 +2,17 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
-import logging
 import datetime
+import logging
+import os
+
+from fairscale.nn.data_parallel import \
+    FullyShardedDataParallel as FullyShardedDDP
+from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
+
 import torch
-import torch.nn as nn
 import torch.distributed as dist
-from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
-from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
+import torch.nn as nn
 
 from .devices import open_device
 
diff --git a/hyperion/torch/utils/devices.py b/hyperion/torch/utils/devices.py
index 19c124b2..c0736f2f 100644
--- a/hyperion/torch/utils/devices.py
+++ b/hyperion/torch/utils/devices.py
@@ -2,9 +2,9 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 import subprocess
-import logging
 
 import torch
 
diff --git a/hyperion/torch/utils/eval_utils.py b/hyperion/torch/utils/eval_utils.py
index f1ae1edb..d74835f6 100644
--- a/hyperion/torch/utils/eval_utils.py
+++ b/hyperion/torch/utils/eval_utils.py
@@ -4,6 +4,7 @@
 """
 
 import math
+
 import torch
 
 
diff --git a/hyperion/torch/utils/metric_acc.py b/hyperion/torch/utils/metric_acc.py
index d635310b..a82c174a 100644
--- a/hyperion/torch/utils/metric_acc.py
+++ b/hyperion/torch/utils/metric_acc.py
@@ -4,6 +4,7 @@
 """
 import logging
 from collections import OrderedDict as ODict
+
 import numpy as np
 
 import torch
diff --git a/hyperion/torch/utils/misc.py b/hyperion/torch/utils/misc.py
index 69d209eb..b2a3810f 100644
--- a/hyperion/torch/utils/misc.py
+++ b/hyperion/torch/utils/misc.py
@@ -4,8 +4,8 @@
 """
 
 import torch
-import torch.nn as nn
 import torch.cuda.amp as amp
+import torch.nn as nn
 
 
 def l2_norm(x, dim=1, axis=None):

From e26f5b93c6be144c303a5e5c2f7230dde24db9b1 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 26 Jan 2023 07:30:08 -0500
Subject: [PATCH 075/154] new hyperparams for voxceleb

---
 .../v1.1/conf/fbank80_specaug1_stmn_16k.yaml  |  24 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v2.0.yaml |  91 ++++
 ...rain_ecapatdnn2048x4_xvec_stage2_v2.0.yaml |  66 +++
 ...train_ecapatdnn512x3_xvec_stage1_v2.0.yaml |  89 ++++
 ...train_ecapatdnn512x3_xvec_stage2_v2.0.yaml |  66 +++
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v2.0.sh |  45 ++
 ...config_fbank80_stmn_ecapatdnn512x3.v2.0.sh |  45 ++
 ...fnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh |  12 +-
 ..._eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh |  12 +-
 ...et34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...et34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...et50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh |  10 +-
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...et50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh |  11 +-
 ..._resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...net34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh |  11 +-
 ...4_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh |  11 +-
 ..._resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh |  12 +-
 ...ine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh |  12 +-
 ...ne2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh |  12 +-
 ...pinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...inenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh |  12 +-
 ...et50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...eresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...ine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 ...ne2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh |  11 +-
 egs/voxceleb/v1.1/run_001_prepare_data.sh     |  27 +
 egs/voxceleb/v1.1/run_011_train_xvector.sh    |  32 +-
 egs/voxceleb/v1.1/run_030_extract_xvectors.sh |  82 ++-
 egs/voxceleb/v1.1/run_040_eval_be.sh          | 231 +++++---
 egs/voxceleb/v1/local/make_vox2_trials.py     |  83 +++
 egs/voxceleb/v1/local/prepare_voxsrc22_dev.py |  88 ++++
 .../v1/local/prepare_voxsrc22_test.py         |  73 +++
 egs/voxceleb/v1/local/score_voxceleb1.sh      |   9 +-
 egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py   | 205 +++++++
 egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py  | 133 +++++
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh   |  75 +++
 egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh |  64 +++
 egs/voxceleb/v1/steps_be/train-qmf.py         | 123 +++++
 egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh  |  81 +++
 hyperion/bin/apply_mvn_select_frames.py       | 173 ++++++
 hyperion/bin/copy_feats.py                    |  40 ++
 ...l_xvec_cosine_scoring_from_adv_test_wav.py | 423 +++++++++++++++
 ...osine_scoring_from_adv_test_wav_wavegan.py | 498 ++++++++++++++++++
 ...l_xvec_cosine_scoring_from_art_test_wav.py | 430 +++++++++++++++
 .../eval_xvec_cosine_scoring_from_test_wav.py | 278 ++++++++++
 ...sine_scoring_from_transfer_adv_test_wav.py | 439 +++++++++++++++
 ...sine_scoring_from_transfer_art_test_wav.py | 464 ++++++++++++++++
 hyperion/bin/finetune_xvector_from_wav.py     |  22 +-
 hyperion/np/score_norm/adapt_s_norm.py        | 318 ++++++++---
 .../data/class_weighted_seg_chunk_sampler.py  |  18 +-
 hyperion/torch/layer_blocks/fc_blocks.py      |   3 +
 53 files changed, 4752 insertions(+), 311 deletions(-)
 create mode 100644 egs/voxceleb/v1.1/conf/fbank80_specaug1_stmn_16k.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
 create mode 100755 egs/voxceleb/v1/local/make_vox2_trials.py
 create mode 100755 egs/voxceleb/v1/local/prepare_voxsrc22_dev.py
 create mode 100755 egs/voxceleb/v1/local/prepare_voxsrc22_test.py
 create mode 100755 egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
 create mode 100755 egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py
 create mode 100755 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
 create mode 100755 egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
 create mode 100755 egs/voxceleb/v1/steps_be/train-qmf.py
 create mode 100755 egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
 create mode 100755 hyperion/bin/apply_mvn_select_frames.py
 create mode 100755 hyperion/bin/copy_feats.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
 create mode 100755 hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py

diff --git a/egs/voxceleb/v1.1/conf/fbank80_specaug1_stmn_16k.yaml b/egs/voxceleb/v1.1/conf/fbank80_specaug1_stmn_16k.yaml
new file mode 100644
index 00000000..8df42fc6
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/fbank80_specaug1_stmn_16k.yaml
@@ -0,0 +1,24 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 5
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 1
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 8
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 1
+  mask_method: mean
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
new file mode 100644
index 00000000..4b6fbc77
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
@@ -0,0 +1,91 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
new file mode 100644
index 00000000..4a4a8a88
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
new file mode 100644
index 00000000..319ab3ab
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
@@ -0,0 +1,89 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 512
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
new file mode 100644
index 00000000..4a4a8a88
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
new file mode 100644
index 00000000..14f2cdb4
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn2048x4.v2.0
+
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+#do_snorm=true
+#do_qmf=true
+do_voxsrc22=false
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
new file mode 100644
index 00000000..0e7a3b52
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN small
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn512x3.v2.0
+
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+#do_snorm=true
+#do_qmf=true
+do_voxsrc22=false
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
index 2806a422..aae5f68e 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh
@@ -26,14 +26,12 @@ eff_batch_size=512 # effective batch size
 lr=0.01
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_effnetb4_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
-
-nnet_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_effnetb4_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr"
 
+nnet_s1_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
index d83ca483..6ddb2b5e 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh
@@ -26,14 +26,12 @@ eff_batch_size=512 # effective batch size
 lr=0.01
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_effnetb4_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model $PWD/conf/efficientnet_b7.yaml --trainer.optim.lr $lr"
-
-nnet_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_eina_hln_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_effnetb4_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model $PWD/conf/efficientnet_b7.yaml --trainer.optim.lr $lr"
 
+nnet_s1_name=${feat_type}_${effnet_type}_is1_mbs1122121_ser${se_r}_fixsh_e${embed_dim}_eina_hln_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index f995fc0f..9082799e 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ scale=4
 ws_tag=w16s4
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index a2e8cdba..f2e22b45 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ scale=4
 ws_tag=w26s4
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 6ddb9e2c..bc828375 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,13 @@ scale=8
 ws_tag=w13s8
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index cfec2b09..0c2e825a 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ scale=4
 ws_tag=w26s4
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 3cf18fcf..49fd61fa 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ scale=8
 ws_tag=w26s8
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
index a5767e50..505ed8bc 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
@@ -28,13 +28,12 @@ scale=8
 ws_tag=w26s8
 nnet_num_epochs=90
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --trainer.epochs $nnet_num_epochs --trainer.swa-start 70 --trainer.swa-lr 1e-3 --trainer.swa-anneal-epochs 5"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --trainer.epochs $nnet_num_epochs --trainer.swa-start 70 --trainer.swa-lr 1e-3 --trainer.swa-anneal-epochs 5"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/swa_model_ep0091.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/swa_model_ep0091.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index b10e5e86..9c787210 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -24,13 +24,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu"
+nnet_s1_base_cfg=conf/train_resnet34_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu"
 
-nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
index 2666b93e..48dc3c90 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh
@@ -25,13 +25,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer $PWD/conf/trainer_swa_default.yaml"
+nnet_s1_base_cfg=conf/train_resnet34_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer $PWD/conf/trainer_swa_default.yaml"
 
-nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/swa_model_ep0081.pth
+nnet_s1_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/swa_model_ep0081.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
index 0ec34ef1..838a41ae 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_sharded_amp.v1.sh
@@ -25,13 +25,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.ddp-type oss_sharded_ddp"
+nnet_s1_base_cfg=conf/train_resnet34_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.ddp-type oss_sharded_ddp"
 
-nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_sharded_ddp_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_sharded_ddp_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
index ced8b8d6..003c8aae 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -26,14 +26,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type"
-
-nnet_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_resnet34_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type"
 
+nnet_s1_name=${feat_type}_${resnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index f3a5ef5a..3a764519 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -27,14 +27,12 @@ scale=4
 ws_tag=w26s4
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
-
-nnet_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
+nnet_s1_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 40957669..e12ab940 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -27,14 +27,12 @@ scale=4
 ws_tag=w26s4
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
-
-nnet_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale"
 
+nnet_s1_name=${feat_type}_${spinenet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 43f539f9..f452baae 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -24,13 +24,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
 
-nnet_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index f834b2cb..d17e2862 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -24,14 +24,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
-
-nnet_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type"
 
+nnet_s1_name=${feat_type}_${spinenet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 243dab65..547020b1 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -29,13 +29,12 @@ ws_tag=w26s4
 nnet_num_epochs=70
 se_r=256
 
-xvec_train_base_cfg=conf/train_res2net50_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
+nnet_s1_base_cfg=conf/train_res2net50_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
 
-nnet_name=${feat_type}_${resnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 749ca557..63cde868 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -27,13 +27,12 @@ margin_warmup=20
 margin=0.3
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_resnet34_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.se-r $se_r"
+nnet_s1_base_cfg=conf/train_resnet34_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.resnet-type $resnet_type --model.se-r $se_r"
 
-nnet_name=${feat_type}_${resnet_type}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${resnet_type}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
index d3a5595c..e465c525 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ ws_tag=w26s4
 se_r=256
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
 
-nnet_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
index 4ffdd48b..975e2aba 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tsespine2net49s_arcs30m0.3_adam_lr0.05_amp.v1.sh
@@ -28,13 +28,12 @@ ws_tag=w26s4
 se_r=256
 nnet_num_epochs=70
 
-xvec_train_base_cfg=conf/train_spinenet49_xvec_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
+nnet_s1_base_cfg=conf/train_spinenet49_xvec_default.yaml
+nnet_s1_args="--data.train.sampler.batch-size $batch_size_1gpu --model.spinenet-type $spinenet_type --model.res2net-width-factor $width_factor --model.res2net-scale $scale --model.se-r $se_r"
 
-nnet_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
+nnet_s1_name=${feat_type}_${spinenet_type}${ws_tag}_r${se_r}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
 
 
 # back-end
diff --git a/egs/voxceleb/v1.1/run_001_prepare_data.sh b/egs/voxceleb/v1.1/run_001_prepare_data.sh
index 7bf15448..037efda1 100755
--- a/egs/voxceleb/v1.1/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.1/run_001_prepare_data.sh
@@ -26,3 +26,30 @@ if [ $stage -le 2 ];then
   # Use this for the newer version of voxceleb1:
   local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
+
+if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
+  local/prepare_voxsrc22_dev.py \
+    --vox1-corpus-dir $voxceleb1_root \
+    --voxsrc22-corpus-dir $voxsrc22_root \
+    --output-dir data/voxsrc22_dev
+fi
+
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+  local/prepare_voxsrc22_test.py \
+    --corpus-dir $voxsrc22_root \
+    --output-dir data/voxsrc22_test
+fi
+
+if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
+  # # split vox2 into 2 parts, for cohort and qmf training
+  # utils/copy_data_dir.sh data/voxceleb2cat_train data/voxceleb2cat_train_odd
+  # utils/copy_data_dir.sh data/voxceleb2cat_train data/voxceleb2cat_train_even
+  # awk 'int(substr($2,3)) % 2 == 1' data/voxceleb2cat_train/utt2spk > data/voxceleb2cat_train_odd/utt2spk
+  # utils/fix_data_dir.sh data/voxceleb2cat_train_odd
+  # awk 'int(substr($2,3)) % 2 == 0' data/voxceleb2cat_train/utt2spk > data/voxceleb2cat_train_even/utt2spk
+  # utils/fix_data_dir.sh data/voxceleb2cat_train_even
+  # # we keep 3 utts per speaker
+  # utils/subset_data_dir.sh --per-spk data/voxceleb2cat_train_odd 3 data/voxceleb2cat_train_subset_cohort
+  # utils/subset_data_dir.sh --per-spk data/voxceleb2cat_train_even 3 data/voxceleb2cat_train_subset_qmf
+  local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
+fi
diff --git a/egs/voxceleb/v1.1/run_011_train_xvector.sh b/egs/voxceleb/v1.1/run_011_train_xvector.sh
index 883c729b..a051c136 100755
--- a/egs/voxceleb/v1.1/run_011_train_xvector.sh
+++ b/egs/voxceleb/v1.1/run_011_train_xvector.sh
@@ -38,13 +38,12 @@ fi
 
 # Network Training
 if [ $stage -le 1 ]; then
-
   
-  mkdir -p $nnet_dir/log
+  mkdir -p $nnet_s1_dir/log
   $cuda_cmd \
-    --gpu $ngpu $nnet_dir/log/train.log \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    train_xvector_from_wav.py $nnet_type --cfg $xvec_train_base_cfg $xvec_train_args $extra_args \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
@@ -52,8 +51,31 @@ if [ $stage -le 1 ]; then
     --data.val.dataset.audio-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
-    --trainer.exp-path $nnet_dir $args \
+    --trainer.exp-path $nnet_s1_dir \
     --num-gpus $ngpu \
   
 fi
 
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
index 3abf2ff6..c42f96bb 100755
--- a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
@@ -8,6 +8,7 @@
 set -e
 
 stage=1
+nnet_stage=1
 config_file=default_config.sh
 use_gpu=false
 xvec_chunk_length=12800
@@ -21,41 +22,64 @@ else
     xvec_cmd="$train_cmd --mem 12G"
 fi
 
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
 xvector_dir=exp/xvectors/$nnet_name
 
-if [ $stage -le 1 ]; then
-    # Extract xvectors for training LDA/PLDA
-    for name in voxceleb2cat_train
-    do
-	if [ $plda_num_augs -eq 0 ]; then
-    	    steps_xvec/extract_xvectors_from_wav.sh --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
-		--random-utt-length true --min-utt-length 400 --max-utt-length 14000 \
-		--feat-config $feat_config \
-    		$nnet data/${name} \
-    		$xvector_dir/${name}
-	else
-	    steps_xvec/extract_xvectors_from_wav.sh --cmd "$xvec_cmd" --nj 300 ${xvec_args} \
-		--random-utt-length true --min-utt-length 400 --max-utt-length 14000 \
-		--feat-config $feat_config --aug-config $plda_aug_config --num-augs $plda_num_augs \
-    		$nnet data/${name} \
-    		$xvector_dir/${name}_augx${plda_num_augs} \
-		data/${name}_augx${plda_num_augs}
-	fi
-    done
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" ) ]]; then
+  # Extract xvectors for training LDA/PLDA
+  for name in voxceleb2cat_train
+  do
+    if [ $plda_num_augs -eq 0 ]; then
+      steps_xvec/extract_xvectors_from_wav.sh \
+	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	--random-utt-length true --min-utt-length 200 --max-utt-length 14000 \
+	--feat-config $feat_config \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}
+    else
+      steps_xvec/extract_xvectors_from_wav.sh \
+	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
+	--random-utt-length true --min-utt-length 200 --max-utt-length 14000 \
+	--feat-config $feat_config --aug-config $plda_aug_config --num-augs $plda_num_augs \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}_augx${plda_num_augs} \
+	data/${name}_augx${plda_num_augs}
+    fi
+  done
 fi
 
 
 if [ $stage -le 2 ]; then
-    # Extracts x-vectors for evaluation
-    for name in voxceleb1_test 
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors_from_wav.sh --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-	    --feat-config $feat_config \
-	    $nnet data/$name \
-	    $xvector_dir/$name
-    done
+  # Extracts x-vectors for evaluation
+  for name in voxceleb1_test 
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 100 ? $num_spk:100))
+    steps_xvec/extract_xvectors_from_wav.sh \
+      --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+      --feat-config $feat_config \
+      $nnet data/$name \
+      $xvector_dir/$name
+  done
 fi
 
 exit
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index cd168180..49fa68e7 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -8,12 +8,34 @@
 set -e
 
 stage=1
+nnet_stage=1
 config_file=default_config.sh
 
+
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh 
 
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
 plda_label=${plda_type}y${plda_y_dim}_v1
 be_name=lda${lda_dim}_${plda_label}_${plda_data}
 
@@ -22,104 +44,179 @@ be_dir=exp/be/$nnet_name/$be_name
 score_dir=exp/scores/$nnet_name/${be_name}
 score_plda_dir=$score_dir/plda
 score_cosine_dir=exp/scores/$nnet_name/cosine
+score_cosine_snorm_dir=exp/scores/$nnet_name/cosine_snorm
+score_cosine_qmf_dir=exp/scores/$nnet_name/cosine_qmf
 
-if [ $stage -le 1 ]; then
 
+if [ "$do_plda" == "true" ];then
+  if [ $stage -le 1 ]; then
     echo "Train PLDA on Voxceleb2"
-    steps_be/train_be_v1.sh --cmd "$train_cmd" \
-				--lda_dim $lda_dim \
-				--plda_type $plda_type \
-				--y_dim $plda_y_dim --z_dim $plda_z_dim \
-				$xvector_dir/$plda_data/xvector.scp \
-				data/$plda_data \
-				$be_dir &
-
-
-    wait
-
-fi
-
-
-if [ $stage -le 2 ];then
-
+    steps_be/train_be_v1.sh \
+      --cmd "$train_cmd" \
+      --lda_dim $lda_dim \
+      --plda_type $plda_type \
+      --y_dim $plda_y_dim --z_dim $plda_z_dim \
+      $xvector_dir/$plda_data/xvector.scp \
+      data/$plda_data \
+      $be_dir
+    
+  fi
+  
+  
+  if [ $stage -le 2 ];then
     echo "Eval Voxceleb 1 with LDA+CentWhiten+LNorm+PLDA"
-    steps_be/eval_be_v1.sh --cmd "$train_cmd" --plda_type $plda_type \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$be_dir/lda_lnorm.h5 \
-    	$be_dir/plda.h5 \
-    	$score_plda_dir/voxceleb1_scores
+    steps_be/eval_be_v1.sh \
+      --cmd "$train_cmd" --plda_type $plda_type \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $be_dir/lda_lnorm.h5 \
+      $be_dir/plda.h5 \
+      $score_plda_dir/voxceleb1_scores
 
     $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-    	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
-
+    	       local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+    
     for f in $(ls $score_plda_dir/*_results);
     do
-	echo $f
-	cat $f
-	echo ""
+      echo $f
+      cat $f
+      echo ""
     done
-
+  fi
 fi
 
 
-score_plda_dir=$score_cosine_dir
 
 if [ $stage -le 3 ];then
 
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$score_plda_dir/voxceleb1_scores
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  steps_be/eval_be_cos.sh \
+    --cmd "$train_cmd" \
+    data/voxceleb1_test/trials \
+    data/voxceleb1_test/utt2model \
+    $xvector_dir/voxceleb1_test/xvector.scp \
+    $score_cosine_dir/voxceleb1_scores
 
-    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+  $train_cmd --mem 10G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_dir 
 
-    for f in $(ls $score_plda_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
+  for f in $(ls $score_cosine_dir/*_results);
+  do
+    echo $f
+    cat $f
+    echo ""
+  done
 
 fi
 
-be_dir=exp/be/$nnet_name/cw
-score_plda_dir=$score_dir/cw_cosine
 
-if [ $stage -le 4 ]; then
-    echo "Train centering+whitening on Voxceleb2"
-    steps_be/train_be_v2.sh --cmd "$train_cmd" \
-	$xvector_dir/$plda_data/xvector.scp \
-	data/$plda_data \
-	$be_dir
+if [ "$do_snorm" == "true" ];then
+  if [ $stage -le 4 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_snorm_dir 
+    
+    for f in $(ls $score_cosine_snorm_dir/*_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+  fi
 fi
 
 
-if [ $stage -le 5 ];then
+if [ "$do_qmf" == "true" ];then
+  if [ $stage -le 5 ];then
+    echo "Train QMF in Vox2"
+    steps_be/train_be_cos_qmf.sh \
+      --cmd "$train_cmd" --coh-nbest 1000 \
+      data/voxceleb2cat_train/trials \
+      data/voxceleb2cat_train/utt2model \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $xvector_dir/voxceleb2cat_train/utt2num_frames \
+      data/voxceleb2cat_train/snorm_utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/voxceleb2_qmf_scores
 
-    echo "Eval Voxceleb 1 with CentWhiten + Cosine scoring"
-    steps_be/eval_be_v2.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-	$be_dir/cw.h5 \
-    	$score_plda_dir/voxceleb1_scores
+  fi
 
-    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+  if [ $stage -le 6 ];then
 
-    for f in $(ls $score_plda_dir/*_results);
+    echo "Eval Voxceleb 1 with Cosine scoring"
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd --mem 15G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $xvector_dir/voxceleb1_test/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_snorm.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_qmf.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxceleb1{,_snorm,_qmf}_[oeh]_clean_results);
     do
-	echo $f
-	cat $f
-	echo ""
+      echo $f
+      cat $f
+      echo ""
     done
 
+  fi
 fi
 
+
 exit
+# be_dir=exp/be/$nnet_name/cw
+# score_plda_dir=$score_dir/cw_cosine
+
+# if [ $stage -le 4 ]; then
+#     echo "Train centering+whitening on Voxceleb2"
+#     steps_be/train_be_v2.sh --cmd "$train_cmd" \
+# 	$xvector_dir/$plda_data/xvector.scp \
+# 	data/$plda_data \
+# 	$be_dir
+# fi
+
+
+# if [ $stage -le 5 ];then
+
+#     echo "Eval Voxceleb 1 with CentWhiten + Cosine scoring"
+#     steps_be/eval_be_v2.sh --cmd "$train_cmd" \
+#     	data/voxceleb1_test/trials \
+#     	data/voxceleb1_test/utt2model \
+#     	$xvector_dir/voxceleb1_test/xvector.scp \
+# 	$be_dir/cw.h5 \
+#     	$score_plda_dir/voxceleb1_scores
+
+#     $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+# 	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+
+#     for f in $(ls $score_plda_dir/*_results);
+#     do
+# 	echo $f
+# 	cat $f
+# 	echo ""
+#     done
+
+# fi
+
+# exit
 
diff --git a/egs/voxceleb/v1/local/make_vox2_trials.py b/egs/voxceleb/v1/local/make_vox2_trials.py
new file mode 100755
index 00000000..95a69cf1
--- /dev/null
+++ b/egs/voxceleb/v1/local/make_vox2_trials.py
@@ -0,0 +1,83 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import math
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils.segment_set import SegmentSet
+
+
+def make_trials_single_gender(ft, fm, fs, segments, num_tar_trials, num_spks):
+
+    # select spks
+    rng = np.random.RandomState(seed=1123)
+    spks = segments["class_id"].unique()
+    spks = rng.choice(spks, size=(num_spks,), replace=False)
+    snorm_segments = segments[~segments["class_id"].isin(spks)]
+    for seg, spk in zip(snorm_segments["id"], snorm_segments["class_id"]):
+        fs.write("%s %s\n" % (seg, spk))
+
+    segments = segments[segments["class_id"].isin(spks)]
+    num_segs_per_spk = int(
+        math.ceil((1 + math.sqrt(1 + 8 * num_tar_trials // num_spks)) / 2)
+    )
+
+    n = num_spks * num_segs_per_spk
+    print(num_segs_per_spk, n, num_tar_trials // num_spks, num_spks, len(spks))
+    seg_ids = rng.choice(segments["id"], size=(n,), replace=False)
+    segments = segments[segments["id"].isin(seg_ids)]
+    seg_ids = segments["id"].values
+    class_ids = segments["class_id"].values
+    ntar = 0
+    nnon = 0
+    for i in range(n - 1):
+        for j in range(i + 1, n):
+            t = "target" if class_ids[i] == class_ids[j] else "nontarget"
+            ft.write("%s %s %s\n" % (seg_ids[i], seg_ids[j], t))
+            if t == "target":
+                ntar += 1
+            else:
+                nnon += 1
+
+    logging.info("Got ntar=%d and nnon=%d", ntar, nnon)
+    for i in range(n - 1):
+        fm.write("%s %s\n" % (seg_ids[i], seg_ids[i]))
+
+
+def make_trials(data_dir, num_1k_tar_trials, num_spks):
+    config_logger(1)
+    logging.info("Making trial list for %s", data_dir)
+    data_dir = Path(data_dir)
+    segments = SegmentSet.load(data_dir / "utt2spk")
+    gender = SegmentSet.load(data_dir / "spk2gender")
+    segments["gender"] = gender.loc[segments["class_id"], "class_id"].values
+
+    num_tar_trials = num_1k_tar_trials * 1000 // 2
+    num_spks = num_spks // 2
+    with open(data_dir / "trials", "w") as ft, open(
+        data_dir / "utt2model", "w"
+    ) as fm, open(data_dir / "snorm_utt2spk", "w") as fs:
+        segs_m = SegmentSet(segments.loc[segments["gender"] == "m"])
+        make_trials_single_gender(ft, fm, fs, segs_m, num_tar_trials, num_spks)
+        segs_f = SegmentSet(segments.loc[segments["gender"] == "f"])
+        make_trials_single_gender(ft, fm, fs, segs_f, num_tar_trials, num_spks)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="makes a trial list for vox2 dev")
+
+    parser.add_argument("--data-dir", required=True, help="Path to dataset")
+    parser.add_argument(
+        "--num-1k-tar-trials", type=int, default=30, help="thousands of target trials"
+    )
+    parser.add_argument("--num-spks", type=int, default=1000, help="number of speakers")
+    args = parser.parse_args()
+    make_trials(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/local/prepare_voxsrc22_dev.py b/egs/voxceleb/v1/local/prepare_voxsrc22_dev.py
new file mode 100755
index 00000000..915de676
--- /dev/null
+++ b/egs/voxceleb/v1/local/prepare_voxsrc22_dev.py
@@ -0,0 +1,88 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def prepare_voxsrc22_dev(vox1_corpus_dir, voxsrc22_corpus_dir, output_dir, verbose):
+    config_logger(verbose)
+    logging.info(
+        "Preparing corpus %s + %s -> %s",
+        vox1_corpus_dir,
+        voxsrc22_corpus_dir,
+        output_dir,
+    )
+    vox1_corpus_dir = Path(vox1_corpus_dir)
+    voxsrc22_corpus_dir = Path(voxsrc22_corpus_dir)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    trials_file = voxsrc22_corpus_dir / "voxsrc2022_dev.txt"
+    df_trials = pd.read_csv(
+        trials_file, header=None, names=["target", "enroll", "test"], sep=" ",
+    )
+
+    trials_file = output_dir / "trials"
+    logging.info("creating trials file %s", trials_file)
+    with open(trials_file, "w") as f:
+        for _, row in df_trials.iterrows():
+            t = "target" if row["target"] == 1 else "nontarget"
+            f.write("%s %s %s\n" % (row["enroll"], row["test"], t))
+
+    enroll_file = output_dir / "utt2model"
+    logging.info("creating enrollment file %s", enroll_file)
+    file_ids = df_trials["enroll"].unique()
+    with open(enroll_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    u2s_file = output_dir / "utt2spk"
+    logging.info("creating utt2spk file %s", u2s_file)
+    file_ids = np.unique(np.concatenate((df_trials["enroll"], df_trials["test"])))
+    with open(u2s_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    s2u_file = output_dir / "spk2utt"
+    logging.info("creating spk2utt file %s", s2u_file)
+    with open(s2u_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    wav_file = output_dir / "wav.scp"
+    logging.info("creating wav.scp file %s", wav_file)
+    with open(wav_file, "w") as f:
+        for file_id in file_ids:
+            if "VoxSRC2022_dev" in file_id:
+                wav_file = voxsrc22_corpus_dir / file_id
+            else:
+                wav_file = vox1_corpus_dir / "wav" / file_id
+
+            f.write("%s %s\n" % (file_id, wav_file))
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares VoxSRC22 Track1/2 validation data")
+
+    parser.add_argument(
+        "--vox1-corpus-dir", required=True, help="Path to voxceleb1 v2 dataset"
+    )
+    parser.add_argument(
+        "--voxsrc22-corpus-dir", required=True, help="Path to voxsrc22 dataset"
+    )
+
+    parser.add_argument("--output-dir", required=True, help="Ouput data path prefix")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_voxsrc22_dev(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/local/prepare_voxsrc22_test.py b/egs/voxceleb/v1/local/prepare_voxsrc22_test.py
new file mode 100755
index 00000000..e3421fe1
--- /dev/null
+++ b/egs/voxceleb/v1/local/prepare_voxsrc22_test.py
@@ -0,0 +1,73 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def prepare_voxsrc22_test(corpus_dir, output_dir, verbose):
+    config_logger(verbose)
+    logging.info(
+        "Preparing corpus %s -> %s", corpus_dir, output_dir,
+    )
+    corpus_dir = Path(corpus_dir)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    trials_file = corpus_dir / "Track12_blind.txt"
+    df_trials = pd.read_csv(
+        trials_file, header=None, names=["enroll", "test"], sep=" ",
+    )
+    trials_file = output_dir / "trials"
+    logging.info("creating trials file %s", trials_file)
+    with open(trials_file, "w") as f:
+        for _, row in df_trials.iterrows():
+            f.write("%s %s\n" % (row["enroll"], row["test"]))
+
+    enroll_file = output_dir / "utt2model"
+    logging.info("creating enrollment file %s", enroll_file)
+    file_ids = df_trials["enroll"].unique()
+    with open(enroll_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    u2s_file = output_dir / "utt2spk"
+    logging.info("creating utt2spk file %s", u2s_file)
+    file_ids = np.unique(np.concatenate((df_trials["enroll"], df_trials["test"])))
+    with open(u2s_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    s2u_file = output_dir / "spk2utt"
+    logging.info("creating spk2utt file %s", s2u_file)
+    with open(s2u_file, "w") as f:
+        for file_id in file_ids:
+            f.write("%s %s\n" % (file_id, file_id))
+
+    wav_file = output_dir / "wav.scp"
+    logging.info("creating wav.scp file %s", wav_file)
+    with open(wav_file, "w") as f:
+        for file_id in file_ids:
+            wav_file = corpus_dir / "Track12_test_data" / file_id
+            f.write("%s %s\n" % (file_id, wav_file))
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares VoxSRC22 Track1/2 test data")
+
+    parser.add_argument("--corpus-dir", required=True, help="Path to voxsrc22 dataset")
+
+    parser.add_argument("--output-dir", required=True, help="Ouput data path prefix")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_voxsrc22_test(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/local/score_voxceleb1.sh b/egs/voxceleb/v1/local/score_voxceleb1.sh
index 5d11848d..f12b18eb 100755
--- a/egs/voxceleb/v1/local/score_voxceleb1.sh
+++ b/egs/voxceleb/v1/local/score_voxceleb1.sh
@@ -2,8 +2,8 @@
 # Copyright 2020 Johns Hopkins University (Jesus Villalba)  
 # Apache 2.0.
 #
-if [ $# -ne 2 ]; then
-  echo "Usage: $0 <data-root> <score-dir>"
+if [ $# -ne 2 ] && [ $# -n 3]; then
+  echo "Usage: $0 <data-root> <score-dir> [suffix]"
   exit 1;
 fi
 
@@ -11,13 +11,14 @@ set -e
 
 data_dir=$1
 score_dir=$2
+suffix=$3
 
 for cond in o o_clean e e_clean h h_clean
 do
-    echo "Voxceleb $cond"
+    echo "Voxceleb1 $cond"
     key=$data_dir/trials_$cond
     #Compute performance
-    python local/score_dcf.py --key-file $key --score-file $score_dir/voxceleb1_scores --output-path $score_dir/voxceleb1_${cond} &
+    python local/score_dcf.py --key-file $key --score-file $score_dir/voxceleb1_scores$suffix --output-path $score_dir/voxceleb1${suffix}_${cond} &
 done
 wait
 
diff --git a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
new file mode 100755
index 00000000..78526277
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+"""
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils import TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.math import cosine_scoring
+from hyperion.np.pdfs import PLDA
+from hyperion.utils.list_utils import ismember
+from hyperion.helpers import TrialDataReader as TDR
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm as SNorm
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+
+
+def get_score_filename(score_file, q_name, i, j, p):
+    if q_name is not None:
+        score_file = "%s_%s" % (score_file, q_name)
+
+    if p:
+        score_file = "%s-%03d-%03d" % (score_file, i, j)
+
+    return score_file
+
+
+def save_empty(score_file, q_name, i, j, p):
+    score_file = get_score_filename(score_file, q_name, i, j, p)
+    logging.info("saving scores to %s", score_file)
+    with open(score_file, "w") as f:
+        pass
+
+
+def save_scores(s, score_file, q_name, i, j, p):
+    score_file = get_score_filename(score_file, q_name, i, j, p)
+    logging.info("saving scores to %s", score_file)
+    s.save_txt(score_file)
+
+
+def eval_plda(
+    v_file,
+    ndx_file,
+    enroll_file,
+    num_frames_file,
+    coh_file,
+    coh_v_file,
+    score_file,
+    qmf_file,
+    model_part_idx,
+    num_model_parts,
+    seg_part_idx,
+    num_seg_parts,
+    coh_nbest,
+    **kwargs
+):
+
+    logging.info("loading data")
+    tdr = TDR(
+        v_file,
+        ndx_file,
+        enroll_file,
+        None,
+        None,
+        model_part_idx,
+        num_model_parts,
+        seg_part_idx,
+        num_seg_parts,
+    )
+    logging.info("read x-vectors and ndx")
+    x_e, x_t, enroll, ndx = tdr.read()
+    enroll_segs = tdr.enroll.key
+
+    parallel = num_model_parts > 1 or num_seg_parts > 1
+
+    if not np.any(ndx.trial_mask):
+        save_empty(score_file, None, model_part_idx, seg_part_idx, parallel)
+        if qmf_file is None:
+            for q_name in ["snorm", "maxnf", "minnf", "maxcohmu", "mincohmu"]:
+                save_empty(score_file, q_name, model_part_idx, seg_part_idx, parallel)
+        return
+
+    logging.info("read num_frames")
+    u2nf = Utt2Info.load(num_frames_file)
+    enroll_nf = np.log(
+        np.clip(
+            u2nf.filter(enroll_segs).info.astype(float) / 100 - 2.0,
+            a_min=0.1,
+            a_max=6.0,
+        )
+    )
+    test_nf = np.log(
+        np.clip(
+            u2nf.filter(ndx.seg_set).info.astype(float) / 100 - 2.0,
+            a_min=0.1,
+            a_max=6.0,
+        )
+    )
+    t1 = time.time()
+    logging.info("computing llr")
+    scores = cosine_scoring(x_e, x_t)
+
+    logging.info("read cohort x-vectors")
+    vcr = VCR(coh_v_file, coh_file)
+    x_coh, ids_coh = vcr.read()
+    D_coh = PLDA.compute_stats_hard(x_coh, class_ids=ids_coh)
+    x_coh = D_coh[1] / np.expand_dims(D_coh[0], axis=-1)
+
+    t2 = time.time()
+    logging.info("score cohort vs test")
+    scores_coh_test = cosine_scoring(x_coh, x_t)
+    logging.info("score enroll vs cohort")
+    scores_enr_coh = cosine_scoring(x_e, x_coh)
+
+    dt = time.time() - t2
+    logging.info("cohort-scoring elapsed time: %.2f s.", dt)
+
+    t2 = time.time()
+    logging.info("apply s-norm")
+    snorm = SNorm(nbest=coh_nbest, nbest_sel_method="highest-other-side")
+    scores_norm, mu_z, _, mu_t, _ = snorm(
+        scores, scores_coh_test, scores_enr_coh, return_stats=True
+    )
+
+    dt = time.time() - t1
+    num_trials = len(enroll) * x_t.shape[0]
+    logging.info(
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms."
+        % (dt, dt / num_trials * 1000)
+    )
+
+    q_measures = {
+        "maxnf": np.maximum(enroll_nf[:, None], test_nf[None, :]),
+        "minnf": np.minimum(enroll_nf[:, None], test_nf[None, :]),
+        "maxcohmu": np.maximum(mu_z, mu_t),
+        "mincohmu": np.minimum(mu_z, mu_t),
+    }
+
+    f, loc = ismember(enroll, ndx.model_set)
+    trial_mask = ndx.trial_mask[loc]
+    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=trial_mask)
+    save_scores(s, score_file, None, model_part_idx, seg_part_idx, parallel)
+    s.scores = scores_norm
+    save_scores(s, score_file, "snorm", model_part_idx, seg_part_idx, parallel)
+    if qmf_file is None:
+        for q_name in ["maxnf", "minnf", "maxcohmu", "mincohmu"]:
+            s.scores = q_measures[q_name]
+            save_scores(s, score_file, q_name, model_part_idx, seg_part_idx, parallel)
+
+        return
+
+    logging.info("applying qmf")
+    scores_fus = [scores.ravel()]
+    for q_name in ["maxnf", "minnf", "maxcohmu", "mincohmu"]:
+        scores_fus.append(q_measures[q_name].ravel())
+
+    scores_fus = np.vstack(scores_fus).T
+    lr = LR.load(qmf_file)
+    scores_fus = lr.predict(scores_fus)
+    scores_fus = np.reshape(scores_fus, (s.num_models, s.num_tests))
+    s.scores = scores_fus
+    save_scores(s, score_file, "qmf", model_part_idx, seg_part_idx, parallel)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Eval cosine-scoring with QMF")
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--ndx-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
+    parser.add_argument("--num-frames-file", required=True)
+    parser.add_argument("--coh-v-file", required=True)
+    parser.add_argument("--coh-file", required=True)
+    parser.add_argument("--coh-nbest", type=int, default=400)
+    parser.add_argument("--qmf-file", default=None)
+    # parser.add_argument("--preproc-file", dest="preproc_file", default=None)
+
+    TDR.add_argparse_args(parser)
+
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_plda(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py b/egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py
new file mode 100755
index 00000000..4ad0a869
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+"""
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.list_utils import ismember
+from hyperion.utils import TrialNdx, TrialScores
+from hyperion.utils.math import cosine_scoring
+from hyperion.helpers import TrialDataReader as TDR
+from hyperion.helpers import PLDAFactory as F
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+
+
+def eval_plda(
+    iv_file,
+    ndx_file,
+    enroll_file,
+    test_file,
+    preproc_file,
+    score_file,
+    coh_iv_file,
+    coh_file,
+    coh_nbest,
+    model_part_idx,
+    num_model_parts,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    logging.info("loading data")
+    if preproc_file is not None:
+        preproc = TransformList.load(preproc_file)
+    else:
+        preproc = None
+
+    tdr = TDR(
+        iv_file,
+        ndx_file,
+        enroll_file,
+        test_file,
+        preproc,
+        model_part_idx,
+        num_model_parts,
+        seg_part_idx,
+        num_seg_parts,
+    )
+    x_e, x_t, enroll, ndx = tdr.read()
+
+    coh_segs = SegmentSet.load(coh_file)
+    r = DRF.create(coh_iv_file)
+    x_coh = r.read(coh_segs["id"], squeeze=True)
+    _, spk_ids = np.unique(coh_segs["class_id"], return_inverse=True)
+    num_coh_spks = np.max(spk_ids) + 1
+    x_coh_spk = np.zeros((num_coh_spks, x_coh.shape[1]))
+    for i in range(num_coh_spks):
+        idx = spk_ids == i
+        x_coh_spk[i] = np.mean(x_coh[idx], axis=0)
+
+    t1 = time.time()
+    logging.info("computing llr")
+    scores = cosine_scoring(x_e, x_t)
+
+    logging.info("computing enroll vs cohort")
+    scores_enr_coh = cosine_scoring(x_e, x_coh_spk)
+    logging.info("computing cohort vs test")
+    scores_coh_test = cosine_scoring(x_coh_spk, x_t)
+
+    snorm = AdaptSNorm(coh_nbest)
+    scores = snorm(scores, scores_coh_test, scores_enr_coh)
+
+    dt = time.time() - t1
+    num_trials = len(enroll) * x_t.shape[0]
+    logging.info(
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+        dt,
+        dt / num_trials * 1000,
+    )
+
+    if num_model_parts > 1 or num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, model_part_idx, seg_part_idx)
+    logging.info("saving scores to %s" % (score_file))
+    f, loc = ismember(enroll, ndx.model_set)
+    s = TrialScores(enroll, ndx.seg_set, scores, score_mask=ndx.trial_mask[loc])
+    s.save_txt(score_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Eval cosine-scoring with adaptive s-norm")
+
+    parser.add_argument("--iv-file", required=True)
+    parser.add_argument("--ndx-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
+    parser.add_argument("--test-file", default=None)
+    parser.add_argument("--preproc-file", default=None)
+
+    TDR.add_argparse_args(parser)
+
+    parser.add_argument("--coh-iv-file", required=True)
+    parser.add_argument("--coh-file", required=True)
+    parser.add_argument("--coh-nbest", type=int, default=1000)
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    assert args.test_file is not None or args.ndx_file is not None
+    eval_plda(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
new file mode 100755
index 00000000..8b69b0d6
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+set -e
+cmd=run.pl
+stage=1
+num_parts=8
+coh_nbest=400
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+set -e
+
+if [ $# -ne 8 ]; then
+  echo "Usage: $0 <ndx> <enroll-file> <vector-file> <numframes-file> <cohort-list> <cohort-vector-file> <qmf-weights> <output-scores>"
+  exit 1;
+fi
+
+ndx_file=$1
+enroll_file=$2
+vector_file=$3
+nf_file=$4
+coh_file=$5
+coh_v_file=$6
+qmf_file=$7
+output_file=$8
+
+output_dir=$(dirname $output_file)
+
+mkdir -p $output_dir/log
+name=$(basename $output_file)
+
+echo "$0 score $ndx_file"
+
+if [ $stage -le 1 ];then
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $cmd $output_dir/log/${name}_${i}_${j}.log \
+	   hyp_utils/conda_env.sh \
+	   steps_be/eval-be-cos-qmf.py \
+	   --v-file scp:$vector_file \
+	   --ndx-file $ndx_file \
+	   --enroll-file $enroll_file \
+	   --score-file $output_file \
+	   --num-frames-file $nf_file \
+	   --coh-v-file scp:$coh_v_file \
+	   --coh-file $coh_file \
+	   --coh-nbest $coh_nbest \
+	   --qmf-file $qmf_file \
+	   --model-part-idx $i --num-model-parts $num_parts \
+	   --seg-part-idx $j --num-seg-parts $num_parts &
+    done
+  done
+  wait
+fi
+
+
+if [ $stage -le 2 ];then
+  for suffix in "" _snorm _qmf
+  do
+    output_file_k=${output_file}${suffix}
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	cat $output_file_k-$(printf "%03d" $i)-$(printf "%03d" $j)
+      done
+    done | sort -u > $output_file_k
+  done
+fi
+
+
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
new file mode 100755
index 00000000..4f5e3e76
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+
+cmd=run.pl
+num_parts=16
+coh_nbest=1000
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+set -e
+
+if [ $# -ne 6 ]; then
+  echo "Usage: $0 <ndx> <enroll-file> <vector-file> <coh-file> <coh-vector-file> <output-scores>"
+  exit 1;
+fi
+
+ndx_file=$1
+enroll_file=$2
+vector_file=$3
+coh_file=$4
+coh_vector_file=$5
+output_file=$6
+
+output_dir=$(dirname $output_file)
+
+mkdir -p $output_dir/log
+name=$(basename $output_file)
+
+echo "$0 score $ndx_file"
+
+
+for((i=1;i<=$num_parts;i++));
+do
+  for((j=1;j<=$num_parts;j++));
+  do
+    $cmd $output_dir/log/${name}_${i}_${j}.log \
+      hyp_utils/conda_env.sh \
+      steps_be/eval-be-v2-snorm.py \
+      --iv-file scp:$vector_file \
+      --ndx-file $ndx_file \
+      --enroll-file $enroll_file \
+      --coh-file $coh_file \
+      --coh-iv-file scp:$coh_vector_file \
+      --score-file $output_file \
+      --coh-nbest $coh_nbest \
+      --model-part-idx $i --num-model-parts $num_parts \
+      --seg-part-idx $j --num-seg-parts $num_parts &
+    sleep 1s
+  done
+done
+wait
+
+
+for((i=1;i<=$num_parts;i++));
+do
+  for((j=1;j<=$num_parts;j++));
+  do
+    cat $output_file-$(printf "%03d" $i)-$(printf "%03d" $j)
+  done
+done | sort -u > $output_file
+
+
+
diff --git a/egs/voxceleb/v1/steps_be/train-qmf.py b/egs/voxceleb/v1/steps_be/train-qmf.py
new file mode 100755
index 00000000..07712221
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/train-qmf.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+  Trains calibration for SRE18 tel condition
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.trial_scores import TrialScores
+from hyperion.utils.trial_key import TrialKey
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+
+
+def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
+
+    logging.info("load key: %s", key_file)
+    key = TrialKey.load_txt(key_file)
+    logging.info("load scores: %s", score_file)
+    scr = TrialScores.load_txt(score_file)
+    tar, non = scr.get_tar_non(key)
+    ntar = len(tar)
+    nnon = len(non)
+
+    q_file = f"{score_file}_maxnf"
+    logging.info("load max num-frames: %s", q_file)
+    q = TrialScores.load_txt(q_file)
+    maxnf_tar, maxnf_non = q.get_tar_non(key)
+
+    q_file = f"{score_file}_minnf"
+    logging.info("load min num-frames: %s", q_file)
+    q = TrialScores.load_txt(q_file)
+    minnf_tar, minnf_non = q.get_tar_non(key)
+
+    q_file = f"{score_file}_maxcohmu"
+    logging.info("load max cohort mean: %s", q_file)
+    q = TrialScores.load_txt(q_file)
+    maxcohmu_tar, maxcohmu_non = q.get_tar_non(key)
+
+    q_file = f"{score_file}_mincohmu"
+    logging.info("load min cohort mean: %s", q_file)
+    q = TrialScores.load_txt(q_file)
+    mincohmu_tar, mincohmu_non = q.get_tar_non(key)
+
+    min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
+        % (min_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+    )
+
+    logging.info("train calibration")
+    tar = np.vstack((tar, maxnf_tar, minnf_tar, maxcohmu_tar, mincohmu_tar)).T
+    non = np.vstack((non, maxnf_non, minnf_non, maxcohmu_non, mincohmu_non)).T
+
+    x = np.vstack((tar, non))
+    y = np.concatenate(
+        (np.ones((ntar,), dtype="int32"), np.zeros((nnon,), dtype="int32"))
+    )
+    lr = LR(
+        prior=prior,
+        lambda_reg=lambda_reg,
+        bias_scaling=1,
+        solver="liblinear",
+        verbose=verbose,
+    )
+    lr.fit(x, y)
+    logging.info(f"A={lr.A} b={lr.b}")
+    logging.info("save calibration at %s", model_file)
+    lr.save(model_file)
+
+    logging.info("calibrate scores")
+    tar_cal = lr.predict(tar)
+    non_cal = lr.predict(non)
+    act_dcf, p_miss, p_fa = compute_act_dcf(tar_cal, non_cal, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
+        % (act_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+    )
+
+    output_file = f"{score_file}_qmf"
+    scr_out = TrialScores(key.model_set, key.seg_set)
+    scr_out.scores[key.tar] = tar_cal
+    scr_out.scores[key.non] = non_cal
+    scr_out.score_mask = np.logical_or(key.tar, key.non)
+    scr_out.save(output_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Trains QMF calibration")
+
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument("--key-file", required=True)
+    parser.add_argument("--model-file", required=True)
+    parser.add_argument("--prior", type=float, default=0.01)
+    parser.add_argument("--lambda-reg", type=float, default=1e-5)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    logging.debug(args)
+
+    train_calibration(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh b/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
new file mode 100755
index 00000000..7dbfcfb9
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+set -e
+cmd=run.pl
+stage=1
+num_parts=8
+coh_nbest=400
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+set -e
+
+if [ $# -ne 7 ]; then
+  echo "Usage: $0 <ndx> <enroll-file> <vector-file> <numframes-file> <cohort-list> <cohort-vector-file> <output-scores>"
+  exit 1;
+fi
+
+ndx_file=$1
+enroll_file=$2
+vector_file=$3
+nf_file=$4
+coh_file=$5
+coh_v_file=$6
+output_file=$7
+
+output_dir=$(dirname $output_file)
+
+mkdir -p $output_dir/log
+name=$(basename $output_file)
+
+echo "$0 score $ndx_file"
+
+if [ $stage -le 1 ];then
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $cmd $output_dir/log/${name}_${i}_${j}.log \
+	   hyp_utils/conda_env.sh \
+	   steps_be/eval-be-cos-qmf.py \
+	   --v-file scp:$vector_file \
+	   --ndx-file $ndx_file \
+	   --enroll-file $enroll_file \
+	   --score-file $output_file \
+	   --num-frames-file $nf_file \
+	   --coh-v-file scp:$coh_v_file \
+	   --coh-file $coh_file \
+	   --coh-nbest $coh_nbest \
+	   --model-part-idx $i --num-model-parts $num_parts \
+	   --seg-part-idx $j --num-seg-parts $num_parts &
+    done
+  done
+  wait
+fi
+
+if [ $stage -le 2 ];then
+  for suffix in "" _maxnf _minnf _maxcohmu _mincohmu _snorm
+  do
+    output_file_k=${output_file}${suffix}
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	cat $output_file_k-$(printf "%03d" $i)-$(printf "%03d" $j)
+      done
+    done | sort -u > $output_file_k
+  done
+fi
+
+if [ $stage -le 3 ];then
+  $cmd $output_dir/log/train_qmf_${name}.log \
+       hyp_utils/conda_env.sh \
+       steps_be/train-qmf.py \
+       --score-file $output_file \
+       --key-file $ndx_file \
+       --model-file $output_dir/qmf.h5
+fi
+
+
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
new file mode 100755
index 00000000..a2456dc9
--- /dev/null
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import RandomAccessDataReaderFactory as RDRF
+from hyperion.io import SequentialDataReaderFactory as DRF
+from hyperion.np.feats import FrameSelector as FSel
+from hyperion.np.feats import MeanVarianceNorm as MVN
+from hyperion.utils import Utt2Info
+from hyperion.utils.kaldi_matrix import compression_methods
+
+
+def process_feats(
+    input_spec,
+    output_spec,
+    vad_spec,
+    write_num_frames_spec,
+    scp_sep,
+    path_prefix,
+    vad_path_prefix,
+    part_idx,
+    num_parts,
+    compress,
+    compression_method,
+    **kwargs
+):
+
+    logging.info("initializing")
+    mvn_args = MVN.filter_args(**kwargs)
+    mvn = MVN(**mvn_args)
+    if vad_spec is not None:
+        fs_args = FSel.filter_args(**kwargs)
+        fs = FSel(**fs_args)
+
+    if write_num_frames_spec is not None:
+        keys = []
+        info = []
+
+    logging.info("opening output stream: %s" % (output_spec))
+    with DWF.create(
+        output_spec,
+        compress=compress,
+        compression_method=compression_method,
+        scp_sep=scp_sep,
+    ) as writer:
+
+        logging.info("opening input stream: %s" % (output_spec))
+        with DRF.create(
+            input_spec,
+            path_prefix=path_prefix,
+            scp_sep=scp_sep,
+            part_idx=part_idx,
+            num_parts=num_parts,
+        ) as reader:
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s" % (vad_spec))
+                v_reader = RDRF.create(
+                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
+                )
+
+            while not reader.eof():
+                key, data = reader.read(1)
+                if len(key) == 0:
+                    break
+                logging.info("processing feats at %s" % (key[0]))
+                x = mvn.normalize(data[0])
+                if vad_spec is not None:
+                    vad = v_reader.read(key)[0].astype("bool")
+                    tot_frames = x.shape[0]
+                    x = fs.select(x, vad)
+                    logging.info(
+                        "for %s detected %d/%d (%.2f %%) speech frames"
+                        % (
+                            key[0],
+                            x.shape[0],
+                            tot_frames,
+                            x.shape[0] / tot_frames * 100,
+                        )
+                    )
+                if x.shape[0] > 0:
+                    writer.write(key, [x])
+                    if write_num_frames_spec is not None:
+                        keys += key
+                        info.append(x.shape[0])
+
+    if write_num_frames_spec is not None:
+        logging.info("writing num-frames to %s" % (write_num_frames_spec))
+        u2nf = Utt2Info.create(keys, info)
+        u2nf.save(write_num_frames_spec)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Apply CMVN and remove silence")
+
+    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--write-num-frames", dest="write_num_frames_spec", default=None
+    )
+    parser.add_argument(
+        "--scp-sep", dest="scp_sep", default=" ", help=("scp file field separator")
+    )
+    parser.add_argument(
+        "--path-prefix", dest="path_prefix", default=None, help=("scp file_path prefix")
+    )
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+    parser.add_argument(
+        "--part-idx",
+        dest="part_idx",
+        type=int,
+        default=1,
+        help=("splits the list of files in num-parts and process part_idx"),
+    )
+    parser.add_argument(
+        "--num-parts",
+        dest="num_parts",
+        type=int,
+        default=1,
+        help=("splits the list of files in num-parts and process part_idx"),
+    )
+
+    parser.add_argument(
+        "--compress",
+        dest="compress",
+        default=False,
+        action="store_true",
+        help="Lossy compress the features",
+    )
+    parser.add_argument(
+        "--compression-method",
+        dest="compression_method",
+        default="auto",
+        choices=compression_methods,
+        help=(
+            "Kaldi compression method: "
+            "{auto (default), speech_feat, "
+            "2byte-auto, 2byte-signed-integer, "
+            "1byte-auto, 1byte-unsigned-integer, 1byte-0-1}."
+        ),
+    )
+    MVN.add_argparse_args(parser)
+    FSel.add_argparse_args(parser)
+
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    process_feats(**namespace_to_dict(args))
diff --git a/hyperion/bin/copy_feats.py b/hyperion/bin/copy_feats.py
new file mode 100755
index 00000000..0385cc55
--- /dev/null
+++ b/hyperion/bin/copy_feats.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+ Copy features/vectors and change format
+"""
+
+import argparse
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import CopyFeats as CF
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        fromfile_prefix_chars="@",
+        description="Copy features and change format",
+    )
+
+    parser.add_argument("--input", dest="input_spec", nargs="+", required=True)
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument("--write-num-frames", dest="write_num_frames", default=None)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    CF.add_argparse_args(parser)
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    CF(**vars(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
new file mode 100755
index 00000000..437127b2
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import AudioWriter as AW
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+
+class MyModel(nn.Module):
+    def __init__(
+        self, feat_extractor, xvector_model, embed_layer=None, calibrator=None, sigma=0
+    ):
+        super().__init__()
+        self.feat_extractor = feat_extractor
+        self.xvector_model = xvector_model
+        self.x_e = None
+        self.vad_t = None
+        self.embed_layer = embed_layer
+        self.calibrator = calibrator
+        self.sigma = sigma
+
+    def forward(self, s_t):
+        # print('sigma0=', self.sigma)
+        if self.sigma > 0:
+            s_t = s_t + self.sigma * torch.randn_like(s_t)
+            # print('sigma1=', self.sigma)
+        f_t = self.feat_extractor(s_t)
+        if self.vad_t is not None:
+            n_vad_frames = len(self.vad_t)
+            n_feat_frames = f_t.shape[1]
+            if n_vad_frames > n_feat_frames:
+                self.vad_t = self.vad_t[:n_feat_frames]
+            elif n_vad_frames < n_feat_frames:
+                f_t = f_t[:, :n_vad_frames]
+
+            f_t = f_t[:, self.vad_t]
+
+        f_t = f_t.transpose(1, 2).contiguous()
+        x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        x_t = l2_norm(x_t)
+        x_e = l2_norm(self.x_e)
+        score = torch.sum(x_e * x_t, dim=-1)
+        if self.calibrator is not None:
+            score = self.calibrator(score)
+
+        return score
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(**kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file, threshold):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    # subting the threshold here will put the decision threshold in 0
+    # some attacks use thr=0 to decide if the attack is succesful
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0] - threshold)
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    key = TrialKey.load(key_file)
+    if num_seg_parts > 1:
+        key = key.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+    f, idx = ismember(key.model_set, enroll.info)
+    assert np.all(f)
+    x_e = x_e[idx]
+    return key, x_e
+
+
+def eval_cosine_scoring(
+    v_file,
+    key_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    model_path,
+    embed_layer,
+    score_file,
+    stats_file,
+    cal_file,
+    threshold,
+    smooth_sigma,
+    max_test_length,
+    save_adv_wav,
+    save_adv_wav_path,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    device = init_device(use_gpu)
+    feat_extractor = init_feats(**kwargs)
+    xvector_model = load_model(model_path)
+
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file, threshold)
+
+    tar = torch.as_tensor([1], dtype=torch.float).to(device)
+    non = torch.as_tensor([0], dtype=torch.float).to(device)
+
+    logging.info("loading key and enrollment x-vectors")
+    key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
+    x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file, **audio_args)
+    wav_scale = audio_reader.wav_scale
+
+    if save_adv_wav:
+        tar_audio_writer = AW(save_adv_wav_path + "/tar2non")
+        non_audio_writer = AW(save_adv_wav_path + "/non2tar")
+
+    smooth_sigma *= wav_scale
+    model = MyModel(
+        feat_extractor, xvector_model, embed_layer, calibrator, smooth_sigma
+    )
+    model.to(device)
+    model.eval()
+
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {
+        "eps_scale": wav_scale,
+        "range_min": -wav_scale,
+        "range_max": wav_scale,
+        "loss": nn.functional.binary_cross_entropy_with_logits,
+        "time_dim": 1,
+    }
+    attack_args.update(extra_args)
+    logging.info("attacks args={}".format(attack_args))
+    attack = AttackFactory.create(model, **attack_args)
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s", vad_spec)
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
+    attack_stats = pd.DataFrame(
+        columns=[
+            "modelid",
+            "segmentid",
+            "snr",
+            "px",
+            "pn",
+            "x_l2",
+            "x_linf",
+            "n_l0",
+            "n_l2",
+            "n_linf",
+            "num_frames",
+        ]
+    )
+    for j in range(key.num_tests):
+        t1 = time.time()
+        logging.info("scoring test utt %s", key.seg_set[j])
+        s, fs = audio_reader.read([key.seg_set[j]])
+        s = s[0]
+        fs = fs[0]
+        if max_test_length is not None:
+            max_samples = int(fs * max_test_length)
+            if len(s) > max_samples:
+                s = s[:max_samples]
+
+        s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
+
+        if vad_spec is not None:
+            vad = v_reader.read([key.seg_set[j]])[0]
+            tot_frames = len(vad)
+            speech_frames = np.sum(vad)
+            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+                device
+            )
+            model.vad_t = vad
+            logging.info(
+                "utt %s detected %d/%d (%.2f %%) speech frames"
+                % (
+                    key.seg_set[j],
+                    speech_frames,
+                    tot_frames,
+                    speech_frames / tot_frames * 100,
+                )
+            )
+
+        t2 = time.time()
+
+        trial_time = 0
+        num_trials = 0
+        for i in range(key.num_models):
+            if key.tar[i, j] or key.non[i, j]:
+                t3 = time.time()
+                model.x_e = x_e[i].to(device)
+                if key.tar[i, j]:
+                    if attack.targeted:
+                        t = non
+                    else:
+                        t = tar
+                else:
+                    if attack.targeted:
+                        t = tar
+                    else:
+                        t = non
+
+                s_adv = attack.generate(s, t)
+                with torch.no_grad():
+                    # we add the threshold back here to make sure the scores are well calibrated
+                    scores[i, j] = model(s_adv) + threshold
+
+                t4 = time.time()
+                trial_time += t4 - t3
+                num_trials += 1
+
+                s_adv = s_adv.detach()
+                stats_ij = compute_stats_adv_attack(s, s_adv)
+                stats_ij = [stat.detach().cpu().numpy()[0] for stat in stats_ij]
+                attack_stats = attack_stats.append(
+                    {
+                        "modelid": key.model_set[i],
+                        "segmentid": key.seg_set[j],
+                        "snr": stats_ij[0],
+                        "px": stats_ij[1],
+                        "pn": stats_ij[2],
+                        "x_l2": stats_ij[3],
+                        "x_linf": stats_ij[4],
+                        "n_l0": stats_ij[5],
+                        "n_l2": stats_ij[6],
+                        "n_linf": stats_ij[7],
+                        "num_samples": s.shape[-1],
+                    },
+                    ignore_index=True,
+                )
+
+                # logging.info('min-max %f %f %f %f' % (torch.min(s), torch.max(s), torch.min(s_adv-s), torch.max(s_adv-s)))
+                if save_adv_wav:
+                    s_adv = s_adv.cpu().numpy()[0]
+                    trial_name = "%s-%s" % (key.model_set[i], key.seg_set[j])
+                    if key.tar[i, j] and scores[i, j] < threshold:
+                        tar_audio_writer.write(trial_name, s_adv, fs)
+                    elif key.non[i, j] and scores[i, j] > threshold:
+                        non_audio_writer.write(trial_name, s_adv, fs)
+
+        trial_time /= num_trials
+        t7 = time.time()
+        logging.info(
+            (
+                "utt %s total-time=%.3f read-time=%.4f trial-time=%.4f n_trials=%d "
+                "rt-factor=%.5f"
+            ),
+            key.seg_set[j],
+            t7 - t1,
+            t2 - t1,
+            trial_time,
+            num_trials,
+            (t7 - t1) / (num_trials * s.shape[1] / fs),
+        )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+        stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
+    logging.info("saving scores to %s", score_file)
+    s = TrialScores(
+        key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
+    )
+    s.save_txt(score_file)
+
+    logging.info("saving stats to %s" % (stats_file))
+    attack_stats.to_csv(stats_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Eval cosine-scoring given enroll x-vector and test wave"
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--v-file", dest="v_file", required=True)
+    parser.add_argument("--key-file", dest="key_file", default=None)
+    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    parser.add_argument(
+        "--save-adv-wav",
+        default=False,
+        action="store_true",
+        help="save adversarial signals to disk",
+    )
+    parser.add_argument(
+        "--save-adv-wav-path", default=None, help="output path of adv signals"
+    )
+
+    # parser.add_argument('--save-adv-wav-tar-thr',
+    #                     default=0.75, type=float,
+    #                     help='min score to save signal from attack that makes non-tar into tar')
+
+    # parser.add_argument('--save-adv-wav-non-thr',
+    #                     default=-0.75, type=float,
+    #                     help='max score to save signal from attack that makes tar into non-tar')
+
+    parser.add_argument(
+        "--stats-file", default=None, help="output path of to save stats of adv signals"
+    )
+
+    parser.add_argument("--cal-file", default=None, help="score calibration file")
+    parser.add_argument("--threshold", default=0, type=float, help="decision threshold")
+    parser.add_argument(
+        "--smooth-sigma", default=0, type=float, help="sigma for smoothing"
+    )
+    parser.add_argument(
+        "--max-test-length",
+        default=None,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
new file mode 100755
index 00000000..aaa91214
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -0,0 +1,498 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+import logging
+import os
+import sys
+import time
+# [Added Sonal May21]
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import AudioWriter as AW
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.adv_defenses.wave_gan_white import WaveGANDefender
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+torch.backends.cudnn.enabled = False
+
+
+class MyModel(nn.Module):
+    def __init__(
+        self,
+        feat_extractor,
+        xvector_model,
+        embed_layer=None,
+        calibrator=None,
+        sigma=0,
+        smoothing_after_wavegan=None,
+        wave_gan_defender=None,
+        wav_scale=2 ** 15 - 1,
+    ):
+        super().__init__()
+        self.feat_extractor = feat_extractor
+        self.xvector_model = xvector_model
+        self.x_e = None
+        self.vad_t = None
+        self.embed_layer = embed_layer
+        self.calibrator = calibrator
+        self.sigma = sigma
+        self.smoothing_after_wavegan = smoothing_after_wavegan
+        self.wave_gan_defender = wave_gan_defender
+        self.wav_scale = wav_scale
+        self.apply_wavegan = False if wave_gan_defender is None else True
+
+    def forward(self, s_t):
+
+        # Pre-proceessing defense, wavegan + smoothing [Added Sonal May21]
+        s_t = s_t / self.wav_scale
+        if self.smoothing_after_wavegan:
+            if self.apply_wavegan:
+                s_t = self.wave_gan_defender(s_t)
+            if self.sigma > 0:
+                s_t = s_t + self.sigma * torch.randn_like(s_t)
+        else:
+            if self.sigma > 0:
+                s_t = s_t + self.sigma * torch.randn_like(s_t)
+            if self.apply_wavegan:
+                s_t = self.wave_gan_defender(s_t)
+
+        s_t = self.wav_scale * s_t
+        # End of pre-processing defense
+
+        f_t = self.feat_extractor(s_t)
+        if self.vad_t is not None:
+            n_vad_frames = len(self.vad_t)
+            n_feat_frames = f_t.shape[1]
+            if n_vad_frames > n_feat_frames:
+                self.vad_t = self.vad_t[:n_feat_frames]
+            elif n_vad_frames < n_feat_frames:
+                f_t = f_t[:, :n_vad_frames]
+
+            f_t = f_t[:, self.vad_t]
+
+        f_t = f_t.transpose(1, 2).contiguous()
+        x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        x_t = l2_norm(x_t)
+        x_e = l2_norm(self.x_e)
+        score = torch.sum(x_e * x_t, dim=-1)
+        if self.calibrator is not None:
+            score = self.calibrator(score)
+
+        return score
+
+
+def fix_out_of_memory(model, tensors):
+    for p in model.parameters():
+        if p.grad is not None:
+            del p.grad  # free some memory
+
+    for tensor in tensors:
+        if tensor.grad is not None:
+            del tensor.grad
+
+    torch.cuda.empty_cache()
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(**kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file, threshold):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    # subting the threshold here will put the decision threshold in 0
+    # some attacks use thr=0 to decide if the attack is succesful
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0] - threshold)
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    key = TrialKey.load(key_file)
+    if num_seg_parts > 1:
+        key = key.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+    f, idx = ismember(key.model_set, enroll.info)
+    assert np.all(f)
+    x_e = x_e[idx]
+    return key, x_e
+
+
+def eval_cosine_scoring_wavegan(
+    v_file,
+    key_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    model_path,
+    embed_layer,
+    score_file,
+    stats_file,
+    cal_file,
+    threshold,
+    smooth_sigma,
+    max_test_length,
+    save_adv_wav,
+    save_adv_wav_path,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    smoothing_after_wavegan,
+    wave_gan_root_dir,
+    wave_gan_model_ckpt,
+    **kwargs
+):
+
+    device = init_device(use_gpu)
+    feat_extractor = init_feats(**kwargs)
+
+    wave_gan_defender = WaveGANDefender(
+        Path(wave_gan_root_dir), Path(wave_gan_model_ckpt)
+    )
+    xvector_model = load_model(model_path)
+
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file, threshold)
+
+    tar = torch.as_tensor([1], dtype=torch.float).to(device)
+    non = torch.as_tensor([0], dtype=torch.float).to(device)
+
+    logging.info("loading key and enrollment x-vectors")
+    key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
+    x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file, **audio_args)
+    wav_scale = audio_reader.wav_scale
+
+    if save_adv_wav:
+        tar_audio_writer = AW(save_adv_wav_path + "/tar2non")
+        non_audio_writer = AW(save_adv_wav_path + "/non2tar")
+
+    model = MyModel(
+        feat_extractor,
+        xvector_model,
+        embed_layer,
+        calibrator,
+        smooth_sigma,
+        smoothing_after_wavegan,
+        wave_gan_defender,
+        wav_scale,
+    )
+    model.to(device)
+    model.eval()
+
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {
+        "eps_scale": wav_scale,
+        "range_min": -wav_scale,
+        "range_max": wav_scale,
+        "loss": nn.functional.binary_cross_entropy_with_logits,
+        "time_dim": 1,
+    }
+    attack_args.update(extra_args)
+    logging.info("attacks args={}".format(attack_args))
+    attack = AttackFactory.create(model, **attack_args)
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s" % (vad_spec))
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
+    attack_stats = pd.DataFrame(
+        columns=[
+            "modelid",
+            "segmentid",
+            "snr",
+            "px",
+            "pn",
+            "x_l2",
+            "x_linf",
+            "n_l0",
+            "n_l2",
+            "n_linf",
+            "num_frames",
+        ]
+    )
+
+    for j in range(key.num_tests):
+        t1 = time.time()
+        logging.info("scoring test utt %s" % (key.seg_set[j]))
+        s, fs = audio_reader.read([key.seg_set[j]])
+        s = s[0]
+        fs = fs[0]
+
+        if max_test_length is not None:
+            max_samples = int(fs * max_test_length)
+            if len(s) > max_samples:
+                s = s[:max_samples]
+
+        s_cpu = s[None, :]
+        s = torch.as_tensor(s_cpu, dtype=torch.get_default_dtype(), device=device)
+
+        if vad_spec is not None:
+            vad = v_reader.read([key.seg_set[j]])[0]
+            tot_frames = len(vad)
+            speech_frames = np.sum(vad)
+            vad = torch.as_tensor(
+                vad.astype(np.bool, copy=False), dtype=torch.bool, device=device
+            )
+            model.vad_t = vad
+            logging.info(
+                "utt %s detected %d/%d (%.2f %%) speech frames"
+                % (
+                    key.seg_set[j],
+                    speech_frames,
+                    tot_frames,
+                    speech_frames / tot_frames * 100,
+                )
+            )
+
+        t2 = time.time()
+
+        trial_time = 0
+        num_trials = 0
+        for i in range(key.num_models):
+            if key.tar[i, j] or key.non[i, j]:
+                t3 = time.time()
+                model.x_e = x_e[i].to(device)
+                if key.tar[i, j]:
+                    if attack.targeted:
+                        t = non
+                    else:
+                        t = tar
+                else:
+                    if attack.targeted:
+                        t = tar
+                    else:
+                        t = non
+
+                s_adv = attack.generate(s, t)
+                with torch.no_grad():
+                    scores[i, j] = model(s_adv) + threshold
+
+                t4 = time.time()
+                trial_time += t4 - t3
+                num_trials += 1
+
+                s_adv = s_adv.detach()
+                stats_ij = compute_stats_adv_attack(s, s_adv)
+                stats_ij = [stat.detach().cpu().numpy()[0] for stat in stats_ij]
+                attack_stats = attack_stats.append(
+                    {
+                        "modelid": key.model_set[i],
+                        "segmentid": key.seg_set[j],
+                        "snr": stats_ij[0],
+                        "px": stats_ij[1],
+                        "pn": stats_ij[2],
+                        "x_l2": stats_ij[3],
+                        "x_linf": stats_ij[4],
+                        "n_l0": stats_ij[5],
+                        "n_l2": stats_ij[6],
+                        "n_linf": stats_ij[7],
+                        "num_samples": s.shape[-1],
+                    },
+                    ignore_index=True,
+                )
+
+                # logging.info('min-max %f %f %f %f' % (torch.min(s), torch.max(s), torch.min(s_adv-s), torch.max(s_adv-s)))
+                if save_adv_wav:
+                    s_adv = s_adv.cpu().numpy()[0]
+                    trial_name = "%s-%s" % (key.model_set[i], key.seg_set[j])
+                    if key.tar[i, j] and scores[i, j] < threshold:
+                        tar_audio_writer.write(trial_name, s_adv, fs)
+                    elif key.non[i, j] and scores[i, j] > threshold:
+                        non_audio_writer.write(trial_name, s_adv, fs)
+
+        trial_time /= num_trials
+        t7 = time.time()
+        logging.info(
+            (
+                "utt %s total-time=%.3f read-time=%.3f trial-time=%.3f n_trials=%d "
+                "rt-factor=%.5f"
+            ),
+            key.seg_set[j],
+            t7 - t1,
+            t2 - t1,
+            trial_time,
+            num_trials,
+            (t7 - t1) / (num_trials * s.shape[1] / fs),
+        )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+        stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
+    logging.info("saving scores to %s" % (score_file))
+    s = TrialScores(
+        key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
+    )
+    s.save_txt(score_file)
+
+    logging.info("saving stats to %s" % (stats_file))
+    attack_stats.to_csv(stats_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Eval cosine-scoring given enroll x-vector and test wave"
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--v-file", dest="v_file", required=True)
+    parser.add_argument("--key-file", dest="key_file", default=None)
+    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    parser.add_argument(
+        "--save-adv-wav",
+        default=False,
+        action="store_true",
+        help="save adversarial signals to disk",
+    )
+    parser.add_argument(
+        "--save-adv-wav-path", default=None, help="output path of adv signals"
+    )
+
+    # parser.add_argument('--save-adv-wav-tar-thr',
+    #                     default=0.75, type=float,
+    #                     help='min score to save signal from attack that makes non-tar into tar')
+
+    # parser.add_argument('--save-adv-wav-non-thr',
+    #                     default=-0.75, type=float,
+    #                     help='max score to save signal from attack that makes tar into non-tar')
+
+    parser.add_argument(
+        "--stats-file", default=None, help="output path of to save stats of adv signals"
+    )
+
+    parser.add_argument("--cal-file", default=None, help="score calibration file")
+    parser.add_argument("--threshold", default=0, type=float, help="decision threshold")
+    parser.add_argument(
+        "--smooth-sigma", default=0, type=float, help="sigma for smoothing"
+    )
+    parser.add_argument(
+        "--max-test-length",
+        default=5,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    # Defense: WaveGAN specific arguments [Added Sonal May21]
+    parser.add_argument(
+        "--smoothing-after-wavegan",
+        default=False,
+        action="store_true",
+        help=(
+            "Smoothing before or after wavegan, if true: "
+            "smoothing is done after wavegan"
+        ),
+    )
+
+    parser.add_argument(
+        "--wave-gan-root-dir", default=None, help="WaveGAN model root directory"
+    )
+    parser.add_argument(
+        "--wave-gan-model-ckpt", default=None, help="WaveGAN model checkpoint"
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring_wavegan(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
new file mode 100755
index 00000000..8d4add76
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import AudioWriter as AW
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(**kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0])
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    key = TrialKey.load(key_file)
+    if num_seg_parts > 1:
+        key = key.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+    f, idx = ismember(key.model_set, enroll.info)
+    assert np.all(f)
+    x_e = x_e[idx]
+    return key, x_e
+
+
+class MyModel(nn.Module):
+    def __init__(
+        self,
+        feat_extractor,
+        xvector_model,
+        embed_layer=None,
+        calibrator=None,
+        threshold=0,
+    ):
+        super().__init__()
+        self.feat_extractor = feat_extractor
+        self.xvector_model = xvector_model
+        self.x_e = None
+        self.vad_t = None
+        self.embed_layer = embed_layer
+        self.calibrator = calibrator
+        self.threshold = threshold
+
+    def forward(self, s_t):
+        f_t = s_t
+        f_t = self.feat_extractor(s_t)
+        if self.vad_t is not None:
+            n_vad_frames = len(self.vad_t)
+            n_feat_frames = f_t.shape[1]
+            if n_vad_frames > n_feat_frames:
+                self.vad_t = self.vad_t[:n_feat_frames]
+            elif n_vad_frames < n_feat_frames:
+                f_t = f_t[:, :n_vad_frames]
+
+            f_t = f_t[:, self.vad_t]
+
+        f_t = f_t.transpose(1, 2).contiguous()
+        x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        x_t = l2_norm(x_t)
+        x_e = l2_norm(self.x_e)
+        tar_score = torch.sum(x_e * x_t, dim=-1, keepdim=True)
+        if self.calibrator is not None:
+            score = self.calibrator(tar_score)
+
+        non_score = self.threshold + 0 * tar_score
+        score = torch.cat((non_score, tar_score), dim=-1)  # .unsqueeze(0)
+        return score
+
+
+def eval_cosine_scoring(
+    v_file,
+    key_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    model_path,
+    embed_layer,
+    score_file,
+    stats_file,
+    cal_file,
+    threshold,
+    save_adv_wav,
+    save_adv_wav_path,
+    max_test_length,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    device_type = "gpu" if use_gpu else "cpu"
+    device = init_device(use_gpu)
+    feat_extractor = init_feats(**kwargs)
+    xvector_model = load_model(model_path)
+
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file)
+
+    model = MyModel(
+        feat_extractor, xvector_model, embed_layer, calibrator, threshold=threshold
+    )
+    model.to(device)
+    model.eval()
+
+    tar = np.asarray([1], dtype=np.int)
+    non = np.asarray([0], dtype=np.int)
+
+    logging.info("loading key and enrollment x-vectors")
+    key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
+    x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file)
+    wav_scale = audio_reader.wav_scale
+
+    if save_adv_wav:
+        tar_audio_writer = AW(save_adv_wav_path + "/tar2non")
+        non_audio_writer = AW(save_adv_wav_path + "/non2tar")
+
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {"eps_scale": wav_scale}
+    attack_args.update(extra_args)
+    logging.info("attack-args={}".format(attack_args))
+
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s" % (vad_spec))
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
+    attack_stats = pd.DataFrame(
+        columns=[
+            "modelid",
+            "segmentid",
+            "snr",
+            "px",
+            "pn",
+            "x_l2",
+            "x_linf",
+            "n_l0",
+            "n_l2",
+            "n_linf",
+            "num_frames",
+        ]
+    )
+
+    for j in range(key.num_tests):
+        t1 = time.time()
+        logging.info("scoring test utt %s" % (key.seg_set[j]))
+        s, fs = audio_reader.read([key.seg_set[j]])
+        s = s[0]
+        fs = fs[0]
+
+        if max_test_length is not None:
+            max_samples = int(fs * max_test_length)
+            if len(s) > max_samples:
+                s = s[:max_samples]
+
+        s = s[None, :].astype("float32", copy=False)
+        s_tensor = torch.as_tensor(s, dtype=torch.get_default_dtype()).to(device)
+
+        if vad_spec is not None:
+            vad = v_reader.read([key.seg_set[j]])[0]
+            tot_frames = len(vad)
+            speech_frames = np.sum(vad)
+            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+                device
+            )
+            model.vad_t = vad
+            logging.info(
+                "utt %s detected %d/%d (%.2f %%) speech frames"
+                % (
+                    key.seg_set[j],
+                    speech_frames,
+                    tot_frames,
+                    speech_frames / tot_frames * 100,
+                )
+            )
+
+        t2 = time.time()
+
+        trial_time = 0
+        num_trials = 0
+        model_art = PyTorchClassifier(
+            model=model,
+            loss=nn.CrossEntropyLoss(),
+            optimizer=None,
+            input_shape=[1, s.shape[1]],
+            nb_classes=2,
+            clip_values=(-wav_scale, wav_scale),
+            device_type=device_type,
+        )
+
+        attack_args["num_samples"] = s.shape[-1]
+        attack = AttackFactory.create(model_art, **attack_args)
+        for i in range(key.num_models):
+            if key.tar[i, j] or key.non[i, j]:
+                t3 = time.time()
+                model.x_e = x_e[i].to(device)
+                if key.tar[i, j]:
+                    if attack.targeted:
+                        t = non
+                    else:
+                        t = tar
+                else:
+                    if attack.targeted:
+                        t = tar
+                    else:
+                        t = non
+
+                s_adv = attack.generate(s, t)
+                s_adv = torch.from_numpy(s_adv).to(device)
+                with torch.no_grad():
+                    scores[i, j] = model(s_adv).cpu().numpy()[0, 1]
+
+                t4 = time.time()
+                trial_time += t4 - t3
+                num_trials += 1
+
+                s_adv = s_adv.detach()
+                stats_ij = compute_stats_adv_attack(s_tensor, s_adv)
+                stats_ij = [stat.detach().cpu().numpy()[0] for stat in stats_ij]
+                attack_stats = attack_stats.append(
+                    {
+                        "modelid": key.model_set[i],
+                        "segmentid": key.seg_set[j],
+                        "snr": stats_ij[0],
+                        "px": stats_ij[1],
+                        "pn": stats_ij[2],
+                        "x_l2": stats_ij[3],
+                        "x_linf": stats_ij[4],
+                        "n_l0": stats_ij[5],
+                        "n_l2": stats_ij[6],
+                        "n_linf": stats_ij[7],
+                        "num_samples": s.shape[-1],
+                    },
+                    ignore_index=True,
+                )
+
+                # logging.info('min-max %f %f %f %f' % (torch.min(s), torch.max(s), torch.min(s_adv-s), torch.max(s_adv-s)))
+                if save_adv_wav:
+                    s_adv = s_adv.cpu().numpy()[0]
+                    trial_name = "%s-%s" % (key.model_set[i], key.seg_set[j])
+                    if key.tar[i, j] and scores[i, j] < threshold:
+                        tar_audio_writer.write(trial_name, s_adv, fs)
+                    elif key.non[i, j] and scores[i, j] > threshold:
+                        non_audio_writer.write(trial_name, s_adv, fs)
+
+        del attack
+        del model_art
+        trial_time /= num_trials
+        t7 = time.time()
+        logging.info(
+            (
+                "utt %s total-time=%.3f read-time=%.3f trial-time=%.3f n_trials=%d "
+                "rt-factor=%.5f"
+            ),
+            key.seg_set[j],
+            t7 - t1,
+            t2 - t1,
+            trial_time,
+            num_trials,
+            (t7 - t1) / (num_trials * s.shape[1] / fs),
+        )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+        stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
+    logging.info("saving scores to %s" % (score_file))
+    s = TrialScores(
+        key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
+    )
+    s.save_txt(score_file)
+
+    logging.info("saving stats to %s" % (stats_file))
+    attack_stats.to_csv(stats_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Eval cosine-scoring given enroll x-vector "
+            "and adversarial test wave from ART"
+        )
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--v-file", dest="v_file", required=True)
+    parser.add_argument("--key-file", dest="key_file", default=None)
+    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    parser.add_argument(
+        "--save-adv-wav",
+        default=False,
+        action="store_true",
+        help="save adversarial signals to disk",
+    )
+    parser.add_argument(
+        "--save-adv-wav-path", default=None, help="output path of adv signals"
+    )
+
+    parser.add_argument(
+        "--stats-file", default=None, help="output path of to save stats of adv signals"
+    )
+
+    parser.add_argument("--cal-file", default=None, help="score calibration file")
+    parser.add_argument("--threshold", default=0, type=float, help="decision threshold")
+    parser.add_argument(
+        "--max-test-length",
+        default=None,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
new file mode 100755
index 00000000..0e9493c0
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(device, **kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    feat_extractor.to(device)
+    return feat_extractor
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file, device):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0])
+    calibrator.to(device)
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, ndx_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    try:
+        ndx = TrialNdx.load(ndx_file)
+    except:
+        ndx = TrialKey.load(ndx_file).to_ndx()
+
+    if num_seg_parts > 1:
+        ndx = ndx.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+
+    f, idx = ismember(ndx.model_set, enroll.info)
+
+    assert np.all(f)
+    x_e = x_e[idx]
+
+    return ndx, x_e
+
+
+def eval_cosine_scoring(
+    v_file,
+    ndx_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    model_path,
+    embed_layer,
+    score_file,
+    cal_file,
+    max_test_length,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    device = init_device(use_gpu)
+    feat_extractor = init_feats(device, **kwargs)
+    model = load_model(model_path, device)
+
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file, device)
+
+    logging.info("loading ndx and enrollment x-vectors")
+    ndx, y_e = read_data(v_file, ndx_file, enroll_file, seg_part_idx, num_seg_parts)
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file, **audio_args)
+
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s" % (vad_spec))
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((ndx.num_models, ndx.num_tests), dtype="float32")
+    with torch.no_grad():
+        for j in range(ndx.num_tests):
+            t1 = time.time()
+            logging.info("scoring test utt %s" % (ndx.seg_set[j]))
+            s, fs = audio_reader.read([ndx.seg_set[j]])
+            s = s[0]
+            fs = fs[0]
+
+            if max_test_length is not None:
+                max_samples = int(fs * max_test_length)
+                if len(s) > max_samples:
+                    s = s[:max_samples]
+
+            t2 = time.time()
+            s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
+            x_t = feat_extractor(s)
+            t4 = time.time()
+            tot_frames = x_t.shape[1]
+            if vad_spec is not None:
+                vad = torch.as_tensor(
+                    v_reader.read([ndx.seg_set[j]], num_frames=x_t.shape[1])[0].astype(
+                        np.uint8, copy=False
+                    ),
+                    dtype=torch.uint8,
+                ).to(device)
+                x_t = x_t[:, vad]
+                logging.info(
+                    "utt %s detected %d/%d (%.2f %%) speech frames"
+                    % (
+                        ndx.seg_set[j],
+                        x_t.shape[1],
+                        tot_frames,
+                        x_t.shape[1] / tot_frames * 100,
+                    )
+                )
+
+            t5 = time.time()
+            x_t = x_t.transpose(1, 2).contiguous()
+            y_t = model.extract_embed(x_t, embed_layer=embed_layer)
+            y_t = l2_norm(y_t)
+            t6 = time.time()
+
+            for i in range(ndx.num_models):
+                if ndx.trial_mask[i, j]:
+                    y_e_i = torch.as_tensor(y_e[i], dtype=torch.get_default_dtype()).to(
+                        device
+                    )
+                    y_e_i = l2_norm(y_e_i)
+                    scores_ij = torch.sum(y_e_i * y_t, dim=-1)
+                    if calibrator is None:
+                        scores[i, j] = scores_ij
+                    else:
+                        scores[i, j] = calibrator(scores_ij)
+
+            t7 = time.time()
+            num_trials = np.sum(ndx.trial_mask[:, j])
+            trial_time = (t7 - t6) / num_trials
+            logging.info(
+                (
+                    "utt %s total-time=%.3f read-time=%.3f feat-time=%.3f "
+                    "vad-time=%.3f embed-time=%.3f trial-time=%.3f n_trials=%d "
+                    "rt-factor=%.2f"
+                ),
+                ndx.seg_set[j],
+                t7 - t1,
+                t2 - t1,
+                t4 - t2,
+                t5 - t4,
+                t6 - t5,
+                trial_time,
+                num_trials,
+                (t7 - t1) / (num_trials * s.shape[1] / fs),
+            )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+    logging.info("saving scores to %s", score_file)
+    s = TrialScores(ndx.model_set, ndx.seg_set, scores, score_mask=ndx.trial_mask)
+    s.save_txt(score_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Eval cosine-scoring given enroll x-vector and test wave"
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--v-file", dest="v_file", required=True)
+    parser.add_argument("--ndx-file", dest="ndx_file", default=None)
+    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument("--cal-file", default=None)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    parser.add_argument(
+        "--max-test-length",
+        default=None,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
new file mode 100755
index 00000000..e0754498
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import AudioWriter as AW
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+
+class MyModel(nn.Module):
+    def __init__(
+        self, feat_extractor, xvector_model, embed_layer=None, calibrator=None
+    ):
+        super().__init__()
+        self.feat_extractor = feat_extractor
+        self.xvector_model = xvector_model
+        self.x_e = None
+        self.vad_t = None
+        self.embed_layer = embed_layer
+        self.calibrator = calibrator
+
+    def forward(self, s_t):
+        f_t = s_t
+        f_t = self.feat_extractor(s_t)
+        if self.vad_t is not None:
+            n_vad_frames = len(self.vad_t)
+            n_feat_frames = f_t.shape[1]
+            if n_vad_frames > n_feat_frames:
+                self.vad_t = self.vad_t[:n_feat_frames]
+            elif n_vad_frames < n_feat_frames:
+                f_t = f_t[:, :n_vad_frames]
+
+            f_t = f_t[:, self.vad_t]
+
+        f_t = f_t.transpose(1, 2).contiguous()
+        x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        x_t = l2_norm(x_t)
+        x_e = l2_norm(self.x_e)
+        score = torch.sum(x_e * x_t, dim=-1)
+        if self.calibrator is not None:
+            score = self.calibrator(score)
+
+        return score
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(**kwargs):
+    feat_args = AF.filter_args(**kwargs)
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.freeze()
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file, threshold):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    # subting the threshold here will put the decision threshold in 0
+    # some attacks use thr=0 to decide if the attack is succesful
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0] - threshold)
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    key = TrialKey.load(key_file)
+
+    if num_seg_parts > 1:
+        key = key.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+
+    f, idx = ismember(key.model_set, enroll.info)
+
+    assert np.all(f)
+    x_e = x_e[idx]
+
+    return key, x_e
+
+
+def eval_cosine_scoring(
+    v_file,
+    key_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    transfer_v_file,
+    model_path,
+    transfer_model_path,
+    embed_layer,
+    score_file,
+    stats_file,
+    cal_file,
+    transfer_cal_file,
+    threshold,
+    max_test_length,
+    save_adv_wav,
+    save_adv_wav_path,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    device = init_device(use_gpu)
+    # load victim model
+    feat_extractor = init_feats(**kwargs["feats"])
+    xvector_model = load_model(model_path)
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file, 0)
+
+    model = MyModel(feat_extractor, xvector_model, embed_layer, calibrator)
+    model.to(device)
+    model.eval()
+
+    # load white-box model
+    tfeat_extractor = init_feats(**kwargs["transfer_feats"])
+    xvector_tmodel = load_model(transfer_model_path)
+    tcalibrator = None
+    if transfer_cal_file is not None:
+        tcalibrator = load_calibrator(transfer_cal_file, threshold)
+
+    tmodel = MyModel(tfeat_extractor, xvector_tmodel, embed_layer, tcalibrator)
+    tmodel.to(device)
+    tmodel.eval()
+
+    tar = torch.as_tensor([1], dtype=torch.float).to(device)
+    non = torch.as_tensor([0], dtype=torch.float).to(device)
+
+    logging.info("loading key and enrollment x-vectors")
+    key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
+    x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
+
+    _, t_x_e = read_data(
+        transfer_v_file, key_file, enroll_file, seg_part_idx, num_seg_parts
+    )
+    t_x_e = torch.as_tensor(t_x_e, dtype=torch.get_default_dtype())
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file)
+    wav_scale = audio_reader.wav_scale
+
+    if save_adv_wav:
+        tar_audio_writer = AW(save_adv_wav_path + "/tar2non")
+        non_audio_writer = AW(save_adv_wav_path + "/non2tar")
+
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {
+        "eps_scale": wav_scale,
+        "range_min": -wav_scale,
+        "range_max": wav_scale,
+        "loss": nn.functional.binary_cross_entropy_with_logits,
+        "time_dim": 1,
+    }
+    attack_args.update(extra_args)
+    logging.info("attacks args={}".format(attack_args))
+    attack = AttackFactory.create(model, **attack_args)
+
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s", vad_spec)
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
+    attack_stats = pd.DataFrame(
+        columns=[
+            "modelid",
+            "segmentid",
+            "snr",
+            "px",
+            "pn",
+            "x_l2",
+            "x_linf",
+            "n_l0",
+            "n_l2",
+            "n_linf",
+            "num_frames",
+        ]
+    )
+    for j in range(key.num_tests):
+        t1 = time.time()
+        logging.info("scoring test utt %s", key.seg_set[j])
+        s, fs = audio_reader.read([key.seg_set[j]])
+        s = s[0]
+        fs = fs[0]
+
+        if max_test_length is not None:
+            max_samples = int(fs * max_test_length)
+            if len(s) > max_samples:
+                s = s[:max_samples]
+
+        s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
+
+        if vad_spec is not None:
+            vad = v_reader.read([key.seg_set[j]])[0]
+            tot_frames = len(vad)
+            speech_frames = np.sum(vad)
+            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+                device
+            )
+            model.vad_t = vad
+            logging.info(
+                "utt %s detected %d/%d (%.2f %%) speech frames",
+                key.seg_set[j],
+                speech_frames,
+                tot_frames,
+                speech_frames / tot_frames * 100,
+            )
+
+        t2 = time.time()
+
+        trial_time = 0
+        num_trials = 0
+        for i in range(key.num_models):
+            if key.tar[i, j] or key.non[i, j]:
+                t3 = time.time()
+                model.x_e = x_e[i].to(device)
+                tmodel.x_e = t_x_e[i].to(device)
+                if key.tar[i, j]:
+                    if attack.targeted:
+                        t = non
+                    else:
+                        t = tar
+                else:
+                    if attack.targeted:
+                        t = tar
+                    else:
+                        t = non
+
+                s_adv = attack.generate(s, t)
+                with torch.no_grad():
+                    scores[i, j] = model(s_adv)
+
+                t4 = time.time()
+                trial_time += t4 - t3
+                num_trials += 1
+
+                s_adv = s_adv.detach()
+                stats_ij = compute_stats_adv_attack(s, s_adv)
+                stats_ij = [stat.detach().cpu().numpy()[0] for stat in stats_ij]
+                attack_stats = attack_stats.append(
+                    {
+                        "modelid": key.model_set[i],
+                        "segmentid": key.seg_set[j],
+                        "snr": stats_ij[0],
+                        "px": stats_ij[1],
+                        "pn": stats_ij[2],
+                        "x_l2": stats_ij[3],
+                        "x_linf": stats_ij[4],
+                        "n_l0": stats_ij[5],
+                        "n_l2": stats_ij[6],
+                        "n_linf": stats_ij[7],
+                        "num_samples": s.shape[-1],
+                    },
+                    ignore_index=True,
+                )
+
+                # logging.info('min-max %f %f %f %f' % (torch.min(s), torch.max(s), torch.min(s_adv-s), torch.max(s_adv-s)))
+                if save_adv_wav:
+                    s_adv = s_adv.cpu().numpy()[0]
+                    trial_name = "%s-%s" % (key.model_set[i], key.seg_set[j])
+                    if key.tar[i, j] and scores[i, j] < threshold:
+                        tar_audio_writer.write(trial_name, s_adv, fs)
+                    elif key.non[i, j] and scores[i, j] > threshold:
+                        non_audio_writer.write(trial_name, s_adv, fs)
+
+        trial_time /= num_trials
+        t7 = time.time()
+        logging.info(
+            (
+                "utt %s total-time=%.3f read-time=%.3f trial-time=%.3f n_trials=%d "
+                "rt-factor=%.2f"
+            ),
+            key.seg_set[j],
+            t7 - t1,
+            t2 - t1,
+            trial_time,
+            num_trials,
+            (t7 - t1) / (num_trials * s.shape[1] / fs),
+        )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+        stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
+    logging.info("saving scores to %s", score_file)
+    s = TrialScores(
+        key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
+    )
+    s.save_txt(score_file)
+
+    logging.info("saving stats to %s", stats_file)
+    attack_stats.to_csv(stats_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Eval cosine-scoring given enroll x-vector and "
+            "adversarial test wave obtained from a different model"
+        )
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--key-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    parser.add_argument("--transfer-v-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+    AF.add_class_args(parser, prefix="transfer_feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument("--transfer-model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    parser.add_argument(
+        "--save-adv-wav",
+        default=False,
+        action="store_true",
+        help="save adversarial signals to disk",
+    )
+
+    parser.add_argument(
+        "--save-adv-wav-path", default=None, help="output path of adv signals"
+    )
+
+    parser.add_argument(
+        "--stats-file", default=None, help="output path of to save stats of adv signals"
+    )
+    parser.add_argument("--cal-file", default=None, help="score calibration file")
+    parser.add_argument(
+        "--transfer-cal-file",
+        default=None,
+        help="score calibration file for transfer model",
+    )
+    parser.add_argument("--threshold", default=0, type=float, help="decision threshold")
+    parser.add_argument(
+        "--max-test-length",
+        default=None,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
new file mode 100755
index 00000000..0f9f375d
--- /dev/null
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python
+"""
+  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import AudioWriter as AW
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
+from hyperion.torch.layers import LinBinCalibrator as Calibrator
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
+from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
+from hyperion.utils.list_utils import ismember
+
+
+class MyModel(nn.Module):
+    def __init__(
+        self,
+        feat_extractor,
+        xvector_model,
+        embed_layer=None,
+        calibrator=None,
+        threshold=0,
+    ):
+        super().__init__()
+        self.feat_extractor = feat_extractor
+        self.xvector_model = xvector_model
+        self.x_e = None
+        self.vad_t = None
+        self.embed_layer = embed_layer
+        self.calibrator = calibrator
+        self.threshold = threshold
+
+    def forward(self, s_t):
+        f_t = s_t
+        f_t = self.feat_extractor(s_t)
+        if self.vad_t is not None:
+            n_vad_frames = len(self.vad_t)
+            n_feat_frames = f_t.shape[1]
+            if n_vad_frames > n_feat_frames:
+                self.vad_t = self.vad_t[:n_feat_frames]
+            elif n_vad_frames < n_feat_frames:
+                f_t = f_t[:, :n_vad_frames]
+
+            f_t = f_t[:, self.vad_t]
+
+        f_t = f_t.transpose(1, 2).contiguous()
+        x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        x_t = l2_norm(x_t)
+        x_e = l2_norm(self.x_e)
+        tar_score = torch.sum(x_e * x_t, dim=-1, keepdim=True)
+        if self.calibrator is not None:
+            score = self.calibrator(tar_score)
+
+        non_score = self.threshold + 0 * tar_score
+        score = torch.cat((non_score, tar_score), dim=-1)  # .unsqueeze(0)
+        return score
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def init_feats(**kwargs):
+    feat_args = AF.filter_args(**kwargs)
+    logging.info("feat args={}".format(feat_args))
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=False, **feat_args)
+    logging.info("feat-extractor={}".format(feat_extractor))
+    feat_extractor.eval()
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("xvector-model={}".format(model))
+    model.freeze()
+    model.eval()
+    return model
+
+
+def load_calibrator(cal_file):
+    logging.info("loading calibration params {}".format(cal_file))
+    lr = LR.load(cal_file)
+    calibrator = Calibrator(lr.A[0, 0], lr.b[0])
+    calibrator.eval()
+    return calibrator
+
+
+def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
+
+    r = DRF.create(v_file)
+    enroll = Utt2Info.load(enroll_file)
+    key = TrialKey.load(key_file)
+
+    if num_seg_parts > 1:
+        key = key.split(1, 1, seg_part_idx, num_seg_parts)
+
+    x_e = r.read(enroll.key, squeeze=True)
+
+    f, idx = ismember(key.model_set, enroll.info)
+
+    assert np.all(f)
+    x_e = x_e[idx]
+
+    return key, x_e
+
+
+def eval_cosine_scoring(
+    v_file,
+    key_file,
+    enroll_file,
+    test_wav_file,
+    vad_spec,
+    vad_path_prefix,
+    transfer_v_file,
+    model_path,
+    transfer_model_path,
+    embed_layer,
+    score_file,
+    stats_file,
+    cal_file,
+    transfer_cal_file,
+    threshold,
+    max_test_length,
+    save_adv_wav,
+    save_adv_wav_path,
+    use_gpu,
+    seg_part_idx,
+    num_seg_parts,
+    **kwargs
+):
+
+    device_type = "gpu" if use_gpu else "cpu"
+    device = init_device(use_gpu)
+    # load victim model
+    feat_extractor = init_feats(**kwargs["feats"])
+    xvector_model = load_model(model_path)
+    calibrator = None
+    if cal_file is not None:
+        calibrator = load_calibrator(cal_file)
+
+    model = MyModel(
+        feat_extractor, xvector_model, embed_layer, calibrator, threshold=threshold
+    )
+    model.to(device)
+    model.eval()
+
+    # load white-box model
+    tfeat_extractor = init_feats(**kwargs["transfer_feats"])
+    xvector_tmodel = load_model(transfer_model_path)
+    tcalibrator = None
+    if transfer_cal_file is not None:
+        tcalibrator = load_calibrator(transfer_cal_file)
+
+    tmodel = MyModel(
+        tfeat_extractor, xvector_tmodel, embed_layer, tcalibrator, threshold=threshold
+    )
+    tmodel.to(device)
+    tmodel.eval()
+
+    tar = np.asarray([1], dtype=np.int)
+    non = np.asarray([0], dtype=np.int)
+
+    logging.info("loading key and enrollment x-vectors")
+    key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
+    x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
+
+    _, t_x_e = read_data(
+        transfer_v_file, key_file, enroll_file, seg_part_idx, num_seg_parts
+    )
+    t_x_e = torch.as_tensor(t_x_e, dtype=torch.get_default_dtype())
+
+    audio_args = AR.filter_args(**kwargs)
+    audio_reader = AR(test_wav_file)
+    wav_scale = audio_reader.wav_scale
+
+    if save_adv_wav:
+        tar_audio_writer = AW(save_adv_wav_path + "/tar2non")
+        non_audio_writer = AW(save_adv_wav_path + "/non2tar")
+
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {"eps_scale": wav_scale}
+    attack_args.update(extra_args)
+    logging.info("attack-args={}".format(attack_args))
+
+    if vad_spec is not None:
+        logging.info("opening VAD stream: %s" % (vad_spec))
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+
+    scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
+    attack_stats = pd.DataFrame(
+        columns=[
+            "modelid",
+            "segmentid",
+            "snr",
+            "px",
+            "pn",
+            "x_l2",
+            "x_linf",
+            "n_l0",
+            "n_l2",
+            "n_linf",
+            "num_frames",
+        ]
+    )
+    for j in range(key.num_tests):
+        t1 = time.time()
+        logging.info("scoring test utt %s" % (key.seg_set[j]))
+        s, fs = audio_reader.read([key.seg_set[j]])
+        s = s[0]
+        fs = fs[0]
+
+        if max_test_length is not None:
+            max_samples = int(fs * max_test_length)
+            if len(s) > max_samples:
+                s = s[:max_samples]
+
+        s = s[None, :].astype("float32", copy=False)
+        s_tensor = torch.as_tensor(s, dtype=torch.get_default_dtype()).to(device)
+
+        if vad_spec is not None:
+            vad = v_reader.read([key.seg_set[j]])[0]
+            tot_frames = len(vad)
+            speech_frames = np.sum(vad)
+            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+                device
+            )
+            model.vad_t = vad
+            tmodel.vad_t = vad
+            logging.info(
+                "utt %s detected %d/%d (%.2f %%) speech frames"
+                % (
+                    key.seg_set[j],
+                    speech_frames,
+                    tot_frames,
+                    speech_frames / tot_frames * 100,
+                )
+            )
+
+        t2 = time.time()
+
+        trial_time = 0
+        num_trials = 0
+        model_art = PyTorchClassifier(
+            model=tmodel,
+            loss=nn.CrossEntropyLoss(),
+            optimizer=None,
+            input_shape=[1, s.shape[1]],
+            nb_classes=2,
+            clip_values=(-wav_scale, wav_scale),
+            device_type=device_type,
+        )
+
+        attack_args["num_samples"] = s.shape[-1]
+        attack = AttackFactory.create(model_art, **attack_args)
+        for i in range(key.num_models):
+            if key.tar[i, j] or key.non[i, j]:
+                t3 = time.time()
+                model.x_e = x_e[i].to(device)
+                tmodel.x_e = t_x_e[i].to(device)
+                if key.tar[i, j]:
+                    if attack.targeted:
+                        t = non
+                    else:
+                        t = tar
+                else:
+                    if attack.targeted:
+                        t = tar
+                    else:
+                        t = non
+
+                s_adv = attack.generate(s, t)
+                s_adv = torch.from_numpy(s_adv).to(device)
+                with torch.no_grad():
+                    scores[i, j] = model(s_adv).cpu().numpy()[0, 1]
+
+                t4 = time.time()
+                trial_time += t4 - t3
+                num_trials += 1
+
+                s_adv = s_adv.detach()
+                stats_ij = compute_stats_adv_attack(s_tensor, s_adv)
+                stats_ij = [stat.detach().cpu().numpy()[0] for stat in stats_ij]
+                attack_stats = attack_stats.append(
+                    {
+                        "modelid": key.model_set[i],
+                        "segmentid": key.seg_set[j],
+                        "snr": stats_ij[0],
+                        "px": stats_ij[1],
+                        "pn": stats_ij[2],
+                        "x_l2": stats_ij[3],
+                        "x_linf": stats_ij[4],
+                        "n_l0": stats_ij[5],
+                        "n_l2": stats_ij[6],
+                        "n_linf": stats_ij[7],
+                        "num_samples": s.shape[-1],
+                    },
+                    ignore_index=True,
+                )
+
+                # logging.info('min-max %f %f %f %f' % (torch.min(s), torch.max(s), torch.min(s_adv-s), torch.max(s_adv-s)))
+                if save_adv_wav:
+                    s_adv = s_adv.cpu().numpy()[0]
+                    trial_name = "%s-%s" % (key.model_set[i], key.seg_set[j])
+                    if key.tar[i, j] and scores[i, j] < threshold:
+                        tar_audio_writer.write(trial_name, s_adv, fs)
+                    elif key.non[i, j] and scores[i, j] > threshold:
+                        non_audio_writer.write(trial_name, s_adv, fs)
+
+        del attack
+        del model_art
+        trial_time /= num_trials
+        t7 = time.time()
+        logging.info(
+            (
+                "utt %s total-time=%.3f read-time=%.3f trial-time=%.3f n_trials=%d "
+                "rt-factor=%.2f"
+            ),
+            key.seg_set[j],
+            t7 - t1,
+            t2 - t1,
+            trial_time,
+            num_trials,
+            (t7 - t1) / (num_trials * s.shape[1] / fs),
+        )
+
+    if num_seg_parts > 1:
+        score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
+        stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
+    logging.info("saving scores to %s" % (score_file))
+    s = TrialScores(
+        key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
+    )
+    s.save_txt(score_file)
+
+    logging.info("saving stats to %s" % (stats_file))
+    attack_stats.to_csv(stats_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            "Eval cosine-scoring given enroll x-vector and "
+            "adversarial test wave obtained from a different model"
+            "using ART"
+        )
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--key-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
+    parser.add_argument("--test-wav-file", required=True)
+
+    parser.add_argument("--transfer-v-file", required=True)
+
+    AR.add_class_args(parser)
+    AF.add_class_args(parser, prefix="feats")
+    AF.add_class_args(parser, prefix="transfer_feats")
+
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument(
+        "--vad-path-prefix",
+        dest="vad_path_prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument("--transfer-model-path", required=True)
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from,"
+            "if None the layer set in training phase is used"
+        ),
+    )
+
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
+    parser.add_argument(
+        "--num-seg-parts",
+        default=1,
+        type=int,
+        help=(
+            "number of parts in which we divide the test list "
+            "to run evaluation in parallel"
+        ),
+    )
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    parser.add_argument(
+        "--save-adv-wav",
+        default=False,
+        action="store_true",
+        help="save adversarial signals to disk",
+    )
+    parser.add_argument(
+        "--save-adv-wav-path", default=None, help="output path of adv signals"
+    )
+    parser.add_argument(
+        "--stats-file", default=None, help="output path of to save stats of adv signals"
+    )
+    parser.add_argument("--cal-file", default=None, help="score calibration file")
+    parser.add_argument(
+        "--transfer-cal-file",
+        default=None,
+        help="score calibration file for transfer model",
+    )
+    parser.add_argument("--threshold", default=0, type=float, help="decision threshold")
+    parser.add_argument(
+        "--max-test-length",
+        default=None,
+        type=float,
+        help=(
+            "maximum length (secs) for the test side, "
+            "this is to avoid GPU memory errors"
+        ),
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_cosine_scoring(**namespace_to_dict(args))
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 7b68b9dd..d7b1f17d 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -10,8 +10,12 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -95,7 +99,12 @@ def init_xvector(num_classes, in_model_file, rank, xvec_class, **kwargs):
 
 
 def init_hard_prototype_mining(model, train_loader, val_loader, rank):
-    if not train_loader.batch_sampler.hard_prototype_mining:
+    try:
+        hard_prototype_mining = train_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
         return
 
     if rank == 0:
@@ -104,7 +113,12 @@ def init_hard_prototype_mining(model, train_loader, val_loader, rank):
     affinity_matrix = model.compute_prototype_affinity()
     train_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
 
-    if not val_loader.batch_sampler.hard_prototype_mining:
+    try:
+        hard_prototype_mining = val_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
         return
 
     val_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
diff --git a/hyperion/np/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
index a5ae6f13..944fcad5 100644
--- a/hyperion/np/score_norm/adapt_s_norm.py
+++ b/hyperion/np/score_norm/adapt_s_norm.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
+import math
 import h5py
 import numpy as np
 
@@ -33,6 +33,24 @@ def __init__(
         self.nbest_discard = nbest_discard
         self.nbest_sel_method = nbest_sel_method
 
+    def __call__(
+        self,
+        scores,
+        scores_coh_test,
+        scores_enr_coh,
+        mask_coh_test=None,
+        mask_enr_coh=None,
+        return_stats=False,
+    ):
+        return self.predict(
+            scores,
+            scores_coh_test,
+            scores_enr_coh,
+            mask_coh_test,
+            mask_enr_coh,
+            return_stats,
+        )
+
     def predict(
         self,
         scores,
@@ -63,9 +81,9 @@ def predict(
             nbest = self.nbest
 
         if mask_coh_test is not None:
-            scores_coh_test[mask_coh_test == False] = 0
+            scores_coh_test[~mask_coh_test] = 0
         if mask_enr_coh is not None:
-            scores_enr_coh[mask_enr_coh == False] = 0
+            scores_enr_coh[~mask_enr_coh] = 0
 
         if self.nbest_sel_method == "highest-other-side":
             return self._norm_highest_other_side(
@@ -87,81 +105,10 @@ def predict(
                 return_stats,
                 nbest,
             )
-        #     best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
-        #         self.nbest_discard : self.nbest_discard + nbest
-        #     ]
-        # elif self.nbest_sel_method == "highest-same-side":
-        #     best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
-        #         :, self.nbest_discard : self.nbest_discard + nbest
-        #     ].T
         else:
             raise Exception(f"invalid cohort selection method {self.nbest_sel_method}")
 
-        # scores_z_norm = np.zeros_like(scores)
-        # for i in range(scores.shape[1]):
-        #     best_idx_i = best_idx[:, i]
-
-        #     best_scores_i = scores_enr_coh[:, best_idx_i]
-        #     mu_z = np.mean(best_scores_i, axis=1, keepdims=True)
-
-        #     if mask_enr_coh is None:
-        #         s_z = np.std(best_scores_i, axis=1, keepdims=True)
-        #     else:
-        #         norm = np.mean(mask_enr_coh[:, best_idx_i], axis=1, keepdims=True)
-        #         mu_z /= norm
-        #         s_z = np.sqrt(
-        #             np.mean(best_scores_i ** 2, axis=1, keepdims=True) / norm
-        #             - mu_z ** 2
-        #         )
-
-        #     s_z = np.clip(s_z, a_min=1e-5, a_max=None)
-        #     if not self.norm_var:
-        #         s_z = 1.0
-
-        #     scores_z_norm[:, i] = (scores[:, i] - mu_z.T) / s_z.T
-
-        # if self.nbest_sel_method == "highest-other-side":
-        #     best_idx = np.fliplr(np.argsort(scores_enr_coh, axis=1))[
-        #         :, self.nbest_discard : self.nbest_discard + nbest
-        #     ]
-        # elif self.nbest_sel_method == "highest-same-side":
-        #     best_idx = np.flipud(np.argsort(scores_coh_test, axis=0))[
-        #         self.nbest_discard : self.nbest_discard + nbest
-        #     ].T
-        # else:
-        #     raise Exception(f"invalid cohort selection method {self.nbest_sel_method}")
-
-        # scores_t_norm = np.zeros_like(scores)
-        # for i in range(scores.shape[0]):
-        #     best_idx_i = best_idx[i]
-        #     best_scores_i = scores_coh_test[best_idx_i, :]
-        #     mu_t = np.mean(best_scores_i, axis=0, keepdims=True)
-
-        #     if mask_coh_test is None:
-        #         s_t = np.std(best_scores_i[best_idx_i, :], axis=0, keepdims=True)
-        #     else:
-        #         norm = np.mean(mask_coh_test[best_idx_i, :], axis=0, keepdims=True)
-        #         mu_t /= norm
-        #         s_t = np.sqrt(
-        #             np.mean(best_scores_i[best_idx_i, :] ** 2, axis=0, keepdims=True)
-        #             / norm
-        #             - mu_z ** 2
-        #         )
-
-        #     s_t = np.clip(s_t, a_min=1e-5, a_max=None)
-        #     if not self.norm_var:
-        #         s_t = 1.0
-
-        #     scores_t_norm[i, :] = (scores[i, :] - mu_t) / s_t
-
-        # scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
-
-        # if return_stats:
-        #     return scores_norm, mu_z, s_z, mu_t, s_t
-        # else:
-        #     return scores_norm
-
-    def _norm_highest_other_side(
+    def _norm_highest_other_side0(
         self,
         scores,
         scores_coh_test,
@@ -246,7 +193,117 @@ def _norm_highest_other_side(
         else:
             return scores_norm
 
-    def _norm_highest_same_side(
+    def _norm_highest_other_side(
+        self,
+        scores,
+        scores_coh_test,
+        scores_enr_coh,
+        mask_coh_test,
+        mask_enr_coh,
+        return_stats,
+        nbest,
+    ):
+
+        # this is very memory intensive, so we pass to f32
+        scores_coh_test = scores_coh_test.astype("float32", copy=False)
+        scores_enr_coh = scores_enr_coh.astype("float32", copy=False)
+
+        best_idx = np.argsort(-scores_coh_test, axis=0)[
+            self.nbest_discard : self.nbest_discard + nbest
+        ].T  # (n_test, n_best)
+
+        mem = nbest * scores_enr_coh.shape[0] * scores.shape[1] * 4 / 2 ** 30
+        # limit mem to 10 GB
+        num_groups = math.ceil(mem / 10)
+        num_el_group = int(math.ceil(scores.shape[1] / num_groups))
+        scores_enr_coh = np.expand_dims(scores_enr_coh, 0)
+        if mask_enr_coh is not None:
+            mask_enr_coh = np.expand_dims(scores_enr_coh, 0)
+
+        mu_z = []
+        s_z = []
+        for start in range(0, scores.shape[1], num_el_group):
+            stop = min(start + num_el_group, scores.shape[1])
+            best_idx_i = np.expand_dims(best_idx[start:stop], 1)
+            best_scores_i = np.take_along_axis(scores_enr_coh, best_idx_i, axis=-1)
+            mu_z_i = best_scores_i.mean(axis=-1)
+
+            if mask_enr_coh is None:
+                s_z_i = np.std(best_scores_i, axis=-1)
+            else:
+                mask_i = np.take_along_axis(mask_enr_coh, best_idx_i, axis=-1)
+                norm = mask_i.mean(axis=-1)
+                mu_z_i /= norm
+                s_z_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=-1) / norm - mu_z_i ** 2
+                )
+
+            del best_scores_i
+            mu_z.append(mu_z_i.T)
+            s_z.append(s_z_i.T)
+
+        mu_z = np.concatenate(mu_z, axis=-1)
+        s_z = np.concatenate(s_z, axis=-1)
+
+        s_z = np.clip(s_z, a_min=1e-5, a_max=None)
+        if not self.norm_var:
+            s_z = 1.0
+
+        scores_z_norm = (scores - mu_z) / s_z
+
+        scores_enr_coh = scores_enr_coh[0]  # unsqueeze
+        best_idx = np.argsort(-scores_enr_coh, axis=1)[
+            :, self.nbest_discard : self.nbest_discard + nbest
+        ].T
+
+        mem = nbest * scores.shape[0] * scores_coh_test.shape[1] * 4 / 2 ** 30
+        # limit mem to 10 GB
+        num_groups = math.ceil(mem / 10)
+        num_el_group = int(math.ceil(scores.shape[0] / num_groups))
+        scores_coh_test = np.expand_dims(scores_coh_test, -1)
+        if mask_coh_test is not None:
+            mask_coh_test = np.expand_dims(mask_coh_test, -1)
+
+        mu_t = []
+        s_t = []
+        for start in range(0, scores.shape[0], num_el_group):
+            stop = min(start + num_el_group, scores.shape[0])
+            best_idx_i = np.expand_dims(best_idx[:, start:stop], 1)
+            # print(scores_coh_test.shape, best_idx_i.shape)
+            best_scores_i = np.take_along_axis(scores_coh_test, best_idx_i, axis=0)
+            # print(best_scores_i.shape)
+            mu_t_i = best_scores_i.mean(axis=0)
+            if mask_enr_coh is None:
+                s_t_i = np.std(best_scores_i, axis=0)
+            else:
+                mask_i = np.take_along_axis(mask_coh_test, best_idx_i, axis=0)
+                norm = mask_i.mean(axis=0)
+                mu_t_i /= norm
+                s_t_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=0) / norm - mu_t_i ** 2
+                )
+
+            # print(best_scores_i.shape, mu_t_i.shape)
+            del best_scores_i
+            mu_t.append(mu_t_i.T)
+            s_t.append(s_t_i.T)
+
+        mu_t = np.concatenate(mu_t, axis=0)
+        s_t = np.concatenate(s_t, axis=0)
+
+        s_t = np.clip(s_t, a_min=1e-5, a_max=None)
+        if not self.norm_var:
+            s_t = 1.0
+
+        scores_t_norm = (scores - mu_t) / s_t
+
+        scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
+        if return_stats:
+            return scores_norm, mu_z, s_z, mu_t, s_t
+        else:
+            return scores_norm
+
+    def _norm_highest_same_side0(
         self,
         scores,
         scores_coh_test,
@@ -331,3 +388,112 @@ def _norm_highest_same_side(
             return scores_norm, mu_z, s_z, mu_t, s_t
         else:
             return scores_norm
+
+    def _norm_highest_same_side(
+        self,
+        scores,
+        scores_coh_test,
+        scores_enr_coh,
+        mask_coh_test,
+        mask_enr_coh,
+        return_stats,
+        nbest,
+    ):
+
+        # this is very memory intensive, so we pass to f32
+        scores_coh_test = scores_coh_test.astype("float32", copy=False)
+        scores_enr_coh = scores_enr_coh.astype("float32", copy=False)
+
+        best_idx = np.argsort(-scores_enr_coh, axis=1)[
+            :, self.nbest_discard : self.nbest_discard + nbest
+        ]
+
+        mem = nbest * scores_enr_coh.shape[0] * scores.shape[0] * 4 / 2 ** 30
+        # limit mem to 10 GB
+        num_groups = math.ceil(mem / 10)
+        num_el_group = int(math.ceil(scores.shape[0] / num_groups))
+        scores_enr_coh = np.expand_dims(scores_enr_coh, 0)
+        if mask_enr_coh is not None:
+            mask_enr_coh = np.expand_dims(scores_enr_coh, 0)
+
+        mu_z = []
+        s_z = []
+        for start in range(0, scores.shape[0], num_el_group):
+            stop = min(start + num_el_group, scores.shape[0])
+            best_idx_i = np.expand_dims(best_idx[start:stop], 1)
+            best_scores_i = np.take_along_axis(scores_enr_coh, best_idx_i, axis=-1)
+            mu_z_i = best_scores_i.mean(axis=-1)
+
+            if mask_enr_coh is None:
+                s_z_i = np.std(best_scores_i, axis=-1)
+            else:
+                mask_i = np.take_along_axis(mask_enr_coh, best_idx_i, axis=-1)
+                norm = mask_i.mean(axis=-1)
+                mu_z_i /= norm
+                s_z_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=-1) / norm - mu_z_i ** 2
+                )
+
+            del best_scores_i
+            mu_z.append(mu_z_i.T)
+            s_z.append(s_z_i.T)
+
+        mu_z = np.concatenate(mu_z, axis=-1)
+        s_z = np.concatenate(s_z, axis=-1)
+
+        s_z = np.clip(s_z, a_min=1e-5, a_max=None)
+        if not self.norm_var:
+            s_z = 1.0
+
+        scores_z_norm = (scores - mu_z) / s_z
+
+        best_idx = np.argsort(-scores_coh_test, axis=0)[
+            self.nbest_discard : self.nbest_discard + nbest
+        ]  # (n_best, n_test)
+
+        mem = nbest * scores.shape[1] * scores_coh_test.shape[1] * 4 / 2 ** 30
+        # limit mem to 10 GB
+        num_groups = math.ceil(mem / 10)
+        num_el_group = int(math.ceil(scores.shape[1] / num_groups))
+        scores_coh_test = np.expand_dims(scores_coh_test, -1)
+        if mask_coh_test is not None:
+            mask_coh_test = np.expand_dims(mask_coh_test, -1)
+
+        mu_t = []
+        s_t = []
+        for start in range(0, scores.shape[1], num_el_group):
+            stop = min(start + num_el_group, scores.shape[1])
+            best_idx_i = np.expand_dims(best_idx[:, start:stop], 1)
+            # print(scores_coh_test.shape, best_idx_i.shape)
+            best_scores_i = np.take_along_axis(scores_coh_test, best_idx_i, axis=0)
+            # print(best_scores_i.shape)
+            mu_t_i = best_scores_i.mean(axis=0)
+            if mask_enr_coh is None:
+                s_t_i = np.std(best_scores_i, axis=0)
+            else:
+                mask_i = np.take_along_axis(mask_coh_test, best_idx_i, axis=0)
+                norm = mask_i.mean(axis=0)
+                mu_t_i /= norm
+                s_t_i = np.sqrt(
+                    np.mean(best_scores_i ** 2, axis=0) / norm - mu_t_i ** 2
+                )
+
+            # print(best_scores_i.shape, mu_t_i.shape)
+            del best_scores_i
+            mu_t.append(mu_t_i.T)
+            s_t.append(s_t_i.T)
+
+        mu_t = np.concatenate(mu_t, axis=0)
+        s_t = np.concatenate(s_t, axis=0)
+
+        s_t = np.clip(s_t, a_min=1e-5, a_max=None)
+        if not self.norm_var:
+            s_t = 1.0
+
+        scores_t_norm = (scores - mu_t) / s_t
+
+        scores_norm = (scores_z_norm + scores_t_norm) / np.sqrt(2)
+        if return_stats:
+            return scores_norm, mu_z, s_z, mu_t, s_t
+        else:
+            return scores_norm
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 81e9082f..7fbfbd71 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -116,6 +116,8 @@ def __init__(
             self.num_chunks_per_seg_epoch,
         )
 
+        self.counts = {}
+
     def _set_seed(self):
         if self.shuffle:
             self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.rank)
@@ -208,7 +210,7 @@ def _set_class_weights(self):
         if self.weight_exponent != 1.0:
             self.class_info.exp_weights(self.weight_exponent)
 
-        zero_weight = self.class_info["min_seg_duration"] < self.min_chunk_length
+        zero_weight = self.class_info["max_seg_duration"] < self.min_chunk_length
         if np.any(zero_weight):
             self.class_info.set_zero_weight(zero_weight)
 
@@ -374,6 +376,20 @@ def __next__(self):
         num_classes = self._compute_num_classes_per_batch(batch_size)
         # t4 = time.time()
         class_ids = self._sample_classes(num_classes, chunk_length)
+        # for i in class_ids:
+        #     if i in self.counts:
+        #         self.counts[i] += 1
+        #     else:
+        #         self.counts[i] = 1
+
+        # mx = 0
+        # mn = 1000000000
+        # for k, v in self.counts.items():
+        #     if v > mx:
+        #         mx = v
+        #     if v < mn:
+        #         mn = v
+
         # t5 = time.time()
         seg_ids = self._sample_segs(class_ids, chunk_length)
         # t6 = time.time()
diff --git a/hyperion/torch/layer_blocks/fc_blocks.py b/hyperion/torch/layer_blocks/fc_blocks.py
index e56ab83e..49bf12db 100644
--- a/hyperion/torch/layer_blocks/fc_blocks.py
+++ b/hyperion/torch/layer_blocks/fc_blocks.py
@@ -84,4 +84,7 @@ def forward_linear(self, x):
         if self.norm_before:
             x = self.bn1(x)
 
+        if self.activation is None and self.norm_after:
+            x = self.bn1(x)
+
         return x

From 5f0ac9936784f3581304152911be0093b25b44c2 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 27 Jan 2023 14:02:21 -0500
Subject: [PATCH 076/154] updated qmf script

---
 .../conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml     | 2 +-
 .../v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml | 2 +-
 .../config_fbank80_stmn_ecapatdnn512x3.v2.0.sh           | 2 +-
 egs/voxceleb/v1.1/run_040_eval_be.sh                     | 2 +-
 egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py              | 9 ++++++---
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh              | 4 ++--
 egs/voxceleb/v1/steps_be/train-qmf.py                    | 5 +++--
 7 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
index 4b6fbc77..c4de614e 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
@@ -87,5 +87,5 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 30
+  epochs: 40
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
index 319ab3ab..f5a7dcb1 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
@@ -85,5 +85,5 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 30
+  epochs: 40
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
index 0e7a3b52..68990732 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
@@ -17,7 +17,7 @@ nnet_name=${feat_type}_ecapatdnn512x3.v2.0
 nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v2.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
 
 nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
 nnet_s2_name=${nnet_name}.s2
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 49fa68e7..18c5eeeb 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -155,7 +155,7 @@ if [ "$do_qmf" == "true" ];then
 
     echo "Eval Voxceleb 1 with Cosine scoring"
     steps_be/eval_be_cos_qmf.sh \
-      --cmd "$train_cmd --mem 15G" --coh-nbest 1000 \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
       data/voxceleb1_test/trials \
       data/voxceleb1_test/utt2model \
       $xvector_dir/voxceleb1_test/xvector.scp \
diff --git a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
index 78526277..90650941 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
@@ -100,14 +100,14 @@ def eval_plda(
         np.clip(
             u2nf.filter(enroll_segs).info.astype(float) / 100 - 2.0,
             a_min=0.1,
-            a_max=6.0,
+            a_max=12.0,  # 6.0,
         )
     )
     test_nf = np.log(
         np.clip(
             u2nf.filter(ndx.seg_set).info.astype(float) / 100 - 2.0,
             a_min=0.1,
-            a_max=6.0,
+            a_max=12.0,  # 6.0,
         )
     )
     t1 = time.time()
@@ -132,9 +132,11 @@ def eval_plda(
     t2 = time.time()
     logging.info("apply s-norm")
     snorm = SNorm(nbest=coh_nbest, nbest_sel_method="highest-other-side")
-    scores_norm, mu_z, _, mu_t, _ = snorm(
+    scores_norm, mu_z, s_z, mu_t, s_t = snorm(
         scores, scores_coh_test, scores_enr_coh, return_stats=True
     )
+    mu_z = mu_z / s_z
+    mu_t = mu_t / s_t
 
     dt = time.time() - t1
     num_trials = len(enroll) * x_t.shape[0]
@@ -165,6 +167,7 @@ def eval_plda(
 
     logging.info("applying qmf")
     scores_fus = [scores.ravel()]
+    scores_fus = [scores_norm.ravel()]
     for q_name in ["maxnf", "minnf", "maxcohmu", "mincohmu"]:
         scores_fus.append(q_measures[q_name].ravel())
 
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
index 8b69b0d6..a8ad0178 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
@@ -5,8 +5,8 @@
 set -e
 cmd=run.pl
 stage=1
-num_parts=8
-coh_nbest=400
+num_parts=16
+coh_nbest=1000
 
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
diff --git a/egs/voxceleb/v1/steps_be/train-qmf.py b/egs/voxceleb/v1/steps_be/train-qmf.py
index 07712221..afd9d218 100755
--- a/egs/voxceleb/v1/steps_be/train-qmf.py
+++ b/egs/voxceleb/v1/steps_be/train-qmf.py
@@ -30,8 +30,9 @@ def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbo
 
     logging.info("load key: %s", key_file)
     key = TrialKey.load_txt(key_file)
-    logging.info("load scores: %s", score_file)
-    scr = TrialScores.load_txt(score_file)
+    score_snorm_file = f"{score_file}_snorm"
+    logging.info("load scores: %s", score_snorm_file)
+    scr = TrialScores.load_txt(score_snorm_file)
     tar, non = scr.get_tar_non(key)
     ntar = len(tar)
     nnon = len(non)

From b3647987d1cfeca2f419e422ea3b8a45b90bddb0 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 27 Jan 2023 14:19:26 -0500
Subject: [PATCH 077/154] xxx

---
 .../v1/global_conf/config_transducer_v4.3.sh        |  2 +-
 .../v1/global_conf/config_transducer_v4.4.sh        |  4 +++-
 egs/librispeech/v1/run_030_inference.sh             | 13 +++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh b/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
index de00c55a..f51f1213 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
@@ -23,7 +23,7 @@ nnet_name=${hf_model_name}_transducer_v4.3
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0060.pth
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
 
 nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s2_args=""
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh b/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
index 3114af61..d09c197b 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
@@ -23,7 +23,9 @@ nnet_name=${hf_model_name}_transducer_v4.4
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0060.pth
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0075.pth
 
 nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
 nnet_s2_args=""
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v1/run_030_inference.sh
index 73ac2b8f..02b97001 100755
--- a/egs/librispeech/v1/run_030_inference.sh
+++ b/egs/librispeech/v1/run_030_inference.sh
@@ -38,11 +38,12 @@ test_data=test_clean
 
 
 # Extracts x-vectors for evaluation
-for name in $test_data 
-  do
-    nj=16
-    steps_transducer/decode_wav2vec2transducer.sh --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
+for name in dev_clean dev_other test_clean test_other #$test_data 
+do
+  nj=40
+  steps_transducer/decode_wav2vec2transducer.sh \
+      --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
       $nnet data/$name \
       $transducer_dir/$name $bpe_model
-  done
-exit
+done
+

From f15905ba2e0e0e43b76190728885ca9d648d6c8f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 30 Jan 2023 04:38:56 -0500
Subject: [PATCH 078/154] new results voxceleb

---
 egs/voxceleb/v1.1/README.md                   |  34 ++++++
 ...train_ecapatdnn512x3_xvec_stage2_v2.0.yaml |   4 +-
 ...rain_idrnd_resnet100_xvec_stage1_v2.0.yaml |  70 ++++++++++++
 ...rain_idrnd_resnet100_xvec_stage2_v2.0.yaml |  66 +++++++++++
 ...rain_idrnd_resnet202_xvec_stage1_v2.0.yaml |  70 ++++++++++++
 ...rain_idrnd_resnet202_xvec_stage2_v2.0.yaml |  66 +++++++++++
 egs/voxceleb/v1.1/datapath.sh                 |   1 +
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v2.0.sh |   8 +-
 ...config_fbank80_stmn_ecapatdnn512x3.v2.0.sh |   6 +-
 ...onfig_fbank80_stmn_idrnd_resnet100.v2.0.sh |  45 ++++++++
 ...onfig_fbank80_stmn_idrnd_resnet202.v2.0.sh |  45 ++++++++
 egs/voxceleb/v1.1/run_001_prepare_data.sh     |  21 +---
 egs/voxceleb/v1.1/run_002_compute_evad.sh     |  62 ++++++-----
 egs/voxceleb/v1.1/run_030_extract_xvectors.sh |   5 +-
 egs/voxceleb/v1.1/run_040_eval_be.sh          | 105 +++++++++++++++++-
 egs/voxceleb/v1/local/score_voxsrc22_dev.sh   |  21 ++++
 hyperion/torch/layer_blocks/se_blocks.py      |   2 +-
 hyperion/torch/narchs/resnet.py               |  28 +++--
 18 files changed, 592 insertions(+), 67 deletions(-)
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet202.v2.0.sh
 create mode 100755 egs/voxceleb/v1/local/score_voxsrc22_dev.sh

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 5b5b93e5..83027e16 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -87,6 +87,40 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 
 ### VoxCeleb 1 Original-Clean trial list
 
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.09 | 0.068 | 0.121 |
+| | | | Cosine + AS-Norm | 1.0 | 0.064 | 0.110 |
+| | | | Cosine + QMF | 0.87 | 0.059 | 0.076 |
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.21 | 0.075 | 0.129 |
+| | | | Cosine + AS-Norm | 1.15 | 0.069 | 0.113 |
+| | | | Cosine + QMF | 1.12 | 0.067 | 0.111 |
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.17 | 0.129 | 0.212 |
+| | | | Cosine + AS-Norm | 1.98 | 0.116 | 0.190 |
+| | | | Cosine + QMF | 1.88 | 0.112 | 0.181 |
+
+### VoxSRC2022 dev
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.85 | 0.187 | 0.310 |
+| | | | Cosine + AS-Norm | 2.69 | 0.182 | 0.310 |
+| | | | Cosine + QMF | 2.80 | 0.196 | 0.338 |
+
+## Results before 2023
+
+### VoxCeleb 1 Original-Clean trial list
+
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
 | config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.00 | 0.129 | 0.216 |
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
index 4a4a8a88..b6163f14 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
@@ -14,7 +14,7 @@ data:
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
-      seg_weight_mode: uniform
+      seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
@@ -33,7 +33,7 @@ data:
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
-      seg_weight_mode: uniform
+      seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
new file mode 100644
index 00000000..fba4ce80
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: fwseidrndresnet100
+  in_channels: 1
+  in_feats: 80
+  conv_channels: 128
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  se_r: 4
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
new file mode 100644
index 00000000..6c209a9f
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage1_v2.0.yaml
new file mode 100644
index 00000000..bff34263
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage1_v2.0.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: fwseidrndresnet100
+  in_channels: 1
+  in_feats: 80
+  conv_channels: 128
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  se_r: 4
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
new file mode 100644
index 00000000..e4e6d97a
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/datapath.sh b/egs/voxceleb/v1.1/datapath.sh
index 9a2f7529..a7eb575c 100644
--- a/egs/voxceleb/v1.1/datapath.sh
+++ b/egs/voxceleb/v1.1/datapath.sh
@@ -13,6 +13,7 @@ elif [ "$(hostname --domain)" == "cm.gemini" ];then
   # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
   voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
   voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  voxsrc22_root=/exp/jvillalba/corpora/voxsrc22
   musan_root=/expscratch/dgromero/corpora-open/musan
 else
   echo "Put your database paths here"
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
index 14f2cdb4..e9c634a3 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
@@ -17,7 +17,7 @@ nnet_name=${feat_type}_ecapatdnn2048x4.v2.0
 nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
 
 nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
 nnet_s2_name=${nnet_name}.s2
@@ -27,9 +27,9 @@ nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
 
 # back-end
 do_plda=false
-#do_snorm=true
-#do_qmf=true
-do_voxsrc22=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
index 68990732..1f6eb371 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v2.0.sh
@@ -27,9 +27,9 @@ nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
 
 # back-end
 do_plda=false
-#do_snorm=true
-#do_qmf=true
-do_voxsrc22=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
new file mode 100644
index 00000000..b9363c3f
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet100.v2.0
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet202.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet202.v2.0.sh
new file mode 100644
index 00000000..3de2f432
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet202.v2.0.sh
@@ -0,0 +1,45 @@
+# Voxsrc22 Ravana ResNet202 network
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet202.v2.0
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet202_xvec_stage1_v2.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/run_001_prepare_data.sh b/egs/voxceleb/v1.1/run_001_prepare_data.sh
index 037efda1..44385610 100755
--- a/egs/voxceleb/v1.1/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.1/run_001_prepare_data.sh
@@ -12,7 +12,7 @@ config_file=default_config.sh
 
 . parse_options.sh || exit 1;
 . datapath.sh 
-
+. $config_file
 
 if [ $stage -le 1 ];then
   # Prepare the VoxCeleb2 dataset for training.
@@ -34,22 +34,13 @@ if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
     --output-dir data/voxsrc22_dev
 fi
 
-if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
-  local/prepare_voxsrc22_test.py \
-    --corpus-dir $voxsrc22_root \
-    --output-dir data/voxsrc22_test
-fi
+# if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+#   local/prepare_voxsrc22_test.py \
+#     --corpus-dir $voxsrc22_root \
+#     --output-dir data/voxsrc22_test
+# fi
 
 if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
   # # split vox2 into 2 parts, for cohort and qmf training
-  # utils/copy_data_dir.sh data/voxceleb2cat_train data/voxceleb2cat_train_odd
-  # utils/copy_data_dir.sh data/voxceleb2cat_train data/voxceleb2cat_train_even
-  # awk 'int(substr($2,3)) % 2 == 1' data/voxceleb2cat_train/utt2spk > data/voxceleb2cat_train_odd/utt2spk
-  # utils/fix_data_dir.sh data/voxceleb2cat_train_odd
-  # awk 'int(substr($2,3)) % 2 == 0' data/voxceleb2cat_train/utt2spk > data/voxceleb2cat_train_even/utt2spk
-  # utils/fix_data_dir.sh data/voxceleb2cat_train_even
-  # # we keep 3 utts per speaker
-  # utils/subset_data_dir.sh --per-spk data/voxceleb2cat_train_odd 3 data/voxceleb2cat_train_subset_cohort
-  # utils/subset_data_dir.sh --per-spk data/voxceleb2cat_train_even 3 data/voxceleb2cat_train_subset_qmf
   local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
 fi
diff --git a/egs/voxceleb/v1.1/run_002_compute_evad.sh b/egs/voxceleb/v1.1/run_002_compute_evad.sh
index eeae00ac..7a2a9be5 100755
--- a/egs/voxceleb/v1.1/run_002_compute_evad.sh
+++ b/egs/voxceleb/v1.1/run_002_compute_evad.sh
@@ -19,39 +19,43 @@ config_file=default_config.sh
 
 
 if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $vaddir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
+  # Prepare to distribute data over multiple machines
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+    dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
+    if [ "$nodes" == "b0" ];then
+      utils/create_split_dir.pl \
+	utils/create_split_dir.pl \
+	/export/b{04,05,06,07}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "b1" ];then
+      utils/create_split_dir.pl \
+	/export/b{14,15,16,17}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "c0" ];then
+      utils/create_split_dir.pl \
+	/export/c{06,07,08,09}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "fs01" ];then
+      utils/create_split_dir.pl \
+	/export/fs01/$dir_name $vaddir/storage
+    else
+      echo "we don't distribute data between multiple machines"
     fi
+  fi
 fi
 
 #Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_vad/$name $vaddir
-	utils/fix_data_dir.sh data/${name}
-    done
+if [ $stage -le 2 ];then
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 40 ? $num_spk:40))
+    hyp_utils/feats/make_evad.sh \
+      --write-utt2num-frames true \
+      --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+      data/${name} exp/make_vad/$name $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
 fi
 
 
diff --git a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
index c42f96bb..4e6a8790 100755
--- a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
@@ -70,7 +70,10 @@ fi
 
 if [ $stage -le 2 ]; then
   # Extracts x-vectors for evaluation
-  for name in voxceleb1_test 
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb1_test $extra_data
   do
     num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
     nj=$(($num_spk < 100 ? $num_spk:100))
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 18c5eeeb..abbdb20c 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -110,9 +110,37 @@ if [ $stage -le 3 ];then
 
 fi
 
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    	data/voxsrc22_dev/trials \
+    	data/voxsrc22_dev/utt2model \
+    	$xvector_dir/voxsrc22_dev/xvector.scp \
+    	$score_cosine_dir/voxsrc22_dev_scores &
+
+    # steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    # 	$score_cosine_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_dir 
+
+    for f in $(ls $score_cosine_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
 
 if [ "$do_snorm" == "true" ];then
-  if [ $stage -le 4 ];then
+  if [ $stage -le 5 ];then
     echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
     steps_be/eval_be_cos_snorm.sh \
       --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
@@ -133,11 +161,42 @@ if [ "$do_snorm" == "true" ];then
       echo ""
     done
   fi
+
+  if [ $stage -le 6 ];then
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxsrc22_dev_scores &
+    
+    # steps_be/eval_be_cos_snorm.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #   data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    # 	$score_cosine_snorm_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_snorm_dir 
+
+    for f in $(ls $score_cosine_snorm_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+  fi
 fi
 
 
 if [ "$do_qmf" == "true" ];then
-  if [ $stage -le 5 ];then
+  if [ $stage -le 7 ];then
     echo "Train QMF in Vox2"
     steps_be/train_be_cos_qmf.sh \
       --cmd "$train_cmd" --coh-nbest 1000 \
@@ -151,7 +210,7 @@ if [ "$do_qmf" == "true" ];then
 
   fi
 
-  if [ $stage -le 6 ];then
+  if [ $stage -le 8 ];then
 
     echo "Eval Voxceleb 1 with Cosine scoring"
     steps_be/eval_be_cos_qmf.sh \
@@ -180,6 +239,46 @@ if [ "$do_qmf" == "true" ];then
     done
 
   fi
+
+  if [ $stage -le 9 ];then
+    echo "Eval voxsrc2 with Cosine scoring"
+    # steps_be/eval_be_cos_qmf.sh \
+    #   --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
+    #   data/voxsrc22_dev/trials \
+    #   data/voxsrc22_dev/utt2model \
+    #   $xvector_dir/voxsrc22_dev/xvector.scp \
+    #   $xvector_dir/voxsrc22_dev/utt2num_frames \
+    #   data/voxceleb2cat_train/utt2spk \
+    #   $xvector_dir/voxceleb2cat_train/xvector.scp \
+    #   $score_cosine_qmf_dir/qmf.h5 \
+    #   $score_cosine_qmf_dir/voxsrc22_dev_scores &
+
+    # steps_be/eval_be_cos_qmf.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #	$xvector_dir/voxsrc22_test/utt2num_frames \
+    #	data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    #	$score_cosine_qmf_dir/qmf.h5 \
+    # 	$score_cosine_qmf_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_snorm.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_qmf.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxsrc22_dev{,_snorm,_qmf}_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+  fi
+
 fi
 
 
diff --git a/egs/voxceleb/v1/local/score_voxsrc22_dev.sh b/egs/voxceleb/v1/local/score_voxsrc22_dev.sh
new file mode 100755
index 00000000..f4649fb7
--- /dev/null
+++ b/egs/voxceleb/v1/local/score_voxsrc22_dev.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+if [ $# -ne 2 ] && [ $# -n 3 ]; then
+  echo "Usage: $0 <data-root> <score-dir>"
+  exit 1;
+fi
+
+set -e
+
+data_dir=$1
+score_dir=$2
+suffix=$3
+
+echo "Score voxsrc22 dev"
+key=$data_dir/trials
+#Compute performance
+python local/score_dcf.py --key-file $key --score-file $score_dir/voxsrc22_dev_scores$suffix --output-path $score_dir/voxsrc22_dev$suffix
+
+
diff --git a/hyperion/torch/layer_blocks/se_blocks.py b/hyperion/torch/layer_blocks/se_blocks.py
index c53d5ecc..b14c2b60 100644
--- a/hyperion/torch/layer_blocks/se_blocks.py
+++ b/hyperion/torch/layer_blocks/se_blocks.py
@@ -177,7 +177,7 @@ def forward(self, x, x_mask=None):
         """
         x = x.transpose(1, 2)
         y = super().forward(x, x_mask)
-        y = y.tranpose(1, 2).continous()
+        y = y.transpose(1, 2).contiguous()
         return y
 
 
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index e3264f33..59143c2e 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -10,10 +10,16 @@
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
-from ..layer_blocks import (Res2NetBasicBlock, Res2NetBNBlock,
-                            ResNetBasicBlock, ResNetBNBlock,
-                            ResNetEndpointBlock, ResNetInputBlock,
-                            SEResNetBasicBlock, SEResNetBNBlock)
+from ..layer_blocks import (
+    Res2NetBasicBlock,
+    Res2NetBNBlock,
+    ResNetBasicBlock,
+    ResNetBNBlock,
+    ResNetEndpointBlock,
+    ResNetInputBlock,
+    SEResNetBasicBlock,
+    SEResNetBNBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..utils import scale_seq_lengths, seq_lengths_to_mask
@@ -296,7 +302,11 @@ def _make_layer(self, block, channels, num_blocks, stride=1, dilate=False):
                 kwargs = {"se_r": self.se_r}
             else:
                 num_feats = int(self.in_feats / (self._downsample_factor * stride))
-                kwargs = {"se_r": self.se_r, "time_se": True, "num_feats": num_feats}
+                kwargs = {
+                    "se_r": self.se_r,
+                    "se_type": self.se_type,
+                    "num_feats": num_feats,
+                }
 
         if self.is_res2net:
             kwargs["scale"] = self.res2net_scale
@@ -972,7 +982,7 @@ def __init__(self, in_channels, **kwargs):
         kwargs["base_channels"] = 128
         kwargs["resb_channels"] = [128, 128, 256, 256]
         kwargs["se_type"] = "fw-se"
-        super().__init__("basic", [6, 16, 24, 3], in_channels, **kwargs)
+        super().__init__("sebasic", [6, 16, 24, 3], in_channels, **kwargs)
 
 
 class FwSEIdRndResNet202(ResNet):
@@ -980,7 +990,7 @@ def __init__(self, in_channels, **kwargs):
         kwargs["base_channels"] = 128
         kwargs["resb_channels"] = [128, 128, 256, 256]
         kwargs["se_type"] = "fw-se"
-        super().__init__("basic", [6, 16, 75, 3], in_channels, **kwargs)
+        super().__init__("sebasic", [6, 16, 75, 3], in_channels, **kwargs)
 
 
 # Channel-Freq-wise Squezee-Excitation ResNets
@@ -1083,7 +1093,7 @@ def __init__(self, in_channels, **kwargs):
         kwargs["base_channels"] = 128
         kwargs["resb_channels"] = [128, 128, 256, 256]
         kwargs["se_type"] = "cfw-se"
-        super().__init__("basic", [6, 16, 24, 3], in_channels, **kwargs)
+        super().__init__("sebasic", [6, 16, 24, 3], in_channels, **kwargs)
 
 
 class CFwSEIdRndResNet202(ResNet):
@@ -1091,7 +1101,7 @@ def __init__(self, in_channels, **kwargs):
         kwargs["base_channels"] = 128
         kwargs["resb_channels"] = [128, 128, 256, 256]
         kwargs["se_type"] = "cfw-se"
-        super().__init__("basic", [6, 16, 75, 3], in_channels, **kwargs)
+        super().__init__("sebasic", [6, 16, 75, 3], in_channels, **kwargs)
 
 
 #################### Res2Net variants ########################

From 21a47643c17d5ec3e7f739c1371470a0c2df1db7 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 30 Jan 2023 07:16:55 -0500
Subject: [PATCH 079/154] uncoment script

---
 egs/voxceleb/v1.1/run_040_eval_be.sh | 58 +++++-----------------------
 1 file changed, 10 insertions(+), 48 deletions(-)

diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index abbdb20c..358e2acf 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -242,16 +242,16 @@ if [ "$do_qmf" == "true" ];then
 
   if [ $stage -le 9 ];then
     echo "Eval voxsrc2 with Cosine scoring"
-    # steps_be/eval_be_cos_qmf.sh \
-    #   --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
-    #   data/voxsrc22_dev/trials \
-    #   data/voxsrc22_dev/utt2model \
-    #   $xvector_dir/voxsrc22_dev/xvector.scp \
-    #   $xvector_dir/voxsrc22_dev/utt2num_frames \
-    #   data/voxceleb2cat_train/utt2spk \
-    #   $xvector_dir/voxceleb2cat_train/xvector.scp \
-    #   $score_cosine_qmf_dir/qmf.h5 \
-    #   $score_cosine_qmf_dir/voxsrc22_dev_scores &
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      $xvector_dir/voxsrc22_dev/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxsrc22_dev_scores &
 
     # steps_be/eval_be_cos_qmf.sh --cmd "$train_cmd" \
     # 	data/voxsrc22_test/trials \
@@ -281,41 +281,3 @@ if [ "$do_qmf" == "true" ];then
 
 fi
 
-
-exit
-# be_dir=exp/be/$nnet_name/cw
-# score_plda_dir=$score_dir/cw_cosine
-
-# if [ $stage -le 4 ]; then
-#     echo "Train centering+whitening on Voxceleb2"
-#     steps_be/train_be_v2.sh --cmd "$train_cmd" \
-# 	$xvector_dir/$plda_data/xvector.scp \
-# 	data/$plda_data \
-# 	$be_dir
-# fi
-
-
-# if [ $stage -le 5 ];then
-
-#     echo "Eval Voxceleb 1 with CentWhiten + Cosine scoring"
-#     steps_be/eval_be_v2.sh --cmd "$train_cmd" \
-#     	data/voxceleb1_test/trials \
-#     	data/voxceleb1_test/utt2model \
-#     	$xvector_dir/voxceleb1_test/xvector.scp \
-# 	$be_dir/cw.h5 \
-#     	$score_plda_dir/voxceleb1_scores
-
-#     $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-# 	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
-
-#     for f in $(ls $score_plda_dir/*_results);
-#     do
-# 	echo $f
-# 	cat $f
-# 	echo ""
-#     done
-
-# fi
-
-# exit
-

From 16b1316ab478a0001d21f57389ed8af30107ce2f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 30 Jan 2023 10:45:01 -0500
Subject: [PATCH 080/154] fix bug

---
 egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
index 90650941..82050ed1 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
@@ -89,9 +89,12 @@ def eval_plda(
 
     if not np.any(ndx.trial_mask):
         save_empty(score_file, None, model_part_idx, seg_part_idx, parallel)
+        save_empty(score_file, "snorm", model_part_idx, seg_part_idx, parallel)
         if qmf_file is None:
             for q_name in ["snorm", "maxnf", "minnf", "maxcohmu", "mincohmu"]:
                 save_empty(score_file, q_name, model_part_idx, seg_part_idx, parallel)
+        else:
+            save_empty(score_file, "qmf", model_part_idx, seg_part_idx, parallel)
         return
 
     logging.info("read num_frames")

From 76ac6f3c2363bcd6583bbc3058bb3e8e9fc6dd5c Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 9 Feb 2023 12:28:27 -0500
Subject: [PATCH 081/154] audio dataset with dictionary for asr

---
 ...n_wav2vec2base_transducer_stage1_v5.0.yaml |  53 +++++++
 .../conf/wav2vec2base_transducer_do0.4.yaml   |  13 ++
 .../v1/global_conf/config_transducer_v5.0.sh  |  32 +++++
 hyperion/bin/train_wav2vec2transducer.py      |  11 +-
 hyperion/torch/data/audio_dataset.py          |   2 +-
 hyperion/torch/trainers/transducer_trainer.py | 136 +++++++++++-------
 6 files changed, 195 insertions(+), 52 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v5.0.sh

diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
new file mode 100644
index 00000000..c23a4f11
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
@@ -0,0 +1,53 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2base_transducer_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 42000
+    hold_steps: 15000
+    min_lr: 4e-5
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 1200
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml b/egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml
new file mode 100644
index 00000000..3707672a
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml
@@ -0,0 +1,13 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h
+transducer:
+  decoder:
+    embedding_dim: 1024
+    num_layers: 2
+    hidden_dim: 512
+    embedding_dropout_rate: 0.4
+    rnn_dropout_rate: 0.4
+  joiner:
+    num_layers: 1
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh b/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
new file mode 100644
index 00000000..b1da75b7
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v5.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0075.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index ee60080a..8b945217 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -41,14 +41,19 @@ def transducer_collate(batch):
     audio_length = []
     target = []
     for record in batch:
-        wav = torch.as_tensor(record[0])
+        wav = torch.as_tensor(record["x"])
         audio.append(wav)
         audio_length.append(wav.shape[0])
-        target.append(record[1])
+        target.append(record["text"])
     audio = pad_sequence(audio)
     audio_length = torch.as_tensor(audio_length)
     target = k2.RaggedTensor(target)
-    return torch.transpose(audio, 0, 1), audio_length, target
+    batch = {
+        "x": torch.transpose(audio, 0, 1),
+        "x_lengths": audio_length,
+        "text": target,
+    }
+    return batch
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 20deb039..b352f94d 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -259,7 +259,7 @@ def _get_segment_info(self, seg_id):
                 seg_info_i = class_info.loc[seg_info_i, "class_idx"]
 
             if info_name == "text":
-                seg_info = self.sp.encode(seg_info, out_type=int)
+                seg_info_i = self.sp.encode(seg_info_i, out_type=int)
 
             seg_info[info_name] = seg_info_i
 
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 932c3ed4..cbf94bc0 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -7,11 +7,14 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torchaudio
 import torch.nn as nn
 
-from ..utils import MetricAcc
+from ...utils.misc import filter_func_args
+from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
 from torch.distributed.elastic.multiprocessing.errors import record
 
@@ -47,6 +50,7 @@ class TransducerTrainer(TorchTrainer):
       swa_anneal_epochs: SWA learning rate anneal epochs
       cpu_offload: CPU offload of gradients when using fully sharded ddp
     """
+
     def __init__(
         self,
         model,
@@ -75,39 +79,42 @@ def __init__(
         swa_lr=1e-3,
         swa_anneal_epochs=10,
         cpu_offload=False,
+        input_key="x",
+        target_key="text",
     ):
 
-        if loss is None:
-            # TODO: Check and Modify loss
-            loss = nn.CrossEntropyLoss()
-        super().__init__(
-            model,
-            loss,
-            optim,
-            epochs,
-            exp_path,
-            cur_epoch=cur_epoch,
-            grad_acc_steps=grad_acc_steps,
-            eff_batch_size=eff_batch_size,
-            device=device,
-            metrics=metrics,
-            lrsched=lrsched,
-            loggers=loggers,
-            ddp=ddp,
-            ddp_type=ddp_type,
-            train_mode=train_mode,
-            use_amp=use_amp,
-            log_interval=log_interval,
-            use_tensorboard=use_tensorboard,
-            use_wandb=use_wandb,
-            wandb=wandb,
-            grad_clip=grad_clip,
-            grad_clip_norm=grad_clip_norm,
-            swa_start=swa_start,
-            swa_lr=swa_lr,
-            swa_anneal_epochs=swa_anneal_epochs,
-            cpu_offload=cpu_offload,
-        )
+        loss = None
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+
+        # super().__init__(
+        #     model,
+        #     None,
+        #     optim,
+        #     epochs,
+        #     exp_path,
+        #     cur_epoch=cur_epoch,
+        #     grad_acc_steps=grad_acc_steps,
+        #     eff_batch_size=eff_batch_size,
+        #     device=device,
+        #     metrics=metrics,
+        #     lrsched=lrsched,
+        #     loggers=loggers,
+        #     ddp=ddp,
+        #     ddp_type=ddp_type,
+        #     train_mode=train_mode,
+        #     use_amp=use_amp,
+        #     log_interval=log_interval,
+        #     use_tensorboard=use_tensorboard,
+        #     use_wandb=use_wandb,
+        #     wandb=wandb,
+        #     grad_clip=grad_clip,
+        #     grad_clip_norm=grad_clip_norm,
+        #     swa_start=swa_start,
+        #     swa_lr=swa_lr,
+        #     swa_anneal_epochs=swa_anneal_epochs,
+        #     cpu_offload=cpu_offload,
+        # )
 
     @record
     def train_epoch(self, data_loader):
@@ -116,29 +123,35 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-
+        batch_keys = [
+            self.input_key, f"{self.input_key}_lengths", self.target_key
+        ]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
         self.sp = data_loader.dataset.sp
 
-        for batch, (data, audio_length, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
-            # TODO: Check and Modify data, target
-            data, audio_length, target = data.to(self.device), audio_length.to(
-                self.device), target.to(self.device)
-            batch_size = data.shape[0]
+
+            # # TODO: Check and Modify data, target
+            # data, audio_length, target = data.to(self.device), audio_length.to(
+            #     self.device), target.to(self.device)
+            #print(data.keys(), batch_keys, flush=True)
+            input_data, input_lengths, target = tensors_subset(
+                data, batch_keys, self.device)
+            batch_size = input_data.shape[0]
 
             with self.amp_autocast():
                 # print("xx", data.shape, data.shape[0] * data.shape[1] / 16000,
                 #       torch.sum(audio_length).item() / 16000,
                 #       torch.min(audio_length).item() / 16000,
                 #       torch.max(audio_length).item() / 16000)
-                output, loss = self.model(data,
-                                          x_lengths=audio_length,
+                output, loss = self.model(input_data,
+                                          x_lengths=input_lengths,
                                           y=target)
                 loss = loss.mean() / self.grad_acc_steps
 
@@ -173,7 +186,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
           data_loader: PyTorch data loader return input/output pairs.
           sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
-
+        batch_keys = [
+            self.input_key, f"{self.input_key}_lengths", self.target_key
+        ]
         metric_acc = MetricAcc(self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -184,17 +199,22 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 log_tag = "val_"
                 self.model.eval()
 
-            for batch, (data, audio_length, target) in enumerate(data_loader):
-                data, audio_length, target = data.to(
-                    self.device), audio_length.to(self.device), target.to(
-                        self.device)
-                batch_size = data.shape[0]
+            for batch, data in enumerate(data_loader):
+
+                input_data, input_lengths, target = tensors_subset(
+                    data, batch_keys, self.device)
+                batch_size = input_data.shape[0]
+
+                # data, audio_length, target = data.to(
+                #     self.device), audio_length.to(self.device), target.to(
+                #         self.device)
+                # batch_size = data.shape[0]
                 # data, target = data.to(self.device), target.to(self.device)
                 # batch_size = data.shape[0]
 
                 with self.amp_autocast():
-                    output, loss = self.model(data,
-                                              x_lengths=audio_length,
+                    output, loss = self.model(input_data,
+                                              x_lengths=input_lengths,
                                               y=target)
                     # output = self.model(data)
                     # loss = self.loss(output, target)
@@ -208,3 +228,23 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = metric_acc.metrics
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        super_skip = skip.copy()
+        super_skip.add("target_key")
+        TorchTrainer.add_class_args(parser,
+                                    train_modes=train_modes,
+                                    skip=super_skip)
+        if "target_key" not in skip:
+            parser.add_argument("--target-key",
+                                default="text",
+                                help="dict. key for nnet targets")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))

From aabbef1ae71e3580f8582058b523041a3108474a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 10 Feb 2023 14:54:47 -0500
Subject: [PATCH 082/154] start refactorizing rnn-t

---
 .../torch/layer_blocks/transducer_joiner.py   |  99 +++++++
 .../layer_blocks/transducer_predictor.py      | 224 ++++++++++++++++
 .../torch/models/transducer/rnn_transducer.py | 250 ++++++++++++++++++
 .../torch/models/transducer/transducer.py     |  24 +-
 .../torch/narchs/rnn_transducer_decoder.py    |  95 +++++++
 5 files changed, 679 insertions(+), 13 deletions(-)
 create mode 100644 hyperion/torch/layer_blocks/transducer_joiner.py
 create mode 100644 hyperion/torch/layer_blocks/transducer_predictor.py
 create mode 100644 hyperion/torch/models/transducer/rnn_transducer.py
 create mode 100644 hyperion/torch/narchs/rnn_transducer_decoder.py

diff --git a/hyperion/torch/layer_blocks/transducer_joiner.py b/hyperion/torch/layer_blocks/transducer_joiner.py
new file mode 100644
index 00000000..ee7a667b
--- /dev/null
+++ b/hyperion/torch/layer_blocks/transducer_joiner.py
@@ -0,0 +1,99 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba, Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+import logging
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+
+
+class TransducerJoiner(nn.Module):
+    """ RNN-T Joiner network.
+    Implementation based on 
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/joiner.py
+
+    Attributes:
+      in_feats: input feature dimension.
+      vocab_size: vocabulary size
+    """
+
+    def __init__(self, in_feats: int, vocab_size: int):
+        super().__init__()
+        self.in_feats = in_feats
+        self.vocab_size = vocab_size
+
+        self.output = nn.Linear(in_feats, out_dims)
+
+    def forward(self, encoder_out: torch.Tensor,
+                pred_out: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+          encoder_out: Output from the encoder with shape = (N, T, C).
+          pred_out: Output from the predictor with shape = (N, U, C).
+        Returns:
+          Return a tensor of shape (N, T, U, C).
+        """
+        assert encoder_out.ndim == pred_out.ndim == 3
+        assert encoder_out.size(0) == pred_out.size(0)
+        assert encoder_out.size(2) == pred_out.size(2)
+
+        encoder_out = encoder_out.unsqueeze(2)
+        # Now encoder_out is (N, T, 1, C)
+        pred_out = pred_out.unsqueeze(1)
+        # Now decoder_out is (N, 1, U, C)
+        x = torch.tanh(encoder_out + pred_out)
+
+        logits = self.output(x)
+        return logits
+
+    def get_config(self):
+        config = {
+            "in_feats": self.in_feats,
+            "out_dims": self.out_dims,
+            "num_layers": self.num_layers,
+        }
+
+        # base_config = super().get_config()
+        return dict(list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "in_feats",
+            "out_dims",
+            "num_layers",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+    @staticmethod
+    def add_class_args(parser,
+                       prefix=None,
+                       skip=set(["in_feats", "out_dims"])):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument("--in-feats",
+                                type=int,
+                                required=True,
+                                help=("input feature dimension"))
+
+        if "out_dims" not in skip:
+            parser.add_argument("--out-dims",
+                                type=int,
+                                required=True,
+                                help=("output feature dimension (vocab size)"))
+        parser.add_argument("--num-layers",
+                            default=1,
+                            type=int,
+                            help=("layers of the joiner"))
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/transducer_predictor.py b/hyperion/torch/layer_blocks/transducer_predictor.py
new file mode 100644
index 00000000..178c423a
--- /dev/null
+++ b/hyperion/torch/layer_blocks/transducer_predictor.py
@@ -0,0 +1,224 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba, Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+import logging
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+
+
+class TransducerPredictor(nn.Module):
+    """ RNN-T prediction network.
+    Implmentation  based on:
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/decoder.py
+
+    Attributes:
+      vocab_size: Number of tokens of the modeling unit including blank.
+      embed_dim: Dimension of the input embedding.
+      blank_id: The ID of the blank symbol.
+      num_layers: Number of LSTM layers.
+      hid_feats: Hidden dimension of LSTM layers.
+      out_feats: Output dimension of the predictor.
+      embed_dropout_rate: Dropout rate for the embedding layer.
+      rnn_dropout_rate: Dropout for LSTM layers.
+           
+    """
+
+    def __init__(self,
+                 vocab_size: int,
+                 embed_dim: int,
+                 num_layers: int,
+                 hid_feats: int,
+                 out_feats: int,
+                 embed_dropout_rate: float = 0.0,
+                 rnn_dropout_rate: float = 0.0,
+                 rnn_type: str = "lstm",
+                 blank_id: int = 0):
+        super().__init__()
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embed_dim=embed_dim,
+            padding_idx=blank_id,
+        )
+        self.embed_dropout = nn.Dropout(embed_dropout_rate)
+        if rnn_type == "lstm":
+            self.rnn = nn.LSTM(
+                input_size=embed_dim,
+                hidden_size=hid_feats,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=rnn_dropout_rate,
+            )
+        elif rnn_type == "gru":
+            self.rnn = nn.GRU(
+                input_size=embed_dim,
+                hidden_size=hid_feats,
+                num_layers=num_layers,
+                batch_first=True,
+                dropout=rnn_dropout_rate,
+            )
+        else:
+            raise Exception(f"Unknown RNN type {rnn_type}")
+
+        self.out_feats = out_feats
+        self.blank_id = blank_id
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+        self.num_layers = num_layers
+        self.hid_feats = hid_feats
+        self.embed_dropout_rate = embed_dropout_rate
+        self.rnn_dropout_rate = rnn_dropout_rate
+        self.output = nn.Linear(hid_feats, in_feats)
+
+    def forward(
+        self,
+        y: torch.Tensor,
+        states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        """
+        Args: 
+          y: previous y_{<t} tensor of shape (N, U) with <sos> prepended.
+          states: tuple of tensors containing RNN layers states
+        Returns:
+          - rnn_output, a tensor of shape (N, U, C)
+          - (h, c), containing the states i for RNN layers with shape (num_layers, N, C).
+        """
+        embed = self.embedding(y)
+        embed = self.embed_dropout(embed)
+        rnn_out, (h, c) = self.rnn(embed, states)
+        out = self.output(rnn_out)
+
+        return out, (h, c)
+
+    def get_config(self):
+        config = {
+            "in_feats": self.in_feats,
+            "blank_id": self.blank_id,
+            "vocab_size": self.vocab_size,
+            "embed_dim": self.embed_dim,
+            "num_layers": self.num_layers,
+            "hid_feats": self.hid_feats,
+            "embed_dropout_rate": self.embed_dropout_rate,
+            "rnn_dropout_rate": self.rnn_dropout_rate,
+        }
+
+        # base_config = super().get_config()
+        return dict(list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "in_feats",
+            "blank_id",
+            "vocab_size",
+            "embed_dim",
+            "num_layers",
+            "hid_feats",
+            "embed_dropout_rate",
+            "rnn_dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        valid_args = (
+            "embed_dropout_rate",
+            "rnn_dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        return args
+
+    @staticmethod
+    def add_class_args(parser,
+                       prefix=None,
+                       skip=set(["in_feats", "blank_id", "vocab_size"])):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument("--in-feats",
+                                type=int,
+                                required=True,
+                                help=("input feature dimension"))
+        if "blank_id" not in skip:
+            parser.add_argument("--blank-id",
+                                type=int,
+                                required=True,
+                                help=("blank id from sp model"))
+        if "vocab_size" not in skip:
+            parser.add_argument("--vocab-size",
+                                type=int,
+                                required=True,
+                                help=("output prediction dimension"))
+        parser.add_argument("--embedding-dim",
+                            default=1024,
+                            type=int,
+                            help=("feature dimension"))
+        parser.add_argument("--embedding-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
+
+        parser.add_argument("--num-layers", default=2, type=int, help=(""))
+
+        parser.add_argument("--hidden-dim", default=512, type=int, help=(""))
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    def change_config(
+        self,
+        override_dropouts=False,
+        embed_dropout_rate: float = 0.0,
+        rnn_dropout_rate: float = 0.0,
+    ):
+        logging.info("changing decoder config")
+
+        if override_dropouts:
+            logging.info("overriding decoder dropouts")
+            self.rnn_dropout_rate = rnn_dropout_rate
+            self.rnn.p = self.rnn_dropout_rate
+            self.embed_dropout_rate = embed_dropout_rate
+            self.embed_dropout = nn.Dropout(self.embed_dropout_rate)
+
+    @staticmethod
+    def add_finetune_args(parser,
+                          prefix=None,
+                          skip=set(["in_feats", "blank_id", "vocab_size"])):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--override-dropouts",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the dropout probabilities passed in the "
+                "arguments instead of the defaults in the pretrained model."))
+        parser.add_argument("--embedding-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
new file mode 100644
index 00000000..dd91da5f
--- /dev/null
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -0,0 +1,250 @@
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Note we use `rnnt_loss` from torchaudio, which exists only in
+torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
+"""
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
+import logging
+import torch
+import torch.nn as nn
+import torchaudio
+import torchaudio.functional
+#from .encoder_interface import EncoderInterface
+
+from ...torch_model import TorchModel
+from hyperion.utils.text import add_sos
+# from .conformer import Conformer
+from .decoder import Decoder
+from .joiner import Joiner
+
+
+class Transducer(TorchModel):
+    """It implements https://arxiv.org/pdf/1211.3711.pdf
+    "Sequence Transduction with Recurrent Neural Networks"
+    """
+
+    def __init__(
+        self,
+        encoder,
+        # conformer_enc,
+        decoder,
+        joiner,
+        vocab_size,
+        blank_id,
+    ):
+        """
+        Args:
+          encoder:
+            It is the transcription network in the paper. Its accepts
+            two inputs: `x` of (N, T, C) and `x_lengths` of shape (N,).
+            It returns two tensors: `logits` of shape (N, T, C) and
+            `logit_lens` of shape (N,).
+          decoder:
+            It is the prediction network in the paper. Its input shape
+            is (N, U) and its output shape is (N, U, C). It should contain
+            one attribute: `blank_id`.
+          joiner:
+            It has two inputs with shapes: (N, T, C) and (N, U, C). Its
+            output shape is (N, T, U, C). Note that its output contains
+            unnormalized probs, i.e., not processed by log-softmax.
+        """
+        super().__init__()
+        decoder["blank_id"] = blank_id
+        decoder["vocab_size"] = vocab_size
+        joiner["out_dims"] = vocab_size
+
+        self.vocab_size = vocab_size
+        self.blank_id = blank_id
+        self.encoder = encoder
+        self.decoder = Decoder(**decoder)
+        self.joiner = Joiner(**joiner)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: k2.RaggedTensor,
+    ) -> torch.Tensor:
+        """
+        Args:
+          x:
+            A 3-D tensor of shape (N, T, C).
+          x_lengths:
+            A 1-D tensor of shape (N,). It contains the number of frames in `x`
+            before padding.
+          y:
+            A ragged tensor with 2 axes [utt][label]. It contains labels of each
+            utterance.
+        Returns:
+          Return the transducer loss.
+        """
+        assert x.ndim == 3, x.shape
+        assert x_lengths.ndim == 1, x_lengths.shape
+        assert y.num_axes == 2, y.num_axes
+
+        assert x.size(0) == x_lengths.size(0) == y.dim0
+
+        #  wav2vec2 works as encoder
+        # encoder_out, x_lengths = self.encoder(x, x_lengths)
+        assert torch.all(x_lengths > 0)
+
+        encoder_out = x
+        # Now for the decoder, i.e., the prediction network
+        row_splits = y.shape.row_splits(1)
+        y_lens = row_splits[1:] - row_splits[:-1]
+
+        blank_id = self.decoder.blank_id
+        sos_y = add_sos(y, sos_id=blank_id)
+
+        sos_y_padded = sos_y.pad(mode="constant", padding_value=blank_id)
+        sos_y_padded = sos_y_padded.to(torch.int64)
+
+        decoder_out, _ = self.decoder(sos_y_padded)
+
+        logits = self.joiner(encoder_out, decoder_out)
+
+        # rnnt_loss requires 0 padded targets
+        # Note: y does not start with SOS
+        y_padded = y.pad(mode="constant", padding_value=0)
+
+        assert hasattr(torchaudio.functional, "rnnt_loss"), (
+            f"Current torchaudio version: {torchaudio.__version__}\n"
+            "Please install a version >= 0.10.0")
+
+        x_lengths = x_lengths.to(torch.int32)
+
+        loss = torchaudio.functional.rnnt_loss(
+            logits=logits,
+            targets=y_padded.to(torch.int32),
+            logit_lengths=x_lengths,
+            target_lengths=y_lens,
+            blank=blank_id,
+            reduction="sum",
+        )
+
+        return logits, loss
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode == "ft-embed-affine":
+            self.unfreeze()
+            self.freeze_preembed_layers()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return ["full", "frozen", "ft-embed-affine"]
+
+    def get_config(self):
+        dec_cfg = self.decoder.get_config()
+        join_cfg = self.joiner.get_config()
+
+        config = {
+            "blank_id": self.blank_id,
+            "vocab_size": self.vocab_size,
+            "decoder": dec_cfg,
+            "joiner": join_cfg,
+        }
+
+        # base_config = super().get_config()
+        return dict(list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        # get arguments for pooling
+        decoder_args = Decoder.filter_args(**kwargs["decoder"])
+        joiner_args = Joiner.filter_args(**kwargs["joiner"])
+
+        valid_args = ()
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        args["decoder"] = decoder_args
+        args["joiner"] = joiner_args
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Decoder.add_class_args(parser, prefix="decoder")
+        Joiner.add_class_args(parser, prefix="joiner")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    def change_config(
+        self,
+        decoder,
+        # joiner,
+    ):
+        logging.info("changing transducer config")
+        self.decoder.change_config(**decoder)
+        # self.joiner.change_config(**joiner)
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        # get arguments for pooling
+        decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
+        # joiner_args = Joiner.filter_finetune_args(**kwargs["joiner"])
+
+        valid_args = ()
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        args["decoder"] = decoder_args
+        # args["joiner"] = joiner_args
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Decoder.add_finetune_args(parser, prefix="decoder")
+        # Joiner.add_finetune_args(parser, prefix="joiner")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    add_argparse_args = add_class_args
+    add_argparse_finetune_args = add_finetune_args
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 8d2a09e8..855e1590 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -28,7 +28,7 @@
 import torch.nn as nn
 import torchaudio
 import torchaudio.functional
-from .encoder_interface import EncoderInterface
+#from .encoder_interface import EncoderInterface
 
 from ...torch_model import TorchModel
 from hyperion.utils.text import add_sos
@@ -41,13 +41,15 @@ class Transducer(TorchModel):
     """It implements https://arxiv.org/pdf/1211.3711.pdf
     "Sequence Transduction with Recurrent Neural Networks"
     """
+
     def __init__(
         self,
-        vocab_size,
-        blank_id,
+        encoder_net,
         # conformer_enc,
         decoder,
         joiner,
+        vocab_size,
+        blank_id,
     ):
         """
         Args:
@@ -66,9 +68,6 @@ def __init__(
             unnormalized probs, i.e., not processed by log-softmax.
         """
         super().__init__()
-        # assert isinstance(encoder, EncoderInterface)
-        # assert hasattr(decoder, "blank_id")
-
         decoder["blank_id"] = blank_id
         decoder["vocab_size"] = vocab_size
         joiner["out_dims"] = vocab_size
@@ -211,10 +210,11 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser.add_argument("--" + prefix,
                                       action=ActionParser(parser=parser))
 
-    def change_config(self, 
+    def change_config(
+        self,
         decoder,
         # joiner,
-        ):
+    ):
         logging.info("changing transducer config")
         self.decoder.change_config(**decoder)
         # self.joiner.change_config(**joiner)
@@ -225,8 +225,7 @@ def filter_finetune_args(**kwargs):
         decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
         # joiner_args = Joiner.filter_finetune_args(**kwargs["joiner"])
 
-        valid_args = (
-        )
+        valid_args = ()
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
         args["decoder"] = decoder_args
@@ -243,9 +242,8 @@ def add_finetune_args(parser, prefix=None):
         # Joiner.add_finetune_args(parser, prefix="joiner")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
     add_argparse_finetune_args = add_finetune_args
-
-
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
new file mode 100644
index 00000000..ef153776
--- /dev/null
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -0,0 +1,95 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from jsonargparse import ActionParser, ArgumentParser
+
+import torch
+import torch.nn as nn
+
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
+from ...utils import filter_func_args
+from ..layer_blocks import TransducerPredictor as Predictor, TransducerJoiner as Joiner
+from .net_arch import NetArch
+
+
+class RNNTransducerDecoder(NetArch):
+    """ RNN-T Decoder composed of Predictor and Joiner networks
+    Implementation based on 
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/transducer.py
+
+    Attributes:
+      in_feats: input features dimension (encoder output)
+      vocab_size: Number of tokens of the modeling unit including blank.
+      embed_dim: Dimension of the predictor input embedding.
+      blank_id: The ID of the blank symbol.
+      num_layers: Number of LSTM layers.
+      hid_feats: Hidden dimension for predictor layers.
+      embed_dropout_rate: Dropout rate for the embedding layer.
+      rnn_dropout_rate: Dropout for LSTM layers.
+
+    """
+
+    def __init__(self,
+                 in_feats: int,
+                 vocab_size: int,
+                 embed_dim: int,
+                 num_pred_layers: int,
+                 pred_hid_feats: int,
+                 embed_dropout_rate: float = 0.0,
+                 rnn_dropout_rate: float = 0.0,
+                 rnn_type: str = "lstm",
+                 blank_id: int = 0):
+
+        super().__init__()
+        self.in_feats = in_feats
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+        self.num_pred_layers = num_pred_layers
+        self.pred_hid_feats = pred_hid_feats
+        self.embed_dropout_rate = embed_dropout_rate
+        self.rnn_dropout_rate = rnn_dropout_rate
+        self.rnn_type = rnn_type
+        self.blank_id = blank_id
+
+        pred_args = filter_func_args(Predictor.__init__, locals())
+        pred_args["num_layers"] = num_pred_layers
+        pred_args["hid_feats"] = pred_hid_feats
+        pred_args["out_feats"] = in_feats
+        self.predictor = Predictor(**pred_args)
+        self.joiner = Joiner(in_feats, vocab_size)
+
+    def forward(self, x: torch.Tensor, x_lengths: torch.Tensor,
+                y: k2.RaggedTensor) -> torch.Tensor:
+
+        # get y_lengths
+        row_splits = y.shape.row_splits(1)
+        y_lengths = row_splits[1:] - row_splits[:-1]
+
+        # shift y adding <sos> token
+        sos_y = add_sos(y, sos_id=self.blank_id)
+        sos_y_padded = sos_y.pad(mode="constant", padding_value=self.blank_id)
+        sos_y_padded = sos_y_padded.to(torch.int64)
+
+        # apply predictor and joiner
+        pred_out, _ = self.predictor(sos_y_padded)
+        logits = self.joiner(x, pred_out)
+
+        # rnnt_loss requires 0 padded targets
+        # Note: y does not start with SOS
+        y_padded = y.pad(mode="constant", padding_value=0)
+        x_lengths = x_lengths.to(torch.int32)
+        loss = torchaudio.functional.rnnt_loss(
+            logits=logits,
+            targets=y_padded.to(torch.int32),
+            logit_lengths=x_lengths,
+            target_lengths=y_lengths,
+            blank=blank_id,
+            reduction="sum",
+        )
+        return loss

From 9a619a5f4808d318cb488504011dfa700ad44423 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 16 Feb 2023 08:58:02 -0500
Subject: [PATCH 083/154] got results with ravana resnet100

---
 egs/voxceleb/v1.1/README.md                       | 12 ++++++++++++
 .../train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml   | 15 +++++++++++----
 .../train_idrnd_resnet100_xvec_stage1_v2.0.yaml   |  2 +-
 .../train_idrnd_resnet100_xvec_stage2_v2.0.yaml   |  2 +-
 .../v1.1/conf/train_res2net50_xvec_default.yaml   |  2 +-
 .../config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh   | 12 ++++++++++--
 .../config_fbank80_stmn_idrnd_resnet100.v2.0.sh   |  5 ++---
 7 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 83027e16..57a09ad8 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -92,6 +92,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.09 | 0.068 | 0.121 |
 | | | | Cosine + AS-Norm | 1.0 | 0.064 | 0.110 |
 | | | | Cosine + QMF | 0.87 | 0.059 | 0.076 |
+| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.053 | 0.083 |
+| | | | Cosine + AS-Norm | 0.78 | 0.046 | 0.078 |
+| | | | Cosine + QMF | 0.74 | 0.046 | 0.077 |
 
 ### VoxCeleb 1 Entire-Clean trial list
 
@@ -100,6 +103,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.21 | 0.075 | 0.129 |
 | | | | Cosine + AS-Norm | 1.15 | 0.069 | 0.113 |
 | | | | Cosine + QMF | 1.12 | 0.067 | 0.111 |
+| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.92 | 0.058 | 0.104 |
+| | | | Cosine + AS-Norm | 0.87 | 0.053 | 0.089 |
+| | | | Cosine + QMF | 0.88 | 0.054 | 0.092 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -108,6 +114,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.17 | 0.129 | 0.212 |
 | | | | Cosine + AS-Norm | 1.98 | 0.116 | 0.190 |
 | | | | Cosine + QMF | 1.88 | 0.112 | 0.181 |
+| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.80 | 0.106 | 0.171 |
+| | | | Cosine + AS-Norm | 1.59 | 0.091 | 0.146 |
+| | | | Cosine + QMF | 1.59 | 0.092 | 0.151 |
 
 ### VoxSRC2022 dev
 
@@ -116,6 +125,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.85 | 0.187 | 0.310 |
 | | | | Cosine + AS-Norm | 2.69 | 0.182 | 0.310 |
 | | | | Cosine + QMF | 2.80 | 0.196 | 0.338 |
+| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.50 | 0.160 | 0.270 |
+| | | | Cosine + AS-Norm | 2.31 | 0.139 | 0.240 |
+| | | | Cosine + QMF | 2.54 | 0.153 | 0.248 |
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
index 4a4a8a88..e7a94225 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
@@ -10,11 +10,13 @@ data:
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      # max_chunk_length: 6.0
+      # min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
-      seg_weight_mode: uniform
+      seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
@@ -33,7 +35,7 @@ data:
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
-      seg_weight_mode: uniform
+      seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
@@ -43,6 +45,11 @@ model:
   margin: 0.4
   margin_warmup_epochs: 0
   intertop_margin: 0.1
+  # override_dropouts: false
+  # dropout_rate: 0.1
+  # resnet_enc:
+  #   override_dropouts: true
+  #   dropout_rate: 0.1
 trainer:
   optim:
     opt_type: sgd
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
index fba4ce80..b7f02a47 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
@@ -66,5 +66,5 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 40
+  epochs: 20
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
index 6c209a9f..7e62ec72 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
@@ -59,7 +59,7 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 35
+  epochs: 5
   eff_batch_size: 256
   swa_start: 31
   swa_lr: 1e-4
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml b/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
index 1d387790..c7eb6ee1 100644
--- a/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
+++ b/egs/voxceleb/v1.1/conf/train_res2net50_xvec_default.yaml
@@ -2,6 +2,6 @@ data:
   train: train_data_default.yaml
   val: val_data_default.yaml
 feats: fbank80_stmn_16k.yaml
-model: resnet34.yaml
+model: res2net50.yaml
 trainer: trainer_default.yaml
  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
index e9c634a3..0532754f 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v2.0.sh
@@ -18,12 +18,20 @@ nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v2.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0040.pth
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0020.pth
+#nnet_s1=$nnet_s1_dir/model_ep0010.pth
 
 nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
-nnet_s2=$nnet_s2_dir/model_ep0030.pth
-nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+nnet_s2=$nnet_s2_dir/model_ep0010.pth
+#nnet_s2=$nnet_s2_dir/model_ep0020.pth
+#nnet_s2=$nnet_s2_dir/model_ep0010.pth
+#nnet_s2=$nnet_s2_dir/model_ep0005.pth
+#nnet_s2=$nnet_s2_dir/model_ep0002.pth
+#nnet_s2=$nnet_s2_dir/model_ep0001.pth
+#nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
 
 # back-end
 do_plda=false
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
index b9363c3f..f71545b7 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v2.0.sh
@@ -17,13 +17,12 @@ nnet_name=${feat_type}_idrnd_resnet100.v2.0
 nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v2.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0040.pth
+nnet_s1=$nnet_s1_dir/model_ep0020.pth
 
 nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
-nnet_s2=$nnet_s2_dir/model_ep0030.pth
-nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+nnet_s2=$nnet_s2_dir/model_ep0005.pth
 
 # back-end
 do_plda=false

From 42f1ebdc8167a20238a82e7be0ee1cf19d89c5bd Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 16 Feb 2023 17:53:58 -0500
Subject: [PATCH 084/154] xxx

---
 .../v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
index 7e62ec72..2311b07b 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
@@ -10,8 +10,8 @@ data:
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 16
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
       seg_weight_mode: data-prior
@@ -29,8 +29,8 @@ data:
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 16
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
       seg_weight_mode: data-prior

From c2bd3dbbb79b8ce66a45c13935d63520be952c75 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 2 Mar 2023 10:56:05 -0500
Subject: [PATCH 085/154] ecapa v3 recipe

---
 egs/voxceleb/v1.1/README.md                   |  16 ++
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |  93 ++++++++
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |  71 ++++++
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v3.0.sh |  44 ++++
 egs/voxceleb/v1.1/run_030_extract_xvectors.sh |   2 +-
 egs/voxceleb/v1.1/run_040_eval_be.sh          | 225 ++++++++++++++++++
 .../{eval-be-v2.py => eval_be_cos.py}         |  22 +-
 egs/voxceleb/v1/steps_be/eval_be_cos.sh       |  11 +-
 ...{eval-be-cos-qmf.py => eval_be_cos_qmf.py} |  12 +-
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh   |   7 +-
 ...al-be-v2-snorm.py => eval_be_cos_snorm.py} |  16 +-
 egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh |  11 +-
 .../{eval-be-v1.py => eval_be_plda_v1.py}     |   0
 .../{eval_be_v1.sh => eval_be_plda_v1.sh}     |   4 +-
 egs/voxceleb/v1/steps_be/eval_be_v2.sh        |   2 +-
 egs/voxceleb/v1/steps_be/train-be-v2.py       |  82 -------
 egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh  |   4 +-
 .../{train-be-v1.py => train_be_plda_v1.py}   |   0
 .../{train_be_v1.sh => train_be_plda_v1.sh}   |   2 +-
 egs/voxceleb/v1/steps_be/train_be_proj_v1.py  |  95 ++++++++
 .../{train_be_v2.sh => train_be_proj_v1.sh}   |   3 +-
 .../steps_be/{train-qmf.py => train_qmf.py}   |   0
 hyperion/np/np_model.py                       |   2 +
 hyperion/np/transforms/pca.py                 |   6 +
 24 files changed, 611 insertions(+), 119 deletions(-)
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
 rename egs/voxceleb/v1/steps_be/{eval-be-v2.py => eval_be_cos.py} (80%)
 rename egs/voxceleb/v1/steps_be/{eval-be-cos-qmf.py => eval_be_cos_qmf.py} (96%)
 rename egs/voxceleb/v1/steps_be/{eval-be-v2-snorm.py => eval_be_cos_snorm.py} (92%)
 rename egs/voxceleb/v1/steps_be/{eval-be-v1.py => eval_be_plda_v1.py} (100%)
 rename egs/voxceleb/v1/steps_be/{eval_be_v1.sh => eval_be_plda_v1.sh} (94%)
 delete mode 100755 egs/voxceleb/v1/steps_be/train-be-v2.py
 rename egs/voxceleb/v1/steps_be/{train-be-v1.py => train_be_plda_v1.py} (100%)
 rename egs/voxceleb/v1/steps_be/{train_be_v1.sh => train_be_plda_v1.sh} (96%)
 create mode 100755 egs/voxceleb/v1/steps_be/train_be_proj_v1.py
 rename egs/voxceleb/v1/steps_be/{train_be_v2.sh => train_be_proj_v1.sh} (94%)
 rename egs/voxceleb/v1/steps_be/{train-qmf.py => train_qmf.py} (100%)

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 57a09ad8..7b6b278f 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -85,6 +85,8 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 
 ## Results
 
+
+
 ### VoxCeleb 1 Original-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -95,6 +97,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.053 | 0.083 |
 | | | | Cosine + AS-Norm | 0.78 | 0.046 | 0.078 |
 | | | | Cosine + QMF | 0.74 | 0.046 | 0.077 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.78 | 0.061 | 0.110 |
+| | | | Cosine + AS-Norm | 0.70 | 0.054 | 0.102 |
+| | | | Cosine + QMF | 0.66 | 0.047 | 0.090 |
 
 ### VoxCeleb 1 Entire-Clean trial list
 
@@ -106,6 +111,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.92 | 0.058 | 0.104 |
 | | | | Cosine + AS-Norm | 0.87 | 0.053 | 0.089 |
 | | | | Cosine + QMF | 0.88 | 0.054 | 0.092 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.93 | 0.058 | 0.103 |
+| | | | Cosine + AS-Norm | 0.88 | 0.052 | 0.092 |
+| | | | Cosine + QMF | 0.90 | 0.053 | 0.090 |
+
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -117,6 +126,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.80 | 0.106 | 0.171 |
 | | | | Cosine + AS-Norm | 1.59 | 0.091 | 0.146 |
 | | | | Cosine + QMF | 1.59 | 0.092 | 0.151 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.78 | 0.110 | 0.180 |
+| | | | Cosine + AS-Norm | 1.61 | 0.097 | 0.159 |
+| | | | Cosine + QMF | 1.62 | 0.096 | 0.158 |
 
 ### VoxSRC2022 dev
 
@@ -128,6 +140,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.50 | 0.160 | 0.270 |
 | | | | Cosine + AS-Norm | 2.31 | 0.139 | 0.240 |
 | | | | Cosine + QMF | 2.54 | 0.153 | 0.248 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.42 | 0.160 | 0.265 |
+| | | | Cosine + AS-Norm | 2.32 | 0.152 | 0.273 |
+| | | | Cosine + QMF | 2.54 | 0.179 | 0.304 |
+
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..408bad1a
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -0,0 +1,93 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+    dropout_rate: 0.2
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.2
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    #min_lr: 1.0e-05
+    min_lr: 1.0e-06
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..91a7d0b8
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      # max_chunk_length: 6.0
+      # min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  resnet_enc:
+    override_dropouts: true
+    dropout_rate: 0.25
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
new file mode 100644
index 00000000..5f7ed094
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn2048x4.v3.0
+
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
index 4e6a8790..5bd2c17d 100755
--- a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
@@ -44,7 +44,7 @@ fi
 
 xvector_dir=exp/xvectors/$nnet_name
 
-if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" ) ]]; then
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
   # Extract xvectors for training LDA/PLDA
   for name in voxceleb2cat_train
   do
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 358e2acf..37a344b6 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -281,3 +281,228 @@ if [ "$do_qmf" == "true" ];then
 
 fi
 
+if [ "$do_pca" != "true" ];then
+  exit 0
+fi
+
+
+be_name=pca_r${pca_var_r}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name/${be_name}
+score_cosine_dir=exp/scores/$nnet_name/$be_name/cosine
+score_cosine_snorm_dir=exp/scores/$nnet_name/$be_name/cosine_snorm
+score_cosine_qmf_dir=exp/scores/$nnet_name/$be_name/cosine_qmf
+
+be_dir=exp/be/$nnet_name/
+score_be_dir=$score_dir/pca_r${pca_var_r}
+
+if [ $stage -le 10 ]; then
+  echo "Train projection on Voxceleb2"
+  $train_cmd $be_dir/log/train_be.log \
+	     hyp_utils/conda_env.sh \
+	     steps_be/train_be_proj_v1.py \
+	     --v-file scp:$xvector_dir/$plda_data/xvector.scp \
+	     --train-list data/$plda_data/utt2spk \
+	     --output-path $be_dir \
+	     --pca.pca-var-r $pca_var_r
+
+fi
+
+
+if [ $stage -le 11 ];then
+
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  steps_be/eval_be_cos.sh \
+    --cmd "$train_cmd" \
+    --preproc-file $be_dir/preproc.h5 \
+    data/voxceleb1_test/trials \
+    data/voxceleb1_test/utt2model \
+    $xvector_dir/voxceleb1_test/xvector.scp \
+    $score_cosine_dir/voxceleb1_scores
+
+  $train_cmd --mem 10G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_dir 
+
+  for f in $(ls $score_cosine_dir/*_results);
+  do
+    echo $f
+    cat $f
+    echo ""
+  done
+
+fi
+exit
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    	data/voxsrc22_dev/trials \
+    	data/voxsrc22_dev/utt2model \
+    	$xvector_dir/voxsrc22_dev/xvector.scp \
+    	$score_cosine_dir/voxsrc22_dev_scores &
+
+    # steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    # 	$score_cosine_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_dir 
+
+    for f in $(ls $score_cosine_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+
+if [ "$do_snorm" == "true" ];then
+  if [ $stage -le 5 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_snorm_dir 
+    
+    for f in $(ls $score_cosine_snorm_dir/*_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+  fi
+
+  if [ $stage -le 6 ];then
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxsrc22_dev_scores &
+    
+    # steps_be/eval_be_cos_snorm.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #   data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    # 	$score_cosine_snorm_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_snorm_dir 
+
+    for f in $(ls $score_cosine_snorm_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+  fi
+fi
+
+
+if [ "$do_qmf" == "true" ];then
+  if [ $stage -le 7 ];then
+    echo "Train QMF in Vox2"
+    steps_be/train_be_cos_qmf.sh \
+      --cmd "$train_cmd" --coh-nbest 1000 \
+      data/voxceleb2cat_train/trials \
+      data/voxceleb2cat_train/utt2model \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $xvector_dir/voxceleb2cat_train/utt2num_frames \
+      data/voxceleb2cat_train/snorm_utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/voxceleb2_qmf_scores
+
+  fi
+
+  if [ $stage -le 8 ];then
+
+    echo "Eval Voxceleb 1 with Cosine scoring"
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $xvector_dir/voxceleb1_test/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_snorm.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_qmf.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxceleb1{,_snorm,_qmf}_[oeh]_clean_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+
+  fi
+
+  if [ $stage -le 9 ];then
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      $xvector_dir/voxsrc22_dev/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxsrc22_dev_scores &
+
+    # steps_be/eval_be_cos_qmf.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #	$xvector_dir/voxsrc22_test/utt2num_frames \
+    #	data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    #	$score_cosine_qmf_dir/qmf.h5 \
+    # 	$score_cosine_qmf_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_snorm.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_qmf.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxsrc22_dev{,_snorm,_qmf}_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+  fi
+
+fi
+
+
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v2.py b/egs/voxceleb/v1/steps_be/eval_be_cos.py
similarity index 80%
rename from egs/voxceleb/v1/steps_be/eval-be-v2.py
rename to egs/voxceleb/v1/steps_be/eval_be_cos.py
index 413ca313..1f9978ee 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v2.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos.py
@@ -26,8 +26,8 @@
 from hyperion.np.transforms import TransformList
 
 
-def eval_plda(
-    iv_file,
+def eval_cos(
+    v_file,
     ndx_file,
     enroll_file,
     test_file,
@@ -47,7 +47,7 @@ def eval_plda(
         preproc = None
 
     tdr = TDR(
-        iv_file,
+        v_file,
         ndx_file,
         enroll_file,
         test_file,
@@ -60,7 +60,7 @@ def eval_plda(
     x_e, x_t, enroll, ndx = tdr.read()
 
     t1 = time.time()
-    logging.info("computing llr")
+    logging.info("computing llr %d", x_e.shape[1])
     scores = cosine_scoring(x_e, x_t)
 
     dt = time.time() - t1
@@ -82,15 +82,15 @@ def eval_plda(
 
     parser = ArgumentParser(description="Eval cosine-scoring")
 
-    parser.add_argument("--iv-file", dest="iv_file", required=True)
-    parser.add_argument("--ndx-file", dest="ndx_file", default=None)
-    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
-    parser.add_argument("--test-file", dest="test_file", default=None)
-    parser.add_argument("--preproc-file", dest="preproc_file", default=None)
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--ndx-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
+    parser.add_argument("--test-file", default=None)
+    parser.add_argument("--preproc-file", default=None)
 
     TDR.add_argparse_args(parser)
 
-    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument("--score-file", required=True)
     parser.add_argument(
         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
     )
@@ -101,4 +101,4 @@ def eval_plda(
     logging.debug(args)
 
     assert args.test_file is not None or args.ndx_file is not None
-    eval_plda(**namespace_to_dict(args))
+    eval_cos(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos.sh b/egs/voxceleb/v1/steps_be/eval_be_cos.sh
index 90f118af..434732d6 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos.sh
@@ -2,13 +2,13 @@
 # Copyright 2020 Johns Hopkins University (Jesus Villalba)  
 # Apache 2.0.
 #
-
+set -e
 cmd=run.pl
 num_parts=8
+preproc_file=""
 
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
-set -e
 
 if [ $# -ne 4 ]; then
   echo "Usage: $0 <ndx> <enroll-file> <vector-file> <output-scores>"
@@ -27,6 +27,9 @@ name=$(basename $output_file)
 
 echo "$0 score $ndx_file"
 
+if [ -n "$preproc_file" ];then
+  extra_args="--preproc-file $preproc_file"
+fi
 
 for((i=1;i<=$num_parts;i++));
 do
@@ -34,8 +37,8 @@ do
   do
     $cmd $output_dir/log/${name}_${i}_${j}.log \
       hyp_utils/conda_env.sh \
-      steps_be/eval-be-v2.py \
-      --iv-file scp:$vector_file \
+      steps_be/eval_be_cos.py $extra_args \
+      --v-file scp:$vector_file \
       --ndx-file $ndx_file \
       --enroll-file $enroll_file \
       --score-file $output_file \
diff --git a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
similarity index 96%
rename from egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
rename to egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
index 82050ed1..e0e1c2da 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-cos-qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
@@ -61,6 +61,7 @@ def eval_plda(
     coh_v_file,
     score_file,
     qmf_file,
+    preproc_file,
     model_part_idx,
     num_model_parts,
     seg_part_idx,
@@ -69,13 +70,18 @@ def eval_plda(
     **kwargs
 ):
 
+    if preproc_file is not None:
+        preproc = TransformList.load(preproc_file)
+    else:
+        preproc = None
+
     logging.info("loading data")
     tdr = TDR(
         v_file,
         ndx_file,
         enroll_file,
         None,
-        None,
+        preproc,
         model_part_idx,
         num_model_parts,
         seg_part_idx,
@@ -118,7 +124,7 @@ def eval_plda(
     scores = cosine_scoring(x_e, x_t)
 
     logging.info("read cohort x-vectors")
-    vcr = VCR(coh_v_file, coh_file)
+    vcr = VCR(coh_v_file, coh_file, preproc=preproc)
     x_coh, ids_coh = vcr.read()
     D_coh = PLDA.compute_stats_hard(x_coh, class_ids=ids_coh)
     x_coh = D_coh[1] / np.expand_dims(D_coh[0], axis=-1)
@@ -194,7 +200,7 @@ def eval_plda(
     parser.add_argument("--coh-file", required=True)
     parser.add_argument("--coh-nbest", type=int, default=400)
     parser.add_argument("--qmf-file", default=None)
-    # parser.add_argument("--preproc-file", dest="preproc_file", default=None)
+    parser.add_argument("--preproc-file", default=None)
 
     TDR.add_argparse_args(parser)
 
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
index a8ad0178..a0712304 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.sh
@@ -7,6 +7,7 @@ cmd=run.pl
 stage=1
 num_parts=16
 coh_nbest=1000
+preproc_file=""
 
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
@@ -33,6 +34,10 @@ name=$(basename $output_file)
 
 echo "$0 score $ndx_file"
 
+if [ -n "$preproc_file" ];then
+  extra_args="--preproc-file $preproc_file"
+fi
+
 if [ $stage -le 1 ];then
   for((i=1;i<=$num_parts;i++));
   do
@@ -40,7 +45,7 @@ if [ $stage -le 1 ];then
     do
       $cmd $output_dir/log/${name}_${i}_${j}.log \
 	   hyp_utils/conda_env.sh \
-	   steps_be/eval-be-cos-qmf.py \
+	   steps_be/eval_be_cos_qmf.py $extra_args \
 	   --v-file scp:$vector_file \
 	   --ndx-file $ndx_file \
 	   --enroll-file $enroll_file \
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
similarity index 92%
rename from egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py
rename to egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
index 4ad0a869..dad89ced 100755
--- a/egs/voxceleb/v1/steps_be/eval-be-v2-snorm.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
@@ -30,13 +30,13 @@
 
 
 def eval_plda(
-    iv_file,
+    v_file,
     ndx_file,
     enroll_file,
     test_file,
     preproc_file,
     score_file,
-    coh_iv_file,
+    coh_v_file,
     coh_file,
     coh_nbest,
     model_part_idx,
@@ -53,7 +53,7 @@ def eval_plda(
         preproc = None
 
     tdr = TDR(
-        iv_file,
+        v_file,
         ndx_file,
         enroll_file,
         test_file,
@@ -66,8 +66,10 @@ def eval_plda(
     x_e, x_t, enroll, ndx = tdr.read()
 
     coh_segs = SegmentSet.load(coh_file)
-    r = DRF.create(coh_iv_file)
+    r = DRF.create(coh_v_file)
     x_coh = r.read(coh_segs["id"], squeeze=True)
+    if preproc is not None:
+        x_coh = preproc(x_coh)
     _, spk_ids = np.unique(coh_segs["class_id"], return_inverse=True)
     num_coh_spks = np.max(spk_ids) + 1
     x_coh_spk = np.zeros((num_coh_spks, x_coh.shape[1]))
@@ -107,7 +109,7 @@ def eval_plda(
 
     parser = ArgumentParser(description="Eval cosine-scoring with adaptive s-norm")
 
-    parser.add_argument("--iv-file", required=True)
+    parser.add_argument("--v-file", required=True)
     parser.add_argument("--ndx-file", default=None)
     parser.add_argument("--enroll-file", required=True)
     parser.add_argument("--test-file", default=None)
@@ -115,11 +117,11 @@ def eval_plda(
 
     TDR.add_argparse_args(parser)
 
-    parser.add_argument("--coh-iv-file", required=True)
+    parser.add_argument("--coh-v-file", required=True)
     parser.add_argument("--coh-file", required=True)
     parser.add_argument("--coh-nbest", type=int, default=1000)
 
-    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument("--score-file", required=True)
     parser.add_argument(
         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
     )
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
index 4f5e3e76..b64d80a3 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.sh
@@ -6,6 +6,8 @@
 cmd=run.pl
 num_parts=16
 coh_nbest=1000
+preproc_file=""
+
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
 set -e
@@ -29,6 +31,9 @@ name=$(basename $output_file)
 
 echo "$0 score $ndx_file"
 
+if [ -n "$preproc_file" ];then
+  extra_args="--preproc-file $preproc_file"
+fi
 
 for((i=1;i<=$num_parts;i++));
 do
@@ -36,12 +41,12 @@ do
   do
     $cmd $output_dir/log/${name}_${i}_${j}.log \
       hyp_utils/conda_env.sh \
-      steps_be/eval-be-v2-snorm.py \
-      --iv-file scp:$vector_file \
+      steps_be/eval_be_cos_snorm.py $extra_args \
+      --v-file scp:$vector_file \
       --ndx-file $ndx_file \
       --enroll-file $enroll_file \
       --coh-file $coh_file \
-      --coh-iv-file scp:$coh_vector_file \
+      --coh-v-file scp:$coh_vector_file \
       --score-file $output_file \
       --coh-nbest $coh_nbest \
       --model-part-idx $i --num-model-parts $num_parts \
diff --git a/egs/voxceleb/v1/steps_be/eval-be-v1.py b/egs/voxceleb/v1/steps_be/eval_be_plda_v1.py
similarity index 100%
rename from egs/voxceleb/v1/steps_be/eval-be-v1.py
rename to egs/voxceleb/v1/steps_be/eval_be_plda_v1.py
diff --git a/egs/voxceleb/v1/steps_be/eval_be_v1.sh b/egs/voxceleb/v1/steps_be/eval_be_plda_v1.sh
similarity index 94%
rename from egs/voxceleb/v1/steps_be/eval_be_v1.sh
rename to egs/voxceleb/v1/steps_be/eval_be_plda_v1.sh
index eefc989f..69d6ace1 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_v1.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_plda_v1.sh
@@ -36,8 +36,8 @@ do
   do
     $cmd $output_dir/log/${name}_${i}_${j}.log \
       hyp_utils/conda_env.sh \
-      steps_be/eval-be-v1.py \
-      --iv-file scp:$vector_file \
+      steps_be/eval_be_plda_v1.py \
+      --v-file scp:$vector_file \
       --ndx-file $ndx_file \
       --enroll-file $enroll_file \
       --preproc-file $preproc_file \
diff --git a/egs/voxceleb/v1/steps_be/eval_be_v2.sh b/egs/voxceleb/v1/steps_be/eval_be_v2.sh
index 7389bf2c..bb58872e 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_v2.sh
+++ b/egs/voxceleb/v1/steps_be/eval_be_v2.sh
@@ -36,7 +36,7 @@ do
     $cmd $output_dir/log/${name}_${i}_${j}.log \
       hyp_utils/conda_env.sh \
       steps_be/eval-be-v2.py \
-      --iv-file scp:$vector_file \
+      --v-file scp:$vector_file \
       --ndx-file $ndx_file \
       --enroll-file $enroll_file \
       --preproc-file $preproc_file \
diff --git a/egs/voxceleb/v1/steps_be/train-be-v2.py b/egs/voxceleb/v1/steps_be/train-be-v2.py
deleted file mode 100755
index 4e3d7542..00000000
--- a/egs/voxceleb/v1/steps_be/train-be-v2.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-""" 
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba) 
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
-"""
-import logging
-import sys
-import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
-
-import numpy as np
-
-from hyperion.hyp_defs import config_logger
-from hyperion.helpers import VectorReader as VR
-from hyperion.np.transforms import TransformList, CentWhiten, PCA
-
-from numpy.linalg import matrix_rank
-
-
-def train_be(iv_file, train_list, output_path, **kwargs):
-
-    # Read data
-    vr_args = VR.filter_args(**kwargs)
-    vr_train = VR(iv_file, train_list, None, **vr_args)
-    x = vr_train.read()
-    del vr_train
-
-    t1 = time.time()
-    rank = matrix_rank(x)
-    pca = None
-    if rank < x.shape[1]:
-        # do PCA if rank of x is smaller than its dimension
-        pca = PCA(pca_dim=rank, name="pca")
-        pca.fit(x)
-        x = pca.predict(x)
-        logging.info("PCA rank=%d" % (rank))
-
-    # Train centering and whitening
-    t1 = time.time()
-    cw = CentWhiten(name="cw")
-    cw.fit(x)
-
-    logging.info("PCA-CW Elapsed time: %.2f s." % (time.time() - t1))
-
-    # Save models
-    if pca is None:
-        preproc = TransformList([cw])
-    else:
-        preproc = TransformList([pca, cw])
-
-    if not os.path.exists(output_path):
-        os.makedirs(ouput_path)
-
-    preproc.save(output_path + "/cw.h5")
-
-
-if __name__ == "__main__":
-
-    parser = ArgumentParser(description="Train Back-end")
-
-    parser.add_argument("--iv-file", dest="iv_file", required=True)
-    parser.add_argument("--train-list", dest="train_list", required=True)
-
-    VR.add_argparse_args(parser)
-
-    parser.add_argument("--output-path", dest="output_path", required=True)
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-    )
-
-    args = parser.parse_args()
-    config_logger(args.verbose)
-    del args.verbose
-    logging.debug(args)
-
-    train_be(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh b/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
index 7dbfcfb9..267466ae 100755
--- a/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
+++ b/egs/voxceleb/v1/steps_be/train_be_cos_qmf.sh
@@ -39,7 +39,7 @@ if [ $stage -le 1 ];then
     do
       $cmd $output_dir/log/${name}_${i}_${j}.log \
 	   hyp_utils/conda_env.sh \
-	   steps_be/eval-be-cos-qmf.py \
+	   steps_be/eval_be_cos_qmf.py \
 	   --v-file scp:$vector_file \
 	   --ndx-file $ndx_file \
 	   --enroll-file $enroll_file \
@@ -72,7 +72,7 @@ fi
 if [ $stage -le 3 ];then
   $cmd $output_dir/log/train_qmf_${name}.log \
        hyp_utils/conda_env.sh \
-       steps_be/train-qmf.py \
+       steps_be/train_qmf.py \
        --score-file $output_file \
        --key-file $ndx_file \
        --model-file $output_dir/qmf.h5
diff --git a/egs/voxceleb/v1/steps_be/train-be-v1.py b/egs/voxceleb/v1/steps_be/train_be_plda_v1.py
similarity index 100%
rename from egs/voxceleb/v1/steps_be/train-be-v1.py
rename to egs/voxceleb/v1/steps_be/train_be_plda_v1.py
diff --git a/egs/voxceleb/v1/steps_be/train_be_v1.sh b/egs/voxceleb/v1/steps_be/train_be_plda_v1.sh
similarity index 96%
rename from egs/voxceleb/v1/steps_be/train_be_v1.sh
rename to egs/voxceleb/v1/steps_be/train_be_plda_v1.sh
index 68e470ff..ee5f8163 100755
--- a/egs/voxceleb/v1/steps_be/train_be_v1.sh
+++ b/egs/voxceleb/v1/steps_be/train_be_plda_v1.sh
@@ -44,7 +44,7 @@ while(getline < fv)
 
 $cmd $output_dir/log/train_be.log \
   hyp_utils/conda_env.sh \
-  steps_be/train-be-v1.py \
+  steps_be/train_be_plda_v1.py \
   --iv-file scp:$vector_file \
   --train-list $train_list \
   --lda-dim $lda_dim \
diff --git a/egs/voxceleb/v1/steps_be/train_be_proj_v1.py b/egs/voxceleb/v1/steps_be/train_be_proj_v1.py
new file mode 100755
index 00000000..24a2a33b
--- /dev/null
+++ b/egs/voxceleb/v1/steps_be/train_be_proj_v1.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+""" 
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba) 
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import logging
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.helpers import VectorReader as VR
+from hyperion.np.transforms import TransformList, CentWhiten, PCA, LNorm
+
+# from numpy.linalg import matrix_rank
+
+
+def train_be_lda(v_file, train_list, output_path, pca, **kwargs):
+    from hyperion.helpers import VectorClassReader as VCR
+    from hyperion.np.transforms import LDA, LNorm
+    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+
+    # Read data
+    vr_args = VCR.filter_args(**kwargs)
+    vr_train = VCR(v_file, train_list, None, **vr_args)
+    x, ids = vr_train.read()
+    del vr_train
+
+    t1 = time.time()
+    lnorm = LNorm()
+    x = lnorm(x)
+    _, ids = np.unique(ids, return_inverse=True)
+    pca = LDA(lda_dim=90)
+    pca.fit(x, ids)
+    logging.info("PCA elapsed time: %.2f s." % (time.time() - t1))
+
+    # Save models
+    preproc = TransformList([lnorm, pca])
+
+    if not os.path.exists(output_path):
+        os.makedirs(ouput_path)
+
+    preproc.save(output_path + "/preproc.h5")
+
+
+def train_be(v_file, train_list, output_path, pca, **kwargs):
+
+    # Read data
+    vr_args = VR.filter_args(**kwargs)
+    vr_train = VR(v_file, train_list, None, **vr_args)
+    x = vr_train.read()
+    del vr_train
+
+    t1 = time.time()
+    pca = PCA(**pca)
+    pca.fit(x)
+    logging.info("PCA dimenson=%d", pca.pca_dim)
+    logging.info("PCA elapsed time: %.2f s." % (time.time() - t1))
+
+    # Save models
+    preproc = TransformList([pca])
+    if not os.path.exists(output_path):
+        os.makedirs(ouput_path)
+
+    preproc.save(output_path + "/preproc.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train Back-end")
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+
+    VR.add_argparse_args(parser)
+    PCA.add_class_args(parser, prefix="pca")
+    parser.add_argument("--output-path", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    train_be(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1/steps_be/train_be_v2.sh b/egs/voxceleb/v1/steps_be/train_be_proj_v1.sh
similarity index 94%
rename from egs/voxceleb/v1/steps_be/train_be_v2.sh
rename to egs/voxceleb/v1/steps_be/train_be_proj_v1.sh
index 487c9b1b..7d1be89d 100755
--- a/egs/voxceleb/v1/steps_be/train_be_v2.sh
+++ b/egs/voxceleb/v1/steps_be/train_be_proj_v1.sh
@@ -3,6 +3,7 @@
 # Apache 2.0.
 #
 cmd=run.pl
+pca_var_r=0.90
 
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
@@ -40,7 +41,7 @@ while(getline < fv)
 
 $cmd $output_dir/log/train_be.log \
   hyp_utils/conda_env.sh \
-  steps_be/train-be-v2.py \
+  steps_be/train_be_proj_v1.py \
   --iv-file scp:$vector_file \
   --train-list $train_list \
   --output-path $output_dir
diff --git a/egs/voxceleb/v1/steps_be/train-qmf.py b/egs/voxceleb/v1/steps_be/train_qmf.py
similarity index 100%
rename from egs/voxceleb/v1/steps_be/train-qmf.py
rename to egs/voxceleb/v1/steps_be/train_qmf.py
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index 8ee84ee8..ee464161 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -20,6 +20,8 @@ class NPModel(object):
     """
 
     def __init__(self, name=None, **kwargs):
+        if name is None:
+            name = self.__class__.__name__
         self.name = name
         self._is_init = False
 
diff --git a/hyperion/np/transforms/pca.py b/hyperion/np/transforms/pca.py
index eabb200d..aa25d8e9 100644
--- a/hyperion/np/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -224,6 +224,12 @@ def add_class_args(parser, prefix=None):
             action=ActionYesNo,
             help=("updates whitening parameter"),
         )
+        parser.add_argument(
+            "--whiten",
+            default=False,
+            action=ActionYesNo,
+            help=("whitens the data after projection"),
+        )
 
         parser.add_argument(
             "--pca-dim", default=None, type=int, help=("output dimension of PCA")

From 1d78ea3596bd1c0c88623b1e3571c2af39c162ea Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 10 Mar 2023 11:23:58 -0500
Subject: [PATCH 086/154] refactored beam search

---
 egs/librispeech/v1/conf/infer.yaml            |   1 +
 ...n_wav2vec2base_transducer_stage1_v6.1.yaml |  53 ++
 .../wav2vec2base_rnn_transducer_do0.4.yaml    |  12 +
 .../v1/global_conf/config_transducer_v5.0.sh  |   1 +
 .../v1/global_conf/config_transducer_v6.1.sh  |  34 ++
 egs/librispeech/v1/run_011_train_asr2.sh      | 119 +++++
 egs/librispeech/v1/run_030_inference2.sh      |  49 ++
 .../decode_wav2vec2rnn_transducer.sh          |  69 +++
 .../decode_wav2vec2transducer.sh              |   5 +-
 hyperion/bin/decode_wav2vec2rnn_transducer.py | 231 +++++++++
 hyperion/bin/train_wav2rnn_transducer.py      | 240 +++++++++
 hyperion/bin/train_wav2vec2rnn_transducer.py  | 259 ++++++++++
 hyperion/torch/layer_blocks/__init__.py       |   2 +
 .../torch/layer_blocks/transducer_joiner.py   | 100 ++--
 .../layer_blocks/transducer_predictor.py      | 218 ++++-----
 .../transformer_conv2d_subsampler.py          |  15 +-
 hyperion/torch/models/__init__.py             |   1 +
 hyperion/torch/models/transducer/__init__.py  |   9 +-
 .../models/transducer/lstm_rnn_transducer.py  | 149 ++++++
 .../models/transducer/rnn_rnn_transducer.py   |  84 ++++
 .../torch/models/transducer/rnn_transducer.py | 258 +++++-----
 .../torch/models/transducer/transducer.py     |  10 +-
 .../torch/models/wav2transducer/__init__.py   |   3 +
 .../wav2transducer/hf_wav2rnn_transducer.py   | 375 +++++++++++++++
 .../hf_wav2vec2rnn_rnn_transducer.py          | 103 ++++
 .../hf_wav2vec2rnn_transducer.py              | 103 ++++
 .../wav2transducer/wav2rnn_transducer.py      | 103 ++++
 .../hf_wav2vec2resnet1d_xvector.py            |  10 +-
 .../models/wav2xvectors/hf_wav2xvector.py     |  86 ++--
 .../torch/models/wav2xvectors/wav2xvector.py  |  17 +-
 .../torch/models/xvectors/resnet1d_xvector.py |  28 +-
 hyperion/torch/narchs/__init__.py             |   2 +
 hyperion/torch/narchs/rnn_encoder.py          | 281 +++++++++++
 .../torch/narchs/rnn_transducer_decoder.py    | 454 +++++++++++++++++-
 hyperion/torch/torch_model.py                 |  28 +-
 35 files changed, 3109 insertions(+), 403 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/infer.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v6.1.sh
 create mode 100755 egs/librispeech/v1/run_011_train_asr2.sh
 create mode 100755 egs/librispeech/v1/run_030_inference2.sh
 create mode 100755 egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh
 create mode 100755 hyperion/bin/decode_wav2vec2rnn_transducer.py
 create mode 100755 hyperion/bin/train_wav2rnn_transducer.py
 create mode 100755 hyperion/bin/train_wav2vec2rnn_transducer.py
 create mode 100644 hyperion/torch/models/transducer/lstm_rnn_transducer.py
 create mode 100644 hyperion/torch/models/transducer/rnn_rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
 create mode 100644 hyperion/torch/narchs/rnn_encoder.py

diff --git a/egs/librispeech/v1/conf/infer.yaml b/egs/librispeech/v1/conf/infer.yaml
new file mode 100644
index 00000000..ddfd25e2
--- /dev/null
+++ b/egs/librispeech/v1/conf/infer.yaml
@@ -0,0 +1 @@
+beam_width: 5
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
new file mode 100644
index 00000000..c1490295
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
@@ -0,0 +1,53 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: 'bucketing_seg_sampler'
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2base_rnn_transducer_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 1200
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml b/egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml
new file mode 100644
index 00000000..6ddc7259
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml
@@ -0,0 +1,12 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h
+transducer:
+  decoder:
+    embed_dim: 1024
+    num_pred_layers: 2
+    pred_hid_feats: 512
+    embed_dropout_rate: 0.4
+    rnn_dropout_rate: 0.4
+    rnn_type: lstm
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh b/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
index b1da75b7..2aaeed2b 100644
--- a/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
+++ b/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
@@ -24,6 +24,7 @@ nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0030.pth
 nnet_s1=$nnet_s1_dir/model_ep0050.pth
 nnet_s1=$nnet_s1_dir/model_ep0075.pth
+nnet_s1=$nnet_s1_dir/model_ep0106.pth
 
 nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
 nnet_s2_args=""
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v6.1.sh b/egs/librispeech/v1/global_conf/config_transducer_v6.1.sh
new file mode 100644
index 00000000..f67b0a88
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v6.1.sh
@@ -0,0 +1,34 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v6.1
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0075.pth
+nnet_s1=$nnet_s1_dir/model_ep0106.pth
+nnet_s1=$nnet_s1_dir/model_ep0646.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/run_011_train_asr2.sh b/egs/librispeech/v1/run_011_train_asr2.sh
new file mode 100755
index 00000000..99b0065e
--- /dev/null
+++ b/egs/librispeech/v1/run_011_train_asr2.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Copyright
+#                2022   Johns Hopkins University (Author: Yen-Ju Lu)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=2
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_dir=data/${nnet_data}/
+val_dir=data/${dev_data}/
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+    extra_args="--data.val.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+if [ "$use_wandb" == "true" ];then
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v2 --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+
+# Network Training
+if [ $stage -le 1 ]; then
+
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
+    train_wav2vec2rnn_transducer.py $nnet_type \
+    --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
+    --trainer.exp-path $nnet_s1_dir $args \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --num-gpus $ngpu
+
+fi
+
+if [ $stage -le 2 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2transducer.py $nnet_type \
+    --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
+    --trainer.exp-path $nnet_s2_dir $args \
+    --in-model-file $nnet_s1 \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --num-gpus $ngpu
+  
+fi
+
+if [ $stage -le 3 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  
+
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2transducer.py $nnet_type \
+    --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.segments-file $train_dir/utt2spk \
+    --data.train.dataset.bpe-model $bpe_model \
+    --data.train.dataset.text-file $train_dir/text \
+    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.segments-file $val_dir/utt2spk \
+    --data.val.dataset.text-file $val_dir/text \
+    --trainer.exp-path $nnet_s3_dir $args \
+    --in-model-file $nnet_s2 \
+    --data.train.dataset.time-durs-file $train_dir/utt2dur \
+    --data.val.dataset.time-durs-file $val_dir/utt2dur \
+    --num-gpus $ngpu
+fi
+
diff --git a/egs/librispeech/v1/run_030_inference2.sh b/egs/librispeech/v1/run_030_inference2.sh
new file mode 100755
index 00000000..7ed9567a
--- /dev/null
+++ b/egs/librispeech/v1/run_030_inference2.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright
+#                2022   Johns Hopkins University (Author: Yen-Ju Lu)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+config_file=default_config.sh
+use_gpu=false
+nnet_stage=1
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+  transducer_args="--use-gpu true"
+  transducer_cmd="$cuda_eval_cmd --mem 6G"
+else
+  transducer_cmd="$train_cmd --mem 12G"
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+fi
+
+transducer_dir=exp/transducer/$nnet_name
+
+
+test_data=test_clean
+
+
+# Extracts x-vectors for evaluation
+for name in dev_clean dev_other test_clean test_other 
+do
+  nj=40
+  steps_transducer/decode_wav2vec2rnn_transducer.sh \
+      --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
+      $nnet data/$name \
+      $transducer_dir/$name $bpe_model
+done
+
diff --git a/egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh b/egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh
new file mode 100755
index 00000000..470b92b1
--- /dev/null
+++ b/egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#               2022  Johns Hopkins University (Author: Yen-Ju Lu)
+# Apache 2.0.
+nj=30
+cmd="run.pl"
+set -e
+use_gpu=false
+#write_utt2num_frames=true  # If true writes utt2num_frames.
+stage=0
+extra_args=""
+infer_cfg=conf/infer.yaml
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ] && [ $# != 4 ]; then
+  echo "Usage: $0 [options] <nnet-model> <data> <xvector-dir> [<data-out-dir>]"
+  echo " e.g.: $0 --feat-config conf/fbank_mvn.yml --aug-config conf/noise_aug.yml exp/xvector_nnet/model.pt data/train exp/xvectors_train [data/train_aug]"
+  echo "main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --infer-cfg <file|conf/infer.yaml>               # decoding configuration"
+  echo "  --use-gpu <bool|false>                           # If true, use GPU."
+  echo "  --nj <n|30>                                      # Number of jobs"
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  
+
+fi
+
+nnet_file=$1
+data_dir=$2
+output_dir=$3
+bpe_model=$4
+
+for f in $data_dir/wav.scp ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+log_dir=$output_dir/log
+mkdir -p $log_dir
+
+num_gpus=0
+if [ "$use_gpu" == "true" ];then
+    cmd="$cmd --gpu 1"
+    num_gpus=1
+    extra_args="${extra_args} --use-gpu"
+fi 
+
+# if [ "$write_utt2num_frames" == "true" ];then
+#     write_num_frames_opt="--write-num-frames $output_dir/utt2num_frames.JOB"
+# fi
+
+if [ $stage -le 0 ];then
+    $cmd JOB=1:$nj $output_dir/log/decode_transducer.JOB.log \
+	hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	decode_wav2vec2rnn_transducer.py \
+	--infer-args $infer_cfg \
+	--part-idx JOB --num-parts $nj \
+	--input $data_dir/wav.scp \
+	--model-path $nnet_file \
+	--bpe-model $bpe_model \
+	--output $output_dir/transducer.JOB.text $extra_args
+fi
+
+if [ $stage -le 1 ];then
+  echo "compute wer"
+  cat $output_dir/transducer.*.text > $output_dir/transducer.text
+  compute-wer --text --mode=present ark:$data_dir/text ark:$output_dir/transducer.text
+fi
diff --git a/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh b/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
index 143087a5..67fc7081 100755
--- a/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
+++ b/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 #               2022  Johns Hopkins University (Author: Yen-Ju Lu)
 # Apache 2.0.
+set -e
 nj=30
 cmd="run.pl"
 
@@ -61,7 +62,7 @@ if [ "$write_utt2num_frames" == "true" ];then
 fi
 
 if [ $stage -le 0 ];then
-    set +e
+    #set +e
     $cmd JOB=1:$nj $output_dir/log/decode_transducer.JOB.log \
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
     decode_wav2transducer.py \
@@ -70,7 +71,7 @@ if [ $stage -le 0 ];then
     --model-path $nnet_file \
     --bpe-model $bpe_model \
     --output $output_dir/transducer.JOB.text
-      set -e
+     # set -e
 fi
 
 if [ $stage -le 1 ];then
diff --git a/hyperion/bin/decode_wav2vec2rnn_transducer.py b/hyperion/bin/decode_wav2vec2rnn_transducer.py
new file mode 100755
index 00000000..cc612628
--- /dev/null
+++ b/hyperion/bin/decode_wav2vec2rnn_transducer.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+from typing import Dict, List, Tuple
+
+import sentencepiece as spm
+import torch.nn as nn
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+import pandas as pd
+
+import torch
+
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.utils import Utt2Info
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.np.augment import SpeechAugment
+
+from hyperion.torch.utils import open_device
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.models import HFWav2Vec2RNNTransducer
+from hyperion.torch import TorchModelLoader as TML
+
+from hyperion.torch.models.wav2transducer.beam_search import greedy_search, beam_search
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus={}".format(num_gpus))
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def load_model(model_path, device):
+    logging.info("loading model {}".format(model_path))
+    model = TML.load(model_path)
+    logging.info("transducer-model={}".format(model))
+    model.to(device)
+    model.eval()
+    return model
+
+
+def decode_one_batch(
+        model: nn.Module,
+        sp: spm.SentencePieceProcessor,
+        x: torch.Tensor,
+        decoding_method="beam_search") -> Dict[str, List[List[str]]]:
+    """Decode one batch and return the result in a dict. The dict has the
+    following format:
+        - key: It indicates the setting used for decoding. For example,
+               if greedy_search is used, it would be "greedy_search"
+               If beam search with a beam size of 7 is used, it would be
+               "beam_7"
+        - value: It contains the decoding result. `len(value)` equals to
+                 batch size. `value[i]` is the decoding result for the i-th
+                 utterance in the given batch.
+    Args:
+      params:
+        It's the return value of :func:`get_params`.
+      model:
+        The neural model.
+      sp:
+        The BPE model.
+      batch:
+        It is the return value from iterating
+        `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation
+        for the format of the `batch`.
+    Returns:
+      Return the decoding result. See above description for the format of
+      the returned dict.
+    """
+    device = model.device
+    feature = x  #batch["inputs"]
+    assert x.shape[0] == 1
+    assert feature.ndim == 2
+
+    feature = feature.to(device)
+    # at entry, feature is (N, T, C)
+
+    feature_lens = torch.Tensor([x.shape[1]]).int()
+
+    encoder_out, hid_feats, encoder_out_lens = model.forward_feats(
+        x=feature, x_lengths=feature_lens)
+
+    hyps = []
+    batch_size = encoder_out.size(0)
+
+    encoder_out = encoder_out.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
+
+    for i in range(batch_size):
+        # fmt: off
+        encoder_out_i = encoder_out[i:i + 1, :encoder_out_lens[i]]
+        # fmt: on
+        if decoding_method == "greedy_search":
+            hyp = greedy_search(model=model, encoder_out=encoder_out_i)
+        elif decoding_method == "beam_search":
+            hyp = beam_search(model=model, encoder_out=encoder_out_i, beam=5)
+        else:
+            raise ValueError(f"Unsupported decoding method: {decoding_method}")
+        hyps.append(sp.decode(hyp).split())
+
+    logging.info("hyps:{}".format(" ".join(hyps[0])))
+
+    if decoding_method == "greedy_search":
+        return hyps[0]
+    else:
+        return hyps[0]
+
+
+def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
+                      infer_args, use_gpu, **kwargs):
+
+    device = init_device(use_gpu)
+    model = load_model(model_path, device)
+
+    logging.info("bpe-model=%s", bpe_model)
+    sp = spm.SentencePieceProcessor()
+    sp.load(bpe_model)
+
+    infer_args = HFWav2Vec2RNNTransducer.filter_infer_args(**infer_args)
+    logging.info(f"infer-args={infer_args}")
+
+    ar_args = AR.filter_args(**kwargs)
+    logging.info("opening output: %s", output_spec)
+    with open(output_spec, "w") as writer:
+        logging.info(f"opening input stream: {input_spec} with args={ar_args}")
+        with AR(input_spec, **ar_args) as reader:
+            while not reader.eof():
+                t1 = time.time()
+                key, x, fs = reader.read(1)
+                if len(key) == 0:
+                    break
+
+                x, key, fs = x[0], key[0], fs[0]
+                t2 = time.time()
+                logging.info("processing utt %s", key)
+                with torch.no_grad():
+                    x = torch.tensor(
+                        x[None, :], dtype=torch.get_default_dtype()).to(device)
+
+                    tot_frames = x.shape[1]
+                    logging.info(
+                        "utt %s detected %d/%d (%.2f %%) speech frames",
+                        key,
+                        x.shape[1],
+                        tot_frames,
+                        x.shape[1] / tot_frames * 100,
+                    )
+
+                    if x.shape[1] == 0:
+                        y = [""]
+                    else:
+                        #y = decode_one_batch(model=model, sp=sp, x=x)
+                        x_lengths = torch.tensor((x.shape[1], ),
+                                                 dtype=torch.long,
+                                                 device=device)
+                        y = model.infer(x, x_lengths, **infer_args)
+
+                    y = sp.decode(y[0])
+                    logging.info(f"utt: {key} hyps: {y}")
+                    t3 = time.time()
+                    writer.write(f"{key} {y}\n")
+
+                    t4 = time.time()
+                    tot_time = t4 - t1
+                    infer_time = t3 - t2
+                    logging.info(
+                        ("utt %s total-time=%.3f read-time=%.3f "
+                         "infer-time=%.3f "
+                         "write-time=%.3f "
+                         "infer-rt-factor=%.2f tot-rt-factor=%.2f"),
+                        key,
+                        tot_time,
+                        t2 - t1,
+                        infer_time,
+                        t4 - t3,
+                        x.shape[1] / fs / infer_time,
+                        x.shape[1] / fs / tot_time,
+                    )
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=("ASR decoding for RNN-T with Wav2vec features"))
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--scp-sep",
+                        default=" ",
+                        help=("scp file field separator"))
+
+    AR.add_class_args(parser)
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument("--bpe-model", required=True)
+
+    HFWav2Vec2RNNTransducer.add_infer_args(parser, "infer-args")
+    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument("--use-gpu",
+                        default=False,
+                        action="store_true",
+                        help="extract xvectors in gpu")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    decode_transducer(**namespace_to_dict(args))
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
new file mode 100755
index 00000000..026c9330
--- /dev/null
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import k2
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.models import Wav2RNNRNNTransducer
+from torch.nn.utils.rnn import pad_sequence
+
+model_dict = {
+    "rnn_rnn_transducer": Wav2RNNRNNTransducer,
+}
+
+
+def transducer_collate(batch):
+    audio = []
+    audio_length = []
+    target = []
+    for record in batch:
+        wav = torch.as_tensor(record["x"])
+        audio.append(wav)
+        audio_length.append(wav.shape[0])
+        target.append(record["text"])
+    audio = pad_sequence(audio)
+    audio_length = torch.as_tensor(audio_length)
+    target = k2.RaggedTensor(target)
+    batch = {
+        "x": torch.transpose(audio, 0, 1),
+        "x_lengths": audio_length,
+        "text": target,
+    }
+    return batch
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+    data_kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**data_kwargs["dataset"])
+    sampler_args = data_kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = data_kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = ({
+        "num_workers": num_workers_per_gpu,
+        "pin_memory": True
+    } if num_gpus > 0 else {})
+    data_loader = torch.utils.data.DataLoader(dataset,
+                                              batch_sampler=sampler,
+                                              **largs,
+                                              collate_fn=transducer_collate)
+    return data_loader
+
+
+def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
+    model_args = model_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("model network args={}".format(model_args))
+    # TODO: check model_args
+    model_args["transducer"]["decoder"]["blank_id"] = blank_id
+    model_args["transducer"]["decoder"]["vocab_size"] = vocab_size
+    model = model_class(**model_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+    #torch.backends.cudnn.deterministic = True
+    #torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.enabled = False
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
+                       train_loader.dataset.sp.get_piece_size(), **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train",
+                             action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+
+    parser.add_argument(
+        "--data.train.dataset.text_file",
+        type=str,
+    )
+
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+
+    parser.add_argument(
+        "--data.train.dataset.bpe_model",
+        type=str,
+    )
+
+    parser.link_arguments("data.train.data_loader.num_workers",
+                          "data.val.data_loader.num_workers")
+
+    parser.link_arguments("data.train.dataset.bpe_model",
+                          "data.val.dataset.bpe_model")
+
+    model_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(parser,
+                           prefix="trainer",
+                           train_modes=model_class.valid_train_modes())
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed",
+                        type=int,
+                        default=1123581321,
+                        help="random seed")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Train RNN Transducer model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    # multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
new file mode 100755
index 00000000..a2d75ba9
--- /dev/null
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import sys
+import os
+from pathlib import Path
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import k2
+import time
+import logging
+import multiprocessing
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.utils import ddp
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.models import HFWav2Vec2RNNTransducer
+from hyperion.torch.models import HFWav2Vec2RNNRNNTransducer
+from torch.nn.utils.rnn import pad_sequence
+
+model_dict = {
+    "hf_wav2vec2rnn_transducer": HFWav2Vec2RNNTransducer,
+    "hf_wav2vec2rnn_rnn_transducer": HFWav2Vec2RNNRNNTransducer,
+    # "hf_hubert2rnn_transducer": HFWav2Vec2RNNTransducer,
+    # "hf_hubert2rnn_rnn_transducer": Hubert2RNNRNNTransducer,
+    # "hf_wavlm2rnn_transducer": HFHubert2RNNTransducer,
+    # "hf_wavlm2rnn_rnn_transducer": HFWavLM2RNNRNNTransducer,
+}
+
+
+def transducer_collate(batch):
+    audio = []
+    audio_length = []
+    target = []
+    for record in batch:
+        wav = torch.as_tensor(record["x"])
+        audio.append(wav)
+        audio_length.append(wav.shape[0])
+        target.append(record["text"])
+    audio = pad_sequence(audio).transpose(0, 1)
+    audio_length = torch.as_tensor(audio_length)
+
+    # sort audios by length
+    sort_idx = torch.argsort(audio_length, descending=True)
+    audio = audio[sort_idx]
+    audio_length = audio_length[sort_idx]
+    target = [target[k] for k in sort_idx]
+    target = k2.RaggedTensor(target)
+
+    batch = {
+        "x": audio,
+        "x_lengths": audio_length,
+        "text": target,
+    }
+    return batch
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+    data_kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**data_kwargs["dataset"])
+    sampler_args = data_kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = data_kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = ({
+        "num_workers": num_workers_per_gpu,
+        "pin_memory": True
+    } if num_gpus > 0 else {})
+    data_loader = torch.utils.data.DataLoader(dataset,
+                                              batch_sampler=sampler,
+                                              **largs,
+                                              collate_fn=transducer_collate)
+    return data_loader
+
+
+def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
+    model_args = model_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("model network args={}".format(model_args))
+    # TODO: check model_args
+    model_args["transducer"]["decoder"]["blank_id"] = blank_id
+    model_args["transducer"]["decoder"]["vocab_size"] = vocab_size
+    model = model_class(**model_args)
+    if rank == 0:
+        logging.info("model={}".format(model))
+    return model
+
+
+def train_model(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+    #torch.backends.cudnn.deterministic = True
+    #torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.enabled = False
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    # # for Debug
+    # rank = 0
+    # kwargs["rank"] = 0
+    # device = "cpu"
+    # world_size=1
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
+                       train_loader.dataset.sp.get_piece_size(), **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {}  #{"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(model_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    train_parser = ArgumentParser(prog="")
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train",
+                             action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+
+    parser.add_argument(
+        "--data.train.dataset.text_file",
+        type=str,
+    )
+
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+
+    parser.add_argument(
+        "--data.train.dataset.bpe_model",
+        type=str,
+    )
+
+    parser.link_arguments("data.train.data_loader.num_workers",
+                          "data.val.data_loader.num_workers")
+
+    parser.link_arguments("data.train.dataset.bpe_model",
+                          "data.val.dataset.bpe_model")
+
+    model_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(parser,
+                           prefix="trainer",
+                           train_modes=model_class.valid_train_modes())
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed",
+                        type=int,
+                        default=1123581321,
+                        help="random seed")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+
+    return parser
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Train Wav2Vec2Transducer model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+
+    for k, v in model_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    model_type = args.subcommand
+    args_sc = vars(args)[model_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.model_class = model_dict[model_type]
+    # torch docs recommend using forkserver
+    # multiprocessing.set_start_method("forkserver")
+    train_model(gpu_id, args_sc)
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index 7ec806a5..7a738bca 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -34,3 +34,5 @@
 from .transformer_encoder_v1 import TransformerEncoderBlockV1
 from .transformer_feedforward import (Conv1dLinear, Conv1dx2,
                                       PositionwiseFeedForward)
+from .transducer_predictor import TransducerPredictor
+from .transducer_joiner import TransducerJoiner
diff --git a/hyperion/torch/layer_blocks/transducer_joiner.py b/hyperion/torch/layer_blocks/transducer_joiner.py
index ee7a667b..482b5aa6 100644
--- a/hyperion/torch/layer_blocks/transducer_joiner.py
+++ b/hyperion/torch/layer_blocks/transducer_joiner.py
@@ -25,7 +25,7 @@ def __init__(self, in_feats: int, vocab_size: int):
         self.in_feats = in_feats
         self.vocab_size = vocab_size
 
-        self.output = nn.Linear(in_feats, out_dims)
+        self.output = nn.Linear(in_feats, vocab_size)
 
     def forward(self, encoder_out: torch.Tensor,
                 pred_out: torch.Tensor) -> torch.Tensor:
@@ -43,57 +43,57 @@ def forward(self, encoder_out: torch.Tensor,
         encoder_out = encoder_out.unsqueeze(2)
         # Now encoder_out is (N, T, 1, C)
         pred_out = pred_out.unsqueeze(1)
-        # Now decoder_out is (N, 1, U, C)
+        # Now pred_out is (N, 1, U, C)
         x = torch.tanh(encoder_out + pred_out)
 
         logits = self.output(x)
         return logits
 
-    def get_config(self):
-        config = {
-            "in_feats": self.in_feats,
-            "out_dims": self.out_dims,
-            "num_layers": self.num_layers,
-        }
-
-        # base_config = super().get_config()
-        return dict(list(config.items()))
-
-    @staticmethod
-    def filter_args(**kwargs):
-        valid_args = (
-            "in_feats",
-            "out_dims",
-            "num_layers",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        return args
-
-    @staticmethod
-    def add_class_args(parser,
-                       prefix=None,
-                       skip=set(["in_feats", "out_dims"])):
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        if "in_feats" not in skip:
-            parser.add_argument("--in-feats",
-                                type=int,
-                                required=True,
-                                help=("input feature dimension"))
-
-        if "out_dims" not in skip:
-            parser.add_argument("--out-dims",
-                                type=int,
-                                required=True,
-                                help=("output feature dimension (vocab size)"))
-        parser.add_argument("--num-layers",
-                            default=1,
-                            type=int,
-                            help=("layers of the joiner"))
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+    # def get_config(self):
+    #     config = {
+    #         "in_feats": self.in_feats,
+    #         "out_dims": self.out_dims,
+    #         "num_layers": self.num_layers,
+    #     }
+
+    #     # base_config = super().get_config()
+    #     return dict(list(config.items()))
+
+    # @staticmethod
+    # def filter_args(**kwargs):
+    #     valid_args = (
+    #         "in_feats",
+    #         "out_dims",
+    #         "num_layers",
+    #     )
+    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    #     return args
+
+    # @staticmethod
+    # def add_class_args(parser,
+    #                    prefix=None,
+    #                    skip=set(["in_feats", "out_dims"])):
+    #     if prefix is not None:
+    #         outer_parser = parser
+    #         parser = ArgumentParser(prog="")
+
+    #     if "in_feats" not in skip:
+    #         parser.add_argument("--in-feats",
+    #                             type=int,
+    #                             required=True,
+    #                             help=("input feature dimension"))
+
+    #     if "out_dims" not in skip:
+    #         parser.add_argument("--out-dims",
+    #                             type=int,
+    #                             required=True,
+    #                             help=("output feature dimension (vocab size)"))
+    #     parser.add_argument("--num-layers",
+    #                         default=1,
+    #                         type=int,
+    #                         help=("layers of the joiner"))
+
+    #     if prefix is not None:
+    #         outer_parser.add_argument("--" + prefix,
+    #                                   action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/transducer_predictor.py b/hyperion/torch/layer_blocks/transducer_predictor.py
index 178c423a..ae354359 100644
--- a/hyperion/torch/layer_blocks/transducer_predictor.py
+++ b/hyperion/torch/layer_blocks/transducer_predictor.py
@@ -9,6 +9,8 @@
 import torch
 import torch.nn as nn
 
+from ...utils.misc import filter_func_args
+
 
 class TransducerPredictor(nn.Module):
     """ RNN-T prediction network.
@@ -40,7 +42,7 @@ def __init__(self,
         super().__init__()
         self.embedding = nn.Embedding(
             num_embeddings=vocab_size,
-            embed_dim=embed_dim,
+            embedding_dim=embed_dim,
             padding_idx=blank_id,
         )
         self.embed_dropout = nn.Dropout(embed_dropout_rate)
@@ -71,7 +73,7 @@ def __init__(self,
         self.hid_feats = hid_feats
         self.embed_dropout_rate = embed_dropout_rate
         self.rnn_dropout_rate = rnn_dropout_rate
-        self.output = nn.Linear(hid_feats, in_feats)
+        self.output = nn.Linear(hid_feats, out_feats)
 
     def forward(
         self,
@@ -93,92 +95,6 @@ def forward(
 
         return out, (h, c)
 
-    def get_config(self):
-        config = {
-            "in_feats": self.in_feats,
-            "blank_id": self.blank_id,
-            "vocab_size": self.vocab_size,
-            "embed_dim": self.embed_dim,
-            "num_layers": self.num_layers,
-            "hid_feats": self.hid_feats,
-            "embed_dropout_rate": self.embed_dropout_rate,
-            "rnn_dropout_rate": self.rnn_dropout_rate,
-        }
-
-        # base_config = super().get_config()
-        return dict(list(config.items()))
-
-    @staticmethod
-    def filter_args(**kwargs):
-        valid_args = (
-            "in_feats",
-            "blank_id",
-            "vocab_size",
-            "embed_dim",
-            "num_layers",
-            "hid_feats",
-            "embed_dropout_rate",
-            "rnn_dropout_rate",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        return args
-
-    @staticmethod
-    def filter_finetune_args(**kwargs):
-        valid_args = (
-            "embed_dropout_rate",
-            "rnn_dropout_rate",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        return args
-
-    @staticmethod
-    def add_class_args(parser,
-                       prefix=None,
-                       skip=set(["in_feats", "blank_id", "vocab_size"])):
-
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        if "in_feats" not in skip:
-            parser.add_argument("--in-feats",
-                                type=int,
-                                required=True,
-                                help=("input feature dimension"))
-        if "blank_id" not in skip:
-            parser.add_argument("--blank-id",
-                                type=int,
-                                required=True,
-                                help=("blank id from sp model"))
-        if "vocab_size" not in skip:
-            parser.add_argument("--vocab-size",
-                                type=int,
-                                required=True,
-                                help=("output prediction dimension"))
-        parser.add_argument("--embedding-dim",
-                            default=1024,
-                            type=int,
-                            help=("feature dimension"))
-        parser.add_argument("--embedding-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder input embeddings"))
-        parser.add_argument("--rnn-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder RNN "))
-
-        parser.add_argument("--num-layers", default=2, type=int, help=(""))
-
-        parser.add_argument("--hidden-dim", default=512, type=int, help=(""))
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
-
     def change_config(
         self,
         override_dropouts=False,
@@ -194,31 +110,101 @@ def change_config(
             self.embed_dropout_rate = embed_dropout_rate
             self.embed_dropout = nn.Dropout(self.embed_dropout_rate)
 
-    @staticmethod
-    def add_finetune_args(parser,
-                          prefix=None,
-                          skip=set(["in_feats", "blank_id", "vocab_size"])):
-
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        parser.add_argument(
-            "--override-dropouts",
-            default=False,
-            action=ActionYesNo,
-            help=(
-                "whether to use the dropout probabilities passed in the "
-                "arguments instead of the defaults in the pretrained model."))
-        parser.add_argument("--embedding-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder input embeddings"))
-        parser.add_argument("--rnn-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder RNN "))
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+    # def get_config(self):
+    #     config = {
+    #         "in_feats": self.in_feats,
+    #         "blank_id": self.blank_id,
+    #         "vocab_size": self.vocab_size,
+    #         "embed_dim": self.embed_dim,
+    #         "num_layers": self.num_layers,
+    #         "hid_feats": self.hid_feats,
+    #         "embed_dropout_rate": self.embed_dropout_rate,
+    #         "rnn_dropout_rate": self.rnn_dropout_rate,
+    #     }
+
+    #     # base_config = super().get_config()
+    #     return dict(list(config.items()))
+
+    # @staticmethod
+    # def filter_args(**kwargs):
+    #     args = filter_func_args(TransducerPredictor.__init__, kwargs)
+    #     return args
+
+    # @staticmethod
+    # def filter_finetune_args(**kwargs):
+    #     args = filter_func_args(TransducerPredictor.change_config, kwargs)
+    #     return args
+
+    # @staticmethod
+    # def add_class_args(parser,
+    #                    prefix=None,
+    #                    skip=set(["in_feats", "blank_id", "vocab_size"])):
+
+    #     if prefix is not None:
+    #         outer_parser = parser
+    #         parser = ArgumentParser(prog="")
+
+    #     if "in_feats" not in skip:
+    #         parser.add_argument("--in-feats",
+    #                             type=int,
+    #                             required=True,
+    #                             help=("input feature dimension"))
+    #     if "blank_id" not in skip:
+    #         parser.add_argument("--blank-id",
+    #                             type=int,
+    #                             required=True,
+    #                             help=("blank id from sp model"))
+    #     if "vocab_size" not in skip:
+    #         parser.add_argument("--vocab-size",
+    #                             type=int,
+    #                             required=True,
+    #                             help=("output prediction dimension"))
+    #     parser.add_argument("--embedding-dim",
+    #                         default=1024,
+    #                         type=int,
+    #                         help=("feature dimension"))
+    #     parser.add_argument("--embedding-dropout-rate",
+    #                         default=0.0,
+    #                         type=float,
+    #                         help=("dropout prob for decoder input embeddings"))
+    #     parser.add_argument("--rnn-dropout-rate",
+    #                         default=0.0,
+    #                         type=float,
+    #                         help=("dropout prob for decoder RNN "))
+
+    #     parser.add_argument("--num-layers", default=2, type=int, help=(""))
+
+    #     parser.add_argument("--hidden-dim", default=512, type=int, help=(""))
+
+    #     if prefix is not None:
+    #         outer_parser.add_argument("--" + prefix,
+    #                                   action=ActionParser(parser=parser))
+
+    # @staticmethod
+    # def add_finetune_args(parser,
+    #                       prefix=None,
+    #                       skip=set(["in_feats", "blank_id", "vocab_size"])):
+
+    #     if prefix is not None:
+    #         outer_parser = parser
+    #         parser = ArgumentParser(prog="")
+
+    #     parser.add_argument(
+    #         "--override-dropouts",
+    #         default=False,
+    #         action=ActionYesNo,
+    #         help=(
+    #             "whether to use the dropout probabilities passed in the "
+    #             "arguments instead of the defaults in the pretrained model."))
+    #     parser.add_argument("--embedding-dropout-rate",
+    #                         default=0.0,
+    #                         type=float,
+    #                         help=("dropout prob for decoder input embeddings"))
+    #     parser.add_argument("--rnn-dropout-rate",
+    #                         default=0.0,
+    #                         type=float,
+    #                         help=("dropout prob for decoder RNN "))
+
+    #     if prefix is not None:
+    #         outer_parser.add_argument("--" + prefix,
+    #                                   action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py b/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
index bdd9b707..942e1313 100644
--- a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
+++ b/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
@@ -6,6 +6,8 @@
 import torch
 import torch.nn as nn
 
+from ..layers import ActivationFactory as AF
+
 
 class TransformerConv2dSubsampler(nn.Module):
     """Convolutional 2D subsampling (to 1/4 length) Tor transformer
@@ -18,18 +20,23 @@ class TransformerConv2dSubsampler(nn.Module):
       time_dim: indicates which is the time dimension in the input tensor
     """
 
-    def __init__(self, in_feats, out_feats, hid_act, pos_enc, time_dim=1):
+    def __init__(self, in_feats, out_feats, hid_act, pos_enc=None, time_dim=1):
         super().__init__()
         self.time_dim = time_dim
+        hid_act = AF.create(hid_act)
         self.conv = nn.Sequential(
             nn.Conv2d(1, out_feats, 3, 2, padding=(0, 1)),
             hid_act,
             nn.Conv2d(out_feats, out_feats, 3, 2, padding=(0, 1)),
             hid_act,
         )
-        self.out = nn.Sequential(
-            nn.Linear(out_feats * (((in_feats - 1) // 2 - 1) // 2), out_feats), pos_enc
-        )
+
+        linear = nn.Linear(out_feats * (((in_feats - 1) // 2 - 1) // 2),
+                           out_feats)
+        if pos_enc is None:
+            self.out = linear
+        else:
+            self.out = nn.Sequential(linear, pos_enc)
 
     def forward(self, x, x_mask=None):
         """Forward function.
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 3e8347ee..95042aed 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -7,6 +7,7 @@
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
 from .wav2transducer import HFWav2Vec2Transducer
+from .wav2transducer import HFWav2Vec2RNNTransducer, HFWav2Vec2RNNRNNTransducer
 from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
                            HFWavLM2ResNet1dXVector)
 from .xvectors.efficient_net_xvector import EfficientNetXVector
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
index ee711a8d..fe55e34d 100644
--- a/hyperion/torch/models/transducer/__init__.py
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -4,7 +4,10 @@
 
 """
 
+from .rnn_transducer import RNNTransducer
+from .rnn_rnn_transducer import RNNRNNTransducer
+
 from .transducer import Transducer
-from .conformer import Conformer
-from .decoder import Decoder
-from .joiner import Joiner
\ No newline at end of file
+#from .conformer import Conformer
+#from .decoder import Decoder
+#from .joiner import Joiner
diff --git a/hyperion/torch/models/transducer/lstm_rnn_transducer.py b/hyperion/torch/models/transducer/lstm_rnn_transducer.py
new file mode 100644
index 00000000..5ab74483
--- /dev/null
+++ b/hyperion/torch/models/transducer/lstm_rnn_transducer.py
@@ -0,0 +1,149 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from typing import Dict, Optional, Union
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
+import torch
+
+from ...torch_model import TorchModel
+from ..narchs import RNNTransducerDecoder
+
+
+class RNNTransducer(TorchModel):
+    """ Base-class for RNN-T in
+    "Sequence Transduction with Recurrent Neural Networks"
+    https://arxiv.org/pdf/1211.3711.pdf
+
+    Attributes:
+      encoder: Encoder network module
+      decoder: RNN-T Decoder config. dictionary or module.
+    """
+
+    def __init__(
+        self,
+        encoder: TorchModel,
+        decoder: Union[Dict, RNNTransducerDecoder],
+    ):
+        super().__init__()
+        assert isinstance(encoder, TorchModel)
+        if isinstance(decoder, dict):
+            decoder = RNNTransducerDecoder(**decoder)
+        else:
+            assert isinstance(decoder, RNNTransducerDecoder)
+
+        self.encoder = encoder
+        self.decoder = decoder
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: k2.RaggedTensor,
+    ) -> torch.Tensor:
+        """
+        Args:
+          x: input features with shape = (N, T, C)
+          x_lengths: feature number for frames with shape = (N,)
+          y: ragged tensor with 2 axes [utt][label]. It contains labels of each
+            utterance.
+        Returns:
+          - Token logits with shape = (N, vocab_size)
+          - RNN-T loss.
+        """
+        assert x.ndim == 3, x.shape
+        assert x_lengths.ndim == 1, x_lengths.shape
+        assert y.num_axes == 2, y.num_axes
+
+        assert x.size(0) == x_lengths.size(0) == y.dim0
+
+        x, x_lengths = self.encoder(x, x_lengths)
+        assert torch.all(x_lengths > 0)
+
+        logits, loss = self.decoder(x, x_lengths, y)
+        return logits, loss
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return ["full", "frozen"]
+
+    def get_config(self):
+        dec_cfg = self.decoder.get_config()
+        config = {
+            "decoder": dec_cfg,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+
+        # get arguments for pooling
+        decoder_args = RNNTransducerDecoder.filter_args(**kwargs["decoder"])
+        args["decoder"] = decoder_args
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNTransducerDecoder.add_class_args(parser, prefix="decoder")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    def change_config(
+        self,
+        decoder,
+    ):
+        logging.info("changing transducer config")
+        self.decoder.change_config(**decoder)
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        # get arguments for pooling
+        decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
+        args["decoder"] = decoder_args
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNTransducerDecoder.add_finetune_args(parser, prefix="decoder")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/rnn_rnn_transducer.py b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
new file mode 100644
index 00000000..0e1c7a85
--- /dev/null
+++ b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
@@ -0,0 +1,84 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from typing import Dict, Optional, Union, Tuple
+from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
+import torch
+
+from .rnn_transducer import RNNTransducer
+from ...narchs import RNNEncoder
+
+
+class RNNRNNTransducer(RNNTransducer):
+    """RNN-T with RNN Encoder
+
+    Attributes:
+      encoder: dictionary of options to initialize RNNEncoder class or RNNEncoder object
+      decoder: RNN-T Decoder config. dictionary or module.
+
+    """
+
+    def __init__(self, encoder, decoder):
+        if isinstance(encoder, dict):
+            encoder = RNNEncoder(**encoder)
+        else:
+            assert isinstance(encoder, RNNEncoder)
+
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = RNNTransducer.filter_args(**kwargs)
+        encoder_args = RNNEncoder.filter_args(**kwargs["encoder"])
+        args["encoder"] = encoder_args
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNEncoder.add_class_args(parser, prefix="encoder")
+        RNNTransducer.add_class_args(parser)
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    def change_config(
+        self,
+        encoder,
+        decoder,
+    ):
+        logging.info("changing transducer encoder config")
+        self.encoder.change_config(**encoder)
+        super().chage_config(**decoder)
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args = RNNTransducer.filter_finetune_args(**kwargs)
+        encoder_args = RNNEncoder.filter_finetune_args(**kwargs["encoder"])
+        args["encoder"] = encoder_args
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNEncoder.add_finetune_args(parser, prefix="encoder")
+        RNNTransducer.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index dd91da5f..ef54a5eb 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -1,101 +1,64 @@
-# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 """
-Note we use `rnnt_loss` from torchaudio, which exists only in
-torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
+import logging
+from typing import Dict, Optional, Union, Tuple, List
 from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 try:
     import k2
 except ModuleNotFoundError:
     from ...utils import dummy_k2 as k2
 
-import logging
 import torch
-import torch.nn as nn
-import torchaudio
-import torchaudio.functional
-#from .encoder_interface import EncoderInterface
 
+from ....utils.misc import filter_func_args
 from ...torch_model import TorchModel
-from hyperion.utils.text import add_sos
-# from .conformer import Conformer
-from .decoder import Decoder
-from .joiner import Joiner
+from ...narchs import RNNTransducerDecoder
 
 
-class Transducer(TorchModel):
-    """It implements https://arxiv.org/pdf/1211.3711.pdf
+class RNNTransducer(TorchModel):
+    """ Base-class for RNN-T in
     "Sequence Transduction with Recurrent Neural Networks"
+    https://arxiv.org/pdf/1211.3711.pdf
+
+    Attributes:
+      encoder: Encoder network module
+      decoder: RNN-T Decoder config. dictionary or module.
     """
 
     def __init__(
         self,
-        encoder,
-        # conformer_enc,
-        decoder,
-        joiner,
-        vocab_size,
-        blank_id,
+        encoder: Union[TorchModel, None],
+        decoder: Union[Dict, RNNTransducerDecoder],
     ):
-        """
-        Args:
-          encoder:
-            It is the transcription network in the paper. Its accepts
-            two inputs: `x` of (N, T, C) and `x_lengths` of shape (N,).
-            It returns two tensors: `logits` of shape (N, T, C) and
-            `logit_lens` of shape (N,).
-          decoder:
-            It is the prediction network in the paper. Its input shape
-            is (N, U) and its output shape is (N, U, C). It should contain
-            one attribute: `blank_id`.
-          joiner:
-            It has two inputs with shapes: (N, T, C) and (N, U, C). Its
-            output shape is (N, T, U, C). Note that its output contains
-            unnormalized probs, i.e., not processed by log-softmax.
-        """
         super().__init__()
-        decoder["blank_id"] = blank_id
-        decoder["vocab_size"] = vocab_size
-        joiner["out_dims"] = vocab_size
+        if encoder is not None:
+            assert isinstance(encoder, TorchModel)
+        if isinstance(decoder, dict):
+            decoder = RNNTransducerDecoder(**decoder)
+        else:
+            assert isinstance(decoder, RNNTransducerDecoder)
 
-        self.vocab_size = vocab_size
-        self.blank_id = blank_id
         self.encoder = encoder
-        self.decoder = Decoder(**decoder)
-        self.joiner = Joiner(**joiner)
+        self.decoder = decoder
 
     def forward(
         self,
         x: torch.Tensor,
         x_lengths: torch.Tensor,
         y: k2.RaggedTensor,
-    ) -> torch.Tensor:
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         Args:
-          x:
-            A 3-D tensor of shape (N, T, C).
-          x_lengths:
-            A 1-D tensor of shape (N,). It contains the number of frames in `x`
-            before padding.
-          y:
-            A ragged tensor with 2 axes [utt][label]. It contains labels of each
+          x: input features with shape = (N, T, C)
+          x_lengths: feature number for frames with shape = (N,)
+          y: ragged tensor with 2 axes [utt][label]. It contains labels of each
             utterance.
         Returns:
-          Return the transducer loss.
+          - Token logits with shape = (N, vocab_size)
+          - RNN-T loss.
         """
         assert x.ndim == 3, x.shape
         assert x_lengths.ndim == 1, x_lengths.shape
@@ -103,45 +66,52 @@ def forward(
 
         assert x.size(0) == x_lengths.size(0) == y.dim0
 
-        #  wav2vec2 works as encoder
-        # encoder_out, x_lengths = self.encoder(x, x_lengths)
-        assert torch.all(x_lengths > 0)
-
-        encoder_out = x
-        # Now for the decoder, i.e., the prediction network
-        row_splits = y.shape.row_splits(1)
-        y_lens = row_splits[1:] - row_splits[:-1]
-
-        blank_id = self.decoder.blank_id
-        sos_y = add_sos(y, sos_id=blank_id)
-
-        sos_y_padded = sos_y.pad(mode="constant", padding_value=blank_id)
-        sos_y_padded = sos_y_padded.to(torch.int64)
+        if self.encoder is not None:
+            x, x_lengths = self.encoder(x, x_lengths)
+            assert torch.all(x_lengths > 0)
 
-        decoder_out, _ = self.decoder(sos_y_padded)
-
-        logits = self.joiner(encoder_out, decoder_out)
-
-        # rnnt_loss requires 0 padded targets
-        # Note: y does not start with SOS
-        y_padded = y.pad(mode="constant", padding_value=0)
+        print("zz", x.shape, x_lengths, y, flush=True)
+        logits, loss = self.decoder(x, x_lengths, y)
+        return logits, loss
 
-        assert hasattr(torchaudio.functional, "rnnt_loss"), (
-            f"Current torchaudio version: {torchaudio.__version__}\n"
-            "Please install a version >= 0.10.0")
+    def infer(self,
+              x: torch.Tensor,
+              x_lengths: torch.Tensor,
+              decoding_method="time_sync_beam_search",
+              beam_width: int = 5,
+              max_sym_per_frame: int = 3,
+              max_sym_per_utt: int = 1000) -> List[List[int]]:
+        """
+        ASR tokens inference
+        Args:
+          x: input features with shape = (N, T, C)
+          x_lengths: feature number for frames with shape = (N,)
+          decoding_method: greedy, time_sync_beam_search or align_length_sync_beam_search
+          max_sym_per_frame: maximum number of symbols RNN-T can emit in 1 frame.
+          max_sym_per_utt: maximimum number of symbols in a single utterance.
+        Returns:
+          List of list of integer indexes of the recognizer's symbols.
+        """
+        assert x.ndim == 3, x.shape
+        assert x_lengths.ndim == 1, x_lengths.shape
+        assert x.size(0) == x_lengths.size(0)
 
-        x_lengths = x_lengths.to(torch.int32)
+        if self.encoder is not None:
+            x, x_lengths = self.encoder(x, x_lengths)
+            assert torch.all(x_lengths > 0)
 
-        loss = torchaudio.functional.rnnt_loss(
-            logits=logits,
-            targets=y_padded.to(torch.int32),
-            logit_lengths=x_lengths,
-            target_lengths=y_lens,
-            blank=blank_id,
-            reduction="sum",
-        )
+        batch_size = x.size(0)
+        y = []
+        for i in range(batch_size):
+            x_i = x[i:i + 1, :x_lengths[i]]
+            y_i = self.decoder.decode(x_i,
+                                      method=decoding_method,
+                                      beam_width=beam_width,
+                                      max_sym_per_frame=max_sym_per_frame,
+                                      max_sym_per_utt=max_sym_per_utt)
+            y.append(y_i)
 
-        return logits, loss
+        return y
 
     def set_train_mode(self, mode):
         if mode == self._train_mode:
@@ -151,9 +121,6 @@ def set_train_mode(self, mode):
             self.unfreeze()
         elif mode == "frozen":
             self.freeze()
-        elif mode == "ft-embed-affine":
-            self.unfreeze()
-            self.freeze_preembed_layers()
         else:
             raise ValueError(f"invalid train_mode={mode}")
 
@@ -167,34 +134,30 @@ def _train(self, train_mode: str):
 
     @staticmethod
     def valid_train_modes():
-        return ["full", "frozen", "ft-embed-affine"]
+        return ["full", "frozen"]
 
     def get_config(self):
-        dec_cfg = self.decoder.get_config()
-        join_cfg = self.joiner.get_config()
+        if self.encoder is None:
+            enc_cfg = None
+        else:
+            enc_cfg = self.encoder.get_config()
+            del enc_cfg["class_name"]
 
+        dec_cfg = self.decoder.get_config()
+        del dec_cfg["class_name"]
         config = {
-            "blank_id": self.blank_id,
-            "vocab_size": self.vocab_size,
+            "encoder": enc_cfg,
             "decoder": dec_cfg,
-            "joiner": join_cfg,
         }
-
-        # base_config = super().get_config()
-        return dict(list(config.items()))
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
 
     @staticmethod
     def filter_args(**kwargs):
-
         # get arguments for pooling
-        decoder_args = Decoder.filter_args(**kwargs["decoder"])
-        joiner_args = Joiner.filter_args(**kwargs["joiner"])
-
-        valid_args = ()
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
+        args = {}
+        decoder_args = RNNTransducerDecoder.filter_args(**kwargs["decoder"])
         args["decoder"] = decoder_args
-        args["joiner"] = joiner_args
         return args
 
     @staticmethod
@@ -204,8 +167,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        Decoder.add_class_args(parser, prefix="decoder")
-        Joiner.add_class_args(parser, prefix="joiner")
+        RNNTransducerDecoder.add_class_args(parser, prefix="decoder")
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix,
@@ -213,24 +175,16 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     def change_config(
         self,
-        decoder,
-        # joiner,
+        decoder: Dict,
     ):
-        logging.info("changing transducer config")
+        logging.info("changing decoder config")
         self.decoder.change_config(**decoder)
-        # self.joiner.change_config(**joiner)
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        # get arguments for pooling
+        args = {}
         decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
-        # joiner_args = Joiner.filter_finetune_args(**kwargs["joiner"])
-
-        valid_args = ()
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
         args["decoder"] = decoder_args
-        # args["joiner"] = joiner_args
         return args
 
     @staticmethod
@@ -239,12 +193,42 @@ def add_finetune_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        Decoder.add_finetune_args(parser, prefix="decoder")
-        # Joiner.add_finetune_args(parser, prefix="joiner")
+        RNNTransducerDecoder.add_finetune_args(parser, prefix="decoder")
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix,
                                       action=ActionParser(parser=parser))
 
-    add_argparse_args = add_class_args
-    add_argparse_finetune_args = add_finetune_args
+    @staticmethod
+    def add_infer_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument("--decoding-method",
+                            default="time_sync_beam_search",
+                            choices=[
+                                "greedy", "time_sync_beam_search",
+                                "align_length_sync_beam_search"
+                            ])
+
+        parser.add_argument("--beam-width",
+                            default=5,
+                            type=int,
+                            help="beam width for beam search")
+        parser.add_argument("--max-sym-per-frame",
+                            default=3,
+                            type=int,
+                            help="max symbols RNN-T can emit in 1 frame")
+        parser.add_argument("--max-sym-per-utt",
+                            default=1000,
+                            type=int,
+                            help="max symbols RNN-T can emit in 1 frame")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_infer_args(**kwargs):
+        return filter_func_args(RNNTransducer.infer, kwargs)
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index 855e1590..bae35e0e 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -28,7 +28,7 @@
 import torch.nn as nn
 import torchaudio
 import torchaudio.functional
-#from .encoder_interface import EncoderInterface
+from .encoder_interface import EncoderInterface
 
 from ...torch_model import TorchModel
 from hyperion.utils.text import add_sos
@@ -44,12 +44,11 @@ class Transducer(TorchModel):
 
     def __init__(
         self,
-        encoder_net,
+        vocab_size,
+        blank_id,
         # conformer_enc,
         decoder,
         joiner,
-        vocab_size,
-        blank_id,
     ):
         """
         Args:
@@ -68,6 +67,9 @@ def __init__(
             unnormalized probs, i.e., not processed by log-softmax.
         """
         super().__init__()
+        # assert isinstance(encoder, EncoderInterface)
+        # assert hasattr(decoder, "blank_id")
+
         decoder["blank_id"] = blank_id
         decoder["vocab_size"] = vocab_size
         joiner["out_dims"] = vocab_size
diff --git a/hyperion/torch/models/wav2transducer/__init__.py b/hyperion/torch/models/wav2transducer/__init__.py
index 5346bc78..de4879a5 100644
--- a/hyperion/torch/models/wav2transducer/__init__.py
+++ b/hyperion/torch/models/wav2transducer/__init__.py
@@ -5,3 +5,6 @@
 """
 
 from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
+
+from .hf_wav2vec2rnn_transducer import HFWav2Vec2RNNTransducer
+from .hf_wav2vec2rnn_rnn_transducer import HFWav2Vec2RNNRNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
new file mode 100644
index 00000000..922996f6
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
@@ -0,0 +1,375 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import contextlib
+from typing import Union, Dict, List
+from jsonargparse import ArgumentParser, ActionParser
+
+import torch
+import torch.nn as nn
+
+from ...utils import remove_silence
+from ...torch_model import TorchModel
+from ..transducer import RNNTransducer
+
+
+class HFWav2RNNTransducer(TorchModel):
+    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+
+    Attributes:
+       hf_feats: hugging face model wrapper object.
+       transducer: transducer model object.
+       feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                          the wav2vec "num_layers".
+       feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(self,
+                 hf_feats: TorchModel,
+                 transducer: Union[Dict, TorchModel],
+                 feat_fusion_start: int = 0,
+                 feat_fusion_method: str = "weighted-avg"):
+
+        super().__init__()
+        self.hf_feats = hf_feats
+        if isinstance(transducer, dict):
+            transducer["decoder"]["in_feats"] = hf_feats.hidden_size
+            #transducer["joiner"]["in_feats"] = hf_feats.hidden_size
+            if "class_name" in transducer:
+                del transducer["class_name"]
+
+            transducer["encoder"] = None
+            transducer = RNNTransducer(**transducer)
+        else:
+            assert isinstance(transducer, RNNTransducer)
+            if transducer.encoder is None:
+                assert transducer.decoder.in_feats == hf_feats.hidden_size
+                #assert transducer.joiner.in_feats == hf_feats.hidden_size
+
+        self.transducer = transducer
+        self.feat_fusion_start = feat_fusion_start
+        self.feat_fusion_method = feat_fusion_method
+        self._hf_context = contextlib.nullcontext()
+        self._make_fuser()
+
+    def _make_fuser(self):
+        if self.feat_fusion_method == "last":
+            self.feat_fuser = None
+            return
+
+        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+        layer_dim = self.hf_feats.hidden_size
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
+        elif self.feat_fusion_method == "linear":
+            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
+            self.feat_fuser.weight.data = torch.ones(1,
+                                                     num_layers) / num_layers
+        elif self.feat_fusion_method == "cat":
+            self.feat_fuser = nn.Linear(num_layers * layer_dim,
+                                        layer_dim,
+                                        bias=False)
+
+    def _fuse_hid_feats(self, hid_feats):
+        """Fuses the hidden features from the Wav2Vec model.
+
+        Args:
+          hid_feats: list of hidden features Tensors from Wav2Vec model.
+
+        Returns:
+          Tensor of fused features (batch, channels, time)
+        """
+        if len(hid_feats) == 1:
+            # There is only one layer of features
+            return hid_feats[0]
+
+        hid_feats = hid_feats[self.feat_fusion_start:]
+        if self.feat_fusion_method == "weighted-avg":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+            feats = torch.sum(hid_feats * norm_weights, dim=-1)
+        elif self.feat_fusion_method == "linear":
+            hid_feats = torch.stack(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
+        elif self.feat_fusion_method == "cat":
+            hid_feats = torch.cat(hid_feats, dim=-1)
+            feats = self.feat_fuser(hid_feats)
+        elif self.feat_fusion_method == "last":
+            feats = hid_feats[-1]
+
+        return feats
+
+    def forward_feats(self,
+                      x,
+                      x_lengths,
+                      return_feat_layers=None,
+                      chunk_length=0,
+                      detach_chunks=False):
+        return_hid_states = (False if return_feat_layers is None
+                             and self.feat_fusion_method == "last" else True)
+        with self._hf_context:
+            hf_output = self.hf_feats(
+                x,
+                x_lengths,
+                return_hid_states=return_hid_states,
+                chunk_length=chunk_length,
+                detach_chunks=detach_chunks,
+            )
+        feat_lengths = hf_output["hidden_states_lengths"]
+        if return_hid_states:
+            hid_feats = hf_output["hidden_states"]
+            feats = self._fuse_hid_feats(hid_feats)
+        else:
+            hid_feats = None
+            feats = hf_output["last_hidden_state"]
+
+        feats = feats.transpose(1, 2)
+        if return_feat_layers is not None:
+            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+            # as the hidden features of the x-vector encoder.
+            hid_feats = [
+                f.transpose(1, 2) for i, f in enumerate(hid_feats)
+                if i in return_feat_layers
+            ]
+        else:
+            hid_feats = None
+
+        return feats, hid_feats, feat_lengths
+
+    def forward(
+        self,
+        x,
+        x_lengths=None,
+        y=None,
+        return_feat_layers=None,
+        # return_enc_layers=None,
+        return_logits=True,
+    ):
+        """Forward function. If returns the logits posteriors of the classes.
+        It can also returns the hidden representations in the wav2vec feature extractor,
+        the x-vector encoder and the
+        classification head. In this case the ouput variable is a dictionary.
+
+        Args:
+          x: input features tensor with shape=(batch, in_feats, time)
+          x_lengths: time lengths of the features with shape=(batch,)
+          y: target classes torch.long tensor with shape=(batch,)
+          return_feat_layers: list of integers indicating, which wav2vec layers
+                             we should return. If None, no wav2vec layers are returned.
+          return_enc_layers: list of integers indicating, which encoder layers
+                             we should return. If None, no encoder layers are returned.
+          return_logits: if True, it adds the logits to the output dictionary.
+        Returns:
+          Tensor with class logits with shape=(batch, num_classes) or
+          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
+        """
+        feats, hid_feats, feat_lengths = self.forward_feats(
+            x, x_lengths, return_feat_layers)
+
+        feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
+
+        output, loss = self.transducer(
+            feats,
+            feat_lengths,
+            y,
+        )
+
+        if not return_feat_layers:
+            return output, loss
+
+        if not isinstance(output, dict):
+            # if the transducer just returned the logits we put then into a dictionary
+            # to append the hid feats later.
+            output["logits"] = output
+
+        output["h_feats"] = hid_feats
+        return output, loss
+
+    def infer(self,
+              x: torch.Tensor,
+              x_lengths: torch.Tensor,
+              decoding_method="time_sync_beam_search",
+              beam_width: int = 5,
+              max_sym_per_frame: int = 3,
+              max_sym_per_utt: int = 1000):
+        """
+        ASR tokens inference
+        Args:
+          x: input features with shape = (N, T, C)
+          x_lengths: feature number for frames with shape = (N,)
+          decoding_method: greedy, time_sync_beam_search or align_length_sync_beam_search
+          max_sym_per_frame: maximum number of symbols RNN-T can emit in 1 frame.
+          max_sym_per_utt: maximimum number of symbols in a single utterance.
+        Returns:
+          List of list of integer indexes of the recognizer's symbols.
+        """
+
+        feats, _, feat_lengths = self.forward_feats(x, x_lengths)
+
+        feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
+
+        y = self.transducer.infer(feats,
+                                  feat_lengths,
+                                  decoding_method=decoding_method,
+                                  beam_width=beam_width,
+                                  max_sym_per_frame=max_sym_per_frame,
+                                  max_sym_per_utt=max_sym_per_utt)
+        return y
+
+    def freeze_feat_fuser(self):
+        if self.feat_fuser is None:
+            return
+
+        if self.feat_fusion_method == "weighted-avg":
+            self.feat_fuser.requires_grad = False
+            return
+
+        for param in self.feat_fuser.parameters():
+            param.requires_grad = False
+
+    def freeze_hf_feats(self):
+        self.hf_feats.freeze()
+
+    def freeze_hf_feature_encoder(self):
+        self.hf_feats.freeze_feature_encoder()
+
+    def set_train_mode(self, mode):
+        if mode == self._train_mode:
+            return
+
+        if mode == "full":
+            self.unfreeze()
+        elif mode == "frozen":
+            self.freeze()
+        elif mode in ["ft-transducer", "ft-transducer-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+            self.freeze_feat_fuser()
+        elif mode in ["hf-feats-frozen", "hf-feats-frozen-nograd"]:
+            self.unfreeze()
+            self.freeze_hf_feats()
+        elif mode == "hf-feat-extractor-frozen":
+            self.unfreeze()
+            self.freeze_hf_feature_encoder()
+        else:
+            raise ValueError(f"invalid train_mode={mode}")
+
+        logging.info("train mode set to %s", mode)
+
+        if "nograd" in mode:
+            logging.info("using torch.no_grad for hf_feats")
+            self._hf_context = torch.no_grad()
+        else:
+            self._hf_context = contextlib.nullcontext()
+
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+
+        if train_mode in ["full", "frozen"]:
+            super()._train(train_mode)
+        elif train_mode in [
+                "ft-transducer",
+                "hf-feats-frozen",
+                "ft-transducer-nograd",
+                "hf-feats-frozen-nograd",
+                "hf-feat-extractor-frozen",
+        ]:
+            self.hf_feats.train()
+            self.transducer._train("full")
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return [
+            "full",
+            "frozen",
+            "ft-embed-affine",
+            "ft-transducer",
+            "hf-feats-frozen",
+            "ft-transducer-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
+        ]
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = (
+            "hf_feats",
+            "transducer",
+            "feat_fusion_start",
+            "feat_fusion_method",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    def get_config(self):
+        hf_cfg = self.hf_feats.get_config()
+        tran_cfg = self.transducer.get_config()
+        del hf_cfg["class_name"]
+        del tran_cfg["class_name"]
+        config = {
+            "hf_feats": hf_cfg,
+            "transducer": tran_cfg,
+            "feat_fusion_start": self.feat_fusion_start,
+            "feat_fusion_method": self.feat_fusion_method,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def change_config(self, hf_feats, transducer):
+        logging.info("changing hf wav2transducer config")
+        self.hf_feats.change_config(**hf_feats)
+        self.transducer.change_config(**transducer)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--feat-fusion-start",
+            default=0,
+            type=int,
+            help="""
+            the input to x-vector model will fuse the wav2vec 
+            layers from feat_fusion_start to
+            the wav2vec num_layers""",
+        )
+        parser.add_argument(
+            "--feat-fusion-method",
+            default="weighted-avg",
+            choices=["weighted-avg", "linear", "cat", "last"],
+            help=("method to fuse the hidden layers from the wav2vec model "
+                  "in [weighted-avg, linear, cat, last]"),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
+
+    @staticmethod
+    def add_infer_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNTransducer.add_infer_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_infer_args(**kwargs):
+        return RNNTransducer.filter_infer_args(**kwargs)
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
new file mode 100644
index 00000000..412a182b
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
@@ -0,0 +1,103 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
+
+import torch
+import torch.nn as nn
+
+from ..transducer import RNNRNNTransducer
+from ...tpm import HFWav2Vec2
+from .hf_wav2rnn_transducer import HFWav2RNNTransducer
+
+
+class HFWav2Vec2RNNRNNTransducer(HFWav2RNNTransducer):
+    """Class for RNN-T with Wav2Vec2 features
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      transducer: Transducer configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, RNNRNNTransducer],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+
+        if isinstance(hf_feats, dict):
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWav2Vec2)
+
+        if isinstance(transducer, dict):
+            transducer["decoder"]["in_feats"] = hf_feats.hidden_size
+            #transducer["joiner"]["in_feats"] = hf_feats.hidden_size
+            if "class_name" in transducer:
+                del transducer["class_name"]
+
+            transducer = RNNRNNTransducer(**transducer)
+        else:
+            assert isinstance(transducer, RNNRNNTransducer)
+
+        super().__init__(hf_feats, transducer, feat_fusion_start,
+                         feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2RNNTransducer.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = RNNRNNTransducer.filter_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        RNNRNNTransducer.add_class_args(parser, prefix="transducer")
+        HFWav2RNNTransducer.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = RNNRNNTransducer.filter_finetune_args(
+            **kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
+        RNNRNNTransducer.add_finetune_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
new file mode 100644
index 00000000..d89953b2
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
@@ -0,0 +1,103 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from jsonargparse import ArgumentParser, ActionParser
+from typing import Union, Dict, Optional
+
+import torch
+import torch.nn as nn
+
+from ..transducer import RNNTransducer
+from ...tpm import HFWav2Vec2
+from .hf_wav2rnn_transducer import HFWav2RNNTransducer
+
+
+class HFWav2Vec2RNNTransducer(HFWav2RNNTransducer):
+    """Class for RNN-T with Wav2Vec2 features
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      transducer: Transducer configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, RNNTransducer],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+
+        if isinstance(hf_feats, dict):
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWav2Vec2)
+
+        # if isinstance(transducer, dict):
+        #     transducer["decoder"]["in_feats"] = hf_feats.hidden_size
+        #     transducer["joiner"]["in_feats"] = hf_feats.hidden_size
+        #     if "class_name" in transducer:
+        #         del transducer["class_name"]
+        #     transducer = Transducer(**transducer)
+        # else:
+        #     assert isinstance(transducer, Transducer)
+        #     assert transducer.decoder.in_feats == hf_feats.hidden_size
+        #     assert transducer.joiner.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, transducer, feat_fusion_start,
+                         feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2RNNTransducer.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = RNNTransducer.filter_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        RNNTransducer.add_class_args(parser, prefix="transducer")
+        HFWav2RNNTransducer.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = RNNTransducer.filter_finetune_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
+        RNNTransducer.add_finetune_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
new file mode 100644
index 00000000..a5df4b8a
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
@@ -0,0 +1,103 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Dict, Optional, Union, Tuple
+from jsonargparse import ActionParser, ArgumentParser
+
+import torch
+import torch.nn as nn
+
+from ...narchs import AudioFeatsMVN
+from ...torch_model import TorchModel
+from ...utils import remove_silence
+
+
+class Wav2RNNTransducer(TorchModel):
+    """Base class for models that integrate the acoustic feature extractor and and x-vector model that takes acoustic features as input.
+
+    Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      xvector: x-vector model object.
+    """
+
+    def __init__(self, feats, transducer):
+
+        super().__init__()
+
+        if isinstance(feats, dict):
+            feats = AudioFeatsMVN.filter_args(**feats)
+            feats["trans"] = True
+            feats = AudioFeatsMVN(**feats)
+        else:
+            assert isinstance(feats, AudioFeatsMVN)
+
+        self.feats = feats
+        self.transducer = transducer
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: k2.RaggedTensor,
+        vad_samples: Optional[torch.Tensor] = None,
+        vad_feats: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+
+        if vad_samples is not None:
+            x, x_lengths = remove_silence(x, x_lengths)
+        feats, feat_lengths = self.feats(x, x_lengths)
+        if vad_feats is not None:
+            feats, feat_lengths = remove_silence(feats, feat_lengths)
+
+        return self.transducer(feats, feat_lengths, y)
+
+    def set_train_mode(self, mode):
+        self.transducer.set_train_mode(mode)
+
+    def get_config(self):
+        feat_cfg = self.feats.get_config()
+        xvector_cfg = self.xvector.get_config()
+        config = {
+            "feats": feat_cfg,
+            "xvector": xvector_cfg,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(*kwargs):
+        """Filters Wav2XVector class arguments from arguments dictionary.
+
+        Args:
+          kwargs: Arguments dictionary.
+
+        Returns:
+          Dictionary with SpecAugment options.
+        """
+        valid_args = (
+            "feats",
+            "xvector",
+        )
+
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds Wav2XVector options common to all child classes to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        AudioFeatsMVN.add_class_args(parser, prefix="feats")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 43ab2382..8a17379c 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -19,7 +19,6 @@ class HFWav2Vec2ResNet1dXVector(HFWav2XVector):
     """Class extracting Wav2Vec2 + ResNet1d x-vectors from waveform.
 
     Attributes:
-      Attributes:
       hf_feats: HFWav2Vec configuration dictionary or object.
                 This is a warpper over Hugging Face Wav2Vec model.
       xvector: ResNet1dXVector configuration dictionary or object.
@@ -53,7 +52,8 @@ def __init__(
             assert isinstance(xvector, ResNet1dXVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, xvector, feat_fusion_start,
+                         feat_fusion_method)
 
     @staticmethod
     def filter_args(**kwargs):
@@ -76,7 +76,8 @@ def add_class_args(parser, prefix=None):
         HFWav2XVector.add_class_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_finetune_args(**kwargs):
@@ -97,4 +98,5 @@ def add_finetune_args(parser, prefix=None):
         ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 8a65f12e..5599fa1e 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -13,9 +13,6 @@
 from ...torch_model import TorchModel
 from ...utils import remove_silence
 
-# import torch.nn.functional as nnf
-
-
 
 class HFWav2XVector(TorchModel):
     """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
@@ -29,9 +26,11 @@ class HFWav2XVector(TorchModel):
                            than one layer is used.
     """
 
-    def __init__(
-        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
-    ):
+    def __init__(self,
+                 hf_feats,
+                 xvector,
+                 feat_fusion_start=0,
+                 feat_fusion_method="weighted-avg"):
 
         super().__init__()
         self.hf_feats = hf_feats
@@ -52,9 +51,12 @@ def _make_fuser(self):
             self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
         elif self.feat_fusion_method == "linear":
             self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
+            self.feat_fuser.weight.data = torch.ones(1,
+                                                     num_layers) / num_layers
         elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+            self.feat_fuser = nn.Linear(num_layers * layer_dim,
+                                        layer_dim,
+                                        bias=False)
 
     def _fuse_hid_feats(self, hid_feats):
         """Fuses the hidden features from the Wav2Vec model.
@@ -69,7 +71,7 @@ def _fuse_hid_feats(self, hid_feats):
             # There is only one layer of features
             return hid_feats[0]
 
-        hid_feats = hid_feats[self.feat_fusion_start :]
+        hid_feats = hid_feats[self.feat_fusion_start:]
         if self.feat_fusion_method == "weighted-avg":
             hid_feats = torch.stack(hid_feats, dim=-1)
             norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
@@ -123,14 +125,14 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
-    def forward_feats(
-        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
-    ):
-        return_hid_states = (
-            False
-            if return_feat_layers is None and self.feat_fusion_method == "last"
-            else True
-        )
+    def forward_feats(self,
+                      x,
+                      x_lengths,
+                      return_feat_layers=None,
+                      chunk_length=0,
+                      detach_chunks=False):
+        return_hid_states = (False if return_feat_layers is None
+                             and self.feat_fusion_method == "last" else True)
         with self._hf_context:
             hf_output = self.hf_feats(
                 x,
@@ -152,8 +154,7 @@ def forward_feats(
             # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
             # as the hidden features of the x-vector encoder.
             hid_feats = [
-                f.transpose(1, 2)
-                for i, f in enumerate(hid_feats)
+                f.transpose(1, 2) for i, f in enumerate(hid_feats)
                 if i in return_feat_layers
             ]
         else:
@@ -193,8 +194,7 @@ def forward(
           "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
         """
         feats, hid_feats, feat_lengths = self.forward_feats(
-            x, x_lengths, return_feat_layers
-        )
+            x, x_lengths, return_feat_layers)
         output = self.xvector(
             feats,
             feat_lengths,
@@ -230,17 +230,16 @@ def extract_embed(
             x, x_lengths = remove_silence(x, x_lengths)
 
         feats, _, feat_lengths = self.forward_feats(
-            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
-        )
-        xvec_chunk_length = int(
-            xvec_chunk_length
-            * self.hf_feats.sample_frequency
-            * feats.size(-1)
-            // x.size(-1)
-        )
-        return self.xvector.extract_embed(
-            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
-        )
+            x,
+            x_lengths,
+            chunk_length=hf_chunk_length,
+            detach_chunks=detach_chunks)
+        xvec_chunk_length = int(xvec_chunk_length *
+                                self.hf_feats.sample_frequency *
+                                feats.size(-1) // x.size(-1))
+        return self.xvector.extract_embed(feats, feat_lengths,
+                                          xvec_chunk_length, embed_layer,
+                                          detach_chunks)
 
     def freeze_feat_fuser(self):
         if self.feat_fuser is None:
@@ -303,11 +302,11 @@ def _train(self, train_mode: str):
             self.hf_feats.train()
             self.xvector._train("ft-embed_affine")
         elif train_mode in [
-            "ft-xvector",
-            "hf-feats-frozen",
-            "ft-xvector-nograd",
-            "hf-feats-frozen-nograd",
-            "hf-feat-extractor-frozen",
+                "ft-xvector",
+                "hf-feats-frozen",
+                "ft-xvector-nograd",
+                "hf-feats-frozen-nograd",
+                "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
             self.xvector._train("full")
@@ -370,19 +369,16 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--feat-fusion-start",
             default=0,
             type=int,
-            help=(
-                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
-                "the wav2vec num_layers"
-            ),
+            help=
+            ("the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+             "the wav2vec num_layers"),
         )
         parser.add_argument(
             "--feat-fusion-method",
             default="weighted-avg",
             choices=["weighted-avg", "linear", "cat", "last"],
-            help=(
-                "method to fuse the hidden layers from the wav2vec model "
-                "in [weighted-avg, cat]"
-            ),
+            help=("method to fuse the hidden layers from the wav2vec model "
+                  "in [weighted-avg, cat]"),
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 824b5830..4c21f478 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -80,9 +80,8 @@ def forward(
             feats, feat_lengths = remove_silence(feats, feat_lengths)
 
         # feat_lengths = torch.div(x_lengths * feats.size(-1), x.size(-1))
-        return self.xvector(
-            feats, feat_lengths, y, enc_layers, classif_layers, return_output
-        )
+        return self.xvector(feats, feat_lengths, y, enc_layers, classif_layers,
+                            return_output)
 
     def extract_embed(
         self,
@@ -102,12 +101,11 @@ def extract_embed(
             feats, feat_lengths = remove_silence(feats, feat_lengths)
 
         feats = feats.transpose(1, 2)
-        return self.xvector.extract_embed(
-            feats, feat_lengths, chunk_length, embed_layer, detach_chunks
-        )
+        return self.xvector.extract_embed(feats, feat_lengths, chunk_length,
+                                          embed_layer, detach_chunks)
 
-    def train_mode(self, mode="ft-embed-affine"):
-        self.xvector.train_mode(mode)
+    def set_train_mode(self, mode):
+        self.xvector.set_train_mode(mode)
 
     def get_config(self):
         feat_cfg = self.feats.get_config()
@@ -152,4 +150,5 @@ def add_class_args(parser, prefix=None):
         AudioFeatsMVN.add_class_args(parser, prefix="feats")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 1bce0f87..09136b7d 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -15,6 +15,7 @@
 
 
 class ResNet1dXVector(XVector):
+
     def __init__(
         self,
         resnet_enc,
@@ -22,7 +23,10 @@ def __init__(
         pool_net="mean+stddev",
         embed_dim=256,
         num_embed_layers=1,
-        hid_act={"name": "relu", "inplace": True},
+        hid_act={
+            "name": "relu",
+            "inplace": True
+        },
         loss_type="arc-softmax",
         cos_scale=64,
         margin=0.3,
@@ -41,7 +45,8 @@ def __init__(
     ):
 
         if isinstance(resnet_enc, dict):
-            logging.info("making %s resnet1d encoder network", resnet_enc["resb_type"])
+            logging.info("making %s resnet1d encoder network",
+                         resnet_enc["resb_type"])
             resnet_enc = Encoder(**resnet_enc)
 
         super().__init__(
@@ -145,12 +150,12 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         XVector.add_class_args(parser, skip=set(["in_feats"]))
-        Encoder.add_class_args(parser, prefix="resnet_enc", skip=set(["head_channels"]))
-        # parser.link_arguments("in_feats", "resnet_enc.in_feats", apply_on="parse")
-        # parser.link_arguments("norm_layer", "encoder_net.norm_layer", apply_on="parse")
-
+        Encoder.add_class_args(parser,
+                               prefix="resnet_enc",
+                               skip=set(["head_channels"]))
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
 
@@ -168,9 +173,10 @@ def add_finetune_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         XVector.add_finetune_args(parser)
-        Encoder.add_finetune_args(
-            parser, prefix="resnet_enc", skip=set(["head_channels"])
-        )
+        Encoder.add_finetune_args(parser,
+                                  prefix="resnet_enc",
+                                  skip=set(["head_channels"]))
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/__init__.py b/hyperion/torch/narchs/__init__.py
index c8504425..5f333fc8 100644
--- a/hyperion/torch/narchs/__init__.py
+++ b/hyperion/torch/narchs/__init__.py
@@ -26,3 +26,5 @@
 from .tdnn_factory import TDNNFactory
 from .torch_na_loader import TorchNALoader
 from .transformer_encoder_v1 import TransformerEncoderV1
+from .rnn_transducer_decoder import RNNTransducerDecoder
+from .rnn_encoder import RNNEncoder
diff --git a/hyperion/torch/narchs/rnn_encoder.py b/hyperion/torch/narchs/rnn_encoder.py
new file mode 100644
index 00000000..dcf02564
--- /dev/null
+++ b/hyperion/torch/narchs/rnn_encoder.py
@@ -0,0 +1,281 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import math
+from typing import Dict, Optional, Union, Tuple
+
+import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+import torch
+import torch.nn as nn
+from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
+
+from ..layer_blocks import TransformerConv2dSubsampler as Subsampler
+from ..layers import ActivationFactory as AF
+#from ..layers import NormLayer1dFactory as NLF
+from ..utils import seq_lengths_to_mask
+from .net_arch import NetArch
+
+
+class RNNEncoder(NetArch):
+    """ RNN Encoder network
+
+    Attributeds:
+      in_feats: input features
+      hid_feats: hidden features in RNN layers
+      out_feats: output features, if 0 we remove last projection layer
+      num_layers: number of RNN layers
+      proj_feats: projection features in LSTM layers
+      rnn_type: type of RNN in [lstm, gru]
+      bidirectional: whether RNN layers are bidirectional
+      dropout_rate: dropout rate
+      subsample_input: whether to subsample the input features time dimension x4
+      subsampling_act: activation function of the subsampling block
+    """
+
+    def __init__(self,
+                 in_feats: int,
+                 hid_feats: int,
+                 out_feats: int,
+                 num_layers: int,
+                 proj_feats: int = 0,
+                 rnn_type: str = "lstm",
+                 bidirectional: bool = False,
+                 dropout_rate: float = 0.0,
+                 subsample_input: bool = False,
+                 subsampling_act: str = "relu6"):
+        super().__init__()
+        if rnn_type != "lstm":
+            proj_feats = 0
+
+        self.in_feats = in_feats
+        self.hid_feats = hid_feats
+        self.out_feats = out_feats
+        self.num_layers = num_layers
+        self.proj_feats = proj_feats
+        self.rnn_type = rnn_type
+        self.bidirectional = bidirectional
+        self.subsample_input = subsample_input
+        self.subsampling_act = subsampling_act
+
+        rnn_feats = hid_feats if proj_feats == 0 else proj_feats
+        if subsample_input:
+            subsamplinb_act = AF.create(subsampling_act)
+            self.subsampler = Subsampler(in_feats,
+                                         hid_feats,
+                                         hid_act=subsampling_act)
+            lstm_in_dim = hid_feats
+        else:
+            self.subsampler = None
+            lstm_in_dim = in_feats
+
+        if rnn_type == "lstm":
+            self.rnn = nn.LSTM(
+                input_size=hid_feats,
+                hidden_size=hid_feats,
+                num_layers=num_layers,
+                bias=True,
+                proj_size=proj_feats,
+                batch_first=True,
+                dropout=dropout_rate,
+                bidirectional=bidirectional,
+            )
+        else:
+            self.rnn = nn.GRU(
+                input_size=hid_feats,
+                hidden_size=hid_feats,
+                num_layers=num_layers,
+                bias=True,
+                batch_first=True,
+                dropout=dropout_rate,
+                bidirectional=bidirectional,
+            )
+
+        if out_feats > 0:
+            self.output = nn.Sequential(
+                nn.Dropout(p=dropout_rate),
+                nn.Linear(rnn_feats, out_feats),
+            )
+
+    def forward(self, x: torch.Tensor,
+                x_lengths: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        if self.subsample_input:
+            t1 = x.size(1)
+            x = self.subsampler(x)
+            t2 = x.size(2)
+            x_lengths = torch.div(t2 * x_lengths, t1, rounding_mode="floor")
+
+        x = pack_padded_sequence(input=x,
+                                 lengths=x_lengths.cpu(),
+                                 batch_first=True,
+                                 enforce_sorted=True)
+        x, _ = self.rnn(x)
+        x = pad_packed_sequence(x, batch_first=True)
+        if self.out_feats > 0:
+            x = self.output(x)
+
+        return x, x_lengths
+
+    def in_context(self):
+        return (self._context, self._context)
+
+    def in_shape(self):
+        return (None, None, self.in_feats)
+
+    def out_shape(self, in_shape=None):
+        out_feats = self.out_feats if self.out_feats > 0 else (
+            self.proj_feats if self.proj_feats > 0 else self.hid_feats)
+
+        if in_shape is None:
+            return (None, None, out_feats)
+
+        assert len(in_shape) == 3
+        return (*in_shape, out_feats)
+
+    def get_config(self):
+        config = filter_func_args(RNNEncoder.__init__, self.__dict__)
+        base_config = super().get_config()
+        base_config.update(config)
+        return base_config
+        #return dict(list(base_config.items()) + list(config.items()))
+
+    def change_config(self, override_dropouts, dropout_rate):
+        if override_dropouts:
+            logging.info("changing RNNEncoder dropouts")
+            self.change_dropouts(dropout_rate)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = filter_func_args(RNNEncoder.__init__, **kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument("--in-feats",
+                                type=int,
+                                required=True,
+                                help=("input feature dimension"))
+
+        parser.add_argument(
+            "--hid-feats",
+            default=512,
+            type=int,
+            help=("num of hidden dimensions of RNN layers"),
+        )
+
+        parser.add_argument(
+            "--out-feats",
+            default=512,
+            type=int,
+            help=
+            ("number of output dimensions of the encoder, if 0 output projection is removed"
+             ),
+        )
+
+        parser.add_argument(
+            "--proj-feats",
+            default=512,
+            type=int,
+            help=("projection features of LSTM layers"),
+        )
+
+        parser.add_argument(
+            "--num-layers",
+            default=5,
+            type=int,
+            help=("number of RNN layers"),
+        )
+
+        parser.add_argument(
+            "--in-kernel-size",
+            default=3,
+            type=int,
+            help=("kernel size of input convolution"),
+        )
+
+        parser.add_argument(
+            "--rnn-type",
+            default="lstm",
+            choices=[
+                "lstm",
+                "gru",
+            ],
+            help=("RNN type in [lstm, gru]"),
+        )
+
+        parser.add_argument(
+            "--bidirectional",
+            default=False,
+            action=ActionYesNo,
+            help="whether to use bidirectional RNN",
+        )
+
+        parser.add_argument(
+            "--subsample-input",
+            default=False,
+            action=ActionYesNo,
+            help="whether to subsaple input features x4",
+        )
+        parser.add_argument("--subsampling-act",
+                            default="relu6",
+                            help="activation for subsampler block")
+
+        if "dropout_rate" not in skip:
+            parser.add_argument("--dropout-rate",
+                                default=0,
+                                type=float,
+                                help="dropout probability")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+
+        valid_args = (
+            "override_dropouts",
+            "dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set([])):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate",
+                                default=0,
+                                type=float,
+                                help="dropout probability")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index ef153776..8f1a60be 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -4,20 +4,34 @@
 """
 
 from jsonargparse import ActionParser, ArgumentParser
+from typing import Optional, Dict, List, Tuple
+from dataclasses import dataclass
 
 import torch
 import torch.nn as nn
+import torchaudio
+import torchaudio.functional
 
 try:
     import k2
 except ModuleNotFoundError:
     from ...utils import dummy_k2 as k2
 
-from ...utils import filter_func_args
+from ...utils.misc import filter_func_args
+from ...utils.text import add_sos
 from ..layer_blocks import TransducerPredictor as Predictor, TransducerJoiner as Joiner
 from .net_arch import NetArch
 
 
+@dataclass
+class Hypothesis:
+    ys: List[int]  # predicted sequences
+    log_prob: float  # log prob of ys
+
+    # Optional LSTM predictor state.
+    pred_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
+
+
 class RNNTransducerDecoder(NetArch):
     """ RNN-T Decoder composed of Predictor and Joiner networks
     Implementation based on 
@@ -70,16 +84,13 @@ def forward(self, x: torch.Tensor, x_lengths: torch.Tensor,
         # get y_lengths
         row_splits = y.shape.row_splits(1)
         y_lengths = row_splits[1:] - row_splits[:-1]
-
         # shift y adding <sos> token
         sos_y = add_sos(y, sos_id=self.blank_id)
         sos_y_padded = sos_y.pad(mode="constant", padding_value=self.blank_id)
         sos_y_padded = sos_y_padded.to(torch.int64)
-
         # apply predictor and joiner
         pred_out, _ = self.predictor(sos_y_padded)
         logits = self.joiner(x, pred_out)
-
         # rnnt_loss requires 0 padded targets
         # Note: y does not start with SOS
         y_padded = y.pad(mode="constant", padding_value=0)
@@ -89,7 +100,438 @@ def forward(self, x: torch.Tensor, x_lengths: torch.Tensor,
             targets=y_padded.to(torch.int32),
             logit_lengths=x_lengths,
             target_lengths=y_lengths,
-            blank=blank_id,
+            blank=self.blank_id,
             reduction="sum",
         )
-        return loss
+        return logits, loss
+
+    def decode(self,
+               x: torch.Tensor,
+               x_lengths: torch.Tensor = None,
+               method="time_sync_beam_search",
+               beam_width: int = 5,
+               max_sym_per_frame: int = 3,
+               max_sym_per_utt: int = 1000) -> List[int]:
+        if method == "time_sync_beam_search":
+            return self.decode_time_sync_beam_search(x,
+                                                     x_lengths,
+                                                     beam_width=beam_width)
+        elif method == "align_length_sync_beam_search":
+            return self.decode_align_length_sync_beam_search(
+                x,
+                x_lengths,
+                beam_width=beam_width,
+                max_sym_per_utt=max_sym_per_utt)
+        elif method == "greedy":
+            return self.decode_greedy(x,
+                                      x_lengths,
+                                      max_sym_per_frame=max_sym_per_frame,
+                                      max_sym_per_utt=max_sym_per_utt)
+
+    def decode_greedy(self,
+                      x: torch.Tensor,
+                      x_lengths: torch.Tensor = None,
+                      max_sym_per_frame: int = 3,
+                      max_sym_per_utt: int = 1000) -> List[int]:
+        """
+        Args:
+          x: encoder embeddings with shape = (N, T, C)
+        Returns:
+          Decoded tokens
+        """
+        assert x.ndim == 3
+
+        # support only batch_size == 1 for now
+        assert x.size(0) == 1, x.size(0)
+        blank_id = self.blank_id
+        device = x.device
+
+        sos = torch.tensor([blank_id], device=device,
+                           dtype=torch.int64).reshape(1, 1)
+        pred_out, (h, c) = self.predictor(sos)
+        T = x.size(1)
+        t = 0
+        hyp = []
+
+        sym_per_frame = 0
+        sym_per_utt = 0
+
+        while t < T and sym_per_utt < max_sym_per_utt:
+            x_t = x[:, t:t + 1, :]
+            logits = self.joiner(x_t, pred_out)  # (1, 1, 1, vocab_size)
+            # logits is
+
+            log_prob = logits.log_softmax(dim=-1)  # (1, 1, 1, vocab_size)
+            # TODO: Use logits.argmax()
+            y = log_prob.argmax()
+            if y != blank_id:
+                hyp.append(y.item())
+                y = y.reshape(1, 1)
+                pred_out, (h, c) = self.predictor(y, (h, c))
+
+                sym_per_utt += 1
+                sym_per_frame += 1
+
+            if y == blank_id or sym_per_frame > max_sym_per_frame:
+                sym_per_frame = 0
+                t += 1
+
+        return hyp
+
+    def decode_time_sync_beam_search(self,
+                                     x: torch.Tensor,
+                                     x_lengths: torch.Tensor = None,
+                                     beam_width: int = 5) -> List[int]:
+        assert x.ndim == 3
+        assert x.size(0) == 1, x.size(0)
+
+        blank_id = self.blank_id
+        device = x.device
+
+        sos = torch.tensor([blank_id], device=device).reshape(1, 1)
+        pred_out, (h, c) = self.predictor(sos)
+        T = x.size(1)
+        t = 0
+        B = [Hypothesis(ys=[blank_id], log_prob=0.0, pred_state=None)]
+        max_u = 20000  # terminate after this number of steps
+        u = 0
+
+        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor,
+                                                   torch.Tensor]]] = {}
+
+        while t < T and u < max_u:
+            x_t = x[:, t:t + 1, :]
+            A = B
+            B = []
+
+            while u < max_u:
+                y_star = max(A, key=lambda hyp: hyp.log_prob)
+                A.remove(y_star)
+
+                # Note: y_star.ys is unhashable, i.e., cannot be used
+                # as a key into a dict
+                cached_key = "_".join(map(str, y_star.ys))
+
+                if cached_key not in cache:
+                    pred_in = torch.tensor([y_star.ys[-1]],
+                                           device=device).reshape(1, 1)
+
+                    pred_out, pred_state = self.predictor(
+                        pred_in,
+                        y_star.pred_state,
+                    )
+                    cache[cached_key] = (pred_out, pred_state)
+                else:
+                    pred_out, pred_state = cache[cached_key]
+
+                logits = self.joiner(x_t, pred_out)
+                log_prob = logits.log_softmax(dim=-1)
+                # log_prob is (1, 1, 1, vocab_size)
+                log_prob = log_prob.squeeze()
+                # Now log_prob is (vocab_size,)
+
+                # If we choose blank here, add the new hypothesis to B.
+                # Otherwise, add the new hypothesis to A
+
+                # First, choose blank
+                skip_log_prob = log_prob[blank_id]
+                new_y_star_log_prob = y_star.log_prob + skip_log_prob.item()
+                # print("tuAB0", t, u, len(y_star.ys), y_star.log_prob,
+                #       skip_log_prob.item(), new_y_star_log_prob)
+                # ys[:] returns a copy of ys
+                new_y_star = Hypothesis(
+                    ys=y_star.ys[:],
+                    log_prob=new_y_star_log_prob,
+                    # Caution: Use y_star.decoder_state here
+                    pred_state=y_star.pred_state,
+                )
+                B.append(new_y_star)
+
+                topk_log_prob = log_prob.topk(beam_width, dim=-1)
+
+                # Second, choose other labels
+                #for i, v in enumerate(log_prob.tolist()):
+                for v, i in zip(*topk_log_prob):
+                    v = v.item()
+                    i = i.item()
+                    if i == blank_id:
+                        continue
+                    new_ys = y_star.ys + [i]
+                    new_log_prob = y_star.log_prob + v
+                    new_hyp = Hypothesis(
+                        ys=new_ys,
+                        log_prob=new_log_prob,
+                        pred_state=pred_state,
+                    )
+                    A.append(new_hyp)
+
+                u += 1
+                # check whether B contains more than "beam" elements more probable
+                # than the most probable in A
+                A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
+                #print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
+                B = sorted(
+                    [
+                        hyp
+                        for hyp in B if hyp.log_prob > A_most_probable.log_prob
+                    ],
+                    key=lambda hyp: hyp.log_prob,
+                    reverse=True,
+                )
+                # print("tuAB2",
+                #       t,
+                #       u,
+                #       len(A),
+                #       A_most_probable.log_prob,
+                #       len(B),
+                #       flush=True)
+                if len(B) >= beam_width:
+                    B = B[:beam_width]
+                    break
+            t += 1
+
+        best_hyp = max(B,
+                       key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
+        ys = best_hyp.ys[1:]  # [1:] to remove the blank
+        return ys
+
+    def decode_align_length_sync_beam_search(
+            self,
+            x: torch.Tensor,
+            x_lengths: torch.Tensor,
+            beam_width: int = 5,
+            max_sym_per_utt: int = 1000) -> List[int]:
+        assert x.ndim == 3
+        assert x.size(0) == 1, x.size(0)
+
+        blank_id = self.blank_id
+        device = x.device
+
+        sos = torch.tensor([blank_id], device=device).reshape(1, 1)
+        pred_out, (h, c) = self.predictor(sos)
+        T = x.size(1)
+        #t = 0
+        B = [Hypothesis(ys=[blank_id], log_prob=0.0, decoder_state=None)]
+        #max_u = 20000  # terminate after this number of steps
+        #u = 0
+
+        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor,
+                                                   torch.Tensor]]] = {}
+        F = []
+        #for t < T and u < max_u:
+        for i in range(T + max_sym_per_utt):
+            A = []
+            for y_star in B:
+                #while u < max_u:
+                u = len(y_star.ys) - 1
+                t = i - u
+                if t >= T:
+                    continue
+
+                #y_star = max(A, key=lambda hyp: hyp.log_prob)
+                #A.remove(y_star)
+                x_t = x[:, t:t + 1, :]
+                # Note: y_star.ys is unhashable, i.e., cannot be used
+                # as a key into a dict
+                cached_key = "_".join(map(str, y_star.ys))
+
+                if cached_key not in cache:
+                    pred_in = torch.tensor([y_star.ys[-1]],
+                                           device=device).reshape(1, 1)
+
+                    pred_out, pred_state = self.predictor(
+                        pred_in,
+                        y_star.pred_state,
+                    )
+                    cache[cached_key] = (pred_out, pred_state)
+                else:
+                    pred_out, pred_state = cache[cached_key]
+
+                logits = self.joiner(x_t, pred_out)
+                log_prob = logits.log_softmax(dim=-1)  # (1, 1, 1, vocab_size)
+                log_prob = log_prob.squeeze()  # (vocab_size,)
+
+                # First, choose blank
+                skip_log_prob = log_prob[blank_id]
+                new_y_star_log_prob = y_star.log_prob + skip_log_prob.item()
+                # print("tuAB0", t, u, len(y_star.ys), y_star.log_prob,
+                #       skip_log_prob.item(), new_y_star_log_prob)
+                # ys[:] returns a copy of ys
+                new_y_star = Hypothesis(
+                    ys=y_star.ys[:],
+                    log_prob=new_y_star_log_prob,
+                    # Caution: Use y_star.decoder_state here
+                    pred_state=y_star.pred_state,
+                )
+                A.append(new_y_star)
+                if t == T - 1:
+                    F.append(y_star)
+
+                topk_log_prob = log_prob.topk(beam_width, dim=-1)
+
+                # Second, choose other labels
+                #for i, v in enumerate(log_prob.tolist()):
+                for v, i in zip(*topk_log_prob):
+                    v = v.item()
+                    i = i.item()
+                    if i == blank_id:
+                        continue
+                    new_ys = y_star.ys + [i]
+                    new_log_prob = y_star.log_prob + v
+                    new_hyp = Hypothesis(
+                        ys=new_ys,
+                        log_prob=new_log_prob,
+                        pred_state=pred_state,
+                    )
+                    A.append(new_hyp)
+
+                # check whether B contains more than "beam_width" elements more probable
+                # than the most probable in A
+                #A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
+                #print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
+                B0 = sorted(
+                    [hyp for hyp in A],
+                    key=lambda hyp: hyp.log_prob,
+                    reverse=True,
+                )
+                B = []
+                B_ys = set()
+                for hyp in B0:
+                    if hyp.ys not in B_ys:
+                        B.append(hyp)
+                        B_ys.add(hyp.ys)
+                # print("tuAB2",
+                #       t,
+                #       u,
+                #       len(A),
+                #       A_most_probable.log_prob,
+                #       len(B),
+                #       flush=True)
+                if len(B) >= beam_width:
+                    B = B[:beam_width]
+                    break
+
+        best_hyp = max(F,
+                       key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
+        ys = best_hyp.ys[1:]  # [1:] to remove the blank
+        return ys
+
+    def change_config(
+        self,
+        override_dropouts=False,
+        embed_dropout_rate: float = 0.0,
+        rnn_dropout_rate: float = 0.0,
+    ):
+        logging.info("changing decoder config")
+        self.predictor.change_config(override_dropouts, embed_dropout_rate,
+                                     rnn_dropout_rate)
+
+    def get_config(self):
+
+        config = {
+            "in_feats": self.in_feats,
+            "vocab_size": self.vocab_size,
+            "embed_dim": self.embed_dim,
+            "num_pred_layers": self.num_pred_layers,
+            "pred_hid_feats": self.pred_hid_feats,
+            "embed_dropout_rate": self.embed_dropout_rate,
+            "rnn_dropout_rate": self.rnn_dropout_rate,
+            "blank_id": self.blank_id,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = filter_func_args(RNNTransducerDecoder.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args = filter_func_args(RNNTransducerDecoder.change_config, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser,
+                       prefix=None,
+                       skip=set(["in_feats", "blanck_id", "vocab_size"])):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument("--in-feats",
+                                type=int,
+                                required=True,
+                                help=("input feature dimension"))
+        if "blank_id" not in skip:
+            parser.add_argument("--blank-id",
+                                type=int,
+                                default=0,
+                                help=("blank id from tokenizer model"))
+        if "vocab_size" not in skip:
+            parser.add_argument("--vocab-size",
+                                type=int,
+                                required=True,
+                                help=("output prediction dimension"))
+        parser.add_argument("--embed-dim",
+                            default=1024,
+                            type=int,
+                            help=("token embedding dimension"))
+        parser.add_argument(
+            "--embed-dropout-rate",
+            default=0.0,
+            type=float,
+            help=("dropout prob for predictor input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
+        parser.add_argument(
+            "--rnn-type",
+            default="lstm",
+            choices=["lstm", "gru"],
+            help=(
+                "type of recurrent network for thep predictor in [lstm, gru]"))
+
+        parser.add_argument("--num-pred-layers",
+                            default=2,
+                            type=int,
+                            help="""number of layers of the predictor """)
+
+        parser.add_argument("--pred-hid-feats",
+                            default=512,
+                            type=int,
+                            help="""hidden features of the predictor""")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--override-dropouts",
+            default=False,
+            action=ActionYesNo,
+            help=(
+                "whether to use the dropout probabilities passed in the "
+                "arguments instead of the defaults in the pretrained model."))
+        parser.add_argument("--embed-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder input embeddings"))
+        parser.add_argument("--rnn-dropout-rate",
+                            default=0.0,
+                            type=float,
+                            help=("dropout prob for decoder RNN "))
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 1d01e02b..540697f7 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -15,6 +15,7 @@
 
 
 class TorchModel(nn.Module):
+
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         torch_model_registry[cls.__name__] = cls
@@ -56,6 +57,8 @@ def change_dropouts(self, dropout_rate):
         for module in self.modules():
             if isinstance(module, nn.modules.dropout._DropoutNd):
                 module.p = dropout_rate
+            if isinstance(module, nn.RNNBase):
+                module.dropout = dropout
 
         if hasattr(self, "dropout_rate"):
             assert dropout_rate == 0 or self.dropout_rate > 0
@@ -67,7 +70,6 @@ def train_mode(self):
 
     @train_mode.setter
     def train_mode(self, mode):
-        print("hola3", mode, flush=True)
         self.set_train_mode(mode)
 
     def set_train_mode(self, mode):
@@ -106,9 +108,10 @@ def save(self, file_path):
             os.makedirs(file_dir, exist_ok=True)
 
         config = self.get_config()
-        torch.save(
-            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()}
-        )
+        torch.save({
+            "model_cfg": self.get_config(),
+            "model_state_dict": self.state_dict()
+        })
 
     @staticmethod
     def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
@@ -128,7 +131,8 @@ def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-        cfg, state_dict = TorchModel._load_cfg_state_dict(file_path, cfg, state_dict)
+        cfg, state_dict = TorchModel._load_cfg_state_dict(
+            file_path, cfg, state_dict)
 
         model = cls(**cfg)
         if state_dict is not None:
@@ -143,15 +147,14 @@ def get_loss(self):
 
     @property
     def device(self):
-        devices = {param.device for param in self.parameters()} | {
-            buf.device for buf in self.buffers()
-        }
+        devices = {param.device
+                   for param in self.parameters()
+                   } | {buf.device
+                        for buf in self.buffers()}
         if len(devices) != 1:
             raise RuntimeError(
                 "Cannot determine device: {} different devices found".format(
-                    len(devices)
-                )
-            )
+                    len(devices)))
 
         return next(iter(devices))
 
@@ -213,4 +216,5 @@ def auto_load(file_path, extra_objs={}, map_location=None):
                     # if it failed the 3 trials raise exception
                     raise err
                 # remove module prefix when is trained with dataparallel
-                state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
+                state_dict = ODict(
+                    (p.sub("", k), v) for k, v in state_dict.items())

From 97050f91772146db8aaa30733561951efa295a52 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 10 Mar 2023 16:58:46 -0500
Subject: [PATCH 087/154] working align length sycn dec

---
 egs/librispeech/v1/conf/infer.yaml              | 3 +++
 hyperion/torch/narchs/rnn_transducer_decoder.py | 9 +++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/egs/librispeech/v1/conf/infer.yaml b/egs/librispeech/v1/conf/infer.yaml
index ddfd25e2..cd50a2cb 100644
--- a/egs/librispeech/v1/conf/infer.yaml
+++ b/egs/librispeech/v1/conf/infer.yaml
@@ -1 +1,4 @@
 beam_width: 5
+decoding_method: time_sync_beam_search
+#decoding_method: greedy
+#decoding_method: align_length_sync_beam_search
\ No newline at end of file
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index 8f1a60be..64c71dcd 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -311,7 +311,7 @@ def decode_align_length_sync_beam_search(
         pred_out, (h, c) = self.predictor(sos)
         T = x.size(1)
         #t = 0
-        B = [Hypothesis(ys=[blank_id], log_prob=0.0, decoder_state=None)]
+        B = [Hypothesis(ys=[blank_id], log_prob=0.0, pred_state=None)]
         #max_u = 20000  # terminate after this number of steps
         #u = 0
 
@@ -360,7 +360,7 @@ def decode_align_length_sync_beam_search(
                 new_y_star = Hypothesis(
                     ys=y_star.ys[:],
                     log_prob=new_y_star_log_prob,
-                    # Caution: Use y_star.decoder_state here
+                    # Caution: Use y_star.pred_state here
                     pred_state=y_star.pred_state,
                 )
                 A.append(new_y_star)
@@ -397,9 +397,10 @@ def decode_align_length_sync_beam_search(
                 B = []
                 B_ys = set()
                 for hyp in B0:
-                    if hyp.ys not in B_ys:
+                    hyp_ys = tuple(hyp.ys)  # to make ys hashable
+                    if hyp_ys not in B_ys:
                         B.append(hyp)
-                        B_ys.add(hyp.ys)
+                        B_ys.add(hyp_ys)
                 # print("tuAB2",
                 #       t,
                 #       u,

From db14742c3a6fc01c789aa3f53dfe149b3da5d88a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 21 Mar 2023 20:55:17 -0400
Subject: [PATCH 088/154] finised refactoring rnn transducer

---
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml |  69 +++
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml |  69 +++
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml |  70 +++
 ...rain_wav2vec2base_rnnt_k2_stage1_v1.0.yaml |  69 +++
 ...n_wav2vec2base_transducer_stage1_v7.1.yaml |  53 +++
 .../v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml   |  16 +
 .../v1/global_conf/config_transducer_v7.1.sh  |  33 ++
 ...config_wav2vec2base_rnnt_k2_pruned_v1.0.sh |  31 ++
 ...config_wav2vec2base_rnnt_k2_pruned_v1.2.sh |  30 ++
 ...config_wav2vec2base_rnnt_k2_pruned_v1.3.sh |  30 ++
 .../config_wav2vec2base_rnnt_k2_v1.0.sh       |  36 ++
 egs/librispeech/v1/run_011_train_asr.sh       |   2 +-
 ...train_asr2.sh => run_011_train_asr_old.sh} |   2 +-
 egs/librispeech/v1/run_030_inference.sh       |   4 +-
 ...inference2.sh => run_030_inference_old.sh} |   4 +-
 hyperion/bin/apply_mvn_select_frames.py       |   5 +-
 hyperion/bin/audio_to_duration.py             |  15 +-
 hyperion/bin/compute_energy_vad.py            |   5 +-
 hyperion/bin/compute_mfcc_feats.py            |   5 +-
 hyperion/bin/copy_feats.py                    |   1 -
 hyperion/bin/decode_wav2transducer.py         |  33 +-
 hyperion/bin/decode_wav2vec2rnn_transducer.py |  35 +-
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |   5 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |   5 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |   7 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |   5 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |   5 +-
 ...sine_scoring_from_transfer_art_test_wav.py |   7 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |   7 +-
 hyperion/bin/extract_xvectors_from_feats.py   |   5 +-
 hyperion/bin/extract_xvectors_from_wav.py     |   5 +-
 .../extract_xvectors_slidwin_from_feats.py    |   7 +-
 .../bin/extract_xvectors_slidwin_from_wav.py  |   7 +-
 hyperion/bin/finetune_wav2vec2transducer.py   |  28 +-
 hyperion/bin/finetune_wav2vec2xvector.py      |   5 +-
 .../bin/finetune_xvector_dfr_from_feats.py    |   5 +-
 hyperion/bin/finetune_xvector_dfr_from_wav.py |   5 +-
 hyperion/bin/finetune_xvector_from_feats.py   |   5 +-
 hyperion/bin/finetune_xvector_from_wav.py     |   9 +-
 hyperion/bin/make_babble_noise_audio_files.py |   7 +-
 hyperion/bin/pack_wav_rirs.py                 |   5 +-
 hyperion/bin/plot_embedding_tsne.py           |   5 +-
 hyperion/bin/plot_embedding_tsne_per_class.py |   5 +-
 hyperion/bin/preprocess_audio_files.py        |   7 +-
 hyperion/bin/train_wav2rnn_transducer.py      |  24 +-
 hyperion/bin/train_wav2vec2rnn_transducer.py  |  28 +-
 hyperion/bin/train_wav2vec2transducer.py      |  24 +-
 hyperion/bin/train_wav2vec2xvector.py         |   5 +-
 hyperion/bin/train_xvector_from_feats.py      |   5 +-
 hyperion/bin/train_xvector_from_wav.py        |   5 +-
 hyperion/bin_deprec/ark2hyp.py                |   1 -
 hyperion/bin_deprec/arkvad2nist.py            |   1 -
 hyperion/bin_deprec/compute-gmm-post.py       |   3 +-
 .../bin_deprec/eval-2class-performance.py     |   1 -
 hyperion/bin_deprec/eval-elbo-ubm.py          |   1 -
 .../bin_deprec/eval-q-scoring-homo-gbe.py     |   1 -
 hyperion/bin_deprec/eval-score-norm.py        |   1 -
 hyperion/bin_deprec/h5vad2nist.py             |   1 -
 hyperion/bin_deprec/init-ubm.py               |   3 +-
 hyperion/bin_deprec/scores2lre_format.py      |   1 -
 .../torch-train-conformer-enc-v1-vq-dvae.py   |   1 -
 .../torch-train-conformer-enc-v1-vq-vae.py    |   1 -
 hyperion/bin_deprec/torch-train-dc1d-dvae.py  |   1 -
 hyperion/bin_deprec/torch-train-dc1d-vae.py   |   1 -
 hyperion/bin_deprec/torch-train-dc2d-dvae.py  |   1 -
 hyperion/bin_deprec/torch-train-dc2d-vae.py   |   1 -
 .../bin_deprec/torch-train-resnet1d-dvae.py   |   1 -
 .../bin_deprec/torch-train-resnet1d-vae.py    |   1 -
 .../torch-train-resnet1d-vq-dvae.py           |   1 -
 .../bin_deprec/torch-train-resnet1d-vq-vae.py |   1 -
 .../bin_deprec/torch-train-resnet2d-dvae.py   |   1 -
 .../bin_deprec/torch-train-resnet2d-vae.py    |   5 +-
 .../torch-train-resnet2d-vq-dvae.py           |   1 -
 .../bin_deprec/torch-train-resnet2d-vq-vae.py |   1 -
 .../torch-train-transformer-enc-v1-dvae.py    |   1 -
 .../torch-train-transformer-enc-v1-vae.py     |   1 -
 .../torch-train-transformer-enc-v1-vq-dvae.py |   1 -
 .../torch-train-transformer-enc-v1-vq-vae.py  |   1 -
 hyperion/bin_deprec/torch-train-xvector.py    |   1 -
 .../bin_deprec/train-q-scoring-homo-gbe.py    |   1 -
 hyperion/bin_deprec/vectors2scores.py         |   1 -
 .../bin_deprec2/apply-mvn-select-frames.py    |   5 +-
 hyperion/bin_deprec2/compute-mfcc-feats.py    |   5 +-
 hyperion/bin_deprec2/copy-feats.py            |   1 -
 hyperion/bin_deprec2/eval-cos-1vs1.py         |   1 -
 hyperion/bin_deprec2/eval-linear-gbe-up.py    |   1 -
 hyperion/bin_deprec2/eval-linear-gbe.py       |   1 -
 hyperion/bin_deprec2/eval-linear-svmc.py      |   1 -
 .../bin_deprec2/eval-logistic-regression.py   |   1 -
 hyperion/bin_deprec2/eval-plda-1vs1.py        |   1 -
 hyperion/bin_deprec2/eval-plda-nvs1.py        |   1 -
 hyperion/bin_deprec2/merge-h5-files.py        |   1 -
 hyperion/bin_deprec2/pack-audio-files.py      |   3 +-
 hyperion/bin_deprec2/plot-vector-hist.py      |   1 -
 hyperion/bin_deprec2/rttm-to-bin-vad.py       |   1 -
 hyperion/bin_deprec2/segments-to-bin-vad.py   |   5 +-
 .../torch-adv-finetune-xvec-from-wav.py       |   5 +-
 .../bin_deprec2/torch-adv-finetune-xvec.py    |   5 +-
 .../bin_deprec2/torch-compute-mfcc-feats.py   |   5 +-
 hyperion/bin_deprec2/torch-eval-vae.py        |   1 -
 ...osine-scoring-from-adv-test-wav-wavegan.py |   5 +-
 ...l-xvec-cosine-scoring-from-adv-test-wav.py |   5 +-
 ...l-xvec-cosine-scoring-from-art-test-wav.py |   7 +-
 ...-eval-xvec-cosine-scoring-from-test-wav.py |   5 +-
 ...sine-scoring-from-transfer-adv-test-wav.py |   5 +-
 ...sine-scoring-from-transfer-art-test-wav.py |   7 +-
 .../torch-eval-xvec-logits-from-wav.py        |   5 +-
 ...rch-extract-xvectors-from-wav-with-rttm.py |   5 +-
 ...torch-extract-xvectors-slidwin-from-wav.py |   7 +-
 .../torch-extract-xvectors-slidwin.py         |   5 +-
 .../torch-extract-xvectors-vae-preproc.py     |   5 +-
 .../bin_deprec2/torch-extract-xvectors.py     |   5 +-
 ...ch-generate-adv-attacks-xvector-classif.py |   7 +-
 ...orch-generate-adv-attacks-xvector-verif.py |   7 +-
 hyperion/bin_deprec2/torch-train-dc1d-ae.py   |   1 -
 hyperion/bin_deprec2/torch-train-dvae.py      |   5 +-
 .../torch-train-efficientnet-xvec-from-wav.py |   5 +-
 .../torch-train-efficientnet-xvec.py          |   5 +-
 .../torch-train-resnet-xvec-from-wav.py       |   5 +-
 .../bin_deprec2/torch-train-resnet-xvec.py    |   5 +-
 .../torch-train-resnet1d-xvec-from-wav.py     |   5 +-
 .../torch-train-spinenet-xvec-from-wav.py     |   5 +-
 .../torch-train-tdnn-xvec-from-wav.py         |   5 +-
 hyperion/bin_deprec2/torch-train-tdnn-xvec.py |   5 +-
 ...orch-train-transformer-xvec-v1-from-wav.py |   5 +-
 .../torch-train-transformer-xvec-v1.py        |   5 +-
 hyperion/bin_deprec2/torch-train-vae.py       |   5 +-
 hyperion/bin_deprec2/torch-train-vq-dvae.py   |   5 +-
 hyperion/bin_deprec2/torch-train-vq-vae.py    |   5 +-
 hyperion/bin_deprec2/train-cw-up.py           |   1 -
 hyperion/bin_deprec2/train-cw.py              |   1 -
 hyperion/bin_deprec2/train-gaussianizer.py    |   1 -
 hyperion/bin_deprec2/train-lda.py             |   1 -
 hyperion/bin_deprec2/train-linear-gbe-up.py   |   1 -
 hyperion/bin_deprec2/train-linear-gbe.py      |   1 -
 hyperion/bin_deprec2/train-linear-svmc.py     |   1 -
 .../bin_deprec2/train-logistic-regression.py  |   1 -
 hyperion/bin_deprec2/train-mvn.py             |   1 -
 hyperion/bin_deprec2/train-nda.py             |   1 -
 hyperion/bin_deprec2/train-pca.py             |   1 -
 hyperion/bin_deprec2/train-plda.py            |   1 -
 hyperion/np/score_norm/adapt_s_norm.py        |   1 +
 .../adv_attacks/random_attack_factory.py      |   3 +-
 hyperion/torch/adv_defenses/wave_gan_white.py |   4 +-
 hyperion/torch/data/__init__.py               |   4 +-
 hyperion/torch/data/audio_dataset.py          |  19 +-
 .../data/class_weighted_embed_sampler.py      |   3 +-
 .../data/class_weighted_seg_chunk_sampler.py  |   3 +-
 hyperion/torch/data/embed_dataset.py          |   3 +-
 hyperion/torch/data/embed_sampler.py          |   3 +-
 hyperion/torch/data/feat_seq_dataset.py       |   3 +-
 hyperion/torch/data/hyp_sampler.py            |   3 +-
 .../torch/data/paired_feat_seq_dataset.py     |   1 -
 hyperion/torch/data/seg_chunk_sampler.py      |   3 +-
 hyperion/torch/data/seg_sampler.py            |   3 +-
 hyperion/torch/data/weighted_embed_sampler.py |   1 -
 hyperion/torch/data/weighted_seq_sampler.py   |   3 +-
 hyperion/torch/layer_blocks/__init__.py       |   4 +-
 hyperion/torch/layer_blocks/etdnn_blocks.py   |   1 -
 .../torch/layer_blocks/resetdnn_blocks.py     |   1 -
 .../torch/layer_blocks/transducer_joiner.py   | 106 ++---
 .../layer_blocks/transducer_predictor.py      | 274 ++++++-----
 hyperion/torch/layers/global_pool.py          |   1 -
 hyperion/torch/layers/mvn.py                  |   3 +-
 hyperion/torch/layers/pool_factory.py         |   3 +-
 hyperion/torch/layers/spec_augment.py         |   3 +-
 hyperion/torch/loggers/logger.py              |   1 -
 hyperion/torch/loggers/logger_list.py         |   1 -
 hyperion/torch/lr_schedulers/factory.py       |   3 +-
 hyperion/torch/models/__init__.py             |   5 +-
 hyperion/torch/models/transducer/__init__.py  |   4 +-
 hyperion/torch/models/transducer/conformer.py |   6 +-
 hyperion/torch/models/transducer/decoder.py   |   3 +-
 hyperion/torch/models/transducer/joiner.py    |   2 +-
 .../models/transducer/lstm_rnn_transducer.py  |   4 +-
 .../models/transducer/rnn_rnn_transducer.py   |   8 +-
 .../torch/models/transducer/rnn_transducer.py |  30 +-
 .../torch/models/transducer/transducer.py     |   8 +-
 .../torch/models/transducer/transformer.py    |   4 +-
 hyperion/torch/models/tvector/tvector.py      |   3 +-
 .../torch/models/wav2transducer/__init__.py   |   3 +-
 .../wav2transducer/hf_wav2rnn_transducer.py   |  31 +-
 .../wav2transducer/hf_wav2transducer.py       |   9 +-
 .../wav2transducer/hf_wav2vec2_transducer.py  |   7 +-
 .../hf_wav2vec2rnn_rnn_transducer.py          |   6 +-
 .../hf_wav2vec2rnn_transducer.py              |   6 +-
 .../wav2transducer/wav2rnn_transducer.py      |   4 +-
 .../hf_hubert2resnet1d_xvector.py             |   3 +-
 .../hf_wav2vec2resnet1d_xvector.py            |   3 +-
 .../models/wav2xvectors/hf_wav2xvector.py     |   3 +-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py |   3 +-
 .../wav2xvectors/wav2resnet1d_xvector.py      |   3 +-
 .../models/wav2xvectors/wav2resnet_xvector.py |   3 +-
 .../torch/models/wav2xvectors/wav2xvector.py  |   3 +-
 .../models/xvectors/efficient_net_xvector.py  |   3 +-
 .../torch/models/xvectors/resnet1d_xvector.py |   3 +-
 .../torch/models/xvectors/resnet_xvector.py   |   3 +-
 .../torch/models/xvectors/spinenet_xvector.py |   3 +-
 .../torch/models/xvectors/tdnn_xvector.py     |   3 +-
 .../models/xvectors/transformer_xvector_v1.py |   3 +-
 hyperion/torch/models/xvectors/xvector.py     |   3 +-
 hyperion/torch/narchs/__init__.py             |   4 +-
 hyperion/torch/narchs/audio_feats_mvn.py      |   3 +-
 hyperion/torch/narchs/classif_head.py         |   3 +-
 hyperion/torch/narchs/conformer_encoder_v1.py |   3 +-
 hyperion/torch/narchs/dc1d_decoder.py         |   3 +-
 hyperion/torch/narchs/dc1d_encoder.py         |   3 +-
 hyperion/torch/narchs/dc2d_decoder.py         |   3 +-
 hyperion/torch/narchs/dc2d_encoder.py         |   3 +-
 hyperion/torch/narchs/efficient_net.py        |   3 +-
 hyperion/torch/narchs/etdnn.py                |   1 -
 hyperion/torch/narchs/net_arch.py             |   1 -
 hyperion/torch/narchs/resetdnn.py             |   1 -
 hyperion/torch/narchs/resnet.py               |   1 -
 hyperion/torch/narchs/resnet1d_decoder.py     |   3 +-
 hyperion/torch/narchs/resnet1d_encoder.py     |   3 +-
 hyperion/torch/narchs/resnet2d_decoder.py     |   3 +-
 hyperion/torch/narchs/resnet2d_encoder.py     |   3 +-
 hyperion/torch/narchs/rnn_encoder.py          |   5 +-
 .../torch/narchs/rnn_transducer_decoder.py    | 442 ++++++++++++++----
 hyperion/torch/narchs/spinenet.py             |   1 -
 hyperion/torch/narchs/tdnn.py                 |   1 -
 .../torch/narchs/transformer_encoder_v1.py    |   3 +-
 hyperion/torch/optim/factory.py               |   3 +-
 hyperion/torch/tpm/hf/hf_hubert.py            |   5 +-
 hyperion/torch/tpm/hf/hf_wav2vec2.py          |   5 +-
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      |   5 +-
 hyperion/torch/tpm/hf/hf_wavlm.py             |   5 +-
 hyperion/torch/trainers/ae_trainer.py         |   3 +-
 hyperion/torch/trainers/dvae_trainer.py       |   3 +-
 hyperion/torch/trainers/torch_trainer.py      |   5 +-
 hyperion/torch/trainers/transducer_trainer.py |  68 +--
 hyperion/torch/trainers/vae_trainer.py        |   3 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |   3 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |   3 +-
 .../torch/trainers/xvector_adv_trainer.py     |   3 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |   3 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |   3 +-
 hyperion/torch/utils/ddp.py                   |   7 +-
 hyperion/torch/utils/metric_acc.py            |   1 -
 hyperion/utils/__init__.py                    |   1 +
 hyperion/utils/hyp_dataclass.py               |  31 ++
 242 files changed, 1490 insertions(+), 918 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
 create mode 100644 egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_transducer_v7.1.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
 rename egs/librispeech/v1/{run_011_train_asr2.sh => run_011_train_asr_old.sh} (98%)
 rename egs/librispeech/v1/{run_030_inference2.sh => run_030_inference_old.sh} (88%)
 create mode 100644 hyperion/utils/hyp_dataclass.py

diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
new file mode 100644
index 00000000..bdb33845
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    decoder:
+      rnnt_loss: k2_pruned
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
new file mode 100644
index 00000000..cfd41553
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    decoder:
+      rnnt_loss: k2_pruned
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.005
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
new file mode 100644
index 00000000..2cf2d04c
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.005
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
new file mode 100644
index 00000000..c66a1ca4
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 70.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    decoder:
+      rnnt_loss: k2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
new file mode 100644
index 00000000..7381bb01
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
@@ -0,0 +1,53 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 75.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: wav2vec2base_rnnt_ta_do0.4.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.003
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml b/egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml
new file mode 100644
index 00000000..cfab3fb9
--- /dev/null
+++ b/egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml
@@ -0,0 +1,16 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-base-960h
+transducer:
+  decoder:
+    rnnt_loss: torchaudio
+    predictor:
+      embed_dim: 1024
+      num_layers: 2
+      hid_feats: 512
+      embed_dropout_rate: 0.4
+      rnn_dropout_rate: 0.4
+      rnn_type: lstm
+    joiner:
+      hid_feats: 512
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v7.1.sh b/egs/librispeech/v1/global_conf/config_transducer_v7.1.sh
new file mode 100644
index 00000000..48f0d363
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_transducer_v7.1.sh
@@ -0,0 +1,33 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_transducer_v7.1
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0056.pth
+nnet_s1=$nnet_s1_dir/model_ep0068.pth
+nnet_s1=$nnet_s1_dir/model_ep0090.pth
+nnet_s1=$nnet_s1_dir/model_ep0094.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
new file mode 100644
index 00000000..1fd43d23
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
@@ -0,0 +1,31 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_rnnt_k2_pruned.v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0081.pth
+nnet_s1=$nnet_s1_dir/model_ep0120.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
new file mode 100644
index 00000000..7cd22d2d
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
@@ -0,0 +1,30 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_rnnt_k2_pruned.v1.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
new file mode 100644
index 00000000..18875086
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
@@ -0,0 +1,30 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_rnnt_k2_pruned.v1.3
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0100.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
new file mode 100644
index 00000000..ed274e91
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
@@ -0,0 +1,36 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_rnnt_k2.v1.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0041.pth
+nnet_s1=$nnet_s1_dir/model_ep0048.pth
+nnet_s1=$nnet_s1_dir/model_ep0066.pth
+nnet_s1=$nnet_s1_dir/model_ep0106.pth
+# nnet_s1=$nnet_s1_dir/model_ep0075.pth
+# nnet_s1=$nnet_s1_dir/model_ep0106.pth
+# nnet_s1=$nnet_s1_dir/model_ep0646.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 3d0e6eb1..99b0065e 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -47,7 +47,7 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
-    train_wav2vec2transducer.py $nnet_type \
+    train_wav2vec2rnn_transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
diff --git a/egs/librispeech/v1/run_011_train_asr2.sh b/egs/librispeech/v1/run_011_train_asr_old.sh
similarity index 98%
rename from egs/librispeech/v1/run_011_train_asr2.sh
rename to egs/librispeech/v1/run_011_train_asr_old.sh
index 99b0065e..3d0e6eb1 100755
--- a/egs/librispeech/v1/run_011_train_asr2.sh
+++ b/egs/librispeech/v1/run_011_train_asr_old.sh
@@ -47,7 +47,7 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
-    train_wav2vec2rnn_transducer.py $nnet_type \
+    train_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.audio-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v1/run_030_inference.sh
index 02b97001..7ed9567a 100755
--- a/egs/librispeech/v1/run_030_inference.sh
+++ b/egs/librispeech/v1/run_030_inference.sh
@@ -38,10 +38,10 @@ test_data=test_clean
 
 
 # Extracts x-vectors for evaluation
-for name in dev_clean dev_other test_clean test_other #$test_data 
+for name in dev_clean dev_other test_clean test_other 
 do
   nj=40
-  steps_transducer/decode_wav2vec2transducer.sh \
+  steps_transducer/decode_wav2vec2rnn_transducer.sh \
       --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
       $nnet data/$name \
       $transducer_dir/$name $bpe_model
diff --git a/egs/librispeech/v1/run_030_inference2.sh b/egs/librispeech/v1/run_030_inference_old.sh
similarity index 88%
rename from egs/librispeech/v1/run_030_inference2.sh
rename to egs/librispeech/v1/run_030_inference_old.sh
index 7ed9567a..02b97001 100755
--- a/egs/librispeech/v1/run_030_inference2.sh
+++ b/egs/librispeech/v1/run_030_inference_old.sh
@@ -38,10 +38,10 @@ test_data=test_clean
 
 
 # Extracts x-vectors for evaluation
-for name in dev_clean dev_other test_clean test_other 
+for name in dev_clean dev_other test_clean test_other #$test_data 
 do
   nj=40
-  steps_transducer/decode_wav2vec2rnn_transducer.sh \
+  steps_transducer/decode_wav2vec2transducer.sh \
       --cmd "$transducer_cmd --mem 12G" --nj $nj ${transducer_args} \
       $nnet data/$name \
       $transducer_dir/$name $bpe_model
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
index a2456dc9..53a01d6d 100755
--- a/hyperion/bin/apply_mvn_select_frames.py
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -21,6 +18,8 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def process_feats(
diff --git a/hyperion/bin/audio_to_duration.py b/hyperion/bin/audio_to_duration.py
index 04acb76c..ac8852a4 100755
--- a/hyperion/bin/audio_to_duration.py
+++ b/hyperion/bin/audio_to_duration.py
@@ -3,22 +3,17 @@
  Copyright 2022 Jesus Villalba (Johns Hopkins University)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import SegmentSet
 from hyperion.io import SequentialAudioReader as AR
+from hyperion.utils import SegmentSet
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def audio_to_duration(audio_file, output_file, **kwargs):
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index 15d74f3a..e4d47ef0 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -9,13 +9,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.feats import EnergyVAD
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_vad(input_path, output_path, write_num_frames, **kwargs):
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index a83f95d1..c8193e5c 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -9,15 +9,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin/copy_feats.py b/hyperion/bin/copy_feats.py
index 0385cc55..4549caec 100755
--- a/hyperion/bin/copy_feats.py
+++ b/hyperion/bin/copy_feats.py
@@ -12,7 +12,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index bbcd0dc7..420f8a9f 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -4,38 +4,29 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-from typing import Dict, List, Tuple
-
-import sentencepiece as spm
-import torch.nn as nn
-
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
+from typing import Dict, List, Tuple
 
 import numpy as np
 import pandas as pd
-
+import sentencepiece as spm
 import torch
-
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch import TorchModelLoader as TML
-
-from hyperion.torch.models.wav2transducer.beam_search import greedy_search, beam_search
+from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
+                                                              greedy_search)
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/decode_wav2vec2rnn_transducer.py b/hyperion/bin/decode_wav2vec2rnn_transducer.py
index cc612628..4fdc3140 100755
--- a/hyperion/bin/decode_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/decode_wav2vec2rnn_transducer.py
@@ -4,39 +4,30 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
 """
 
-from typing import Dict, List, Tuple
-
-import sentencepiece as spm
-import torch.nn as nn
-
-import sys
+import logging
 import os
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
+import sys
 import time
-import logging
+from typing import Dict, List, Tuple
 
 import numpy as np
 import pandas as pd
-
+import sentencepiece as spm
 import torch
-
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
-from hyperion.utils import Utt2Info
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
-
-from hyperion.torch.utils import open_device
-from hyperion.torch.narchs import AudioFeatsMVN as AF
-from hyperion.torch.models import HFWav2Vec2RNNTransducer
 from hyperion.torch import TorchModelLoader as TML
-
-from hyperion.torch.models.wav2transducer.beam_search import greedy_search, beam_search
+from hyperion.torch.models import HFWav2Vec2RNNTransducer
+from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
+                                                              greedy_search)
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 437127b2..27d36d6f 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -10,9 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index aaa91214..1c00ed2a 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -12,9 +12,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 8d4add76..f9b77f11 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 0e9493c0..9f6801ef 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -27,6 +24,8 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index e0754498..6fdca983 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -10,9 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 0f9f375d..7ef4815c 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 2a92a83e..70b838e6 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -11,11 +11,8 @@
 
 import numpy as np
 import pandas as pd
-import torchaudio.transforms as tat
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
+import torchaudio.transforms as tat
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -24,6 +21,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 resamplers = {}
 
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index 926e0bcc..13ad4277 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -22,6 +19,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index addabbcf..4f48bbdc 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -11,9 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index e3d2fcbb..fb6583e2 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -10,11 +10,8 @@
 import time
 
 import numpy as np
-import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
@@ -23,6 +20,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index 2b1bba3b..9f1728eb 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -11,11 +11,8 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -25,6 +22,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/finetune_wav2vec2transducer.py b/hyperion/bin/finetune_wav2vec2transducer.py
index b940c024..6f17f800 100755
--- a/hyperion/bin/finetune_wav2vec2transducer.py
+++ b/hyperion/bin/finetune_wav2vec2transducer.py
@@ -3,37 +3,29 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import k2
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
+import k2
 import numpy as np
-
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import TransducerTrainer as Trainer
-from hyperion.torch.data import AudioDataset as AD
-
 from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
-
 model_dict = {
     "hf_wav2vec2transducer": HFWav2Vec2Transducer,
 }
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index b3edd9b5..d9d9c281 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -26,6 +23,8 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/finetune_xvector_dfr_from_feats.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
index 2ac01025..17cafb85 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_feats.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_feats.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_dfr_from_wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
index ff97d3ca..f7832a47 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_wav.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin/finetune_xvector_from_feats.py b/hyperion/bin/finetune_xvector_from_feats.py
index 7a1fb5a9..ac9c2d0b 100755
--- a/hyperion/bin/finetune_xvector_from_feats.py
+++ b/hyperion/bin/finetune_xvector_from_feats.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -23,6 +20,8 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index d7b1f17d..2e120815 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -10,13 +10,6 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -32,6 +25,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 972ff01f..4a356037 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -10,15 +10,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 
 def make_noise(xs):
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index dccf58da..c5ddd25c 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -10,12 +10,11 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
index e011dfe8..e2157e3e 100755
--- a/hyperion/bin/plot_embedding_tsne.py
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -13,13 +13,12 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 6f35f074..6af0202c 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -13,15 +13,14 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.clustering import AHC
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
 from hyperion.utils.math import cosine_scoring
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index 2f4e5cbc..e8adfd16 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -10,15 +10,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 026c9330..8930b299 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -3,31 +3,25 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import k2
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
+import k2
 import numpy as np
-
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.models import Wav2RNNRNNTransducer
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index a2d75ba9..67f5c6ba 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -3,32 +3,26 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import k2
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
+import k2
 import numpy as np
-
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-from hyperion.torch.models import HFWav2Vec2RNNTransducer
-from hyperion.torch.models import HFWav2Vec2RNNRNNTransducer
+from hyperion.torch.models import (HFWav2Vec2RNNRNNTransducer,
+                                   HFWav2Vec2RNNTransducer)
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 8b945217..55f3b996 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -3,32 +3,26 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu, Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import sys
-import os
-from pathlib import Path
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import k2
-import time
 import logging
 import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
 
+import k2
 import numpy as np
-
 import torch
 import torch.nn as nn
-
 from hyperion.hyp_defs import config_logger, set_float_cpu
-from hyperion.torch.utils import ddp
-from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import HFWav2Vec2Transducer
+from hyperion.torch.trainers import TransducerTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 5e7ecafa..8e1653b1 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index 7f4ab0fa..71bba080 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -28,6 +25,8 @@
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 57a33b56..6fef9bbe 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -10,9 +10,6 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 # from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
@@ -28,6 +25,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin_deprec/ark2hyp.py b/hyperion/bin_deprec/ark2hyp.py
index abcb4457..a25c561b 100755
--- a/hyperion/bin_deprec/ark2hyp.py
+++ b/hyperion/bin_deprec/ark2hyp.py
@@ -13,7 +13,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import HypDataWriter, KaldiDataReader
 
diff --git a/hyperion/bin_deprec/arkvad2nist.py b/hyperion/bin_deprec/arkvad2nist.py
index 559371be..15a04f67 100755
--- a/hyperion/bin_deprec/arkvad2nist.py
+++ b/hyperion/bin_deprec/arkvad2nist.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.io import KaldiDataReader
 
 
diff --git a/hyperion/bin_deprec/compute-gmm-post.py b/hyperion/bin_deprec/compute-gmm-post.py
index 58675336..45d17623 100755
--- a/hyperion/bin_deprec/compute-gmm-post.py
+++ b/hyperion/bin_deprec/compute-gmm-post.py
@@ -14,13 +14,12 @@
 import time
 
 import numpy as np
-from keras import backend as K
-
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter
 from hyperion.pdfs import DiagGMM
 from hyperion.transforms import TransformList
+from keras import backend as K
 
 
 def to_sparse(r, num_comp):
diff --git a/hyperion/bin_deprec/eval-2class-performance.py b/hyperion/bin_deprec/eval-2class-performance.py
index eff16830..d149deb2 100755
--- a/hyperion/bin_deprec/eval-2class-performance.py
+++ b/hyperion/bin_deprec/eval-2class-performance.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.metrics import compute_eer
 from hyperion.utils.trial_key import TrialKey
diff --git a/hyperion/bin_deprec/eval-elbo-ubm.py b/hyperion/bin_deprec/eval-elbo-ubm.py
index bf4839db..5f2eab28 100755
--- a/hyperion/bin_deprec/eval-elbo-ubm.py
+++ b/hyperion/bin_deprec/eval-elbo-ubm.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.pdfs import DiagGMM
diff --git a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
index 4548e49b..7817b570 100755
--- a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.classifiers import QScoringHomoGBE as GBE
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/bin_deprec/eval-score-norm.py b/hyperion/bin_deprec/eval-score-norm.py
index 4b620518..4f66a8e4 100755
--- a/hyperion/bin_deprec/eval-score-norm.py
+++ b/hyperion/bin_deprec/eval-score-norm.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.score_norm import *
 from hyperion.utils.trial_ndx import TrialNdx
diff --git a/hyperion/bin_deprec/h5vad2nist.py b/hyperion/bin_deprec/h5vad2nist.py
index fb45c22b..21d61d3a 100755
--- a/hyperion/bin_deprec/h5vad2nist.py
+++ b/hyperion/bin_deprec/h5vad2nist.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import HypDataReader
 
diff --git a/hyperion/bin_deprec/init-ubm.py b/hyperion/bin_deprec/init-ubm.py
index 204ca855..77aed464 100755
--- a/hyperion/bin_deprec/init-ubm.py
+++ b/hyperion/bin_deprec/init-ubm.py
@@ -15,12 +15,11 @@
 import time
 
 import numpy as np
-from keras import backend as K
-
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.pdfs import DiagGMM
 from hyperion.utils.multithreading import threadsafe_generator
+from keras import backend as K
 
 
 @threadsafe_generator
diff --git a/hyperion/bin_deprec/scores2lre_format.py b/hyperion/bin_deprec/scores2lre_format.py
index 717c1535..fcba8804 100755
--- a/hyperion/bin_deprec/scores2lre_format.py
+++ b/hyperion/bin_deprec/scores2lre_format.py
@@ -12,7 +12,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.utils.trial_ndx import TrialNdx
 from hyperion.utils.trial_scores import TrialScores
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
index 608a5271..5c1b19fc 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
index a4cc54e6..326175ab 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc1d-dvae.py b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
index 1b88beba..7a4f9634 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc1d-vae.py b/hyperion/bin_deprec/torch-train-dc1d-vae.py
index dd5d2e72..1de4560a 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc2d-dvae.py b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
index 3f7cb17d..5bbc53bf 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc2d-vae.py b/hyperion/bin_deprec/torch-train-dc2d-vae.py
index 5b97f55c..b073c4c0 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
index ca6f6996..c10c6fe7 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
index a6218567..cf460a0a 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
index 89448754..a1b13d95 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
index 4a84bbff..a773d9aa 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
index 3f6cd6ba..a3857701 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
index 4e853230..695472cb 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
index 5e0add50..fdcc0c47 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
index 6398d959..17d4c474 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
index 0137e101..ff8ef4dc 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
@@ -11,7 +11,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
index 71021825..92dad725 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
index a6908c4f..18888706 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
index b3b07682..566ea106 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-xvector.py b/hyperion/bin_deprec/torch-train-xvector.py
index 4c69eb25..88147d37 100755
--- a/hyperion/bin_deprec/torch-train-xvector.py
+++ b/hyperion/bin_deprec/torch-train-xvector.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
diff --git a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
index 8a348728..48967f58 100755
--- a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.classifiers import QScoringHomoGBE as GBE
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/bin_deprec/vectors2scores.py b/hyperion/bin_deprec/vectors2scores.py
index ab4be8ac..2ff635c2 100755
--- a/hyperion/bin_deprec/vectors2scores.py
+++ b/hyperion/bin_deprec/vectors2scores.py
@@ -11,7 +11,6 @@
 import time
 
 import numpy as np
-
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.utils.trial_scores import TrialScores
 
diff --git a/hyperion/bin_deprec2/apply-mvn-select-frames.py b/hyperion/bin_deprec2/apply-mvn-select-frames.py
index a2456dc9..53a01d6d 100755
--- a/hyperion/bin_deprec2/apply-mvn-select-frames.py
+++ b/hyperion/bin_deprec2/apply-mvn-select-frames.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -21,6 +18,8 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def process_feats(
diff --git a/hyperion/bin_deprec2/compute-mfcc-feats.py b/hyperion/bin_deprec2/compute-mfcc-feats.py
index a83f95d1..c8193e5c 100755
--- a/hyperion/bin_deprec2/compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/compute-mfcc-feats.py
@@ -9,15 +9,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin_deprec2/copy-feats.py b/hyperion/bin_deprec2/copy-feats.py
index 0385cc55..4549caec 100755
--- a/hyperion/bin_deprec2/copy-feats.py
+++ b/hyperion/bin_deprec2/copy-feats.py
@@ -12,7 +12,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
diff --git a/hyperion/bin_deprec2/eval-cos-1vs1.py b/hyperion/bin_deprec2/eval-cos-1vs1.py
index de508333..f60fdd4b 100755
--- a/hyperion/bin_deprec2/eval-cos-1vs1.py
+++ b/hyperion/bin_deprec2/eval-cos-1vs1.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.np.transforms import LNorm, TransformList
diff --git a/hyperion/bin_deprec2/eval-linear-gbe-up.py b/hyperion/bin_deprec2/eval-linear-gbe-up.py
index d82bf967..ba646498 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe-up.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-linear-gbe.py b/hyperion/bin_deprec2/eval-linear-gbe.py
index cf788392..9828944d 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-linear-svmc.py b/hyperion/bin_deprec2/eval-linear-svmc.py
index ba4c5e81..3b8b644b 100755
--- a/hyperion/bin_deprec2/eval-linear-svmc.py
+++ b/hyperion/bin_deprec2/eval-linear-svmc.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-logistic-regression.py b/hyperion/bin_deprec2/eval-logistic-regression.py
index 992ca7b8..56507a9a 100755
--- a/hyperion/bin_deprec2/eval-logistic-regression.py
+++ b/hyperion/bin_deprec2/eval-logistic-regression.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-plda-1vs1.py b/hyperion/bin_deprec2/eval-plda-1vs1.py
index 5a810cf7..1a966f57 100755
--- a/hyperion/bin_deprec2/eval-plda-1vs1.py
+++ b/hyperion/bin_deprec2/eval-plda-1vs1.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/eval-plda-nvs1.py b/hyperion/bin_deprec2/eval-plda-nvs1.py
index 5c5d200c..5ead954a 100755
--- a/hyperion/bin_deprec2/eval-plda-nvs1.py
+++ b/hyperion/bin_deprec2/eval-plda-nvs1.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/merge-h5-files.py b/hyperion/bin_deprec2/merge-h5-files.py
index aeda3bab..51207343 100755
--- a/hyperion/bin_deprec2/merge-h5-files.py
+++ b/hyperion/bin_deprec2/merge-h5-files.py
@@ -12,7 +12,6 @@
 import time
 
 import numpy as np
-
 from hyperion.io import H5Merger
 
 
diff --git a/hyperion/bin_deprec2/pack-audio-files.py b/hyperion/bin_deprec2/pack-audio-files.py
index 5d544df4..a843825a 100755
--- a/hyperion/bin_deprec2/pack-audio-files.py
+++ b/hyperion/bin_deprec2/pack-audio-files.py
@@ -11,13 +11,12 @@
 import time
 
 import numpy as np
-from scipy import ndimage, signal
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import PackedAudioWriter as Writer
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.io import WSpecifier as WS
+from scipy import ndimage, signal
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin_deprec2/plot-vector-hist.py b/hyperion/bin_deprec2/plot-vector-hist.py
index 75236726..a4d842c0 100755
--- a/hyperion/bin_deprec2/plot-vector-hist.py
+++ b/hyperion/bin_deprec2/plot-vector-hist.py
@@ -15,7 +15,6 @@
 
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import TransformList
diff --git a/hyperion/bin_deprec2/rttm-to-bin-vad.py b/hyperion/bin_deprec2/rttm-to-bin-vad.py
index 19e98d8f..610a0019 100755
--- a/hyperion/bin_deprec2/rttm-to-bin-vad.py
+++ b/hyperion/bin_deprec2/rttm-to-bin-vad.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import pandas as pd
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.utils import RTTM, SegmentList
diff --git a/hyperion/bin_deprec2/segments-to-bin-vad.py b/hyperion/bin_deprec2/segments-to-bin-vad.py
index 24021a4b..56e6bf9f 100755
--- a/hyperion/bin_deprec2/segments-to-bin-vad.py
+++ b/hyperion/bin_deprec2/segments-to-bin-vad.py
@@ -10,12 +10,11 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.utils import SegmentList
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def segments_to_bin_vad(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
index ad33515c..9dde434d 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -26,6 +23,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
index 850233e2..88d21cdb 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -25,6 +22,8 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorAdvTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
index 07f71bfb..17565a3c 100755
--- a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
@@ -8,9 +8,6 @@
 import sys
 import time
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -18,6 +15,8 @@
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.torch.layers import AudioFeatsFactory as AFF
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin_deprec2/torch-eval-vae.py b/hyperion/bin_deprec2/torch-eval-vae.py
index d676b0f1..bf99dddd 100755
--- a/hyperion/bin_deprec2/torch-eval-vae.py
+++ b/hyperion/bin_deprec2/torch-eval-vae.py
@@ -16,7 +16,6 @@
 matplotlib.use("Agg")
 # matplotlib.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
 import matplotlib.pyplot as plt
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
index aaa91214..1c00ed2a 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
@@ -12,9 +12,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
index 437127b2..27d36d6f 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
@@ -10,9 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
index 8d4add76..f9b77f11 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
index 0e9493c0..9f6801ef 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -27,6 +24,8 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
index e0754498..6fdca983 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
@@ -10,9 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
index 0f9f375d..7ef4815c 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -32,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
index da6389fb..b95b2a7c 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
@@ -11,9 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
index fc494448..a9785a61 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
@@ -11,9 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import RTTM, Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
index c85fe4c9..7453e0ba 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
@@ -11,11 +11,8 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -25,6 +22,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
index 6da57e16..3153b312 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -22,6 +19,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
index 6edf60ed..347c80f8 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -22,6 +19,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors.py b/hyperion/bin_deprec2/torch-extract-xvectors.py
index 76d941e0..83d21692 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -22,6 +19,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
index 88b0b1d9..077da270 100755
--- a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
+++ b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -27,6 +24,8 @@
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialNdx, Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
index a4df5091..54d217e2 100755
--- a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
+++ b/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
@@ -11,12 +11,9 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -31,6 +28,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-train-dc1d-ae.py b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
index 50ac7d42..cdba46b3 100755
--- a/hyperion/bin_deprec2/torch-train-dc1d-ae.py
+++ b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
@@ -10,7 +10,6 @@
 import time
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec2/torch-train-dvae.py b/hyperion/bin_deprec2/torch-train-dvae.py
index 808bfbba..6c21bbcf 100755
--- a/hyperion/bin_deprec2/torch-train-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-dvae.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -27,6 +24,8 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import DVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
index f256f735..9db86225 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,6 +20,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
index 622ac62e..124e9cb3 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.models import EfficientNetXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
index 3d135b18..6b9455df 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -28,6 +25,8 @@
 # from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 # import torch.multiprocessing as mp
 
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec.py b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
index f976cc6e..f035032a 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.models import ResNetXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
index 3ee6bf18..616e2cd3 100755
--- a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
index 0857ce5c..f579a807 100755
--- a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
index 7bbbff03..486b1d92 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,6 +20,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
index 5614f1b9..be429344 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.models import TDNNXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
index 6b361583..3e91da90 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,6 +20,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
index 62164f15..d08a58a3 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.models import TransformerXVectorV1 as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-vae.py b/hyperion/bin_deprec2/torch-train-vae.py
index 4c41d49c..6f545795 100755
--- a/hyperion/bin_deprec2/torch-train-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vae.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -27,6 +24,8 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-dvae.py b/hyperion/bin_deprec2/torch-train-vq-dvae.py
index 5de1bbd4..449c3b49 100755
--- a/hyperion/bin_deprec2/torch-train-vq-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-dvae.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -27,6 +24,8 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-vae.py b/hyperion/bin_deprec2/torch-train-vq-vae.py
index 2a95f853..17dea6aa 100755
--- a/hyperion/bin_deprec2/torch-train-vq-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-vae.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -27,6 +24,8 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VQVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/train-cw-up.py b/hyperion/bin_deprec2/train-cw-up.py
index c1c372ad..bab22ce7 100755
--- a/hyperion/bin_deprec2/train-cw-up.py
+++ b/hyperion/bin_deprec2/train-cw-up.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-cw.py b/hyperion/bin_deprec2/train-cw.py
index cabca7c2..e8c693c1 100755
--- a/hyperion/bin_deprec2/train-cw.py
+++ b/hyperion/bin_deprec2/train-cw.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-gaussianizer.py b/hyperion/bin_deprec2/train-gaussianizer.py
index aeb51e46..4718d3df 100755
--- a/hyperion/bin_deprec2/train-gaussianizer.py
+++ b/hyperion/bin_deprec2/train-gaussianizer.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-lda.py b/hyperion/bin_deprec2/train-lda.py
index 1887a72f..9dfe394f 100755
--- a/hyperion/bin_deprec2/train-lda.py
+++ b/hyperion/bin_deprec2/train-lda.py
@@ -13,7 +13,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import LDA, SbSw, TransformList
diff --git a/hyperion/bin_deprec2/train-linear-gbe-up.py b/hyperion/bin_deprec2/train-linear-gbe-up.py
index 9986b6bc..9435d0ad 100755
--- a/hyperion/bin_deprec2/train-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/train-linear-gbe-up.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBEUP as GBE
diff --git a/hyperion/bin_deprec2/train-linear-gbe.py b/hyperion/bin_deprec2/train-linear-gbe.py
index e9455cb8..75fe0b67 100755
--- a/hyperion/bin_deprec2/train-linear-gbe.py
+++ b/hyperion/bin_deprec2/train-linear-gbe.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBE as GBE
diff --git a/hyperion/bin_deprec2/train-linear-svmc.py b/hyperion/bin_deprec2/train-linear-svmc.py
index 90ff8768..f48a573e 100755
--- a/hyperion/bin_deprec2/train-linear-svmc.py
+++ b/hyperion/bin_deprec2/train-linear-svmc.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearSVMC as SVM
diff --git a/hyperion/bin_deprec2/train-logistic-regression.py b/hyperion/bin_deprec2/train-logistic-regression.py
index 1aa128a3..f7036879 100755
--- a/hyperion/bin_deprec2/train-logistic-regression.py
+++ b/hyperion/bin_deprec2/train-logistic-regression.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LogisticRegression as LR
diff --git a/hyperion/bin_deprec2/train-mvn.py b/hyperion/bin_deprec2/train-mvn.py
index 2d10b116..ff03175b 100755
--- a/hyperion/bin_deprec2/train-mvn.py
+++ b/hyperion/bin_deprec2/train-mvn.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-nda.py b/hyperion/bin_deprec2/train-nda.py
index 946a8baa..ec73db2a 100755
--- a/hyperion/bin_deprec2/train-nda.py
+++ b/hyperion/bin_deprec2/train-nda.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import NDA, NSbSw, TransformList
diff --git a/hyperion/bin_deprec2/train-pca.py b/hyperion/bin_deprec2/train-pca.py
index 25dcb366..9d9ae7a9 100755
--- a/hyperion/bin_deprec2/train-pca.py
+++ b/hyperion/bin_deprec2/train-pca.py
@@ -13,7 +13,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import PCA, TransformList
diff --git a/hyperion/bin_deprec2/train-plda.py b/hyperion/bin_deprec2/train-plda.py
index 520f4cd7..f8d24366 100755
--- a/hyperion/bin_deprec2/train-plda.py
+++ b/hyperion/bin_deprec2/train-plda.py
@@ -14,7 +14,6 @@
 import time
 
 import numpy as np
-
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/np/score_norm/adapt_s_norm.py b/hyperion/np/score_norm/adapt_s_norm.py
index 944fcad5..294893ae 100644
--- a/hyperion/np/score_norm/adapt_s_norm.py
+++ b/hyperion/np/score_norm/adapt_s_norm.py
@@ -4,6 +4,7 @@
 """
 
 import math
+
 import h5py
 import numpy as np
 
diff --git a/hyperion/torch/adv_attacks/random_attack_factory.py b/hyperion/torch/adv_attacks/random_attack_factory.py
index 0c83bc56..2f7cc7f3 100644
--- a/hyperion/torch/adv_attacks/random_attack_factory.py
+++ b/hyperion/torch/adv_attacks/random_attack_factory.py
@@ -5,9 +5,8 @@
 
 import math
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ArgumentParser
 
 from .attack_factory import AttackFactory as AF
 
diff --git a/hyperion/torch/adv_defenses/wave_gan_white.py b/hyperion/torch/adv_defenses/wave_gan_white.py
index 5d045f08..af51dc00 100644
--- a/hyperion/torch/adv_defenses/wave_gan_white.py
+++ b/hyperion/torch/adv_defenses/wave_gan_white.py
@@ -8,9 +8,8 @@
 
 import librosa
 import numpy as np
-import yaml
-
 import torch
+import yaml
 
 try:
     # import parallel_wavegan.models
@@ -21,7 +20,6 @@
     pass
 
 from sklearn.preprocessing import StandardScaler
-
 from torch import nn
 
 
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 252ac160..959a635a 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -4,11 +4,11 @@
 """
 
 from .audio_dataset import AudioDataset
+# samplers
+from .bucketing_seg_sampler import BucketingSegSampler
 from .embed_sampler_factory import EmbedSamplerFactory
 # datasets
 from .feat_seq_dataset import FeatSeqDataset
 from .paired_feat_seq_dataset import PairedFeatSeqDataset
-# samplers
-from .bucketing_seg_sampler import BucketingSegSampler
 # from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index b352f94d..7a69c45f 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -9,28 +9,25 @@
 
 import numpy as np
 import pandas as pd
-import torchaudio.transforms as tat
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
+#import k2
+import sentencepiece as spm
 import torch
 import torch.distributed as dist
+import torchaudio.transforms as tat
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessAudioReader as AR
 from ...np.augment import SpeechAugment
-
-#import k2
-import sentencepiece as spm
-#from torch.nn.utils.rnn import pad_sequence
-
-from torch.utils.data import Dataset
-import torch.distributed as dist
-
 from ...utils.class_info import ClassInfo
 from ...utils.segment_set import SegmentSet
 from ...utils.text import read_text
 from ..torch_defs import floatstr_torch
 
+#from torch.nn.utils.rnn import pad_sequence
+
+
+
 
 class AudioDataset(Dataset):
 
diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
index edf1c00d..aed9105d 100644
--- a/hyperion/torch/data/class_weighted_embed_sampler.py
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -9,9 +9,8 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .hyp_sampler import HypSampler
 
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 7fbfbd71..b551f342 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -9,9 +9,8 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .hyp_sampler import HypSampler
 
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index 519f498d..31fd00fd 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -10,10 +10,9 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
diff --git a/hyperion/torch/data/embed_sampler.py b/hyperion/torch/data/embed_sampler.py
index 65adcba6..8836fe2a 100644
--- a/hyperion/torch/data/embed_sampler.py
+++ b/hyperion/torch/data/embed_sampler.py
@@ -7,9 +7,8 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .hyp_sampler import HypSampler
 
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index bb487dda..68dea5c3 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -12,10 +12,9 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index d1bcb0a8..c5097723 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -2,10 +2,9 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/data/paired_feat_seq_dataset.py b/hyperion/torch/data/paired_feat_seq_dataset.py
index eff2ed58..fc17593e 100644
--- a/hyperion/torch/data/paired_feat_seq_dataset.py
+++ b/hyperion/torch/data/paired_feat_seq_dataset.py
@@ -6,7 +6,6 @@
 import logging
 
 import numpy as np
-
 import torch
 
 from ...utils.utt2info import Utt2Info
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 2933dcc6..76054cd8 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -8,10 +8,9 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 39d1eed2..74726f63 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -7,9 +7,8 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .hyp_sampler import HypSampler
 
diff --git a/hyperion/torch/data/weighted_embed_sampler.py b/hyperion/torch/data/weighted_embed_sampler.py
index 5870512a..22da93f9 100644
--- a/hyperion/torch/data/weighted_embed_sampler.py
+++ b/hyperion/torch/data/weighted_embed_sampler.py
@@ -7,7 +7,6 @@
 import math
 
 import numpy as np
-
 import torch
 from torch.utils.data import Sampler
 
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index b6f0b670..345c2429 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -7,10 +7,9 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index 7a738bca..0487ae4f 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -30,9 +30,9 @@
 from .seresnet_blocks import SEResNetBasicBlock, SEResNetBNBlock
 from .spine_blocks import BlockSpec, SpineConv, SpineEndpoints, SpineResample
 from .tdnn_blocks import TDNNBlock
+from .transducer_joiner import TransducerJoiner
+from .transducer_predictor import TransducerRNNPredictor, TransducerConvPredictor
 from .transformer_conv2d_subsampler import TransformerConv2dSubsampler
 from .transformer_encoder_v1 import TransformerEncoderBlockV1
 from .transformer_feedforward import (Conv1dLinear, Conv1dx2,
                                       PositionwiseFeedForward)
-from .transducer_predictor import TransducerPredictor
-from .transducer_joiner import TransducerJoiner
diff --git a/hyperion/torch/layer_blocks/etdnn_blocks.py b/hyperion/torch/layer_blocks/etdnn_blocks.py
index b6afdd29..10fd09b3 100644
--- a/hyperion/torch/layer_blocks/etdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/etdnn_blocks.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
diff --git a/hyperion/torch/layer_blocks/resetdnn_blocks.py b/hyperion/torch/layer_blocks/resetdnn_blocks.py
index dfea3720..1af632fb 100644
--- a/hyperion/torch/layer_blocks/resetdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/resetdnn_blocks.py
@@ -5,7 +5,6 @@
 #
 
 import numpy as np
-
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
diff --git a/hyperion/torch/layer_blocks/transducer_joiner.py b/hyperion/torch/layer_blocks/transducer_joiner.py
index 482b5aa6..738c0cda 100644
--- a/hyperion/torch/layer_blocks/transducer_joiner.py
+++ b/hyperion/torch/layer_blocks/transducer_joiner.py
@@ -2,98 +2,68 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba, Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
 from typing import Optional, Tuple
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 class TransducerJoiner(nn.Module):
     """ RNN-T Joiner network.
     Implementation based on 
-    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/joiner.py
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer_stateless7/joiner.py
 
     Attributes:
       in_feats: input feature dimension.
       vocab_size: vocabulary size
     """
 
-    def __init__(self, in_feats: int, vocab_size: int):
+    def __init__(self, enc_feats: int, pred_feats: int, hid_feats: int,
+                 vocab_size: int):
         super().__init__()
-        self.in_feats = in_feats
+        self.enc_feats = enc_feats
+        self.pred_feats = pred_feats
+        self.hid_feats = hid_feats
         self.vocab_size = vocab_size
 
-        self.output = nn.Linear(in_feats, vocab_size)
-
-    def forward(self, encoder_out: torch.Tensor,
-                pred_out: torch.Tensor) -> torch.Tensor:
+        self.enc_proj = nn.Linear(enc_feats, hid_feats)
+        self.pred_proj = nn.Linear(pred_feats, hid_feats)
+        self.output = nn.Linear(hid_feats, vocab_size)
+
+    def get_config(self):
+        config = {
+            "joiner_type": "basic",
+            "hid_feats": self.hid_feats,
+        }
+        return config
+
+    def forward(self,
+                enc_out: torch.Tensor,
+                pred_out: torch.Tensor,
+                project_input: bool = True) -> torch.Tensor:
         """
         Args:
-          encoder_out: Output from the encoder with shape = (N, T, C).
-          pred_out: Output from the predictor with shape = (N, U, C).
+          enc_out: output from the encoder with shape = (N, T, C) or (N, T, s_range, C)
+          pred_out: output from the predictor with shape = (N, U, C) or (N, T, s_range, C)
+          project_input: if True projects the encoder and predictor features 
+            in the forward founction, if False it expects them outside.
         Returns:
-          Return a tensor of shape (N, T, U, C).
+          Symbols' logits of shape (N, T, U, C).
         """
-        assert encoder_out.ndim == pred_out.ndim == 3
-        assert encoder_out.size(0) == pred_out.size(0)
-        assert encoder_out.size(2) == pred_out.size(2)
+        assert enc_out.ndim == pred_out.ndim
+        assert enc_out.ndim in (3, 4)
+
+        if enc_out.ndim == 3:
+            enc_out = enc_out.unsqueeze(2)  # (N, T, 1, C)
+            pred_out = pred_out.unsqueeze(1)  # (N, 1, U, C)
 
-        encoder_out = encoder_out.unsqueeze(2)
-        # Now encoder_out is (N, T, 1, C)
-        pred_out = pred_out.unsqueeze(1)
-        # Now pred_out is (N, 1, U, C)
-        x = torch.tanh(encoder_out + pred_out)
+        if project_input:
+            x = self.enc_proj(enc_out) + self.pred_proj(pred_out)
+        else:
+            x = enc_out + pred_out
 
+        x = torch.tanh(x)
         logits = self.output(x)
         return logits
-
-    # def get_config(self):
-    #     config = {
-    #         "in_feats": self.in_feats,
-    #         "out_dims": self.out_dims,
-    #         "num_layers": self.num_layers,
-    #     }
-
-    #     # base_config = super().get_config()
-    #     return dict(list(config.items()))
-
-    # @staticmethod
-    # def filter_args(**kwargs):
-    #     valid_args = (
-    #         "in_feats",
-    #         "out_dims",
-    #         "num_layers",
-    #     )
-    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-    #     return args
-
-    # @staticmethod
-    # def add_class_args(parser,
-    #                    prefix=None,
-    #                    skip=set(["in_feats", "out_dims"])):
-    #     if prefix is not None:
-    #         outer_parser = parser
-    #         parser = ArgumentParser(prog="")
-
-    #     if "in_feats" not in skip:
-    #         parser.add_argument("--in-feats",
-    #                             type=int,
-    #                             required=True,
-    #                             help=("input feature dimension"))
-
-    #     if "out_dims" not in skip:
-    #         parser.add_argument("--out-dims",
-    #                             type=int,
-    #                             required=True,
-    #                             help=("output feature dimension (vocab size)"))
-    #     parser.add_argument("--num-layers",
-    #                         default=1,
-    #                         type=int,
-    #                         help=("layers of the joiner"))
-
-    #     if prefix is not None:
-    #         outer_parser.add_argument("--" + prefix,
-    #                                   action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/transducer_predictor.py b/hyperion/torch/layer_blocks/transducer_predictor.py
index ae354359..00339fe7 100644
--- a/hyperion/torch/layer_blocks/transducer_predictor.py
+++ b/hyperion/torch/layer_blocks/transducer_predictor.py
@@ -2,31 +2,32 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba, Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
 from typing import Optional, Tuple
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...utils.misc import filter_func_args
+from ..layers import ActivationFactory as AF
 
 
-class TransducerPredictor(nn.Module):
-    """ RNN-T prediction network.
+class TransducerRNNPredictor(nn.Module):
+    """ RNN-T prediction network with LSTM or GRU
     Implmentation  based on:
     https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/decoder.py
 
     Attributes:
       vocab_size: Number of tokens of the modeling unit including blank.
       embed_dim: Dimension of the input embedding.
-      blank_id: The ID of the blank symbol.
       num_layers: Number of LSTM layers.
       hid_feats: Hidden dimension of LSTM layers.
       out_feats: Output dimension of the predictor.
       embed_dropout_rate: Dropout rate for the embedding layer.
       rnn_dropout_rate: Dropout for LSTM layers.
-           
+      rnn_type: between lstm and gru
+      blank_id: The ID of the blank symbol.           
     """
 
     def __init__(self,
@@ -34,7 +35,7 @@ def __init__(self,
                  embed_dim: int,
                  num_layers: int,
                  hid_feats: int,
-                 out_feats: int,
+                 out_feats: Optional[int] = None,
                  embed_dropout_rate: float = 0.0,
                  rnn_dropout_rate: float = 0.0,
                  rnn_type: str = "lstm",
@@ -73,7 +74,29 @@ def __init__(self,
         self.hid_feats = hid_feats
         self.embed_dropout_rate = embed_dropout_rate
         self.rnn_dropout_rate = rnn_dropout_rate
-        self.output = nn.Linear(hid_feats, out_feats)
+        if out_feats is None:
+            out_feats = hid_feats
+
+        self.out_feats = out_feats
+        if out_feats != hid_feats:
+            self.output_proj = nn.Linear(hid_feats, out_feats)
+        else:
+            self.output_proj = None
+
+    def get_config(self):
+        config = {
+            "pred_type": "conv",
+            "vocab_size": self.vocab_size,
+            "embed_dim": self.embed_dim,
+            "num_layers": self.num_layers,
+            "hid_feats": self.hid_feats,
+            "out_feats": self.out_feats,
+            "embed_dropout_rate": self.embed_dropout_rate,
+            "rnn_dropout_rate": self.rnn_dropout_rate,
+            "rnn_type": self.rnn_type,
+            "blank_id": self.blank_id,
+        }
+        return config
 
     def forward(
         self,
@@ -90,8 +113,9 @@ def forward(
         """
         embed = self.embedding(y)
         embed = self.embed_dropout(embed)
-        rnn_out, (h, c) = self.rnn(embed, states)
-        out = self.output(rnn_out)
+        out, (h, c) = self.rnn(embed, states)
+        if self.output_proj:
+            out = self.output_proj(out)
 
         return out, (h, c)
 
@@ -110,101 +134,137 @@ def change_config(
             self.embed_dropout_rate = embed_dropout_rate
             self.embed_dropout = nn.Dropout(self.embed_dropout_rate)
 
-    # def get_config(self):
-    #     config = {
-    #         "in_feats": self.in_feats,
-    #         "blank_id": self.blank_id,
-    #         "vocab_size": self.vocab_size,
-    #         "embed_dim": self.embed_dim,
-    #         "num_layers": self.num_layers,
-    #         "hid_feats": self.hid_feats,
-    #         "embed_dropout_rate": self.embed_dropout_rate,
-    #         "rnn_dropout_rate": self.rnn_dropout_rate,
-    #     }
-
-    #     # base_config = super().get_config()
-    #     return dict(list(config.items()))
-
-    # @staticmethod
-    # def filter_args(**kwargs):
-    #     args = filter_func_args(TransducerPredictor.__init__, kwargs)
-    #     return args
-
-    # @staticmethod
-    # def filter_finetune_args(**kwargs):
-    #     args = filter_func_args(TransducerPredictor.change_config, kwargs)
-    #     return args
-
-    # @staticmethod
-    # def add_class_args(parser,
-    #                    prefix=None,
-    #                    skip=set(["in_feats", "blank_id", "vocab_size"])):
-
-    #     if prefix is not None:
-    #         outer_parser = parser
-    #         parser = ArgumentParser(prog="")
-
-    #     if "in_feats" not in skip:
-    #         parser.add_argument("--in-feats",
-    #                             type=int,
-    #                             required=True,
-    #                             help=("input feature dimension"))
-    #     if "blank_id" not in skip:
-    #         parser.add_argument("--blank-id",
-    #                             type=int,
-    #                             required=True,
-    #                             help=("blank id from sp model"))
-    #     if "vocab_size" not in skip:
-    #         parser.add_argument("--vocab-size",
-    #                             type=int,
-    #                             required=True,
-    #                             help=("output prediction dimension"))
-    #     parser.add_argument("--embedding-dim",
-    #                         default=1024,
-    #                         type=int,
-    #                         help=("feature dimension"))
-    #     parser.add_argument("--embedding-dropout-rate",
-    #                         default=0.0,
-    #                         type=float,
-    #                         help=("dropout prob for decoder input embeddings"))
-    #     parser.add_argument("--rnn-dropout-rate",
-    #                         default=0.0,
-    #                         type=float,
-    #                         help=("dropout prob for decoder RNN "))
-
-    #     parser.add_argument("--num-layers", default=2, type=int, help=(""))
-
-    #     parser.add_argument("--hidden-dim", default=512, type=int, help=(""))
-
-    #     if prefix is not None:
-    #         outer_parser.add_argument("--" + prefix,
-    #                                   action=ActionParser(parser=parser))
-
-    # @staticmethod
-    # def add_finetune_args(parser,
-    #                       prefix=None,
-    #                       skip=set(["in_feats", "blank_id", "vocab_size"])):
-
-    #     if prefix is not None:
-    #         outer_parser = parser
-    #         parser = ArgumentParser(prog="")
-
-    #     parser.add_argument(
-    #         "--override-dropouts",
-    #         default=False,
-    #         action=ActionYesNo,
-    #         help=(
-    #             "whether to use the dropout probabilities passed in the "
-    #             "arguments instead of the defaults in the pretrained model."))
-    #     parser.add_argument("--embedding-dropout-rate",
-    #                         default=0.0,
-    #                         type=float,
-    #                         help=("dropout prob for decoder input embeddings"))
-    #     parser.add_argument("--rnn-dropout-rate",
-    #                         default=0.0,
-    #                         type=float,
-    #                         help=("dropout prob for decoder RNN "))
-
-    #     if prefix is not None:
-    #         outer_parser.add_argument("--" + prefix,
-    #                                   action=ActionParser(parser=parser))
+
+class TransducerConvPredictor(nn.Module):
+    """ RNN-T prediction network based on Convolutions
+    Implmentation  based on:
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7/decoder.py
+
+    Attributes:
+      vocab_size: Number of tokens of the modeling unit including blank.
+      embed_dim: Dimension of the input embedding.
+      blank_id: The ID of the blank symbol.
+      out_feats: Output dimension of the predictor.
+      embed_dropout_rate: Dropout rate for the embedding layer.
+    """
+
+    def __init__(
+        self,
+        vocab_size: int,
+        embed_dim: int,
+        out_feats: Optional[int] = None,
+        context_size: int = 2,
+        embed_dropout_rate: float = 0.0,
+        hid_act: str = "relu",
+        blank_id: int = 0,
+    ):
+        super().__init__()
+        self.embedding = nn.Embedding(
+            num_embeddings=vocab_size,
+            embedding_dim=embed_dim,
+            padding_idx=blank_id,
+        )
+        self.embed_dropout = nn.Dropout(embed_dropout_rate)
+        assert context_size >= 1, context_size
+        if context_size > 1:
+            self.conv = nn.Conv1d(
+                in_channels=embed_dim,
+                out_channels=embed_dim,
+                kernel_size=context_size,
+                padding=0,
+                groups=out_feats // 4,
+                bias=False,
+            )
+
+        self.blank_id = blank_id
+        self.vocab_size = vocab_size
+        self.embed_dim = embed_dim
+        self.embed_dropout_rate = embed_dropout_rate
+        self.context_size = context_size
+        self.hid_act = AF.create(hid_act)
+
+        if out_feats is None:
+            out_feats = embed_dim
+
+        self.out_feats = out_feats
+        if out_feats != embed_feats:
+            self.output_proj = nn.Linear(embed_dim, out_feats)
+        else:
+            self.output_proj = None
+
+    def get_config(self):
+        hid_act = AF.get_config(self.hid_act)
+        config = {
+            "pred_type": "conv",
+            "vocab_size": self.vocab_size,
+            "embed_dim": self.embed_dim,
+            "out_feats": self.out_feats,
+            "context_size": self.context_size,
+            "embed_dropout_rate": self.embed_dropout_rate,
+            "blank_id": self.blank_id,
+            "hid_act": hid_act,
+        }
+        return config
+
+    def forward(
+        self,
+        y: torch.Tensor,
+        states: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, None]:
+        """
+        Args:
+          y:
+            A 2-D tensor of shape (N, U).
+          # need_pad:
+          #   True to left pad the input. Should be True during training.
+          #   False to not pad the input. Should be False during inference.
+        Returns:
+          Return a tensor of shape (N, U, decoder_dim).
+        """
+        y = y.to(torch.int64)
+        embed = self.embedding(y)
+        if self.context > 1:
+            embed = embed.transpose(1, 2)
+            if states is None:
+                embed = F.pad(embedding_out, pad=(self.context_size - 1, 0))
+            else:
+                raise NotImplementedError()
+            embed = self.conv(embed).transpose(1, 2)
+
+        out = self.hid_act(embed)
+        if self.output_proj:
+            out = self.output_proj(out)
+
+        return out, None
+
+        # # this stuff about clamp() is a temporary fix for a mismatch
+        # # at utterance start, we use negative ids in beam_search.py
+        # if torch.jit.is_tracing():
+        #     # This is for exporting to PNNX via ONNX
+        #     embedding_out = self.embedding(y)
+        # else:
+        #     embedding_out = self.embedding(y.clamp(min=0)) * (y >= 0).unsqueeze(-1)
+        # if self.context_size > 1:
+        #     embedding_out = embedding_out.permute(0, 2, 1)
+        #     if need_pad is True:
+        #         embedding_out = F.pad(embedding_out, pad=(self.context_size - 1, 0))
+        #     else:
+        #         # During inference time, there is no need to do extra padding
+        #         # as we only need one output
+        #         assert embedding_out.size(-1) == self.context_size
+        #     embedding_out = self.conv(embedding_out)
+        #     embedding_out = embedding_out.permute(0, 2, 1)
+        # embedding_out = F.relu(embedding_out)
+        # return embedding_out
+
+    def change_config(
+        self,
+        override_dropouts=False,
+        embed_dropout_rate: float = 0.0,
+    ):
+        logging.info("changing predictor config")
+
+        if override_dropouts:
+            logging.info("overriding predictor dropouts")
+            self.embed_dropout_rate = embed_dropout_rate
+            self.embed_dropout = nn.Dropout(self.embed_dropout_rate)
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 5e38494f..85ba92f6 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -6,7 +6,6 @@
 import math
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index 4b4c5927..b90a65c8 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -2,10 +2,9 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 
 class MeanVarianceNorm(nn.Module):
diff --git a/hyperion/torch/layers/pool_factory.py b/hyperion/torch/layers/pool_factory.py
index 84d0cbf1..c0e573af 100644
--- a/hyperion/torch/layers/pool_factory.py
+++ b/hyperion/torch/layers/pool_factory.py
@@ -2,9 +2,8 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from .global_pool import *
 
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index f4e03842..761a4e31 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -4,11 +4,10 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
+from jsonargparse import ActionParser, ArgumentParser
 
 count = 0
 
diff --git a/hyperion/torch/loggers/logger.py b/hyperion/torch/loggers/logger.py
index 7e9c91f2..46c1130d 100644
--- a/hyperion/torch/loggers/logger.py
+++ b/hyperion/torch/loggers/logger.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch.distributed as dist
 
 
diff --git a/hyperion/torch/loggers/logger_list.py b/hyperion/torch/loggers/logger_list.py
index 0291a01f..20ae58ec 100644
--- a/hyperion/torch/loggers/logger_list.py
+++ b/hyperion/torch/loggers/logger_list.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch.distributed as dist
 
 from .tensorboard_logger import TensorBoardLogger as TBL
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index d3111140..4bd086ad 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -2,9 +2,8 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ArgumentParser
 
 from .cos_lr import AdamCosineLR, CosineLR
 from .exp_lr import ExponentialLR
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 95042aed..2df4e047 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -6,8 +6,9 @@
 
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
-from .wav2transducer import HFWav2Vec2Transducer
-from .wav2transducer import HFWav2Vec2RNNTransducer, HFWav2Vec2RNNRNNTransducer
+from .transducer import RNNTransducer, RNNRNNTransducer
+from .wav2transducer import (HFWav2Vec2RNNRNNTransducer,
+                             HFWav2Vec2RNNTransducer, HFWav2Vec2Transducer)
 from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
                            HFWavLM2ResNet1dXVector)
 from .xvectors.efficient_net_xvector import EfficientNetXVector
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
index fe55e34d..ee3c85f5 100644
--- a/hyperion/torch/models/transducer/__init__.py
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -4,10 +4,10 @@
 
 """
 
-from .rnn_transducer import RNNTransducer
 from .rnn_rnn_transducer import RNNRNNTransducer
-
+from .rnn_transducer import RNNTransducer, RNNTransducerOutput
 from .transducer import Transducer
+
 #from .conformer import Conformer
 #from .decoder import Decoder
 #from .joiner import Joiner
diff --git a/hyperion/torch/models/transducer/conformer.py b/hyperion/torch/models/transducer/conformer.py
index a350d579..f62621af 100644
--- a/hyperion/torch/models/transducer/conformer.py
+++ b/hyperion/torch/models/transducer/conformer.py
@@ -20,12 +20,12 @@
 import warnings
 from typing import List, Optional, Tuple
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import torch
+from hyperion.utils.text import make_pad_mask, subsequent_chunk_mask
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch import Tensor, nn
-from .transformer import Transformer
 
-from hyperion.utils.text import make_pad_mask, subsequent_chunk_mask
+from .transformer import Transformer
 
 
 class Conformer(Transformer):
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index e7a40ec0..7f3698d7 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -14,13 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import logging
 from typing import Optional, Tuple
 
-import logging
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 # TODO(fangjun): Support switching between LSTM and GRU
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 0fc1fe51..983f064a 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -14,9 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 class Joiner(nn.Module):
diff --git a/hyperion/torch/models/transducer/lstm_rnn_transducer.py b/hyperion/torch/models/transducer/lstm_rnn_transducer.py
index 5ab74483..983334d4 100644
--- a/hyperion/torch/models/transducer/lstm_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/lstm_rnn_transducer.py
@@ -5,7 +5,9 @@
 
 import logging
 from typing import Dict, Optional, Union
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 try:
     import k2
 except ModuleNotFoundError:
diff --git a/hyperion/torch/models/transducer/rnn_rnn_transducer.py b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
index 0e1c7a85..1c0704f5 100644
--- a/hyperion/torch/models/transducer/rnn_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
@@ -4,8 +4,10 @@
 """
 
 import logging
-from typing import Dict, Optional, Union, Tuple
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from typing import Dict, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 try:
     import k2
 except ModuleNotFoundError:
@@ -13,8 +15,8 @@
 
 import torch
 
-from .rnn_transducer import RNNTransducer
 from ...narchs import RNNEncoder
+from .rnn_transducer import RNNTransducer
 
 
 class RNNRNNTransducer(RNNTransducer):
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index ef54a5eb..0b886fdf 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -4,8 +4,11 @@
 """
 
 import logging
-from typing import Dict, Optional, Union, Tuple, List
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 try:
     import k2
 except ModuleNotFoundError:
@@ -13,9 +16,19 @@
 
 import torch
 
+from ....utils import HypDataClass
 from ....utils.misc import filter_func_args
-from ...torch_model import TorchModel
 from ...narchs import RNNTransducerDecoder
+from ...torch_model import TorchModel
+
+
+@dataclass
+class RNNTransducerOutput(HypDataClass):
+
+    loss: torch.Tensor
+    loss_simple: Optional[torch.Tensor] = None
+    loss_pruned: Optional[torch.Tensor] = None
+    h_feats: Optional[List[torch.Tensor]] = None
 
 
 class RNNTransducer(TorchModel):
@@ -49,7 +62,7 @@ def forward(
         x: torch.Tensor,
         x_lengths: torch.Tensor,
         y: k2.RaggedTensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> RNNTransducerOutput:
         """
         Args:
           x: input features with shape = (N, T, C)
@@ -65,14 +78,17 @@ def forward(
         assert y.num_axes == 2, y.num_axes
 
         assert x.size(0) == x_lengths.size(0) == y.dim0
+        assert torch.all(
+            x_lengths[:-1] >= x_lengths[1:]
+        ), f"x_lengths={x_lengths}"  # check x_lengths are sorted
 
         if self.encoder is not None:
             x, x_lengths = self.encoder(x, x_lengths)
             assert torch.all(x_lengths > 0)
 
-        print("zz", x.shape, x_lengths, y, flush=True)
-        logits, loss = self.decoder(x, x_lengths, y)
-        return logits, loss
+        dec_output = self.decoder(x, x_lengths, y)
+        output = RNNTransducerOutput(*dec_output)
+        return output
 
     def infer(self,
               x: torch.Tensor,
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index bae35e0e..c9ba365e 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -17,23 +17,25 @@
 Note we use `rnnt_loss` from torchaudio, which exists only in
 torchaudio >= v0.10.0. It also means you have to use torch >= v1.10.0
 """
-from jsonargparse import ArgumentParser, ActionParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 try:
     import k2
 except ModuleNotFoundError:
     from ...utils import dummy_k2 as k2
 
 import logging
+
 import torch
 import torch.nn as nn
 import torchaudio
 import torchaudio.functional
-from .encoder_interface import EncoderInterface
+from hyperion.utils.text import add_sos
 
 from ...torch_model import TorchModel
-from hyperion.utils.text import add_sos
 # from .conformer import Conformer
 from .decoder import Decoder
+from .encoder_interface import EncoderInterface
 from .joiner import Joiner
 
 
diff --git a/hyperion/torch/models/transducer/transformer.py b/hyperion/torch/models/transducer/transformer.py
index d9d5d6fb..a354b5f5 100644
--- a/hyperion/torch/models/transducer/transformer.py
+++ b/hyperion/torch/models/transducer/transformer.py
@@ -20,11 +20,11 @@
 
 import torch
 import torch.nn as nn
+from hyperion.utils.text import make_pad_mask
+
 from .encoder_interface import EncoderInterface
 from .subsampling import Conv2dSubsampling, VggSubsampling
 
-from hyperion.utils.text import make_pad_mask
-
 
 class Transformer(EncoderInterface):
     def __init__(
diff --git a/hyperion/torch/models/tvector/tvector.py b/hyperion/torch/models/tvector/tvector.py
index a46fc324..a4e4d148 100644
--- a/hyperion/torch/models/tvector/tvector.py
+++ b/hyperion/torch/models/tvector/tvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ClassifHead, ConformerEncoderV1, TorchNALoader
 from ..layer_blocks import TDNNBlock
diff --git a/hyperion/torch/models/wav2transducer/__init__.py b/hyperion/torch/models/wav2transducer/__init__.py
index de4879a5..79af6349 100644
--- a/hyperion/torch/models/wav2transducer/__init__.py
+++ b/hyperion/torch/models/wav2transducer/__init__.py
@@ -5,6 +5,5 @@
 """
 
 from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
-
-from .hf_wav2vec2rnn_transducer import HFWav2Vec2RNNTransducer
 from .hf_wav2vec2rnn_rnn_transducer import HFWav2Vec2RNNRNNTransducer
+from .hf_wav2vec2rnn_transducer import HFWav2Vec2RNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
index 922996f6..f68a6f7a 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
@@ -2,17 +2,18 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
 import contextlib
-from typing import Union, Dict, List
-from jsonargparse import ArgumentParser, ActionParser
+import logging
+from dataclasses import dataclass
+from typing import Dict, List, Union
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
-from ...utils import remove_silence
 from ...torch_model import TorchModel
-from ..transducer import RNNTransducer
+from ...utils import remove_silence
+from ..transducer import RNNTransducer, RNNTransducerOutput
 
 
 class HFWav2RNNTransducer(TorchModel):
@@ -163,31 +164,23 @@ def forward(
                              we should return. If None, no encoder layers are returned.
           return_logits: if True, it adds the logits to the output dictionary.
         Returns:
-          Tensor with class logits with shape=(batch, num_classes) or
-          Dictionary with "logits", "h_enc" (list of hidden encoder layers),
-          "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
+          Dataclass with losses, "h_enc" (list of hidden encoder layers),
+          "h_feats" (wav2vec features)
         """
         feats, hid_feats, feat_lengths = self.forward_feats(
             x, x_lengths, return_feat_layers)
 
         feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
-
-        output, loss = self.transducer(
+        output = self.transducer(
             feats,
             feat_lengths,
             y,
         )
 
-        if not return_feat_layers:
-            return output, loss
-
-        if not isinstance(output, dict):
-            # if the transducer just returned the logits we put then into a dictionary
-            # to append the hid feats later.
-            output["logits"] = output
+        if return_feat_layers:
+            output.h_feats = hid_feats
 
-        output["h_feats"] = hid_feats
-        return output, loss
+        return output
 
     def infer(self,
               x: torch.Tensor,
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index ec4c83b0..4cebfd66 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -2,17 +2,18 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
 import contextlib
-from jsonargparse import ArgumentParser, ActionParser
+import logging
 
 import torch
 import torch.nn as nn
-
-# import torch.nn.functional as nnf
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
+
+# import torch.nn.functional as nnf
+
 # from ..wav2xvectors.hf_wav2xvector import HFWav2XVector
 
 
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index 242a5ca1..bd58e2cd 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -3,18 +3,17 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
-from ..transducer import Transducer
 from ...tpm import HFWav2Vec2
+from ..transducer import Transducer
 from .hf_wav2transducer import HFWav2Transducer
 
 
-
 class HFWav2Vec2Transducer(HFWav2Transducer):
     """Class extracting Wav2Vec2 + ResNet1d x-vectors from waveform.
 
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
index 412a182b..d2b13fb6 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
@@ -3,14 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
-from ..transducer import RNNRNNTransducer
 from ...tpm import HFWav2Vec2
+from ..transducer import RNNRNNTransducer
 from .hf_wav2rnn_transducer import HFWav2RNNTransducer
 
 
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
index d89953b2..f4e02a23 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
@@ -3,14 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from jsonargparse import ArgumentParser, ActionParser
-from typing import Union, Dict, Optional
+from typing import Dict, Optional, Union
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
-from ..transducer import RNNTransducer
 from ...tpm import HFWav2Vec2
+from ..transducer import RNNTransducer
 from .hf_wav2rnn_transducer import HFWav2RNNTransducer
 
 
diff --git a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
index a5df4b8a..458e7cae 100644
--- a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
@@ -3,11 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from typing import Dict, Optional, Union, Tuple
-from jsonargparse import ActionParser, ArgumentParser
+from typing import Dict, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index b75ac53f..fb528809 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -5,10 +5,9 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFHubert
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 8a17379c..739213b4 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -5,10 +5,9 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 5599fa1e..6ff8f8b4 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -5,10 +5,9 @@
 import contextlib
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index 56a19130..87e9a6f8 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -5,10 +5,9 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWavLM
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index 0d9f1bc4..b545bfaf 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNet1dXVector
 from .wav2xvector import Wav2XVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
index 1f7283a0..51e045da 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNetXVector
 from .wav2xvector import Wav2XVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 4c21f478..838f3342 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -4,10 +4,9 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index a8663cd9..e2c46be5 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import EfficientNet as EN
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 09136b7d..2e3973a2 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNet1dEncoder as Encoder
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index c6889626..2d1fe95a 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNetFactory as RNF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 203008be..e1c3e4a4 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -5,10 +5,9 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import SpineNetFactory as SNF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index 530ca63b..ae2a9d78 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TDNNFactory as TF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 7c55844a..9d884ed6 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TransformerEncoderV1 as TE
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 3807bbd8..eae4a48f 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -6,10 +6,9 @@
 from enum import Enum
 from typing import Optional
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...layer_blocks import TDNNBlock
 from ...layers import GlobalPool1dFactory as PF
diff --git a/hyperion/torch/narchs/__init__.py b/hyperion/torch/narchs/__init__.py
index 5f333fc8..4fe8b4ed 100644
--- a/hyperion/torch/narchs/__init__.py
+++ b/hyperion/torch/narchs/__init__.py
@@ -20,11 +20,11 @@
 from .resnet2d_decoder import ResNet2dDecoder
 from .resnet2d_encoder import ResNet2dEncoder
 from .resnet_factory import ResNetFactory
+from .rnn_encoder import RNNEncoder
+from .rnn_transducer_decoder import RNNTransducerDecoder
 from .spinenet import *
 from .spinenet_factory import SpineNetFactory
 from .tdnn import TDNNV1
 from .tdnn_factory import TDNNFactory
 from .torch_na_loader import TorchNALoader
 from .transformer_encoder_v1 import TransformerEncoderV1
-from .rnn_transducer_decoder import RNNTransducerDecoder
-from .rnn_encoder import RNNEncoder
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 160ee61b..8a877d5e 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -2,10 +2,9 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..layers import AudioFeatsFactory as AFF
 from ..layers import MeanVarianceNorm as MVN
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 5d179fdb..028efe29 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -3,10 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 from torch.nn import Linear
 
 from ..layer_blocks import FCBlock
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 3acd44d2..98160a25 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -3,10 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..layer_blocks import ConformerEncoderBlockV1 as EBlock
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index f5ab74d5..57d9adec 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -5,10 +5,9 @@
 
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC1dDecBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index 0c331a5e..aaf1bb2d 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -4,10 +4,9 @@
 """
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks.dc1d_blocks import DC1dEncBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index 4106cbfd..87a18bfe 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -5,10 +5,9 @@
 
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC2dDecBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index ce7b9677..70eeac3c 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -5,10 +5,9 @@
 
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC2dEncBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/efficient_net.py b/hyperion/torch/narchs/efficient_net.py
index b9efdcef..1eddc3ff 100644
--- a/hyperion/torch/narchs/efficient_net.py
+++ b/hyperion/torch/narchs/efficient_net.py
@@ -5,10 +5,9 @@
 
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn import Dropout, Linear
 
 from ..layer_blocks import MBConvBlock, MBConvInOutBlock
diff --git a/hyperion/torch/narchs/etdnn.py b/hyperion/torch/narchs/etdnn.py
index a73439b7..d2b2d298 100644
--- a/hyperion/torch/narchs/etdnn.py
+++ b/hyperion/torch/narchs/etdnn.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import Conv1d, Linear
diff --git a/hyperion/torch/narchs/net_arch.py b/hyperion/torch/narchs/net_arch.py
index 9a3fc65f..4b39804c 100644
--- a/hyperion/torch/narchs/net_arch.py
+++ b/hyperion/torch/narchs/net_arch.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch.nn as nn
 
 from ..torch_model import TorchModel
diff --git a/hyperion/torch/narchs/resetdnn.py b/hyperion/torch/narchs/resetdnn.py
index eb964fa5..c4dc7784 100644
--- a/hyperion/torch/narchs/resetdnn.py
+++ b/hyperion/torch/narchs/resetdnn.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index e3264f33..eb605bff 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -5,7 +5,6 @@
 import logging
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index 0c577174..d65bab00 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -4,10 +4,9 @@
 """
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC1dDecBlock, ResNet1dBasicDecBlock,
                             ResNet1dBNDecBlock, SEResNet1dBasicDecBlock,
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index 5bdad186..ab184467 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -7,10 +7,9 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC1dEncBlock, Res2Net1dBasicBlock,
                             Res2Net1dBNBlock, ResNet1dBasicBlock,
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index 426b37f5..50369c8d 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -5,10 +5,9 @@
 
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC2dDecBlock, ResNet2dBasicDecBlock,
                             ResNet2dBNDecBlock, SEResNet2dBasicDecBlock,
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index 84e6599e..8a76e348 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -6,10 +6,9 @@
 import logging
 import math
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC2dEncBlock, Res2Net2dBasicBlock,
                             Res2Net2dBNBlock, ResNet2dBasicBlock,
diff --git a/hyperion/torch/narchs/rnn_encoder.py b/hyperion/torch/narchs/rnn_encoder.py
index dcf02564..593405c5 100644
--- a/hyperion/torch/narchs/rnn_encoder.py
+++ b/hyperion/torch/narchs/rnn_encoder.py
@@ -5,13 +5,12 @@
 
 import logging
 import math
-from typing import Dict, Optional, Union, Tuple
+from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 
 from ..layer_blocks import TransformerConv2dSubsampler as Subsampler
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index 64c71dcd..265f2c9b 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -3,14 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ArgumentParser
-from typing import Optional, Dict, List, Tuple
 from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
 
 import torch
 import torch.nn as nn
 import torchaudio
 import torchaudio.functional
+from jsonargparse import ActionParser, ArgumentParser
 
 try:
     import k2
@@ -19,7 +19,8 @@
 
 from ...utils.misc import filter_func_args
 from ...utils.text import add_sos
-from ..layer_blocks import TransducerPredictor as Predictor, TransducerJoiner as Joiner
+from ..layer_blocks import TransducerJoiner as Joiner
+from ..layer_blocks import TransducerRNNPredictor as RNNPredictor, TransducerConvPredictor as ConvPredictor
 from .net_arch import NetArch
 
 
@@ -40,56 +41,117 @@ class RNNTransducerDecoder(NetArch):
     Attributes:
       in_feats: input features dimension (encoder output)
       vocab_size: Number of tokens of the modeling unit including blank.
-      embed_dim: Dimension of the predictor input embedding.
-      blank_id: The ID of the blank symbol.
-      num_layers: Number of LSTM layers.
-      hid_feats: Hidden dimension for predictor layers.
-      embed_dropout_rate: Dropout rate for the embedding layer.
-      rnn_dropout_rate: Dropout for LSTM layers.
-
+      predictor: Dictionary with the predictor options.
+      joiner: Dictionary with the joiner options.
+      blank_id: id of the null symbol.
+      rnnt_loss: type of rnn-t loss between torchaudio, k2 or k2_pruned.
+      rnnt_type: rnn-t variation between regular, modified or constrained.
+      delay_penalty: penalize symbol delay, which is used to make symbol 
+        emit earlier.
+      reduction: type of reduction for rnn-t loss between sum or mean
+      prune_range: how many symbols to keep for each frame in k2 rnn-t 
+        pruned loss.
+      lm_scale: language model scale in rnn-t smoothed loss.
+      am_scale: acoustic model scale in rnn-t smoothed loss.
+      simple_loss_scale: weight of rnn-t simple loss when using k2 pruned loss.
+      pruned_warmup_steps: number of steps to warm up the k2 rnn-t pruned loss 
+        from 0.1 to 1.
     """
 
-    def __init__(self,
-                 in_feats: int,
-                 vocab_size: int,
-                 embed_dim: int,
-                 num_pred_layers: int,
-                 pred_hid_feats: int,
-                 embed_dropout_rate: float = 0.0,
-                 rnn_dropout_rate: float = 0.0,
-                 rnn_type: str = "lstm",
-                 blank_id: int = 0):
+    def __init__(
+        self,
+        in_feats: int,
+        vocab_size: int,
+        predictor: Dict,
+        joiner: Dict,
+        blank_id: int = 0,
+        rnnt_loss: str = "k2_pruned",
+        rnnt_type: str = "regular",
+        delay_penalty: float = 0.0,
+        reduction: str = "sum",
+        prune_range: int = 5,
+        lm_scale: float = 0.25,
+        am_scale: float = 0.0,
+        simple_loss_scale: float = 0.5,
+        pruned_warmup_steps: int = 2000,
+    ):
 
         super().__init__()
         self.in_feats = in_feats
         self.vocab_size = vocab_size
-        self.embed_dim = embed_dim
-        self.num_pred_layers = num_pred_layers
-        self.pred_hid_feats = pred_hid_feats
-        self.embed_dropout_rate = embed_dropout_rate
-        self.rnn_dropout_rate = rnn_dropout_rate
-        self.rnn_type = rnn_type
+        self.predictor_args = predictor
+        self.joiner_args = joiner
         self.blank_id = blank_id
+        self.rnnt_loss = rnnt_loss
+        self.rnnt_type = rnnt_type
+        self.delay_penalty = delay_penalty
+        self.reduction = reduction
+        self.prune_range = prune_range
+        self.lm_scale = lm_scale
+        self.am_scale = am_scale
+        self.simple_loss_scale = simple_loss_scale
+        self.pruned_warmup_steps = pruned_warmup_steps
+
+        self._make_predictor()
+        self._make_joiner()
+
+        if self.rnnt_loss == "k2_pruned":
+            self.simple_am_proj = nn.Linear(in_feats, vocab_size)
+            self.simple_lm_proj = nn.Linear(self.predictor.out_feats,
+                                            vocab_size)
+            self.register_buffer("cur_step", torch.as_tensor(0,
+                                                             dtype=torch.int))
+
+    def _make_predictor(self):
+        pred_type = self.predictor_args["pred_type"]
+        self.predictor_args["in_feats"] = self.in_feats
+        self.predictor_args["vocab_size"] = self.vocab_size
+        self.predictor_args["blank_id"] = self.blank_id
+        if pred_type == "rnn":
+            pred_args = filter_func_args(RNNPredictor.__init__,
+                                         self.predictor_args)
+            self.predictor = RNNPredictor(**pred_args)
+        elif pred_type == "conv":
+            pred_args = filter_func_args(ConvPredictor.__init__,
+                                         self.predictor_args)
+            self.predictor = ConvPredictor(**pred_args)
+        else:
+            raise ValueError(f"Unknown predictor type {pred_type}")
+
+    def _make_joiner(self):
+        joiner_type = self.joiner_args["joiner_type"]
+
+        if joiner_type == "basic":
+            pred_feats = self.predictor_args["out_feats"]
+            hid_feats = self.joiner_args["hid_feats"]
+            self.joiner = Joiner(self.in_feats, pred_feats, hid_feats,
+                                 self.vocab_size)
+        else:
+            raise ValueError(f"Unknown joiner type {joiner_type}")
 
-        pred_args = filter_func_args(Predictor.__init__, locals())
-        pred_args["num_layers"] = num_pred_layers
-        pred_args["hid_feats"] = pred_hid_feats
-        pred_args["out_feats"] = in_feats
-        self.predictor = Predictor(**pred_args)
-        self.joiner = Joiner(in_feats, vocab_size)
-
-    def forward(self, x: torch.Tensor, x_lengths: torch.Tensor,
-                y: k2.RaggedTensor) -> torch.Tensor:
+    def get_config(self):
+        config = {
+            "in_feats": self.in_feats,
+            "vocab_size": self.vocab_size,
+            "predictor": self.predictor_args,
+            "joiner": self.joiner_args,
+            "blank_id": self.blank_id,
+            "rnnt_loss": self.rnnt_loss,
+            "rnnt_type": self.rnnt_type,
+            "delay_penalty": self.delay_penalty,
+            "reduction": self.reduction,
+            "prune_range": self.prune_range,
+            "lm_scale": self.lm_scale,
+            "am_scale": self.am_scale,
+            "simple_loss_scale": self.simple_loss_scale,
+            "pruned_warmup_steps": self.pruned_warmup_steps,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
 
-        # get y_lengths
-        row_splits = y.shape.row_splits(1)
-        y_lengths = row_splits[1:] - row_splits[:-1]
-        # shift y adding <sos> token
-        sos_y = add_sos(y, sos_id=self.blank_id)
-        sos_y_padded = sos_y.pad(mode="constant", padding_value=self.blank_id)
-        sos_y_padded = sos_y_padded.to(torch.int64)
-        # apply predictor and joiner
-        pred_out, _ = self.predictor(sos_y_padded)
+    def _rnnt_loss_torchaudio(self, x: torch.Tensor, x_lengths: torch.Tensor,
+                              y: torch.Tensor, y_lengths: torch.Tensor,
+                              pred_out: torch.Tensor):
         logits = self.joiner(x, pred_out)
         # rnnt_loss requires 0 padded targets
         # Note: y does not start with SOS
@@ -101,9 +163,137 @@ def forward(self, x: torch.Tensor, x_lengths: torch.Tensor,
             logit_lengths=x_lengths,
             target_lengths=y_lengths,
             blank=self.blank_id,
-            reduction="sum",
+            reduction=self.reduction,
         )
-        return logits, loss
+        return loss
+
+    def _rnnt_loss_k2(self, x: torch.Tensor, x_lengths: torch.Tensor,
+                      y: torch.Tensor, y_lengths: torch.Tensor,
+                      pred_out: torch.Tensor):
+        y_padded = y.pad(mode="constant", padding_value=0)
+        y_padded = y_padded.to(torch.int64)
+        boundary = torch.zeros((x.size(0), 4),
+                               dtype=torch.int64,
+                               device=x.device)
+        boundary[:, 2] = y_lengths
+        boundary[:, 3] = x_lengths
+
+        logits = self.joiner(x, pred_out)
+
+        with torch.cuda.amp.autocast(enabled=False):
+            loss = k2.rnnt_loss(
+                logits=logits.float(),
+                symbols=y_padded,
+                termination_symbol=self.blank_id,
+                boundary=boundary,
+                rnnt_type=self.rnnt_type,
+                delay_penalty=self.delay_penalty,
+                reduction=self.reduction,
+            )
+        return loss
+
+    def _rnnt_loss_k2_pruned(self, x: torch.Tensor, x_lengths: torch.Tensor,
+                             y: torch.Tensor, y_lengths: torch.Tensor,
+                             pred_out: torch.Tensor):
+
+        y_padded = y.pad(mode="constant", padding_value=0)
+        y_padded = y_padded.to(torch.int64)
+        boundary = torch.zeros((x.size(0), 4),
+                               dtype=torch.int64,
+                               device=x.device)
+        boundary[:, 2] = y_lengths
+        boundary[:, 3] = x_lengths
+
+        am_simple = self.simple_am_proj(x)
+        lm_simple = self.simple_lm_proj(pred_out)
+        with torch.cuda.amp.autocast(enabled=False):
+            loss_simple, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
+                lm=lm_simple.float(),
+                am=am_simple.float(),
+                symbols=y_padded,
+                termination_symbol=self.blank_id,
+                lm_only_scale=self.lm_scale,
+                am_only_scale=self.am_scale,
+                boundary=boundary,
+                rnnt_type=self.rnnt_type,
+                delay_penalty=self.delay_penalty,
+                reduction=self.reduction,
+                return_grad=True,
+            )
+
+        # ranges : [B, T, prune_range]
+        ranges = k2.get_rnnt_prune_ranges(
+            px_grad=px_grad,
+            py_grad=py_grad,
+            boundary=boundary,
+            s_range=self.prune_range,
+        )
+
+        # am_pruned : [B, T, prune_range, encoder_dim]
+        # lm_pruned : [B, T, prune_range, decoder_dim]
+        am_pruned, lm_pruned = k2.do_rnnt_pruning(
+            am=self.joiner.enc_proj(x),
+            lm=self.joiner.pred_proj(pred_out),
+            ranges=ranges,
+        )
+
+        # logits : [B, T, prune_range, vocab_size]
+
+        # project_input=False since we applied the decoder's input projections
+        # prior to do_rnnt_pruning (this is an optimization for speed).
+        logits = self.joiner(am_pruned, lm_pruned, project_input=False)
+
+        with torch.cuda.amp.autocast(enabled=False):
+            loss_pruned = k2.rnnt_loss_pruned(
+                logits=logits.float(),
+                symbols=y_padded,
+                ranges=ranges,
+                termination_symbol=self.blank_id,
+                boundary=boundary,
+                rnnt_type=self.rnnt_type,
+                delay_penalty=self.delay_penalty,
+                reduction=self.reduction,
+            )
+
+        if self.cur_step > self.pruned_warmup_steps:
+            simple_loss_scale = self.simple_loss_scale
+            pruned_loss_scale = 1.0
+        else:
+            r = self.cur_step / self.pruned_warmup_steps
+            simple_loss_scale = 1.0 - r * (1.0 - self.simple_loss_scale)
+            pruned_loss_scale = 0.1 + 0.9 * r
+            self.cur_step += 1
+            print(simple_loss_scale, pruned_loss_scale)
+
+        loss = simple_loss_scale * loss_simple + pruned_loss_scale * loss_pruned
+
+        return loss, loss_simple, loss_pruned
+
+    def forward(
+        self, x: torch.Tensor, x_lengths: torch.Tensor, y: k2.RaggedTensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+
+        # get y_lengths
+        row_splits = y.shape.row_splits(1)
+        y_lengths = row_splits[1:] - row_splits[:-1]
+        # shift y adding <sos> token
+        sos_y = add_sos(y, sos_id=self.blank_id)
+        sos_y_padded = sos_y.pad(mode="constant", padding_value=self.blank_id)
+        sos_y_padded = sos_y_padded.to(torch.int64)
+        # apply predictor and joiner
+        pred_out, _ = self.predictor(sos_y_padded)
+        loss_simple = loss_pruned = None
+        if self.rnnt_loss == "k2_pruned":
+            loss, loss_simple, loss_pruned = self._rnnt_loss_k2_pruned(
+                x, x_lengths, y, y_lengths, pred_out)
+        elif self.rnnt_loss == "k2":
+            loss = self._rnnt_loss_k2(x, x_lengths, y, y_lengths, pred_out)
+        elif self.rnnt_loss == "torchaudio":
+            loss_simple = loss_pruned = None
+            loss = self._rnnt_loss_torchaudio(x, x_lengths, y, y_lengths,
+                                              pred_out)
+
+        return loss, loss_simple, loss_pruned
 
     def decode(self,
                x: torch.Tensor,
@@ -427,21 +617,6 @@ def change_config(
         self.predictor.change_config(override_dropouts, embed_dropout_rate,
                                      rnn_dropout_rate)
 
-    def get_config(self):
-
-        config = {
-            "in_feats": self.in_feats,
-            "vocab_size": self.vocab_size,
-            "embed_dim": self.embed_dim,
-            "num_pred_layers": self.num_pred_layers,
-            "pred_hid_feats": self.pred_hid_feats,
-            "embed_dropout_rate": self.embed_dropout_rate,
-            "rnn_dropout_rate": self.rnn_dropout_rate,
-            "blank_id": self.blank_id,
-        }
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
     @staticmethod
     def filter_args(**kwargs):
         args = filter_func_args(RNNTransducerDecoder.__init__, kwargs)
@@ -452,10 +627,79 @@ def filter_finetune_args(**kwargs):
         args = filter_func_args(RNNTransducerDecoder.change_config, kwargs)
         return args
 
+    @staticmethod
+    def add_pred_args(parser):
+
+        pred_parser = ArgumentParser(prog="")
+        pred_parser.add_argument(
+            "--pred-type",
+            default="rnn",
+            choices=["rnn", "conv"],
+            help=
+            """type of predictor between RNN and Convolutional [rnn, conv]""")
+        pred_parser.add_argument("--embed-dim",
+                                 default=1024,
+                                 type=int,
+                                 help=("token embedding dimension"))
+        pred_parser.add_argument(
+            "--embed-dropout-rate",
+            default=0.0,
+            type=float,
+            help=("dropout prob for predictor input embeddings"))
+        pred_parser.add_argument("--rnn-dropout-rate",
+                                 default=0.0,
+                                 type=float,
+                                 help="""dropout prob for decoder RNN """)
+        pred_parser.add_argument(
+            "--rnn-type",
+            default="lstm",
+            choices=["lstm", "gru"],
+            help=
+            """type of recurrent network for thep predictor in [lstm, gru]""")
+
+        pred_parser.add_argument("--num-layers",
+                                 default=2,
+                                 type=int,
+                                 help="""number of layers of the predictor """)
+
+        pred_parser.add_argument("--hid-feats",
+                                 default=512,
+                                 type=int,
+                                 help="""hidden features of the predictor""")
+        pred_parser.add_argument("--out-feats",
+                                 default=512,
+                                 type=int,
+                                 help="""output features of the predictor""")
+        pred_parser.add_argument("--context-size",
+                                 default=2,
+                                 type=int,
+                                 help="""context length of the convolutional 
+                                 predictor, 1->bigram, 2-> trigram,...""")
+
+        parser.add_argument("--predictor",
+                            action=ActionParser(parser=pred_parser))
+
+    @staticmethod
+    def add_joiner_args(parser):
+
+        pred_parser = ArgumentParser(prog="")
+        pred_parser.add_argument(
+            "--joiner-type",
+            default="basic",
+            choices=["basic"],
+            help=
+            """type of joiner network, there is only basic joiner for now""")
+        pred_parser.add_argument("--hid-feats",
+                                 default=512,
+                                 type=int,
+                                 help="""hidden features of the joiner""")
+        parser.add_argument("--joiner",
+                            action=ActionParser(parser=pred_parser))
+
     @staticmethod
     def add_class_args(parser,
                        prefix=None,
-                       skip=set(["in_feats", "blanck_id", "vocab_size"])):
+                       skip=set(["in_feats", "blank_id", "vocab_size"])):
 
         if prefix is not None:
             outer_parser = parser
@@ -476,35 +720,59 @@ def add_class_args(parser,
                                 type=int,
                                 required=True,
                                 help=("output prediction dimension"))
-        parser.add_argument("--embed-dim",
-                            default=1024,
-                            type=int,
-                            help=("token embedding dimension"))
+
+        RNNTransducerDecoder.add_pred_args(parser)
+        RNNTransducerDecoder.add_joiner_args(parser)
         parser.add_argument(
-            "--embed-dropout-rate",
+            "--rnnt-loss",
+            default="k2_pruned",
+            choices=["torchaudio", "k2", "k2_pruned"],
+            help="""type of rnn-t loss between torchaudio, k2 or k2_pruned.""")
+        parser.add_argument(
+            "--rnnt-type",
+            default="regular",
+            choices=["regular", "modified", "constrained"],
+            help=
+            """type of rnn-t loss between regular, modified or constrained.""")
+        parser.add_argument(
+            "--delay-penalty",
             default=0.0,
             type=float,
-            help=("dropout prob for predictor input embeddings"))
-        parser.add_argument("--rnn-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder RNN "))
+            help=
+            """penalize symbol delay, which is used to make symbol emit earlier
+            for streaming models.""")
         parser.add_argument(
-            "--rnn-type",
-            default="lstm",
-            choices=["lstm", "gru"],
-            help=(
-                "type of recurrent network for thep predictor in [lstm, gru]"))
-
-        parser.add_argument("--num-pred-layers",
-                            default=2,
-                            type=int,
-                            help="""number of layers of the predictor """)
-
-        parser.add_argument("--pred-hid-feats",
-                            default=512,
-                            type=int,
-                            help="""hidden features of the predictor""")
+            "--reduction",
+            default="sum",
+            choices=["sum", "mean"],
+            help="""type of reduction for rnn-t loss between sum or mean""")
+        parser.add_argument(
+            "--prune-range",
+            default=5,
+            type=int,
+            help="""how many symbols to keep for each frame in k2 rnn-t 
+            pruned loss.""")
+        parser.add_argument(
+            "--lm-scale",
+            default=0.25,
+            type=float,
+            help="""language model scale in rnn-t smoothed loss""")
+        parser.add_argument(
+            "--am-scale",
+            default=0.0,
+            type=float,
+            help="""acoustic model scale in rnn-t smoothed loss""")
+        parser.add_argument(
+            "--simple-loss-scale",
+            default=0.5,
+            type=float,
+            help="""weight of rnn-t simple loss when using k2 pruned loss""")
+        parser.add_argument(
+            "--pruned-warmup-steps",
+            default=2000,
+            type=int,
+            help="""number of steps to warm up the k2 rnn-t pruned loss 
+            from 0.1 to 1""")
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix,
diff --git a/hyperion/torch/narchs/spinenet.py b/hyperion/torch/narchs/spinenet.py
index 117c0733..da47ffe5 100644
--- a/hyperion/torch/narchs/spinenet.py
+++ b/hyperion/torch/narchs/spinenet.py
@@ -6,7 +6,6 @@
 import logging
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
diff --git a/hyperion/torch/narchs/tdnn.py b/hyperion/torch/narchs/tdnn.py
index 55e47e6a..8ac9be79 100644
--- a/hyperion/torch/narchs/tdnn.py
+++ b/hyperion/torch/narchs/tdnn.py
@@ -4,7 +4,6 @@
 """
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import Linear
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index 4468185e..fd3de235 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -3,10 +3,9 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from ..layer_blocks import TransformerEncoderBlockV1 as EBlock
diff --git a/hyperion/torch/optim/factory.py b/hyperion/torch/optim/factory.py
index 95117b05..aa1acdc8 100644
--- a/hyperion/torch/optim/factory.py
+++ b/hyperion/torch/optim/factory.py
@@ -4,10 +4,9 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.optim as optim
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_args
 from .radam import RAdam
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index b2198924..659f9dde 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import HubertConfig, HubertModel
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import HubertConfig, HubertModel
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index e1f21153..76d80aa4 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import Wav2Vec2Config, Wav2Vec2Model
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2Config, Wav2Vec2Model
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index b0a815c7..5dd6a539 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -8,11 +8,10 @@
 from turtle import right
 from typing import List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
 
 from ...torch_model import TorchModel
 from ...utils import scale_seq_lengths, seq_lengths_to_mask
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 0d5c5ad3..eec88dec 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import WavLMConfig, WavLMModel
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import WavLMConfig, WavLMModel
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 9f5fafe6..69e97cc6 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index e2d2d1f6..0523ad44 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index a6f20a8e..00a218f9 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -11,13 +11,12 @@
 from enum import Enum
 from pathlib import Path
 
-from fairscale.optim.grad_scaler import ShardedGradScaler
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.distributed as dist
 import torch.nn as nn
+from fairscale.optim.grad_scaler import ShardedGradScaler
+from jsonargparse import ActionParser, ArgumentParser
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index cbf94bc0..3f0b3f1f 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -2,21 +2,19 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 import os
 from collections import OrderedDict as ODict
 
-import logging
-
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
-import torchaudio
 import torch.nn as nn
+import torchaudio
+from jsonargparse import ActionParser, ArgumentParser
+from torch.distributed.elastic.multiprocessing.errors import record
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
 from .torch_trainer import TorchTrainer
-from torch.distributed.elastic.multiprocessing.errors import record
 
 
 class TransducerTrainer(TorchTrainer):
@@ -87,35 +85,6 @@ def __init__(
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     None,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
     @record
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -146,13 +115,10 @@ def train_epoch(self, data_loader):
             batch_size = input_data.shape[0]
 
             with self.amp_autocast():
-                # print("xx", data.shape, data.shape[0] * data.shape[1] / 16000,
-                #       torch.sum(audio_length).item() / 16000,
-                #       torch.min(audio_length).item() / 16000,
-                #       torch.max(audio_length).item() / 16000)
-                output, loss = self.model(input_data,
-                                          x_lengths=input_lengths,
-                                          y=target)
+                output = self.model(input_data,
+                                    x_lengths=input_lengths,
+                                    y=target)
+                loss = output.loss
                 loss = loss.mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -165,7 +131,10 @@ def train_epoch(self, data_loader):
                     self.lr_scheduler.on_opt_step()
                 self.update_model()
 
-            batch_metrics["loss"] = loss.item() * self.grad_acc_steps
+            for k, v in output.items():
+                if "loss" in k and v is not None:
+                    batch_metrics[k] = output[k].item()
+
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
@@ -213,13 +182,14 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 # batch_size = data.shape[0]
 
                 with self.amp_autocast():
-                    output, loss = self.model(input_data,
-                                              x_lengths=input_lengths,
-                                              y=target)
-                    # output = self.model(data)
-                    # loss = self.loss(output, target)
+                    output = self.model(input_data,
+                                        x_lengths=input_lengths,
+                                        y=target)
+
+                for k, v in output.items():
+                    if "loss" in k and v is not None:
+                        batch_metrics[k] = output[k].item()
 
-                batch_metrics["loss"] = loss.mean().item()
                 for k, metric in self.metrics.items():
                     batch_metrics[k] = metric(output, target)
 
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index f4877dc6..ba401cb7 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index fc9d98f1..03800e0d 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 35946e96..40b6b10d 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 303427de..af915d6b 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -7,11 +7,10 @@
 import time
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 2a012dde..1e1b1778 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -7,11 +7,10 @@
 import time
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 9d04af42..4e791347 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -6,11 +6,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 1aefb3d4..ad9c825c 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -6,13 +6,12 @@
 import logging
 import os
 
-from fairscale.nn.data_parallel import \
-    FullyShardedDataParallel as FullyShardedDDP
-from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
-
 import torch
 import torch.distributed as dist
 import torch.nn as nn
+from fairscale.nn.data_parallel import \
+    FullyShardedDataParallel as FullyShardedDDP
+from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
 
 from .devices import open_device
 
diff --git a/hyperion/torch/utils/metric_acc.py b/hyperion/torch/utils/metric_acc.py
index a82c174a..7b423a3e 100644
--- a/hyperion/torch/utils/metric_acc.py
+++ b/hyperion/torch/utils/metric_acc.py
@@ -6,7 +6,6 @@
 from collections import OrderedDict as ODict
 
 import numpy as np
-
 import torch
 import torch.distributed as dist
 
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index 67f492f9..a8adf9a1 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -5,6 +5,7 @@
 
 from .class_info import ClassInfo
 from .feature_set import FeatureSet
+from .hyp_dataclass import HypDataClass
 from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from .recording_set import RecordingSet
 from .rttm import RTTM
diff --git a/hyperion/utils/hyp_dataclass.py b/hyperion/utils/hyp_dataclass.py
new file mode 100644
index 00000000..f1e86d2c
--- /dev/null
+++ b/hyperion/utils/hyp_dataclass.py
@@ -0,0 +1,31 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from dataclasses import dataclass
+
+
+@dataclass
+class HypDataClass:
+    """Dataclass that can imitate a dict"""
+
+    def __getitem__(self, key):
+        return getattr(self, key)
+
+    def __setitem__(self, key, val):
+        return setattr(self, key, val)
+
+    def keys(self):
+        return self.__dict__.keys()
+        #return self.__annotations__.keys()
+
+    def items(self):
+        return self.__dict__.items()
+        # for k in self.keys():
+        #     yield k, getattr(self, k)
+
+    @classmethod
+    def from_parent(cls, parent, **kwargs):
+        args = parent.__dict__
+        args.update(kwargs)
+        return cls(**args)

From 7115dbb65004ca6d7f95e14409d314fef9119890 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 24 Mar 2023 10:47:06 -0400
Subject: [PATCH 089/154] change configuration of residual networks with
 batchnorm after nonlinearity

---
 egs/voxceleb/v1.1/README.md                   |  72 +++---
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |   4 +-
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |   7 +-
 ...train_ecapatdnn512x3_xvec_stage1_v3.0.yaml |  93 +++++++
 ...train_ecapatdnn512x3_xvec_stage2_v3.0.yaml |  69 ++++++
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v3.0.sh |   8 +-
 ...config_fbank80_stmn_ecapatdnn512x3.v3.0.sh |  45 ++++
 egs/voxceleb/v1.1/run_040_eval_be.sh          | 179 +-------------
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py   |  28 ++-
 egs/voxceleb/v1/steps_be/train_qmf.py         |  26 +-
 .../torch/layer_blocks/res2net1d_blocks.py    | 107 ++++++++-
 .../torch/layer_blocks/res2net2d_blocks.py    |  55 +++--
 hyperion/torch/layer_blocks/res2net_blocks.py |  42 ++--
 .../torch/layer_blocks/resnet1d_blocks.py     | 226 ++++++++++--------
 .../torch/layer_blocks/resnet2d_blocks.py     | 183 +++++++-------
 hyperion/torch/layer_blocks/resnet_blocks.py  |  30 ++-
 .../torch/layer_blocks/seresnet_blocks.py     |  35 +--
 hyperion/torch/layers/activation_factory.py   |  12 +-
 hyperion/torch/layers/swish.py                | 112 +++++++++
 .../models/xvectors/efficient_net_xvector.py  |   4 +-
 .../torch/models/xvectors/resnet1d_xvector.py |   3 +-
 .../torch/models/xvectors/resnet_xvector.py   |   2 +
 .../torch/models/xvectors/spinenet_xvector.py |   2 +
 .../torch/models/xvectors/tdnn_xvector.py     |   4 +-
 .../models/xvectors/transformer_xvector_v1.py |   9 +-
 hyperion/torch/models/xvectors/xvector.py     | 128 ++++++----
 hyperion/torch/narchs/classif_head.py         |  73 +++---
 .../trainers/xvector_trainer_from_wav.py      |  33 ---
 28 files changed, 968 insertions(+), 623 deletions(-)
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 7b6b278f..1d438868 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -85,65 +85,67 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 
 ## Results
 
-
-
 ### VoxCeleb 1 Original-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.09 | 0.068 | 0.121 |
-| | | | Cosine + AS-Norm | 1.0 | 0.064 | 0.110 |
-| | | | Cosine + QMF | 0.87 | 0.059 | 0.076 |
-| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.053 | 0.083 |
-| | | | Cosine + AS-Norm | 0.78 | 0.046 | 0.078 |
-| | | | Cosine + QMF | 0.74 | 0.046 | 0.077 |
-| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.78 | 0.061 | 0.110 |
-| | | | Cosine + AS-Norm | 0.70 | 0.054 | 0.102 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.10 | 0.069 | 0.124 |
+| | | | Cosine + AS-Norm | 1.09 | 0.065 | 0.105 |
+| | | | Cosine + QMF | 0.92 | 0.059 | 0.090 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
+| | | | Cosine + AS-Norm | 0.63 | 0.048 | 0.083 |
 | | | | Cosine + QMF | 0.66 | 0.047 | 0.090 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | || |
+
+
+
+
 
 ### VoxCeleb 1 Entire-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.21 | 0.075 | 0.129 |
-| | | | Cosine + AS-Norm | 1.15 | 0.069 | 0.113 |
-| | | | Cosine + QMF | 1.12 | 0.067 | 0.111 |
-| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.92 | 0.058 | 0.104 |
-| | | | Cosine + AS-Norm | 0.87 | 0.053 | 0.089 |
-| | | | Cosine + QMF | 0.88 | 0.054 | 0.092 |
-| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.93 | 0.058 | 0.103 |
-| | | | Cosine + AS-Norm | 0.88 | 0.052 | 0.092 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.16 | 0.073 | 0.130 |
+| | | | Cosine + AS-Norm | 1.13 | 0.068 | 0.117 |
+| | | | Cosine + QMF | 1.06 | 0.065 | 0.108 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
+| | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.088 |
 | | | | Cosine + QMF | 0.90 | 0.053 | 0.090 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | | | |
 
 
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.17 | 0.129 | 0.212 |
-| | | | Cosine + AS-Norm | 1.98 | 0.116 | 0.190 |
-| | | | Cosine + QMF | 1.88 | 0.112 | 0.181 |
-| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.80 | 0.106 | 0.171 |
-| | | | Cosine + AS-Norm | 1.59 | 0.091 | 0.146 |
-| | | | Cosine + QMF | 1.59 | 0.092 | 0.151 |
-| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.78 | 0.110 | 0.180 |
-| | | | Cosine + AS-Norm | 1.61 | 0.097 | 0.159 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.10 | 0.128 | 0.209 |
+| | | | Cosine + AS-Norm | 1.99 | 0.117 | 0.191 |
+| | | | Cosine + QMF | 1.82 | 0.111 | 0.183 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
+| | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.150 |
 | | | | Cosine + QMF | 1.62 | 0.096 | 0.158 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | | | |
+
 
 ### VoxSRC2022 dev
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v2.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.85 | 0.187 | 0.310 |
-| | | | Cosine + AS-Norm | 2.69 | 0.182 | 0.310 |
-| | | | Cosine + QMF | 2.80 | 0.196 | 0.338 |
-| config_fbank80_stmn_idrnd_resnet100.v2.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.50 | 0.160 | 0.270 |
-| | | | Cosine + AS-Norm | 2.31 | 0.139 | 0.240 |
-| | | | Cosine + QMF | 2.54 | 0.153 | 0.248 |
-| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.42 | 0.160 | 0.265 |
-| | | | Cosine + AS-Norm | 2.32 | 0.152 | 0.273 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.87 | 0.185 | 0.301 |
+| | | | Cosine + AS-Norm | 2.84 | 0.182 | 0.307 |
+| | | | Cosine + QMF | 2.62 | 0.175 | 0.282 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
+| | | | Cosine + AS-Norm | 2.19 | 0.145 | 0.265 |
 | | | | Cosine + QMF | 2.54 | 0.179 | 0.304 |
-
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine |  ||  |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  | | |
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
index 408bad1a..1633f4a2 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -61,6 +61,7 @@ model:
     endpoint_channels: 4096
     norm_before: false
     dropout_rate: 0.2
+    hid_act: swish
   pool_net:
     pool_type: ch-wise-att-mean+stddev
     inner_feats: 128
@@ -87,7 +88,8 @@ trainer:
     min_lr: 1.0e-06
     warmup_steps: 15000
     update_lr_on_opt_step: true
+  grad_clip: 250
   use_amp: true
   log_interval: 1000
-  epochs: 40
+  epochs: 35
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
index 91a7d0b8..877736b3 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -12,8 +12,6 @@ data:
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
-      # max_chunk_length: 6.0
-      # min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
       seg_weight_mode: data-prior
@@ -31,8 +29,8 @@ data:
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
-      max_chunk_length: 6.0
-      min_chunk_length: 6.0
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
       class_name: class_id
       seg_weight_mode: data-prior
@@ -62,6 +60,7 @@ trainer:
     min_lr: 1.0e-6
     warmup_steps: 8000
     update_lr_on_opt_step: true
+  grad_clip: 250
   use_amp: true
   log_interval: 1000
   epochs: 15
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..f15d453d
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
@@ -0,0 +1,93 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 512
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    dropout_rate: 0.002
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..45e55d97
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  resnet_enc:
+    override_dropouts: true
+    dropout_rate: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
index 5f7ed094..b093b37a 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
@@ -12,14 +12,14 @@ nnet_data=voxceleb2cat_train
 
 # x-vector cfg
 nnet_type=resnet1d
-nnet_name=${feat_type}_ecapatdnn2048x4.v3.0
+nnet_name=${feat_type}_ecapatdnn2048x4.v4.0
 
-nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v4.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0030.pth
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
 
-nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v4.0.yaml
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
 nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
new file mode 100644
index 00000000..5288f66b
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN small
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn512x3.v3.12
+
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v3.12.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v3.12.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 37a344b6..4c7c2091 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -98,7 +98,7 @@ if [ $stage -le 3 ];then
     $xvector_dir/voxceleb1_test/xvector.scp \
     $score_cosine_dir/voxceleb1_scores
 
-  $train_cmd --mem 10G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
 	     local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_dir 
 
   for f in $(ls $score_cosine_dir/*_results);
@@ -143,7 +143,7 @@ if [ "$do_snorm" == "true" ];then
   if [ $stage -le 5 ];then
     echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
     steps_be/eval_be_cos_snorm.sh \
-      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      --cmd "$train_cmd --mem 22G" --coh-nbest 1000 \
       data/voxceleb1_test/trials \
       data/voxceleb1_test/utt2model \
       $xvector_dir/voxceleb1_test/xvector.scp \
@@ -209,7 +209,7 @@ if [ "$do_qmf" == "true" ];then
       $score_cosine_qmf_dir/voxceleb2_qmf_scores
 
   fi
-
+  stage=9
   if [ $stage -le 8 ];then
 
     echo "Eval Voxceleb 1 with Cosine scoring"
@@ -333,176 +333,3 @@ if [ $stage -le 11 ];then
   done
 
 fi
-exit
-if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
-
-    echo "Eval voxsrc2 with Cosine scoring"
-    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxsrc22_dev/trials \
-    	data/voxsrc22_dev/utt2model \
-    	$xvector_dir/voxsrc22_dev/xvector.scp \
-    	$score_cosine_dir/voxsrc22_dev_scores &
-
-    # steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    # 	data/voxsrc22_test/trials \
-    # 	data/voxsrc22_test/utt2model \
-    # 	$xvector_dir/voxsrc22_test/xvector.scp \
-    # 	$score_cosine_dir/voxsrc22_test_scores
-
-    wait
-    $train_cmd --mem 10G --num-threads 1 $score_cosine_dir/log/score_voxsrc22_dev.log \
-	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_dir 
-
-    for f in $(ls $score_cosine_dir/voxsrc22_dev_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
-
-
-if [ "$do_snorm" == "true" ];then
-  if [ $stage -le 5 ];then
-    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
-    steps_be/eval_be_cos_snorm.sh \
-      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
-      data/voxceleb1_test/trials \
-      data/voxceleb1_test/utt2model \
-      $xvector_dir/voxceleb1_test/xvector.scp \
-      data/voxceleb2cat_train/utt2spk \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $score_cosine_snorm_dir/voxceleb1_scores
-    
-    $train_cmd --mem 10G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
-	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_snorm_dir 
-    
-    for f in $(ls $score_cosine_snorm_dir/*_results);
-    do
-      echo $f
-      cat $f
-      echo ""
-    done
-  fi
-
-  if [ $stage -le 6 ];then
-    echo "Eval voxsrc2 with Cosine scoring"
-    steps_be/eval_be_cos_snorm.sh \
-      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
-      data/voxsrc22_dev/trials \
-      data/voxsrc22_dev/utt2model \
-      $xvector_dir/voxsrc22_dev/xvector.scp \
-      data/voxceleb2cat_train/utt2spk \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $score_cosine_snorm_dir/voxsrc22_dev_scores &
-    
-    # steps_be/eval_be_cos_snorm.sh --cmd "$train_cmd" \
-    # 	data/voxsrc22_test/trials \
-    # 	data/voxsrc22_test/utt2model \
-    # 	$xvector_dir/voxsrc22_test/xvector.scp \
-    #   data/voxceleb2cat_train/utt2spk \
-    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
-    # 	$score_cosine_snorm_dir/voxsrc22_test_scores
-
-    wait
-    $train_cmd --mem 10G --num-threads 1 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
-	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_snorm_dir 
-
-    for f in $(ls $score_cosine_snorm_dir/voxsrc22_dev_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-  fi
-fi
-
-
-if [ "$do_qmf" == "true" ];then
-  if [ $stage -le 7 ];then
-    echo "Train QMF in Vox2"
-    steps_be/train_be_cos_qmf.sh \
-      --cmd "$train_cmd" --coh-nbest 1000 \
-      data/voxceleb2cat_train/trials \
-      data/voxceleb2cat_train/utt2model \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $xvector_dir/voxceleb2cat_train/utt2num_frames \
-      data/voxceleb2cat_train/snorm_utt2spk \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $score_cosine_qmf_dir/voxceleb2_qmf_scores
-
-  fi
-
-  if [ $stage -le 8 ];then
-
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_be/eval_be_cos_qmf.sh \
-      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
-      data/voxceleb1_test/trials \
-      data/voxceleb1_test/utt2model \
-      $xvector_dir/voxceleb1_test/xvector.scp \
-      $xvector_dir/voxceleb1_test/utt2num_frames \
-      data/voxceleb2cat_train/utt2spk \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $score_cosine_qmf_dir/qmf.h5 \
-      $score_cosine_qmf_dir/voxceleb1_scores
-    
-    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1.log \
-	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir 
-    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_snorm.log \
-	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _snorm
-    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_qmf.log \
-	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _qmf
-
-    for f in $(ls $score_cosine_qmf_dir/voxceleb1{,_snorm,_qmf}_[oeh]_clean_results);
-    do
-      echo $f
-      cat $f
-      echo ""
-    done
-
-  fi
-
-  if [ $stage -le 9 ];then
-    echo "Eval voxsrc2 with Cosine scoring"
-    steps_be/eval_be_cos_qmf.sh \
-      --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
-      data/voxsrc22_dev/trials \
-      data/voxsrc22_dev/utt2model \
-      $xvector_dir/voxsrc22_dev/xvector.scp \
-      $xvector_dir/voxsrc22_dev/utt2num_frames \
-      data/voxceleb2cat_train/utt2spk \
-      $xvector_dir/voxceleb2cat_train/xvector.scp \
-      $score_cosine_qmf_dir/qmf.h5 \
-      $score_cosine_qmf_dir/voxsrc22_dev_scores &
-
-    # steps_be/eval_be_cos_qmf.sh --cmd "$train_cmd" \
-    # 	data/voxsrc22_test/trials \
-    # 	data/voxsrc22_test/utt2model \
-    # 	$xvector_dir/voxsrc22_test/xvector.scp \
-    #	$xvector_dir/voxsrc22_test/utt2num_frames \
-    #	data/voxceleb2cat_train/utt2spk \
-    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
-    #	$score_cosine_qmf_dir/qmf.h5 \
-    # 	$score_cosine_qmf_dir/voxsrc22_test_scores
-
-    wait
-    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev.log \
-	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir 
-    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_snorm.log \
-	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _snorm
-    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_qmf.log \
-	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _qmf
-
-    for f in $(ls $score_cosine_qmf_dir/voxsrc22_dev{,_snorm,_qmf}_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-  fi
-
-fi
-
-
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
index e0e1c2da..e6b68ae8 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
@@ -52,6 +52,11 @@ def save_scores(s, score_file, q_name, i, j, p):
     s.save_txt(score_file)
 
 
+def print_q_stats(scores, name):
+    s = f"{name} stats mean={np.mean(scores)} min={np.min(scores)} max={np.max(scores)} median={np.median(scores)}"
+    logging.info(s)
+
+
 def eval_plda(
     v_file,
     ndx_file,
@@ -67,7 +72,7 @@ def eval_plda(
     seg_part_idx,
     num_seg_parts,
     coh_nbest,
-    **kwargs
+    **kwargs,
 ):
 
     if preproc_file is not None:
@@ -105,20 +110,31 @@ def eval_plda(
 
     logging.info("read num_frames")
     u2nf = Utt2Info.load(num_frames_file)
+    # enroll_nf = np.log(
+    #     np.clip(
+    #         u2nf.filter(enroll_segs).info.astype(float) / 100, a_min=0.1, a_max=15.0,
+    #     )
+    # )
+    # test_nf = np.log(
+    #     np.clip(
+    #         u2nf.filter(ndx.seg_set).info.astype(float) / 100, a_min=0.1, a_max=15.0,
+    #     )
+    # )
     enroll_nf = np.log(
         np.clip(
-            u2nf.filter(enroll_segs).info.astype(float) / 100 - 2.0,
+            u2nf.filter(enroll_segs).info.astype(float) / 100 - 1.0,
             a_min=0.1,
-            a_max=12.0,  # 6.0,
+            a_max=15.0,
         )
     )
     test_nf = np.log(
         np.clip(
-            u2nf.filter(ndx.seg_set).info.astype(float) / 100 - 2.0,
+            u2nf.filter(ndx.seg_set).info.astype(float) / 100 - 1.0,
             a_min=0.1,
-            a_max=12.0,  # 6.0,
+            a_max=15.0,
         )
     )
+
     t1 = time.time()
     logging.info("computing llr")
     scores = cosine_scoring(x_e, x_t)
@@ -160,6 +176,8 @@ def eval_plda(
         "maxcohmu": np.maximum(mu_z, mu_t),
         "mincohmu": np.minimum(mu_z, mu_t),
     }
+    for k, v in q_measures.items():
+        print_q_stats(v, k)
 
     f, loc = ismember(enroll, ndx.model_set)
     trial_mask = ndx.trial_mask[loc]
diff --git a/egs/voxceleb/v1/steps_be/train_qmf.py b/egs/voxceleb/v1/steps_be/train_qmf.py
index afd9d218..ee9733d8 100755
--- a/egs/voxceleb/v1/steps_be/train_qmf.py
+++ b/egs/voxceleb/v1/steps_be/train_qmf.py
@@ -26,6 +26,12 @@
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
+def print_q_stats(q, name):
+    scores = q.scores[q.score_mask]
+    s = f"{name} stats mean={np.mean(scores)} min={np.min(scores)} max={np.max(scores)} median={np.median(scores)}"
+    logging.info(s)
+
+
 def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
 
     logging.info("load key: %s", key_file)
@@ -40,29 +46,37 @@ def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbo
     q_file = f"{score_file}_maxnf"
     logging.info("load max num-frames: %s", q_file)
     q = TrialScores.load_txt(q_file)
+    print_q_stats(q, "max-nf")
     maxnf_tar, maxnf_non = q.get_tar_non(key)
 
     q_file = f"{score_file}_minnf"
     logging.info("load min num-frames: %s", q_file)
     q = TrialScores.load_txt(q_file)
+    print_q_stats(q, "min-nf")
     minnf_tar, minnf_non = q.get_tar_non(key)
 
     q_file = f"{score_file}_maxcohmu"
     logging.info("load max cohort mean: %s", q_file)
     q = TrialScores.load_txt(q_file)
+    print_q_stats(q, "max-cohmu")
     maxcohmu_tar, maxcohmu_non = q.get_tar_non(key)
 
     q_file = f"{score_file}_mincohmu"
     logging.info("load min cohort mean: %s", q_file)
     q = TrialScores.load_txt(q_file)
+    print_q_stats(q, "min-cohmu")
     mincohmu_tar, mincohmu_non = q.get_tar_non(key)
 
     min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior)
     n_miss = p_miss * ntar
     n_fa = p_fa * nnon
     logging.info(
-        "min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
-        % (min_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+        "min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f",
+        min_dcf,
+        p_miss * 100,
+        p_fa * 100,
+        n_miss,
+        n_fa,
     )
 
     logging.info("train calibration")
@@ -92,8 +106,12 @@ def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbo
     n_miss = p_miss * ntar
     n_fa = p_fa * nnon
     logging.info(
-        "act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
-        % (act_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+        "act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f",
+        act_dcf,
+        p_miss * 100,
+        p_fa * 100,
+        n_miss,
+        n_fa,
     )
 
     output_file = f"{score_file}_qmf"
diff --git a/hyperion/torch/layer_blocks/res2net1d_blocks.py b/hyperion/torch/layer_blocks/res2net1d_blocks.py
index 1decc327..0fbdc301 100644
--- a/hyperion/torch/layer_blocks/res2net1d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net1d_blocks.py
@@ -76,7 +76,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation={"name": "relu6", "inplace": True},
+        activation={"name": "relu", "inplace": True},
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -195,6 +195,9 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, time).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
         split_x = torch.split(x, split_size, 1)
@@ -213,7 +216,7 @@ def forward(self, x, x_mask=None):
             if self.norm_before:
                 x_i = self.bn1s[i](x_i)
             x_i = self.act1(x_i)
-            if not self.norm_before:
+            if self.norm_after:
                 x_i = self.bn1s[i](x_i)
             x.append(x_i)
 
@@ -223,23 +226,28 @@ def forward(self, x, x_mask=None):
         x = torch.cat(x, dim=1)
 
         x = self.conv2(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if self.se_layer:
-            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        if self.downsample is not None:
-            residual = self.downsample(residual)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        x += residual
-        x = self.act2(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if not self.norm_before:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -377,6 +385,79 @@ def expansion(self):
     def forward(self, x, x_mask=None):
         """Forward function.
 
+        Args:
+          x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
+          x_mask: Binary mask indicating which spatial dimensions are valid of
+                  shape=(batch, time), (batch, 1, time).
+
+        Returns:
+          Tensor with shape = (batch, out_channels, time).
+        """
+        residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
+        x = self.conv1(x)
+        if self.norm_before:
+            x = self.bn1(x)
+        x = self.act1(x)
+        if self.norm_after:
+            x = self.bn1(x)
+
+        split_x = torch.split(x, self.width, 1)
+        x = []
+        for i in range(self.num_k):
+            if i == 0 or self.stride > 1:
+                x_i = split_x[i]
+            else:
+                x_i = x_i + split_x[i]
+            x_i = self.conv2s[i](x_i)
+            if self.norm_before:
+                x_i = self.bn2s[i](x_i)
+            x_i = self.act2(x_i)
+            if self.norm_after:
+                x_i = self.bn2s[i](x_i)
+            x.append(x_i)
+
+        if self.scale > 1:
+            if self.stride == 1:
+                x.append(split_x[-1])
+            else:
+                x.append(self.pool(split_x[-1]))
+
+        x = torch.cat(x, dim=1)
+
+        x = self.conv3(x)
+        if self.norm_after:
+            x = self.act3(x)
+            x = self.bn3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
+
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
+
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
+
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
+
+            x += residual
+            x = self.act3(x)
+
+        if self.dropout_rate > 0:
+            x = self.dropout(x)
+
+        return x
+
+    def forward0(self, x, x_mask=None):
+        """Forward function.
+
         Args:
           x: input tensor with shape = (batch, in_channels, in_heigh, in_width).
           x_mask: Binary mask indicating which spatial dimensions are valid of
diff --git a/hyperion/torch/layer_blocks/res2net2d_blocks.py b/hyperion/torch/layer_blocks/res2net2d_blocks.py
index d833a5e3..4050f936 100644
--- a/hyperion/torch/layer_blocks/res2net2d_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net2d_blocks.py
@@ -195,6 +195,9 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
         split_x = torch.split(x, split_size, 1)
@@ -213,7 +216,7 @@ def forward(self, x, x_mask=None):
             if self.norm_before:
                 x_i = self.bn1s[i](x_i)
             x_i = self.act1(x_i)
-            if not self.norm_before:
+            if self.norm_after:
                 x_i = self.bn1s[i](x_i)
             x.append(x_i)
 
@@ -223,20 +226,22 @@ def forward(self, x, x_mask=None):
         x = torch.cat(x, dim=1)
 
         x = self.conv2(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        if self.se_layer:
-            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        x += residual
-        x = self.act2(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if not self.norm_before:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -383,12 +388,14 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
         x = self.act1(x)
-        if not self.norm_before:
+        if self.norm_after:
             x = self.bn1(x)
 
         split_x = torch.split(x, self.width, 1)
@@ -402,7 +409,7 @@ def forward(self, x, x_mask=None):
             if self.norm_before:
                 x_i = self.bn2s[i](x_i)
             x_i = self.act2(x_i)
-            if not self.norm_before:
+            if self.norm_after:
                 x_i = self.bn2s[i](x_i)
             x.append(x_i)
 
@@ -415,20 +422,22 @@ def forward(self, x, x_mask=None):
         x = torch.cat(x, dim=1)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        if self.se_layer:
-            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        x += residual
-        x = self.act3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if not self.norm_before:
-            x = self.bn3(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 6a785956..73255a24 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -179,6 +179,9 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
         split_x = torch.split(x, split_size, 1)
@@ -209,18 +212,18 @@ def forward(self, x, x_mask=None):
         x = self.conv2(x)
         if self.norm_before:
             x = self.bn2(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        if self.se_layer:
-            x = self.se_layer(x, x_mask=x_mask)
-
-        x += residual
-        x = self.act2(x)
-
-        if not self.norm_before:
+            x += residual
+            x = self.act2(x)
+        else:
+            x = self.act2(x)
             x = self.bn2(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
+
+            x += residual
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -358,6 +361,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -392,15 +397,18 @@ def forward(self, x, x_mask=None):
         x = self.conv3(x)
         if self.norm_before:
             x = self.bn3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        if self.se_layer:
-            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act3(x)
+        else:
+            x = self.act3(x)
+            x = self.bn3(x)
+            if self.se_layer:
+                x = self.se_layer(x, x_mask=x_mask)
 
-        x += residual
-        x = self.act3(x)
+            x += residual
 
         if not self.norm_before:
             x = self.bn3(x)
diff --git a/hyperion/torch/layer_blocks/resnet1d_blocks.py b/hyperion/torch/layer_blocks/resnet1d_blocks.py
index dd914eba..4ad9b8ce 100644
--- a/hyperion/torch/layer_blocks/resnet1d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet1d_blocks.py
@@ -139,7 +139,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -210,6 +210,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -221,21 +223,22 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        x += residual
-        x = self.act2(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -270,7 +273,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -342,6 +345,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -353,21 +358,22 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        x += residual
-        x = self.act2(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -400,7 +406,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -484,6 +490,8 @@ def forward(self, x, x_mask=None):
         """
 
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -502,20 +510,22 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        x += residual
-        x = self.act3(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn3(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -548,7 +558,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -625,6 +635,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -643,20 +655,22 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        x += residual
-        x = self.act3(x)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -690,7 +704,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -731,6 +745,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -742,22 +758,24 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        x = self.se_layer(x, x_mask=x_mask)
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        x += residual
-        x = self.act2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -791,7 +809,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -836,6 +854,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -847,22 +867,24 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        x = self.se_layer(x, x_mask=x_mask)
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
 
-        x += residual
-        x = self.act2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -896,7 +918,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -939,6 +961,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -957,21 +981,24 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        x = self.se_layer(x, x_mask=x_mask)
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        x += residual
-        x = self.act3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn3(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -1005,7 +1032,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -1048,6 +1075,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -1066,21 +1095,24 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        x = self.se_layer(x, x_mask=x_mask)
-        if self.drop_connect_rate > 0:
-            x = self.drop_connect(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        x += residual
-        x = self.act3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            if self.drop_connect_rate > 0:
+                x = self.drop_connect(x)
 
-        if self.norm_after:
-            x = self.bn3(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -1115,7 +1147,7 @@ def __init__(
         in_scale,
         scale,
         upsampling_mode="nearest",
-        activation={"name": "relu6", "inplace": True},
+        activation={"name": "relu", "inplace": True},
         use_norm=True,
         norm_layer=None,
         norm_before=True,
diff --git a/hyperion/torch/layer_blocks/resnet2d_blocks.py b/hyperion/torch/layer_blocks/resnet2d_blocks.py
index 7fe89b56..6c2dca74 100644
--- a/hyperion/torch/layer_blocks/resnet2d_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet2d_blocks.py
@@ -103,7 +103,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -168,6 +168,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -180,17 +182,16 @@ def forward(self, x, x_mask=None):
 
         x = self.conv2(x)
 
-        if self.norm_before:
-            x = self.bn2(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x += residual
-        x = self.act2(x)
-
         if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
+
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -223,7 +224,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -289,6 +290,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -300,18 +303,16 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
-            x = self.bn2(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
-
-        x += residual
-        x = self.act2(x)
-
         if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
+
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -342,7 +343,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -419,6 +420,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -437,17 +440,16 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
-            x = self.bn3(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x += residual
-        x = self.act3(x)
-
         if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
+
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -478,7 +480,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -549,6 +551,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -567,17 +571,16 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
+        if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
 
-        if self.upsample is not None:
-            residual = self.upsample(residual)
-
-        x += residual
-        x = self.act3(x)
-
-        if self.norm_after:
-            x = self.bn2(x)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -611,7 +614,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -650,6 +653,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -661,19 +666,18 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
-            x = self.bn2(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act2(x)
-
         if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
+
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -707,7 +711,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -750,6 +754,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -761,19 +767,18 @@ def forward(self, x, x_mask=None):
             x = self.bn1(x)
 
         x = self.conv2(x)
-
-        if self.norm_before:
-            x = self.bn2(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
-
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act2(x)
-
         if self.norm_after:
+            x = self.act2(x)
             x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn2(x)
+
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act2(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -805,7 +810,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -846,6 +851,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -864,18 +871,18 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
-            x = self.bn3(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act3(x)
-
         if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
+
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -907,7 +914,7 @@ def __init__(
         in_channels,
         channels,
         kernel_size=3,
-        activation="relu6",
+        activation="relu",
         stride=1,
         dropout_rate=0,
         groups=1,
@@ -948,6 +955,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.upsample is not None:
+            residual = self.upsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -966,18 +975,18 @@ def forward(self, x, x_mask=None):
             x = self.bn2(x)
 
         x = self.conv3(x)
-        if self.norm_before:
-            x = self.bn3(x)
-
-        if self.upsample is not None:
-            residual = self.upsample(residual)
-
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act3(x)
-
         if self.norm_after:
+            x = self.act3(x)
             x = self.bn3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+        else:
+            if self.norm_before:
+                x = self.bn3(x)
+
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act3(x)
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
diff --git a/hyperion/torch/layer_blocks/resnet_blocks.py b/hyperion/torch/layer_blocks/resnet_blocks.py
index e25c0cbb..c077a54b 100644
--- a/hyperion/torch/layer_blocks/resnet_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet_blocks.py
@@ -190,6 +190,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -204,15 +206,12 @@ def forward(self, x, x_mask=None):
 
         if self.norm_before:
             x = self.bn2(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x += residual
-        x = self.act2(x)
-
-        if not self.norm_before:
+            x += residual
+            x = self.act2(x)
+        else:
+            x = self.act2(x)
             x = self.bn2(x)
+            x += residual
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -303,6 +302,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -321,15 +322,12 @@ def forward(self, x, x_mask=None):
         x = self.conv3(x)
         if self.norm_before:
             x = self.bn3(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x += residual
-        x = self.act3(x)
-
-        if not self.norm_before:
+            x += residual
+            x = self.act3(x)
+        else:
+            x = self.act3(x)
             x = self.bn3(x)
+            x += residual
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
diff --git a/hyperion/torch/layer_blocks/seresnet_blocks.py b/hyperion/torch/layer_blocks/seresnet_blocks.py
index 4807e94b..b13a7ff3 100644
--- a/hyperion/torch/layer_blocks/seresnet_blocks.py
+++ b/hyperion/torch/layer_blocks/seresnet_blocks.py
@@ -95,18 +95,19 @@ def forward(self, x, x_mask=None):
 
         x = self.conv2(x)
 
-        if self.norm_before:
-            x = self.bn2(x)
-
         if self.downsample is not None:
             residual = self.downsample(residual)
 
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act2(x)
-
-        if not self.norm_before:
+        if self.norm_before:
+            x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act2(x)
+        else:
+            x = self.act2(x)
             x = self.bn2(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
@@ -186,6 +187,8 @@ def forward(self, x, x_mask=None):
           Tensor with shape = (batch, out_channels, out_heigh, out_width).
         """
         residual = x
+        if self.downsample is not None:
+            residual = self.downsample(residual)
 
         x = self.conv1(x)
         if self.norm_before:
@@ -204,16 +207,14 @@ def forward(self, x, x_mask=None):
         x = self.conv3(x)
         if self.norm_before:
             x = self.bn3(x)
-
-        if self.downsample is not None:
-            residual = self.downsample(residual)
-
-        x = self.se_layer(x, x_mask=x_mask)
-        x += residual
-        x = self.act3(x)
-
-        if not self.norm_before:
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
+            x = self.act3(x)
+        else:
+            x = self.act3(x)
             x = self.bn3(x)
+            x = self.se_layer(x, x_mask=x_mask)
+            x += residual
 
         if self.dropout_rate > 0:
             x = self.dropout(x)
diff --git a/hyperion/torch/layers/activation_factory.py b/hyperion/torch/layers/activation_factory.py
index d07b184e..9d972f95 100644
--- a/hyperion/torch/layers/activation_factory.py
+++ b/hyperion/torch/layers/activation_factory.py
@@ -6,7 +6,7 @@
 
 import torch.nn as nn
 
-from .swish import Swish
+from .swish import Swish, DoubleSwish, Swish6, DoubleSwish6
 
 act_dict = {
     "elu": nn.ELU,
@@ -33,6 +33,9 @@
     "logsoftmax": nn.LogSoftmax,
     "alogsoftmax": nn.AdaptiveLogSoftmaxWithLoss,
     "swish": Swish,
+    "double_swish": DoubleSwish,
+    "swish6": Swish6,
+    "double_swish6": DoubleSwish6,
 }
 
 
@@ -90,7 +93,6 @@ def create_from_str(activation_name, **kwargs):
             except:
                 # activation didn't have inplace option
                 del kwargs["inplace"]
-                pass
 
         return act_dict[activation_name](**kwargs)
 
@@ -186,3 +188,9 @@ def get_config(activation):
             }
         if isinstance(activation, Swish):
             return {"name": "swish"}
+        if isinstance(activation, DoubleSwish):
+            return {"name": "double_swish"}
+        if isinstance(activation, Swish6):
+            return {"name": "swish6"}
+        if isinstance(activation, DoubleSwish6):
+            return {"name": "double_swish6"}
diff --git a/hyperion/torch/layers/swish.py b/hyperion/torch/layers/swish.py
index a313455e..62225ad9 100644
--- a/hyperion/torch/layers/swish.py
+++ b/hyperion/torch/layers/swish.py
@@ -36,3 +36,115 @@ def __repr__(self):
     def __str__(self):
         s = "{}()".format(self.__class__.__name__)
         return s
+
+
+class Swish6(nn.Module):
+    """Swish activation class, clamped to 6
+    y = min(x, 6) * sigmoid(min(x,6))
+    """
+
+    def forward(self, x):
+        return SwishImplementation.apply(x.clamp(max=6))
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        s = "{}()".format(self.__class__.__name__)
+        return s
+
+
+class DoubleSwishImplementation(torch.autograd.Function):
+    """ Implementation for DoubleSwish Activation from
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py    
+
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
+         where swish(x) =  x * sigmoid(x).
+
+    Memory-efficient derivative computation:
+     f'(x) = =  x * s'(x) + x' * s(x) = x * s'(x) + s(x).
+         where s(x) = simoid(x), and s'(x) = s(x) * (1-s(x)).
+     
+     f'(x) = x * s(x) * (1-s(x)) + s(x) = f(x) * (1-s(x)) + s(x)
+    """
+
+    @staticmethod
+    def forward(ctx, x: torch.Tensor) -> torch.Tensor:
+        requires_grad = x.requires_grad
+        x_dtype = x.dtype
+        if x.dtype == torch.float16:
+            x = x.to(torch.float32)
+
+        s = torch.sigmoid(x - 1.0)
+        y = x * s
+
+        if requires_grad:
+            deriv = y * (1 - s) + s
+            # notes on derivative of x * sigmoid(x - 1):
+            # https://www.wolframalpha.com/input?i=d%2Fdx+%28x+*+sigmoid%28x-1%29%29
+            # min \simeq -0.043638.  Take floor as -0.043637 so it's a lower bound
+            # max \simeq 1.1990.   Take ceil to be 1.2 so it's an upper bound.
+            # the combination of "+ torch.rand_like(deriv)" and casting to torch.uint8 (which
+            # floors), should be expectation-preserving.
+            floor = -0.043637
+            ceil = 1.2
+            d_scaled = (deriv - floor) * (255.0 / (ceil - floor)) + torch.rand_like(
+                deriv
+            )
+            d_int = d_scaled.to(torch.uint8)
+            ctx.save_for_backward(d_int)
+        if x_dtype == torch.float16 or torch.is_autocast_enabled():
+            y = y.to(torch.float16)
+        return y
+
+    @staticmethod
+    def backward(ctx, y_grad: torch.Tensor) -> torch.Tensor:
+        (d,) = ctx.saved_tensors
+        # the same constants as used in forward pass.
+        floor = -0.043637
+        ceil = 1.2
+        d = d * ((ceil - floor) / 255.0) + floor
+        return y_grad * d
+
+
+class DoubleSwish(torch.nn.Module):
+    """ DoubleSwish activation
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
+         where swish(x) =  x * sigmoid(x).        
+    """
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+
+        if torch.jit.is_scripting() or torch.jit.is_tracing():
+            return (x * torch.sigmoid(x - 1.0)).clamp(max=6)
+
+        return DoubleSwishImplementation.apply(x)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        s = "{}()".format(self.__class__.__name__)
+        return s
+
+
+class DoubleSwish6(torch.nn.Module):
+    """ DoubleSwish activation clamped to 6
+    x = min(x, 6)
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
+         where swish(x) =  x * sigmoid(x).        
+    """
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = x.clamp(max=6)
+        if torch.jit.is_scripting() or torch.jit.is_tracing():
+            return (x * torch.sigmoid(x - 1.0)).clamp(max=6)
+
+        return DoubleSwishImplementation.apply(x)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        s = "{}()".format(self.__class__.__name__)
+        return s
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index a8663cd9..132bb51d 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -52,11 +52,12 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
     ):
 
-        logging.info("making %s encoder network" % (effnet_type))
+        logging.info("making %s encoder network", effnet_type)
         encoder_net = EN(
             effnet_type,
             in_channels,
@@ -98,6 +99,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 1bce0f87..7af207c4 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -35,7 +35,7 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
-        in_norm=False,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
     ):
@@ -62,6 +62,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             proj_feats=proj_feats,
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index c6889626..0e9eba22 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -46,6 +46,7 @@ def __init__(
         use_norm=True,
         norm_before=True,
         in_norm=False,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
         se_r=16,
@@ -94,6 +95,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 203008be..0b27a840 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -50,6 +50,7 @@ def __init__(
         use_norm=True,
         norm_before=True,
         in_norm=False,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
         se_r=16,
@@ -102,6 +103,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index 530ca63b..38262cc3 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -43,11 +43,12 @@ def __init__(
         use_norm=True,
         norm_before=False,
         in_norm=False,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
     ):
 
-        logging.info("making %s encoder network" % (tdnn_type))
+        logging.info("making %s encoder network", tdnn_type)
         encoder_net = TF.create(
             tdnn_type,
             num_enc_blocks,
@@ -83,6 +84,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=None,
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 7c55844a..25e9c894 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -47,7 +47,6 @@ class TransformerXVectorV1(XVector):
 
       use_norm: if True use batch/layer norm
       norm_before: if True, use layer norm before layers, otherwise after
-      in_norm: add batchnorm at the input
       embed_layer: which layer to use to extract x-vectors
       proj_feats: add linear projection layer after the encoder to project feature dimension to proj_feats
     """
@@ -84,7 +83,7 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=False,
-        in_norm=False,
+        head_use_in_norm=False,
         embed_layer=0,
         proj_feats=None,
     ):
@@ -128,6 +127,7 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=None,
@@ -186,10 +186,6 @@ def enc_concat_after(self):
     def enc_ff_type(self):
         return self.encoder_net.ff_type
 
-    # @property
-    # def in_norm(self):
-    #     return self.encoder_net.in_norm
-
     def get_config(self):
         """Gets network config
         Returns:
@@ -215,7 +211,6 @@ def get_config(self):
             "in_layer_type": self.in_layer_type,
             "enc_concat_after": self.enc_concat_after,
         }
-        #'in_norm': self.in_norm }
 
         config.update(base_config)
         return config
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 3807bbd8..8dc15fbc 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -11,6 +11,7 @@
 import torch
 import torch.nn as nn
 
+from ....utils.misc import filter_func_args
 from ...layer_blocks import TDNNBlock
 from ...layers import GlobalPool1dFactory as PF
 from ...narchs import ClassifHead, TorchNALoader
@@ -18,10 +19,10 @@
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
 
-class XVectorTrainMode(Enum):
-    full = 0
-    frozen = 1
-    ft_embed_affine = 2
+# class XVectorTrainMode(Enum):
+#     full = 0
+#     frozen = 1
+#     ft_embed_affine = 2
 
 
 class XVector(TorchModel):
@@ -46,6 +47,7 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_in_norm=False,
         dropout_rate=0,
         embed_layer=0,
         in_feats=None,
@@ -106,7 +108,7 @@ def __init__(
 
         # if head_norm_layer is none we use the global norm_layer
         if head_norm_layer is None and norm_layer is not None:
-            if norm_layer == "instance-norm" or norm_layer == "instance-norm-affine":
+            if norm_layer in ("instance-norm", "instance-norm-affine"):
                 head_norm_layer = "batch-norm"
             else:
                 head_norm_layer = norm_layer
@@ -130,6 +132,7 @@ def __init__(
             use_norm=use_norm,
             norm_before=norm_before,
             dropout_rate=dropout_rate,
+            use_in_norm=head_use_in_norm,
         )
 
         self.hid_act = hid_act
@@ -137,6 +140,7 @@ def __init__(
         self.head_norm_layer = head_norm_layer
         self.use_norm = use_norm
         self.norm_before = norm_before
+        self.head_use_in_norm = head_use_in_norm
         self.dropout_rate = dropout_rate
         self.embed_layer = embed_layer
 
@@ -282,12 +286,47 @@ def forward_logits(self, x, x_lengths=None, y=None):
         Returns:
           class logits tensor with shape=(batch, num_classes).
         """
+        f = x
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
         x = self.encoder_net(x)
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
         y = self.classif_net(p, y)
+        # if not self.training:
+        #     fnf = (
+        #         torch.any(torch.any(torch.logical_not(torch.isfinite(f)), dim=1), dim=1)
+        #         .sum()
+        #         .cpu()
+        #         .item()
+        #     )
+        #     xnf = (
+        #         torch.any(torch.any(torch.logical_not(torch.isfinite(x)), dim=1), dim=1)
+        #         .sum()
+        #         .cpu()
+        #         .item()
+        #     )
+        #     pnf = (
+        #         torch.any(torch.logical_not(torch.isfinite(p)), dim=1)
+        #         .sum()
+        #         .cpu()
+        #         .item()
+        #     )
+        #     ynf = (
+        #         torch.any(torch.logical_not(torch.isfinite(y)), dim=1)
+        #         .sum()
+        #         .cpu()
+        #         .item()
+        #     )
+        #     # if xnf + pnf + ynf > 0:
+        #     logging.warning("ff %d xnf %d pnf %d ynf %d", fnf, xnf, pnf, ynf)
+        #     if xnf > 0:
+        #         ii = torch.any(
+        #             torch.any(torch.logical_not(torch.isfinite(x)), dim=1), dim=1
+        #         )
+        #         xx = x[ii]
+        #         logging.info(f"xx={xx}")
+
         return y
 
     def forward_hid_feats(
@@ -510,6 +549,7 @@ def get_config(self):
             "head_norm_layer": self.head_norm_layer,
             "use_norm": self.use_norm,
             "norm_before": self.norm_before,
+            "head_use_in_norm": self.head_use_in_norm,
             "dropout_rate": self.dropout_rate,
             "embed_layer": self.embed_layer,
             "in_feats": self.in_feats,
@@ -656,42 +696,38 @@ def valid_train_modes():
     @staticmethod
     def filter_args(**kwargs):
 
-        # if "wo_norm" in kwargs:
-        #     kwargs["use_norm"] = not kwargs["wo_norm"]
-        #     del kwargs["wo_norm"]
-
-        # if "norm_after" in kwargs:
-        #     kwargs["norm_before"] = not kwargs["norm_after"]
-        #     del kwargs["norm_after"]
-
         # get arguments for pooling
         pool_args = PF.filter_args(**kwargs["pool_net"])
-
-        valid_args = (
-            "num_classes",
-            "embed_dim",
-            "num_embed_layers",
-            "hid_act",
-            "loss_type",
-            "cos_scale",
-            "margin",
-            "margin_warmup_epochs",
-            "intertop_k",
-            "intertop_margin",
-            "num_subcenters",
-            "use_norm",
-            "norm_before",
-            "in_feats",
-            "proj_feats",
-            "dropout_rate",
-            "norm_layer",
-            "head_norm_layer",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
+        args = filter_func_args(ClassifHead.__init__, kwargs)
         args["pool_net"] = pool_args
         return args
 
+        # valid_args = (
+        #     "num_classes",
+        #     "embed_dim",
+        #     "num_embed_layers",
+        #     "hid_act",
+        #     "loss_type",
+        #     "cos_scale",
+        #     "margin",
+        #     "margin_warmup_epochs",
+        #     "intertop_k",
+        #     "intertop_margin",
+        #     "num_subcenters",
+        #     "use_norm",
+        #     "norm_before",
+        #     "in_feats",
+        #     "proj_feats",
+        #     "dropout_rate",
+        #     "norm_layer",
+        #     "head_norm_layer",
+        #     "head_use_in_norm",
+        # )
+        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+        # args["pool_net"] = pool_args
+        # return args
+
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
 
@@ -793,19 +829,6 @@ def add_class_args(parser, prefix=None, skip=set()):
         except:
             pass
 
-        # parser.add_argument(
-        #     "--wo-norm",
-        #     default=False,
-        #     action="store_true",
-        #     help="without batch normalization",
-        # )
-
-        # parser.add_argument(
-        #     "--norm-after",
-        #     default=False,
-        #     action="store_true",
-        #     help="batch normalizaton after activation",
-        # )
         parser.add_argument(
             "--use-norm",
             default=True,
@@ -820,6 +843,13 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="batch normalizaton before activation",
         )
 
+        parser.add_argument(
+            "--head-use-in-norm",
+            default=False,
+            action=ActionYesNo,
+            help="batch normalizaton at the head input",
+        )
+
         try:
             parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
         except:
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 5d179fdb..3e2997a6 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -3,12 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 
 import torch
 import torch.nn as nn
 from torch.nn import Linear
 
+from ...utils.misc import filter_func_args
 from ..layer_blocks import FCBlock
 from ..layers import ActivationFactory as AF
 from ..layers import ArcLossOutput, CosLossOutput
@@ -37,6 +38,7 @@ class ClassifHead(NetArch):
        norm_layer: norm_layer object or str indicating type norm layer, if None it uses BatchNorm1d
        use_norm: it True it uses layer/batch-normalization
        norm_before: if True, layer-norm is before the activation function
+       use_in_norm: put batchnorm at the input
     """
 
     def __init__(
@@ -57,6 +59,7 @@ def __init__(
         use_norm=True,
         norm_before=True,
         dropout_rate=0,
+        use_in_norm=False,
     ):
 
         super().__init__()
@@ -67,6 +70,7 @@ def __init__(
         self.embed_dim = embed_dim
         self.num_classes = num_classes
         self.norm_layer = norm_layer
+        self.use_in_norm = use_in_norm
 
         if use_norm:
             norm_groups = None
@@ -88,6 +92,10 @@ def __init__(
         self.intertop_margin = intertop_margin
         self.num_subcenters = num_subcenters
 
+        if self.use_in_norm:
+            assert not self.norm_before
+            self.in_norm = self._norm_layer(prev_feats)
+
         prev_feats = in_feats
         fc_blocks = []
         for i in range(num_embed_layers - 1):
@@ -109,16 +117,21 @@ def __init__(
         else:
             act = hid_act
 
-        fc_blocks.append(
-            FCBlock(
-                prev_feats,
-                embed_dim,
-                activation=act,
-                norm_layer=self._norm_layer,
-                use_norm=use_norm,
-                norm_before=norm_before,
+        if self.use_in_norm:
+            fc_blocks.append(
+                FCBlock(prev_feats, embed_dim, activation=act, use_norm=False)
+            )
+        else:
+            fc_blocks.append(
+                FCBlock(
+                    prev_feats,
+                    embed_dim,
+                    activation=act,
+                    norm_layer=self._norm_layer,
+                    use_norm=use_norm,
+                    norm_before=norm_before,
+                )
             )
-        )
 
         self.fc_blocks = nn.ModuleList(fc_blocks)
 
@@ -270,6 +283,9 @@ def put_layers_in_eval_mode(self, layer_list):
 
     def forward(self, x, y=None):
 
+        if self.use_in_norm:
+            x = self.in_norm(x)
+
         for l in range(self.num_embed_layers):
             x = self.fc_blocks[l](x)
 
@@ -286,6 +302,9 @@ def forward_hid_feats(self, x, y=None, return_layers=None, return_logits=False):
         if return_layers is None:
             return_layers = []
 
+        if self.use_in_norm:
+            x = self.in_norm(x)
+
         h = []
         for l in range(self.num_embed_layers):
             x = self.fc_blocks[l](x)
@@ -303,6 +322,9 @@ def forward_hid_feats(self, x, y=None, return_layers=None, return_logits=False):
 
     def extract_embed(self, x, embed_layer=0):
 
+        if self.use_in_norm:
+            x = self.in_norm(x)
+
         for l in range(embed_layer):
             x = self.fc_blocks[l](x)
 
@@ -341,6 +363,7 @@ def get_config(self):
             "use_norm": self.use_norm,
             "norm_before": self.norm_before,
             "dropout_rate": self.dropout_rate,
+            "use_in_norm": self.use_in_norm,
         }
 
         base_config = super().get_config()
@@ -357,24 +380,7 @@ def filter_args(**kwargs):
             kwargs["norm_before"] = not kwargs["norm_after"]
             del kwargs["norm_after"]
 
-        valid_args = (
-            "num_classes",
-            "embed_dim",
-            "num_embed_layers",
-            "hid_act",
-            "loss_type",
-            "s",
-            "margin",
-            "margin_warmup_epochs",
-            "intertop_k",
-            "intertop_margin",
-            "num_subcenters",
-            "use_norm",
-            "norm_before",
-            "dropout_rate",
-            "norm_layer",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args = filter_func_args(ClassifHead.__init__, kwargs)
         return args
 
     @staticmethod
@@ -455,17 +461,24 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--wo-norm",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
             "--norm-after",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="batch normalizaton after activation",
         )
 
+        parser.add_argument(
+            "--use-in-norm",
+            default=False,
+            action=ActionYesNo,
+            help="batch normalizaton in the classif head input",
+        )
+
         try:
             parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
         except:
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 4a66f0eb..9541d7b0 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -84,35 +84,6 @@ def __init__(
 
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
-        # super().__init__(
-        #     model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     loss=loss,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.feat_extractor = feat_extractor
         if device is not None:
             self.feat_extractor.to(device)
@@ -135,10 +106,6 @@ def train_epoch(self, data_loader):
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            # input_data, target = (
-            #     data[self.input_key].to(self.device),
-            #     data[self.target_key].to(self.device),
-            # )
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
             with torch.no_grad():

From 69884850fbad2a701c6e024671199f9d4ee34011 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 27 Mar 2023 16:30:02 -0400
Subject: [PATCH 090/154] add results with new ecapa v3

---
 egs/voxceleb/v1.1/README.md                   | 46 ++++++++-----------
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v3.0.sh |  6 +--
 ...config_fbank80_stmn_ecapatdnn512x3.v3.0.sh |  6 +--
 egs/voxceleb/v1.1/run_030_extract_xvectors.sh |  4 +-
 egs/voxceleb/v1.1/run_040_eval_be.sh          |  2 +-
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py   | 36 +++++++--------
 6 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 1d438868..1ee9468f 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -89,63 +89,57 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.10 | 0.069 | 0.124 |
-| | | | Cosine + AS-Norm | 1.09 | 0.065 | 0.105 |
-| | | | Cosine + QMF | 0.92 | 0.059 | 0.090 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.11 | 0.069 | 0.126 |
+| | | | Cosine + AS-Norm | 1.10 | 0.065 | 0.108 |
+| | | | Cosine + QMF | 0.95 | 0.059 | 0.084 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
-| | | | Cosine + AS-Norm | 0.63 | 0.048 | 0.083 |
-| | | | Cosine + QMF | 0.66 | 0.047 | 0.090 |
+| | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
+| | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
 | | | | Cosine + AS-Norm | | | |
 | | | | Cosine + QMF | || |
 
-
-
-
-
 ### VoxCeleb 1 Entire-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.16 | 0.073 | 0.130 |
-| | | | Cosine + AS-Norm | 1.13 | 0.068 | 0.117 |
-| | | | Cosine + QMF | 1.06 | 0.065 | 0.108 |
+| | | | Cosine + AS-Norm | 1.13 | 0.068 | 0.118 |
+| | | | Cosine + QMF | 1.06 | 0.064 | 0.112 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
-| | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.088 |
-| | | | Cosine + QMF | 0.90 | 0.053 | 0.090 |
+| | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
+| | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
 | | | | Cosine + AS-Norm | | | |
 | | | | Cosine + QMF | | | |
 
-
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.10 | 0.128 | 0.209 |
-| | | | Cosine + AS-Norm | 1.99 | 0.117 | 0.191 |
-| | | | Cosine + QMF | 1.82 | 0.111 | 0.183 |
+| | | | Cosine + AS-Norm | 1.99 | 0.118 | 0.190 |
+| | | | Cosine + QMF | 1.84 | 0.111 | 0.184 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
-| | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.150 |
-| | | | Cosine + QMF | 1.62 | 0.096 | 0.158 |
+| | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
+| | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
 | | | | Cosine + AS-Norm | | | |
 | | | | Cosine + QMF | | | |
 
-
 ### VoxSRC2022 dev
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.87 | 0.185 | 0.301 |
-| | | | Cosine + AS-Norm | 2.84 | 0.182 | 0.307 |
-| | | | Cosine + QMF | 2.62 | 0.175 | 0.282 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.87 | 0.185 | 0.304 |
+| | | | Cosine + AS-Norm | 2.84 | 0.182 | 0.304 |
+| | | | Cosine + QMF | 2.61 | 0.172 | 0.283 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
-| | | | Cosine + AS-Norm | 2.19 | 0.145 | 0.265 |
-| | | | Cosine + QMF | 2.54 | 0.179 | 0.304 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine |  ||  |
+| | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
+| | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | ||  |
 | | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  | | |
+| | | | Cosine + QMF | | | |
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
index b093b37a..f2622b0e 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
@@ -12,14 +12,14 @@ nnet_data=voxceleb2cat_train
 
 # x-vector cfg
 nnet_type=resnet1d
-nnet_name=${feat_type}_ecapatdnn2048x4.v4.0
+nnet_name=${feat_type}_ecapatdnn2048x4.v3.0
 
-nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v4.0.yaml
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0035.pth
 
-nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v4.0.yaml
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
 nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
index 5288f66b..a3ad0c29 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
@@ -12,14 +12,14 @@ nnet_data=voxceleb2cat_train
 
 # x-vector cfg
 nnet_type=resnet1d
-nnet_name=${feat_type}_ecapatdnn512x3.v3.12
+nnet_name=${feat_type}_ecapatdnn512x3.v3.0
 
-nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v3.12.yaml
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0040.pth
 
-nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v3.12.yaml
+nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
 nnet_s2=$nnet_s2_dir/model_ep0030.pth
diff --git a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
index 5bd2c17d..8c0949f4 100755
--- a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
@@ -51,14 +51,14 @@ if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qm
     if [ $plda_num_augs -eq 0 ]; then
       steps_xvec/extract_xvectors_from_wav.sh \
 	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
-	--random-utt-length true --min-utt-length 200 --max-utt-length 14000 \
+	--random-utt-length true --min-utt-length 200 --max-utt-length 3000 \
 	--feat-config $feat_config \
     	$nnet data/${name} \
     	$xvector_dir/${name}
     else
       steps_xvec/extract_xvectors_from_wav.sh \
 	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
-	--random-utt-length true --min-utt-length 200 --max-utt-length 14000 \
+	--random-utt-length true --min-utt-length 200 --max-utt-length 3000 \
 	--feat-config $feat_config --aug-config $plda_aug_config --num-augs $plda_num_augs \
     	$nnet data/${name} \
     	$xvector_dir/${name}_augx${plda_num_augs} \
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 4c7c2091..0780584c 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -209,7 +209,7 @@ if [ "$do_qmf" == "true" ];then
       $score_cosine_qmf_dir/voxceleb2_qmf_scores
 
   fi
-  stage=9
+
   if [ $stage -le 8 ];then
 
     echo "Eval Voxceleb 1 with Cosine scoring"
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
index e6b68ae8..7034126a 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
@@ -110,30 +110,27 @@ def eval_plda(
 
     logging.info("read num_frames")
     u2nf = Utt2Info.load(num_frames_file)
-    # enroll_nf = np.log(
-    #     np.clip(
-    #         u2nf.filter(enroll_segs).info.astype(float) / 100, a_min=0.1, a_max=15.0,
-    #     )
-    # )
-    # test_nf = np.log(
-    #     np.clip(
-    #         u2nf.filter(ndx.seg_set).info.astype(float) / 100, a_min=0.1, a_max=15.0,
-    #     )
-    # )
+    min_dur = 0.1
+    max_dur = 30.0
+
     enroll_nf = np.log(
         np.clip(
-            u2nf.filter(enroll_segs).info.astype(float) / 100 - 1.0,
-            a_min=0.1,
-            a_max=15.0,
+            u2nf.filter(enroll_segs).info.astype(float) / 100,
+            a_min=min_dur,
+            a_max=max_dur,
         )
     )
     test_nf = np.log(
         np.clip(
-            u2nf.filter(ndx.seg_set).info.astype(float) / 100 - 1.0,
-            a_min=0.1,
-            a_max=15.0,
+            u2nf.filter(ndx.seg_set).info.astype(float) / 100,
+            a_min=min_dur,
+            a_max=max_dur,
         )
     )
+    log_min_dur = np.log(min_dur)
+    log_max_dur = np.log(max_dur)
+    enroll_nf = (enroll_nf - log_min_dur) / (log_max_dur - log_min_dur)
+    test_nf = (test_nf - log_min_dur) / (log_max_dur - log_min_dur)
 
     t1 = time.time()
     logging.info("computing llr")
@@ -166,8 +163,9 @@ def eval_plda(
     dt = time.time() - t1
     num_trials = len(enroll) * x_t.shape[0]
     logging.info(
-        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms."
-        % (dt, dt / num_trials * 1000)
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+        dt,
+        dt / num_trials * 1000,
     )
 
     q_measures = {
@@ -193,7 +191,7 @@ def eval_plda(
         return
 
     logging.info("applying qmf")
-    scores_fus = [scores.ravel()]
+    # scores_fus = [scores.ravel()]
     scores_fus = [scores_norm.ravel()]
     for q_name in ["maxnf", "minnf", "maxcohmu", "mincohmu"]:
         scores_fus.append(q_measures[q_name].ravel())

From b475d370538e2579a3d7b58e0f7650268f81e7c8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 27 Mar 2023 17:09:31 -0400
Subject: [PATCH 091/154] changed default config

---
 egs/voxceleb/v1.1/default_config.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/voxceleb/v1.1/default_config.sh b/egs/voxceleb/v1.1/default_config.sh
index 8f713463..fd0e1bb1 120000
--- a/egs/voxceleb/v1.1/default_config.sh
+++ b/egs/voxceleb/v1.1/default_config.sh
@@ -1 +1 @@
-global_conf/config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
\ No newline at end of file
+global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
\ No newline at end of file

From 537071616378bafd38ee046e16604574fa525a77 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 14 Apr 2023 08:59:35 -0400
Subject: [PATCH 092/154] adapted adv.v1 recipe to persephone

---
 egs/voxceleb/adv.v1.1/README.md               |   51 +-
 egs/voxceleb/adv.v1.1/conf                    |    1 -
 .../adv.v1.1/conf/advft_resnet34_xvec.yaml    |   67 +
 egs/voxceleb/adv.v1.1/conf/clsp.conf          |   11 +
 egs/voxceleb/adv.v1.1/conf/coe_gpu_long.conf  |   13 +
 egs/voxceleb/adv.v1.1/conf/coe_gpu_rtx.conf   |   11 +
 egs/voxceleb/adv.v1.1/conf/coe_gpu_short.conf |   11 +
 .../adv.v1.1/conf/fbank80_stmn_16k.yaml       |   12 +
 .../adv.v1.1/conf/reverb_noise_aug.yaml       |   35 +
 .../adv.v1.1/conf/train_lresnet34_xvec.yaml   |   68 +
 .../adv.v1.1/conf/train_resetdnn_xvec.yaml    |   79 ++
 .../adv.v1.1/conf/train_resnet34_xvec.yaml    |   68 +
 egs/voxceleb/adv.v1.1/conf/vad_16k.yaml       |    8 +
 ...k80_stmn_lresnet34_transfer_resetdnn.v1.sh |   86 +-
 ...34_transfer_resetdnn.v1_wavegan_defense.sh |   86 +-
 ...net34_transfer_fbank40_stmn_resetdnn.v1.sh |   68 +-
 ...ank80_stmn_resnet34_transfer_lresnet.v1.sh |   69 +-
 ...nk80_stmn_resnet34_transfer_resetdnn.v1.sh |   70 +-
 egs/voxceleb/adv.v1.1/local                   |    1 -
 .../adv.v1.1/local/attack_analysis.py         |  200 +++
 .../adv.v1.1/local/attack_analysis.sh         |   79 ++
 .../local/calibrate_voxceleb1_o_clean.sh      |   55 +
 egs/voxceleb/adv.v1.1/local/make_musan.py     |  189 +++
 egs/voxceleb/adv.v1.1/local/make_musan.sh     |   48 +
 egs/voxceleb/adv.v1.1/local/make_rirs_data.sh |   29 +
 .../adv.v1.1/local/make_trials_subset.py      |   61 +
 .../adv.v1.1/local/make_voxceleb1_o.pl        |  180 +++
 .../adv.v1.1/local/make_voxceleb2cat.pl       |  136 ++
 egs/voxceleb/adv.v1.1/local/score_dcf.py      |   88 ++
 .../adv.v1.1/local/score_voxceleb1_o_clean.sh |   23 +
 .../local/score_voxceleb1_single_cond.sh      |   21 +
 egs/voxceleb/adv.v1.1/run_002_compute_evad.sh |    1 -
 ...run_004_prepare_victim_xvec_train_data.sh} |    4 +-
 .../adv.v1.1/run_005_train_victim_xvector.sh  |   58 +
 ...n_006_prepare_transfer_xvec_train_data.sh} |    0
 .../run_007_train_transfer_xvector.sh         |   68 +
 .../run_008_adv_finetune_victim_xvector.sh    |  131 ++
 ... run_009_extract_xvectors_victim_model.sh} |   10 +-
 ...un_010_extract_xvectors_transfer_model.sh} |   11 +-
 ...del.sh => run_011_eval_be_victim_model.sh} |    0
 ...l.sh => run_012_eval_be_transfer_model.sh} |    0
 ...ks.sh => run_013_eval_whitebox_attacks.sh} |    0
 ...run_014_eval_transfer_blackbox_attacks.sh} |    0
 ...itebox_attacks_with_randsmooth_defense.sh} |  178 ---
 ...ttacks_with_randsmooth_wavegan_defense.sh} |    0
 ...h => run_017_eval_art_whitebox_attacks.sh} |  842 ++++++-----
 ..._018_eval_art_transfer_blackbox_attacks.sh |  633 +++++++++
 ..._054_eval_art_transfer_blackbox_attacks.sh | 1260 -----------------
 egs/voxceleb/adv.v1/conf                      |    1 -
 ...g_victim_lresnet34_transfer_resetdnn.v1.sh |   62 +-
 ...ig_victim_resnet34_transfer_resetdnn.v1.sh |  128 +-
 egs/voxceleb/adv.v1/local                     |    1 -
 egs/voxceleb/adv.v1/run_002_compute_evad.sh   |    1 -
 egs/voxceleb/adv.v1/run_003_compute_fbank.sh  |   69 -
 .../adv.v1/run_003_prepare_noises_rirs.sh     |   67 +
 .../adv.v1/run_004_prepare_augment.sh         |  123 --
 .../run_004_prepare_victim_xvec_train_data.sh |   42 +
 .../adv.v1/run_005_compute_fbank_augment.sh   |   57 -
 ...un_006_prepare_transfer_xvec_train_data.sh |   48 +
 .../run_008_extract_xvectors_victim_model.sh  |   37 +
 .../run_010_prepare_victim_xvec_train_data.sh |   45 -
 .../adv.v1/run_011_train_victim_xvector.sh    |   76 -
 ...un_012_prepare_transfer_xvec_train_data.sh |   53 -
 .../adv.v1/run_013_train_transfer_xvector.sh  |  102 --
 .../run_030_extract_xvectors_victim_model.sh  |   38 -
 egs/voxceleb/v1.1/README.md                   |   24 +-
 ...rain_idrnd_resnet100_xvec_stage1_v3.0.yaml |   72 +
 ...rain_idrnd_resnet100_xvec_stage2_v3.0.yaml |   69 +
 ...onfig_fbank80_stmn_idrnd_resnet100.v3.0.sh |   44 +
 egs/voxceleb/v1.2/README.md                   |  263 ++++
 egs/voxceleb/v1.2/cmd.sh                      |   28 +
 egs/voxceleb/v1.2/conf/clsp.conf              |   11 +
 egs/voxceleb/v1.2/conf/coe_gpu_bigmem.conf    |   11 +
 egs/voxceleb/v1.2/conf/coe_gpu_long.conf      |   13 +
 egs/voxceleb/v1.2/conf/coe_gpu_rtx.conf       |   11 +
 egs/voxceleb/v1.2/conf/coe_gpu_short.conf     |   11 +
 egs/voxceleb/v1.2/conf/coe_gpu_v100.conf      |   11 +
 .../v1.2/conf/fbank80_specaug1_stmn_16k.yaml  |   24 +
 egs/voxceleb/v1.2/conf/fbank80_stmn_16k.yaml  |   12 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |   95 ++
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |   70 +
 ...train_ecapatdnn512x3_xvec_stage1_v3.0.yaml |   93 ++
 ...train_ecapatdnn512x3_xvec_stage2_v3.0.yaml |   69 +
 egs/voxceleb/v1.2/conf/vad_16k.yaml           |    8 +
 egs/voxceleb/v1.2/datapath.sh                 |   23 +
 egs/voxceleb/v1.2/default_config.sh           |    1 +
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v3.0.sh |   44 +
 ...config_fbank80_stmn_ecapatdnn512x3.v3.0.sh |   45 +
 egs/voxceleb/v1.2/path.sh                     |    5 +
 egs/voxceleb/v1.2/run_001_prepare_data.sh     |   50 +
 .../eval_cosine_scoring_from_adv_test_wav.sh  |    2 +-
 ...osine_scoring_from_adv_test_wav_wavegan.sh |    2 +-
 .../eval_cosine_scoring_from_art_test_wav.sh  |    2 +-
 ...sine_scoring_from_transfer_adv_test_wav.sh |    2 +-
 ...sine_scoring_from_transfer_art_test_wav.sh |    2 +-
 .../eval_cosine_scoring_from_test_wav.sh      |    2 +-
 .../xvectors/eval_xvec_logits_from_wav.sh     |    2 +-
 hyperion/bin/adv_finetune_xvector_from_wav.py |  482 +++++++
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |   14 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |   15 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |   57 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |   44 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |   12 +-
 ...sine_scoring_from_transfer_art_test_wav.py |   27 +-
 hyperion/bin/prepare_data.py                  |   41 +
 hyperion/bin/train_xvector_from_wav.py        |    9 +-
 hyperion/data_prep/__init__.py                |    8 +
 hyperion/data_prep/data_prep.py               |   56 +
 hyperion/data_prep/voxceleb2.py               |  169 +++
 .../torch/adv_attacks/art_attack_factory.py   |  346 +++--
 hyperion/torch/adv_attacks/attack_factory.py  |   16 +-
 hyperion/utils/misc.py                        |   50 +
 112 files changed, 5507 insertions(+), 3030 deletions(-)
 delete mode 120000 egs/voxceleb/adv.v1.1/conf
 create mode 100644 egs/voxceleb/adv.v1.1/conf/advft_resnet34_xvec.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/clsp.conf
 create mode 100644 egs/voxceleb/adv.v1.1/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/adv.v1.1/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/adv.v1.1/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/adv.v1.1/conf/fbank80_stmn_16k.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/reverb_noise_aug.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/train_lresnet34_xvec.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/train_resetdnn_xvec.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/train_resnet34_xvec.yaml
 create mode 100644 egs/voxceleb/adv.v1.1/conf/vad_16k.yaml
 delete mode 120000 egs/voxceleb/adv.v1.1/local
 create mode 100755 egs/voxceleb/adv.v1.1/local/attack_analysis.py
 create mode 100755 egs/voxceleb/adv.v1.1/local/attack_analysis.sh
 create mode 100755 egs/voxceleb/adv.v1.1/local/calibrate_voxceleb1_o_clean.sh
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_musan.py
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_musan.sh
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_rirs_data.sh
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_trials_subset.py
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_voxceleb1_o.pl
 create mode 100755 egs/voxceleb/adv.v1.1/local/make_voxceleb2cat.pl
 create mode 100755 egs/voxceleb/adv.v1.1/local/score_dcf.py
 create mode 100755 egs/voxceleb/adv.v1.1/local/score_voxceleb1_o_clean.sh
 create mode 100755 egs/voxceleb/adv.v1.1/local/score_voxceleb1_single_cond.sh
 rename egs/voxceleb/adv.v1.1/{run_010_prepare_victim_xvec_train_data.sh => run_004_prepare_victim_xvec_train_data.sh} (94%)
 create mode 100755 egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
 rename egs/voxceleb/adv.v1.1/{run_012_prepare_transfer_xvec_train_data.sh => run_006_prepare_transfer_xvec_train_data.sh} (100%)
 create mode 100755 egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
 create mode 100755 egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
 rename egs/voxceleb/adv.v1.1/{run_030_extract_xvectors_victim_model.sh => run_009_extract_xvectors_victim_model.sh} (81%)
 rename egs/voxceleb/adv.v1.1/{run_031_extract_xvectors_transfer_model.sh => run_010_extract_xvectors_transfer_model.sh} (81%)
 rename egs/voxceleb/adv.v1.1/{run_040_eval_be_victim_model.sh => run_011_eval_be_victim_model.sh} (100%)
 rename egs/voxceleb/adv.v1.1/{run_041_eval_be_transfer_model.sh => run_012_eval_be_transfer_model.sh} (100%)
 rename egs/voxceleb/adv.v1.1/{run_043_eval_whitebox_attacks.sh => run_013_eval_whitebox_attacks.sh} (100%)
 rename egs/voxceleb/adv.v1.1/{run_044_eval_transfer_blackbox_attacks.sh => run_014_eval_transfer_blackbox_attacks.sh} (100%)
 rename egs/voxceleb/adv.v1.1/{run_045_eval_whitebox_attacks_with_randsmooth_defense.sh => run_015_eval_whitebox_attacks_with_randsmooth_defense.sh} (67%)
 rename egs/voxceleb/adv.v1.1/{run_046_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh => run_016_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh} (100%)
 rename egs/voxceleb/adv.v1.1/{run_053_eval_art_whitebox_attacks.sh => run_017_eval_art_whitebox_attacks.sh} (54%)
 create mode 100755 egs/voxceleb/adv.v1.1/run_018_eval_art_transfer_blackbox_attacks.sh
 delete mode 100755 egs/voxceleb/adv.v1.1/run_054_eval_art_transfer_blackbox_attacks.sh
 delete mode 120000 egs/voxceleb/adv.v1/conf
 delete mode 120000 egs/voxceleb/adv.v1/local
 delete mode 100755 egs/voxceleb/adv.v1/run_003_compute_fbank.sh
 create mode 100755 egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_004_prepare_augment.sh
 create mode 100755 egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_005_compute_fbank_augment.sh
 create mode 100755 egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
 create mode 100755 egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_010_prepare_victim_xvec_train_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_011_train_victim_xvector.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_012_prepare_transfer_xvec_train_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_013_train_transfer_xvector.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_030_extract_xvectors_victim_model.sh
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/README.md
 create mode 100755 egs/voxceleb/v1.2/cmd.sh
 create mode 100644 egs/voxceleb/v1.2/conf/clsp.conf
 create mode 100644 egs/voxceleb/v1.2/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/voxceleb/v1.2/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/v1.2/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/v1.2/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/v1.2/conf/coe_gpu_v100.conf
 create mode 100644 egs/voxceleb/v1.2/conf/fbank80_specaug1_stmn_16k.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/fbank80_stmn_16k.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/vad_16k.yaml
 create mode 100644 egs/voxceleb/v1.2/datapath.sh
 create mode 120000 egs/voxceleb/v1.2/default_config.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
 create mode 100755 egs/voxceleb/v1.2/path.sh
 create mode 100755 egs/voxceleb/v1.2/run_001_prepare_data.sh
 create mode 100755 hyperion/bin/adv_finetune_xvector_from_wav.py
 create mode 100755 hyperion/bin/prepare_data.py
 create mode 100644 hyperion/data_prep/__init__.py
 create mode 100644 hyperion/data_prep/data_prep.py
 create mode 100644 hyperion/data_prep/voxceleb2.py

diff --git a/egs/voxceleb/adv.v1.1/README.md b/egs/voxceleb/adv.v1.1/README.md
index bccc494b..49801544 100644
--- a/egs/voxceleb/adv.v1.1/README.md
+++ b/egs/voxceleb/adv.v1.1/README.md
@@ -1,25 +1,15 @@
 # VoxCeleb Adversarial Attacks Version 1.1
 
-Last update 2021/05/17
+Last update 2023/04/10
 
 Recipe to evaluate Adversarial Attacks to x-Vector Speaker Verification Systems
 
-## Differences w.r.t VoxCeleb adv.v1 recipe
-
-In recipe version V1: 
-   - We compute speech augmentations and acoustic features offline and dump them to disk. 
-   - Augmentation is performed using Kaldi scripts and wav-reverbate tool
-   - Babble noise is created on-the-fly when computing features by mixing 3-7 single speaker files.
-
-In this recipe:
-   - We compute speech augmentations and acoustic features are computed always on-the-fly,
-     we don't dump any features to disk. 
-   - Augmentation is performed using Hyperin SpeechAugment class.
-   - The behavior of this class is controlled 
-     by the the configuration file `conf/reverb_noise_aug.yml`, 
-     which mimics the proportions of noise and RIR types, and SNRs used in the V1 or the recipe.
-   - Babble noise is created offline by mixing 3-10 single speaker files.
+## Setup
 
+To run attacks with Adversarial Robustness toolbox, you need to install it in the environment by
+```
+pip install adversarial-robustness-toolbox[pytorch]
+```
 
 ## Threat Model
 
@@ -92,48 +82,45 @@ run_0*.sh --config-file global_conf/config_victim_resnet34_transfer_lresnet.v1.s
    - `run_002_compute_evad.sh`
       - Computes Energy VAD for all datasets
 
-   - `run_002b_compute_fbank.sh`
-      - Computes log-filter-banks acoustic features for all datasets
-
    - `run_003_prepare_noises_rirs.sh`
       - Prepares MUSAN noises, music to be used by SpeechAugment class.
       - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
       - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
 
-   - `run_010_prepare_xvec_train_data.sh`
+   - `run_004_prepare_victim_xvec_train_data.sh`
       - Prepares audios train the victim x-vector model
       - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
       - Removes silence from the audios
       - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
       - Creates training and validation lists for x-vector training
 
-   - `run_011_train_victim_xvector.sh`
+   - `run_005_train_victim_xvector.sh`
       - Trains the victim x-vector network
 
-   - `run_012_prepare_transfer_xvec_train_data.sh`
+   - `run_006_prepare_transfer_xvec_train_data.sh`
       - Prepares audios train the transfer white-box x-vector model
       - If training data for victim and tranfer models is the same, it does nothing
 
-   - `run_013_train_transfer_xvector.sh`
+   - `run_007_train_transfer_xvector.sh`
       - Trains the transfer white-box x-vector network
 
-   - `run_030_extract_xvectors_victim_model.sh`
+   - `run_009_extract_xvectors_victim_model.sh`
       - Exctracts x-vectors for VoxCeleb1 test set using the victim model
 
-   - `run_031_extract_xvectors_transfer_model.sh`
+   - `run_010_extract_xvectors_transfer_model.sh`
       - Exctracts x-vectors for VoxCeleb1 test set using the transfer model
 
-   - `run_040_eval_be_victim_model.sh`
+   - `run_011_eval_be_victim_model.sh`
       - Eval cosine scoring back-end without attack on victim model x-vectors
       - Trains calibration for the victim model scores
       - Results are left in `exp/scores/$nnet_name/cosine/voxceleb1_o_clean_results`
 
-   - `run_041_eval_be_tranfer_model.sh`
+   - `run_012_eval_be_tranfer_model.sh`
       - Eval cosine scoring back-end without attack on transfer model x-vectors
       - Trains calibration for the tranfer model scores
       - Results are left in `exp/scores/$transfer_nnet_name/cosine/voxceleb1_o_clean_results`
    
-   - `run_043_eval_whitebox_attacks.sh`
+   - `run_013_eval_whitebox_attacks.sh`
       - Eval white box attacks implemented in Hyperion toolkit: FGSM, Iter-FGSM, PGD, Carlini-Wagner
       - Results are left in `exp/scores/$nnet_name/cosine_${attack_related_label}/voxceleb1_o_clean_results`
       - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
@@ -141,7 +128,7 @@ run_0*.sh --config-file global_conf/config_victim_resnet34_transfer_lresnet.v1.s
       - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
       - Wavs are saves to `exp/scores/$nnet_name/cosine_${attack_related_label}/wav`
  
-   - `run_044_eval_transfer_blackbox_attacks.sh`
+   - `run_014_eval_transfer_blackbox_attacks.sh`
       - Eval transfer black box attacks implemented in Hyperion toolkit: FGSM, Iter-FGSM, PGD, Carlini-Wagner
       - Results are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_${attack_related_label}/voxceleb1_o_clean_results`
       - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
@@ -149,11 +136,11 @@ run_0*.sh --config-file global_conf/config_victim_resnet34_transfer_lresnet.v1.s
       - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
       - Wavs are saves to `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_${attack_related_label}/wav`
 
-   - `run_045_eval_whitebox_attacks_with_randsmooth_defense.sh`
+   - `run_015_eval_whitebox_attacks_with_randsmooth_defense.sh`
       - Eval white box attacks with Gaussian randomized smoothing defense.
       - Results are left in `exp/scores/$nnet_name/cosine_${attack_related_label}_randsmooth${smooth_sigma}/voxceleb1_o_clean_results`
  
-   - `run_053_eval_art_whitebox_attacks.sh`
+   - `run_017_eval_art_whitebox_attacks.sh`
       - Eval white box attacks implemented in IBM's Adversarial Robustness Toolkit (ART): FGSM, Iter-FGSM, PGD, Carlini-Wagner
       - Results are left in `exp/scores/$nnet_name/cosine_art_${attack_related_label}/voxceleb1_o_clean_results`
       - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
@@ -161,7 +148,7 @@ run_0*.sh --config-file global_conf/config_victim_resnet34_transfer_lresnet.v1.s
       - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
       - Wavs are saves to `exp/scores/$nnet_name/cosine_art_${attack_related_label}/wav`
 
-   - `run_054_eval_art_transfer_blackbox_attacks.sh`
+   - `run_018_eval_art_transfer_blackbox_attacks.sh`
       - Eval transfer black box attacks implemented in IBM's Adversarial Robustness Toolkit (ART): FGSM, Iter-FGSM, PGD, Carlini-Wagner
       - Results are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_art_${attack_related_label}/voxceleb1_o_clean_results`
       - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
diff --git a/egs/voxceleb/adv.v1.1/conf b/egs/voxceleb/adv.v1.1/conf
deleted file mode 120000
index 7dfe9dce..00000000
--- a/egs/voxceleb/adv.v1.1/conf
+++ /dev/null
@@ -1 +0,0 @@
-../../sre19-cmn2/v1/conf
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1.1/conf/advft_resnet34_xvec.yaml b/egs/voxceleb/adv.v1.1/conf/advft_resnet34_xvec.yaml
new file mode 100644
index 00000000..fd9c95e1
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/advft_resnet34_xvec.yaml
@@ -0,0 +1,67 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+attack:
+  attack_type: pgd
+  max_iters: 10
+  eps: 0.004
+  alpha: 0.0008
+  random_eps: true
+  p_attack: 0.5
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched: lrsched_exp_default.yaml
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 8000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+  use_amp: true
+  log_interval: 1000
+  epochs: 20
+  eff_batch_size: 512
diff --git a/egs/voxceleb/adv.v1.1/conf/clsp.conf b/egs/voxceleb/adv.v1.1/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/adv.v1.1/conf/coe_gpu_long.conf b/egs/voxceleb/adv.v1.1/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/adv.v1.1/conf/coe_gpu_rtx.conf b/egs/voxceleb/adv.v1.1/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/adv.v1.1/conf/coe_gpu_short.conf b/egs/voxceleb/adv.v1.1/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/adv.v1.1/conf/fbank80_stmn_16k.yaml b/egs/voxceleb/adv.v1.1/conf/fbank80_stmn_16k.yaml
new file mode 100644
index 00000000..f4091f5d
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/fbank80_stmn_16k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/adv.v1.1/conf/reverb_noise_aug.yaml b/egs/voxceleb/adv.v1.1/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/voxceleb/adv.v1.1/conf/train_lresnet34_xvec.yaml b/egs/voxceleb/adv.v1.1/conf/train_lresnet34_xvec.yaml
new file mode 100644
index 00000000..609f6829
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/train_lresnet34_xvec.yaml
@@ -0,0 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  resnet_type: lresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched: lrsched_exp_default.yaml
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+  use_amp: true
+  log_interval: 1000
+  epochs: 70
+  eff_batch_size: 512
diff --git a/egs/voxceleb/adv.v1.1/conf/train_resetdnn_xvec.yaml b/egs/voxceleb/adv.v1.1/conf/train_resetdnn_xvec.yaml
new file mode 100644
index 00000000..c379ee76
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/train_resetdnn_xvec.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  tdnn_type: resetdnn
+  in_feats: 80
+  num_enc_blocks: 5
+  enc_hid_units: 512
+  enc_expand_units: 1536
+  kernel_size:
+  - 5
+  - 3
+  - 3
+  - 3
+  - 1
+  dilation:
+  - 1
+  - 2
+  - 3
+  - 4
+  - 1
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.1
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched: lrsched_exp_default.yaml
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+  use_amp: true
+  log_interval: 1000
+  epochs: 70
+  eff_batch_size: 512
diff --git a/egs/voxceleb/adv.v1.1/conf/train_resnet34_xvec.yaml b/egs/voxceleb/adv.v1.1/conf/train_resnet34_xvec.yaml
new file mode 100644
index 00000000..73ddcb68
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/train_resnet34_xvec.yaml
@@ -0,0 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  resnet_type: resnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched: lrsched_exp_default.yaml
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+  use_amp: true
+  log_interval: 1000
+  epochs: 70
+  eff_batch_size: 512
diff --git a/egs/voxceleb/adv.v1.1/conf/vad_16k.yaml b/egs/voxceleb/adv.v1.1/conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1.sh
index d102a77a..b569604d 100644
--- a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1.sh
+++ b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1.sh
@@ -3,93 +3,41 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim acoustic features
+# acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
+
 # victim x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=4
-max_chunk=4
-ipe=$nnet_num_augs
-lr=0.05
-
-nnet_type=lresnet34
-dropout=0
-embed_dim=256
 
-s=30
-margin_warmup=20
-margin=0.3
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
+nnet_cfg=conf/train_lresnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
-# transfer model acoustic features
+# transfer feature extractor
 transfer_feat_config=$feat_config
 transfer_feat_type=$feat_type
 
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb1cat
-transfer_nnet_num_augs=6
-transfer_aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=4
-transfer_max_chunk=4
-transfer_ipe=$transfer_nnet_num_augs
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train 
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_feat_type}_${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_name=${transfer_feat_type}_resetdnn5x512
+transfer_nnet_cfg=conf/train_resetdnn_xvec.yaml
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
+# adversarial finetuning
+advft_nnet_name=${nnet_name}_advft
+advft_nnet_cfg=conf/advft_lresnet34_xvec.yaml
+advft_nnet_dir=exp/xvector_nnets/$advft_nnet_name
+advft_nnet=$advft_nnet_dir/model_ep0070.pth
 
-# options for adversarial finetuning of the victim model                                                                                                                           
-advft_batch_size_1gpu=32
-advft_eff_batch_size=128 # effective batch size
-advft_margin=0.3
-advft_margin_warmup=20
-advft_nnet_num_epochs=20
-advft_eps=0.004
-advft_eps_step=$(echo $advft_eps | awk '{ print $1/5}')
-advft_p=0.5
-advft_lr=0.05
-advft_iters=10
-advft_attack_opts="--attack.attack-type pgd --attack.max-iter $advft_iters --attack.eps $advft_eps --attack.alpha $advft_eps_step --attack.random-eps --p-attack $advft_p"
-advft_opt_opt="--optim.opt-type adam --optim.lr $advft_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-advft_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 8000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
 
-advft_nnet_name=$nnet_name.advft_p${advft_p}_pgd${advft_iters}e${advft_eps}step${advft_eps_step}_arcm${advft_margin}wup${advft_margin_warmup}_optv1_adam_lr${advft_lr}
-advft_nnet_dir=exp/xvector_nnets/$advft_nnet_name
-advft_nnet=$advft_nnet_dir/model_ep0020.pth
diff --git a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1_wavegan_defense.sh b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1_wavegan_defense.sh
index 09d6b993..8105df2c 100644
--- a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1_wavegan_defense.sh
+++ b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_lresnet34_transfer_resetdnn.v1_wavegan_defense.sh
@@ -3,96 +3,42 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim acoustic features
+# acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
+
 # victim x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=4
-max_chunk=4
-ipe=$nnet_num_augs
-lr=0.05
-
-nnet_type=lresnet34
-dropout=0
-embed_dim=256
 
-s=30
-margin_warmup=20
-margin=0.3
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
+nnet_cfg=conf/train_lresnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
-# transfer model acoustic features
+# transfer feature extractor
 transfer_feat_config=$feat_config
 transfer_feat_type=$feat_type
 
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb1cat
-transfer_nnet_num_augs=6
-transfer_aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=4
-transfer_max_chunk=4
-transfer_ipe=$transfer_nnet_num_augs
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train 
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_feat_type}_${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_name=${transfer_feat_type}_resetdnn5x512
+transfer_nnet_cfg=conf/train_resetdnn_xvec.yaml
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
-
-# options for adversarial finetuning of the victim model                                                                                                                           
-advft_batch_size_1gpu=32
-advft_eff_batch_size=128 # effective batch size
-advft_margin=0.3
-advft_margin_warmup=20
-advft_nnet_num_epochs=20
-advft_eps=0.004
-advft_eps_step=$(echo $advft_eps | awk '{ print $1/5}')
-advft_p=0.5
-advft_lr=0.05
-advft_iters=10
-advft_attack_opts="--attack.attack-type pgd --attack.max-iter $advft_iters --attack.eps $advft_eps --attack.alpha $advft_eps_step --attack.random-eps --p-attack $advft_p"
-advft_opt_opt="--optim.opt-type adam --optim.lr $advft_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-advft_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 8000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-advft_nnet_name=$nnet_name.advft_p${advft_p}_pgd${advft_iters}e${advft_eps}step${advft_eps_step}_arcm${advft_margin}wup${advft_margin_warmup}_optv1_adam_lr${advft_lr}
+# adversarial finetuning
+advft_nnet_name=${nnet_name}_advft
+advft_nnet_cfg=conf/advft_lresnet34_xvec.yaml
 advft_nnet_dir=exp/xvector_nnets/$advft_nnet_name
-advft_nnet=$advft_nnet_dir/model_ep0020.pth
+advft_nnet=$advft_nnet_dir/model_ep0070.pth
 
 # WaveGAN configs
 smoothing_after_wavegan=true
diff --git a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_fbank40_stmn_resetdnn.v1.sh b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_fbank40_stmn_resetdnn.v1.sh
index 54e47a29..3e7739d0 100644
--- a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_fbank40_stmn_resetdnn.v1.sh
+++ b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_fbank40_stmn_resetdnn.v1.sh
@@ -3,7 +3,7 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim acoustic features
+# acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
@@ -12,70 +12,32 @@ vad_config=conf/vad_16k.yaml
 
 # victim x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=4
-max_chunk=4
-ipe=1
-lr=0.05
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34
 
-nnet_type=resnet34
-dropout=0
-embed_dim=256
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-num_augs=5
+nnet_cfg=conf/train_resnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
-# transfer model acoustic features
+# transfer feature extractor
 transfer_feat_config=conf/fbank40_stmn_16k.yaml
 transfer_feat_type=fbank40_stmn
 
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb1cat
-transfer_nnet_num_augs=6
-transfer_aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=4
-transfer_max_chunk=4
-transfer_ipe=$transfer_nnet_num_augs
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train 
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 40 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_feat_type}_${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_name=${transfer_feat_type}_resetdnn5x512
+transfer_nnet_cfg=conf/train_resetdnn_xvec.yaml
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
+# adversarial finetuning
+advft_nnet_name=${nnet_name}_advft
+advft_nnet_cfg=conf/advft_resnet34_xvec.yaml
+advft_nnet_dir=exp/xvector_nnets/$advft_nnet_name
+advft_nnet=$advft_nnet_dir/model_ep0070.pth
+
 
diff --git a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_lresnet.v1.sh b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_lresnet.v1.sh
index 777b8b5d..00dfd4ff 100644
--- a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_lresnet.v1.sh
+++ b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_lresnet.v1.sh
@@ -3,70 +3,39 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim acoustic features
+# Victim model ResNet34 x-vector
+# For the black-box attacks we use Residual E-TDNN to generate the attack and transfer them to the ResNet34
+# Both models uses the same features: 80 fbanks
+# Both models uses the same training data.
+
+# acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
-# victim x-vector training 
-nnet_data=voxceleb2cat
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=4
-max_chunk=4
-ipe=$nnet_num_augs
-lr=0.05
+#vad
+vad_config=conf/vad_16k.yaml
 
-nnet_type=resnet34
-dropout=0
-embed_dim=256
+# victim x-vector training 
+nnet_data=voxceleb2cat_train
 
-s=30
-margin_warmup=20
-margin=0.3
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-num_augs=5
+nnet_cfg=conf/train_resnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
-# transfer model acoustic features
+# transfer feature extractor
 transfer_feat_config=$feat_config
 transfer_feat_type=$feat_type
 
 # transfer model training
-transfer_nnet_data=voxceleb2cat #this can be voxceleb2cat or voxceleb1cat
-transfer_nnet_num_augs=6
-transfer_aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=4
-transfer_max_chunk=4
-transfer_ipe=$transfer_nnet_num_augs
-transfer_lr=0.05
-
-transfer_nnet_type=lresnet34
-transfer_dropout=0
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--resnet-type $transfer_nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_feat_type}_${transfer_nnet_type}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
+transfer_nnet_data=voxceleb2cat_train 
 
+transfer_nnet_type=resnet
+transfer_nnet_name=${transfer_feat_type}_lresnet34
+transfer_nnet_cfg=conf/train_lresnet34_xvec.yaml
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_resetdnn.v1.sh
index 482f3b7b..6570f4a2 100644
--- a/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_resetdnn.v1.sh
+++ b/egs/voxceleb/adv.v1.1/global_conf/config_victim_fbank80_stmn_resnet34_transfer_resetdnn.v1.sh
@@ -3,75 +3,41 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim acoustic features
+# acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
 feat_type=fbank80_stmn
 
+#vad
+vad_config=conf/vad_16k.yaml
+
 # victim x-vector training 
 nnet_data=voxceleb2cat_train
-nnet_num_augs=6
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=4
-max_chunk=4
-ipe=$nnet_num_augs
-lr=0.05
-
-nnet_type=resnet34
-dropout=0
-embed_dim=256
 
-s=30
-margin_warmup=20
-margin=0.3
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${feat_type}_${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
+nnet_cfg=conf/train_resnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
-# transfer model acoustic features
+# transfer feature extractor
 transfer_feat_config=$feat_config
 transfer_feat_type=$feat_type
 
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb1cat
-transfer_nnet_num_augs=6
-transfer_aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=4
-transfer_max_chunk=4
-transfer_ipe=$transfer_nnet_num_augs
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train 
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_feat_type}_${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_name=${transfer_feat_type}_resetdnn5x512
+transfer_nnet_cfg=conf/train_resetdnn_xvec.yaml
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
+# adversarial finetuning
+advft_nnet_name=${nnet_name}_advft
+advft_nnet_cfg=conf/advft_resnet34_xvec.yaml
+advft_nnet_dir=exp/xvector_nnets/$advft_nnet_name
+advft_nnet=$advft_nnet_dir/model_ep0070.pth
+
 
diff --git a/egs/voxceleb/adv.v1.1/local b/egs/voxceleb/adv.v1.1/local
deleted file mode 120000
index ce1cbf90..00000000
--- a/egs/voxceleb/adv.v1.1/local
+++ /dev/null
@@ -1 +0,0 @@
-../v1/local
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1.1/local/attack_analysis.py b/egs/voxceleb/adv.v1.1/local/attack_analysis.py
new file mode 100755
index 00000000..2e0fdb42
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/attack_analysis.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+
+
+import sys
+import os
+import argparse
+import time
+import logging
+
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.np.metrics.verification_evaluator import (
+    VerificationAdvAttackEvaluator as Eval,
+)
+
+
+def evaluate_attacks(
+    key_file,
+    clean_score_file,
+    attack_score_files,
+    attack_stats_files,
+    output_path,
+    prior,
+):
+
+    output_dir = os.path.dirname(output_path)
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+
+    evaluator = Eval(
+        key_file, clean_score_file, attack_score_files, attack_stats_files, prior
+    )
+
+    # performance vs SNR
+    logging.info("compute perf vs snr for all trials")
+    df_clean = evaluator.compute_dcf_eer(return_df=True)
+    df_clean.insert(0, "snr", np.inf)
+
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "snr",
+        [-10, 0, 10, 20, 30, 40, 50, 60],
+        "all",
+        higher_better=True,
+        return_df=True,
+    )
+    file_path = "%s_attack_all_snr_results.csv" % (output_path)
+    df = pd.concat([df_clean, df], ignore_index=True)
+    df.to_csv(file_path)
+    file_path = "%s_attack_all_snr" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "snr", file_path, clean_ref=0, xlabel="SNR(dB)", higher_better=True
+    )
+
+    logging.info("compute perf vs snr for tar trials")
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "snr",
+        [-10, 0, 10, 20, 30, 40, 50, 60],
+        "tar",
+        higher_better=True,
+        return_df=True,
+    )
+    file_path = "%s_attack_tar_snr_results.csv" % (output_path)
+    df = pd.concat([df_clean, df], ignore_index=True)
+    df.to_csv(file_path)
+    file_path = "%s_attack_tar_snr" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "snr", file_path, clean_ref=0, xlabel="SNR(dB)", higher_better=True
+    )
+
+    logging.info("compute perf vs snr for non trials")
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "snr",
+        [-10, 0, 10, 20, 30, 40, 50, 60],
+        "non",
+        higher_better=True,
+        return_df=True,
+    )
+    file_path = "%s_attack_non_snr_results.csv" % (output_path)
+    df = pd.concat([df_clean, df], ignore_index=True)
+    df.to_csv(file_path)
+    file_path = "%s_attack_non_snr" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "snr", file_path, clean_ref=0, xlabel="SNR(dB)", higher_better=True
+    )
+
+    logging.info("find best attacks from snr point of view")
+    for i in range(len(attack_score_files)):
+        file_path = "%s_best_snr_tar_attacks_%d.csv" % (output_path, i)
+        evaluator.save_best_attacks(
+            file_path,
+            "snr",
+            "tar",
+            num_best=10,
+            min_delta=1,
+            attack_idx=i,
+            higher_better=True,
+        )
+
+        file_path = "%s_best_snr_non_attacks_%d.csv" % (output_path, i)
+        evaluator.save_best_attacks(
+            file_path,
+            "snr",
+            "non",
+            num_best=10,
+            min_delta=1,
+            attack_idx=i,
+            higher_better=True,
+        )
+
+    # performance vs Linf
+    logging.info("compute perf vs linf for all trials")
+    eps = np.ceil(np.asarray([0, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]) * 2 ** 15)
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "n_linf", eps, "all", higher_better=False, return_df=True
+    )
+    file_path = "%s_attack_all_linf_results.csv" % (output_path)
+    df.to_csv(file_path)
+    file_path = "%s_attack_all_linf" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "n_linf", file_path, clean_ref=0, xlabel=r"$L_{\infty}$", log_x=True
+    )
+
+    logging.info("compute perf vs linf for tar trials")
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "n_linf", eps, "tar", higher_better=False, return_df=True
+    )
+    file_path = "%s_attack_tar_linf_results.csv" % (output_path)
+    df.to_csv(file_path)
+    file_path = "%s_attack_tar_linf" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "n_linf", file_path, clean_ref=0, xlabel=r"$L_{\infty}$", log_x=True
+    )
+
+    logging.info("compute perf vs linf for non trials")
+    df = evaluator.compute_dcf_eer_vs_stats(
+        "n_linf", eps, "non", higher_better=False, return_df=True
+    )
+    file_path = "%s_attack_non_linf_results.csv" % (output_path)
+    df.to_csv(file_path)
+    file_path = "%s_attack_non_linf" % (output_path)
+    evaluator.plot_dcf_eer_vs_stat_v1(
+        df, "n_linf", file_path, clean_ref=0, xlabel=r"$L_{\infty}$", log_x=True
+    )
+
+    # find the best attacks in terms of linf
+    logging.info("find best attacks from linf point of view")
+    for i in range(len(attack_score_files)):
+        file_path = "%s_best_linf_tar_attacks_%d.csv" % (output_path, i)
+        evaluator.save_best_attacks(
+            file_path,
+            "n_linf",
+            "tar",
+            num_best=10,
+            min_delta=1,
+            attack_idx=i,
+            higher_better=False,
+        )
+
+        file_path = "%s_best_linf_non_attacks_%d.csv" % (output_path, i)
+        evaluator.save_best_attacks(
+            file_path,
+            "n_linf",
+            "non",
+            num_best=10,
+            min_delta=1,
+            attack_idx=i,
+            higher_better=False,
+        )
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        fromfile_prefix_chars="@",
+        description="Analyses performance of adversarial attacks for spk. verif.",
+    )
+
+    parser.add_argument("--key-file", required=True)
+    parser.add_argument("--clean-score-file", required=True)
+    parser.add_argument("--attack-score-files", required=True, nargs="+")
+    parser.add_argument("--attack-stats-files", required=True, nargs="+")
+    parser.add_argument("--output-path", required=True)
+    parser.add_argument("--prior", default=0.05, type=float)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    evaluate_attacks(**vars(args))
diff --git a/egs/voxceleb/adv.v1.1/local/attack_analysis.sh b/egs/voxceleb/adv.v1.1/local/attack_analysis.sh
new file mode 100755
index 00000000..42249873
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/attack_analysis.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+cmd=run.pl
+prior=0.05
+. parse_options.sh || exit 1;
+
+if [ $# -ne 5 ]; then
+  echo "Usage: $0 <key> <clean-scores> <adv-scores-array> <adv-stats-array> <output-basename>"
+  exit 1;
+fi
+
+set -e
+
+key=$1
+clean_scores=$2
+adv_scores="$3"
+adv_stats="$4"
+output_path=$5
+
+output_dir=$(dirname $output_path)
+base=$(basename $output_path)
+logdir=$output_dir/log
+mkdir -p $logdir
+
+if [ "$(hostname --domain)" == "cm.gemini" ];then
+    module load texlive
+fi
+
+$cmd $logdir/analysis_${base}.log \
+    local/attack_analysis.py \
+    --key-file $key \
+    --clean-score-file $clean_scores \
+    --attack-score-files $adv_scores \
+    --attack-stats-files $adv_stats \
+    --output-path $output_path
+
+scores_v=($adv_scores)
+for((i=0;i<${#scores_v[@]};i++))
+do
+    scores_dir=$(dirname ${scores_v[$i]})
+    wav_out_dir0=${output_path}_wavs
+
+    for t in tar non
+    do
+	if [ "$t" == "tar" ];then
+	    t2=tar2non
+	else
+	    t2=non2tar
+	fi
+	wav_in_dir=$scores_dir/wav/$t2
+	if [ ! -d "$wav_in_dir" ];then
+	    continue
+	fi
+	for m in snr linf
+	do
+	    best_file=${output_path}_best_${m}_${t}_attacks_$i.csv
+	    if [ ! -f $best_file ];then
+		continue
+	    fi
+	    wav_out_dir=${wav_out_dir0}/best_${m}_${t}_attacks_$i
+	    mkdir -p $wav_out_dir
+	    for f in $(awk -F "," 'BEGIN{getline;}{ print $2"-"$3".wav"}' $best_file)
+	    do
+		ff=$wav_in_dir/$f
+		if [ -f $ff ];then
+		    cp -v $ff $wav_out_dir > $logdir/copywavs_${base}.log 2>&1
+		fi
+	    done
+	done
+    done
+done
+
+
diff --git a/egs/voxceleb/adv.v1.1/local/calibrate_voxceleb1_o_clean.sh b/egs/voxceleb/adv.v1.1/local/calibrate_voxceleb1_o_clean.sh
new file mode 100755
index 00000000..736c3fb0
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/calibrate_voxceleb1_o_clean.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2019 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+
+set -e
+
+cmd=run.pl
+prior=0.05
+l2_reg=1e-5
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <cal-set> <score-dir>"
+  exit 1;
+fi
+
+score_dir=$1
+cal_score_dir=${score_dir}_cal_v1
+
+mkdir -p $cal_score_dir
+
+echo "$0 train calibration on VoxCeleb1 Original Clean"
+
+model_file=$cal_score_dir/cal_tel.h5
+train_scores=$score_dir/voxceleb1_scores
+train_key=data/voxceleb1_test/trials_o_clean
+
+$cmd $cal_score_dir/train_cal_tel.log \
+     steps_be/train-calibration-v1.py --score-file $train_scores \
+     --key-file $train_key --model-file $model_file --prior $prior --lambda-reg $l2_reg
+
+ndxs=(voxceleb1_test/trials_o_clean)
+scores=(voxceleb1)
+n_ndx=${#ndxs[*]}
+for((i=0;i<$n_ndx;i++))
+do
+    echo "$0 eval calibration on ${scores[$i]}"
+    scores_in=$score_dir/${scores[$i]}_scores
+    scores_out=$cal_score_dir/${scores[$i]}_scores
+    ndx=data/${ndxs[$i]}
+    $cmd $cal_score_dir/eval_cal_${scores[$i]}.log \
+	 steps_be/eval-calibration-v1.py --in-score-file $scores_in \
+	 --ndx-file $ndx --model-file $model_file --out-score-file $scores_out &
+
+done
+wait
+
+
+
+
+
diff --git a/egs/voxceleb/adv.v1.1/local/make_musan.py b/egs/voxceleb/adv.v1.1/local/make_musan.py
new file mode 100755
index 00000000..b0ae6846
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_musan.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This file is meant to be invoked by make_musan.sh.
+
+import os, sys
+
+
+def process_music_annotations(path):
+    utt2spk = {}
+    utt2vocals = {}
+    lines = open(path, "r").readlines()
+    for line in lines:
+        utt, genres, vocals, musician = line.rstrip().split()[:4]
+        # For this application, the musican ID isn't important
+        utt2spk[utt] = utt
+        utt2vocals[utt] = vocals == "Y"
+    return utt2spk, utt2vocals
+
+
+def prepare_music(root_dir, fs, use_vocals):
+    utt2vocals = {}
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    music_dir = os.path.join(root_dir, "music")
+    for root, dirs, files in os.walk(music_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+            elif str(file) == "ANNOTATIONS":
+                utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
+                utt2spk.update(utt2spk_part)
+                utt2vocals.update(utt2vocals_part)
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2vocals:
+        if utt in utt2wav:
+            if use_vocals or not utt2vocals[utt]:
+                utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+                if fs == 8:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 8k -t wav - |\n"
+                    )
+                else:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 16k -t wav - |\n"
+                    )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In music directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_speech(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    speech_dir = os.path.join(root_dir, "speech")
+    for root, dirs, files in os.walk(speech_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In speech directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_noise(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    noise_dir = os.path.join(root_dir, "noise")
+    for root, dirs, files in os.walk(noise_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In noise directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def main():
+    in_dir = sys.argv[1]
+    fs = int(sys.argv[2])
+    out_dir = sys.argv[3]
+    use_vocals = sys.argv[4] == "Y"
+    utt2spk_music, utt2wav_music = prepare_music(in_dir, fs, use_vocals)
+    utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, fs)
+    utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, fs)
+    utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
+    utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
+    wav_fi = open(os.path.join(out_dir, "wav.scp"), "w")
+    wav_fi.write(utt2wav)
+    utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), "w")
+    utt2spk_fi.write(utt2spk)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/voxceleb/adv.v1.1/local/make_musan.sh b/egs/voxceleb/adv.v1.1/local/make_musan.sh
new file mode 100755
index 00000000..4a6d30f9
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_musan.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This script, called by ../run.sh, creates the MUSAN
+# data directory. The required dataset is freely available at
+#   http://www.openslr.org/17/
+
+set -e
+use_vocals='Y'
+
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ];then
+    echo "Usage: $0 [options] <in-dir> <fs> <data-dir>";
+    echo "e.g.: $0 /export/corpora/JHU/musan 8 data"
+    exit 1;
+fi
+
+in_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir/musan.tmp
+
+echo "Preparing ${data_dir}/musan..."
+mkdir -p ${data_dir}/musan
+local/make_musan.py ${in_dir} $fs ${data_dir}/musan ${use_vocals}
+
+utils/fix_data_dir.sh ${data_dir}/musan
+
+grep "music" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_music
+grep "speech" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_speech
+grep "noise" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_noise
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_music \
+  ${data_dir}/musan ${data_dir}/musan_music
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_speech \
+  ${data_dir}/musan ${data_dir}/musan_speech
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_noise \
+  ${data_dir}/musan ${data_dir}/musan_noise
+
+utils/fix_data_dir.sh ${data_dir}/musan_music
+utils/fix_data_dir.sh ${data_dir}/musan_speech
+utils/fix_data_dir.sh ${data_dir}/musan_noise
+
+rm -rf $data_dir/musan.tmp
+
diff --git a/egs/voxceleb/adv.v1.1/local/make_rirs_data.sh b/egs/voxceleb/adv.v1.1/local/make_rirs_data.sh
new file mode 100755
index 00000000..c6652eda
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_rirs_data.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)
+#           
+# Apache 2.0.
+set -e
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 <rir-dir> <fs> <data-dir>"
+  echo "e.g.: $0 RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom"
+fi
+
+rir_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir
+
+rir_list=$rir_dir/rir_list
+if [ "$fs" -eq 16 ];then
+    awk '{ key=$5; sub(/.*\//,"",key); print key,$5 }' $rir_list > $data_dir/wav.scp
+else
+    awk '{ 
+key=$5; sub(/.*\//,"",key); 
+print key,"sox "$5" -r 8000 -t wav -b 16 -e signed-integer - |" }' \
+    $rir_list > $data_dir/wav.scp
+fi
+awk '{ key=$5; sub(/.*\//,"",key); print key,$4 }' $rir_list > $data_dir/rir2room
+
diff --git a/egs/voxceleb/adv.v1.1/local/make_trials_subset.py b/egs/voxceleb/adv.v1.1/local/make_trials_subset.py
new file mode 100755
index 00000000..da230842
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_trials_subset.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import logging
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils import SparseTrialKey
+
+
+def make_trials(in_key_file, out_key_file, ntar, nnon, seed):
+
+    rng = np.random.RandomState(seed=seed)
+
+    logging.info("Load key: %s" % in_key_file)
+    key = SparseTrialKey.load_txt(in_key_file)
+
+    nz_idx = key.tar.nonzero()
+    nnz = len(nz_idx[0])
+    p = rng.permutation(nnz)[ntar:]
+    nz_idx = (nz_idx[0][p], nz_idx[1][p])
+    key.tar[nz_idx] = False
+
+    nz_idx = key.non.nonzero()
+    nnz = len(nz_idx[0])
+    p = rng.permutation(nnz)[nnon:]
+    nz_idx = (nz_idx[0][p], nz_idx[1][p])
+    key.non[nz_idx] = False
+
+    logging.info("Saving key: %s" % out_key_file)
+    key.save_txt(out_key_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Makes a subset of a trial key")
+
+    parser.add_argument("--in-key-file", required=True)
+    parser.add_argument("--out-key-file", required=True)
+    parser.add_argument("--ntar", required=True, type=int)
+    parser.add_argument("--nnon", required=True, type=int)
+    parser.add_argument("--seed", default=112358, type=int)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    make_trials(**namespace_to_dict(args))
diff --git a/egs/voxceleb/adv.v1.1/local/make_voxceleb1_o.pl b/egs/voxceleb/adv.v1.1/local/make_voxceleb1_o.pl
new file mode 100755
index 00000000..dce92245
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_voxceleb1_o.pl
@@ -0,0 +1,180 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#           2020  Jesus Villalba
+#
+# Usage: make_voxceleb1.pl /export/voxceleb1 data/
+# Create trial lists for Voxceleb1 original, 
+# with cleaned and non-cleaned versions
+# Attention:
+#  - This script is for the old version of the dataset without anonymized speaker-ids
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
+
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 data/\n";
+  exit(1);
+}
+
+($data_base, $out_dir) = @ARGV;
+my $out_dir = "$out_dir/voxceleb1_test";
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
+}
+
+my $url_base="http://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta";
+my @trials_basename = ("very_test.txt", "very_test2.txt");
+my @trials_url = ("$url_base/veri_test.txt", "$url_base/veri_test2.txt");
+my @trials = ("trials_o", "trials_o_clean");
+
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
+my %id2spkr = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
+while (<META_IN>) {
+    chomp;
+    my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+    $id2spkr{$vox_id} = $spkr_id;
+    $spkr2gender{$spkr_id} = $gender;
+    $nation =~ s@ @-@g;
+    $spkr2nation{$spkr_id} = $nation;
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $spkr_id = $id2spkr{$vox_id};
+  my $utt_id = "$spkr_id-$vid_id-00$file_id";
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+#download trials from voxceleb web page
+for($i = 0; $i <= $#trials; $i++) {
+
+    my $file_i = "$out_dir/$trials_basename[$i]";
+    my $url_i = $trials_url[$i];
+    my $trial_i = "$out_dir/$trials[$i]";
+    if (! -e $file_i) {
+	system("wget -O $file_i $url_i");
+    }
+    #mapping from new speaker ids and file-names to old ones
+    open(TRIAL_IN, "<", "$file_i") or die "Could not open the verification trials file $file_i";
+    open(TRIAL_OUT, ">", "$trial_i") or die "Could not open the output file $trial_i";
+    while (<TRIAL_IN>) {
+	chomp;
+	my ($tar_or_non, $path1, $path2) = split;
+
+	# Create entry for left-hand side of trial
+	my ($vox_id, $rec_id, $segment) = split('/', $path1);
+	$segment =~ s/\.wav$//;
+	my $spkr_id = $id2spkr{$vox_id};
+	my $utt_id1 = "$spkr_id-$rec_id-00$segment";
+	
+	# Create entry for right-hand side of trial
+	my ($vox_id, $rec_id, $segment) = split('/', $path2);
+	$segment =~ s/\.wav$//;
+	my $spkr_id = $id2spkr{$vox_id};
+	my $utt_id2 = "$spkr_id-$rec_id-00$segment";
+	
+	my $target = "nontarget";
+	if ($tar_or_non eq "1") {
+	    $target = "target";
+	}
+	print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
+    }
+    
+    close(TRIAL_IN) or die;
+    close(TRIAL_OUT) or die;
+    
+}
+
+
+opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+
+foreach (@spkr_dirs) {
+  my $spkr_id = $_;
+  my $new_spkr_id = $spkr_id;
+  # If we're using a newer version of VoxCeleb1, we need to "deanonymize"
+  # the speaker labels.
+  if (exists $id2spkr{$spkr_id}) {
+    $new_spkr_id = $id2spkr{$spkr_id};
+  }
+  print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+  print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
+  opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
+  my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+  closedir $dh;
+  foreach (@files) {
+    my $filename = $_;
+    my $rec_id = substr($filename, 0, 11);
+    my $segment = substr($filename, 12, 7);
+    my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
+    my $utt_id = "$new_spkr_id-$rec_id-$segment";
+    print WAV "$utt_id", " $wav", "\n";
+    print SPKR "$utt_id", " $new_spkr_id", "\n";
+    if (exists $utt2lang{$utt_id}) {
+	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+    }
+    else {
+	print LANG "$utt_id N/A\n";
+    }
+  }
+}
+
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+close(NAT) or die;
+
+if (system(
+  "cat $out_dir/trials_* | sort -u > $out_dir/trials") != 0) {
+  die "Error creating trials file in directory $out_dir";
+}
+
+if (system(
+  "awk '{ print \$1,\$1 }' $out_dir/trials | sort -u > $out_dir/utt2model") != 0) {
+  die "Error creating utt2model file in directory $out_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
+
diff --git a/egs/voxceleb/adv.v1.1/local/make_voxceleb2cat.pl b/egs/voxceleb/adv.v1.1/local/make_voxceleb2cat.pl
new file mode 100755
index 00000000..93b6ad5a
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/make_voxceleb2cat.pl
@@ -0,0 +1,136 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Johns Hopkins University (Jesus Villalba)
+# Copyright 2018  Ewald Enzinger
+#
+# Apache 2.0
+# Usage: make_voxceleb2cat.pl /export/voxceleb2cat_train dev 16 data/dev
+#
+# Note: This script requires ffmpeg to be installed and its location included in $PATH.
+
+if (@ARGV != 4) {
+  print STDERR "Usage: $0 <path-to-voxceleb2> <dataset> fs <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb2 dev 16 data/dev\n";
+  exit(1);
+}
+
+# Check that ffmpeg is installed.
+if (`which ffmpeg` eq "") {
+  die "Error: this script requires that ffmpeg is installed.";
+}
+
+($data_base, $dataset, $fs, $out_dir) = @ARGV;
+
+print "Preparing VoxCeleb2 Cat in $out_dir \n";
+
+if ("$dataset" ne "dev" && "$dataset" ne "test") {
+  die "dataset parameter must be 'dev' or 'test'!";
+}
+
+my $dataset_path = "" ;
+if ( -d "$data_base/$dataset/aac" ){
+    $dataset_path = "$data_base/$dataset/aac"
+}
+else {
+    $dataset_path = "$data_base/$dataset"
+}
+
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
+}
+
+if (system("mkdir -p $out_dir/lists_cat") != 0) {
+  die "Error making directory $out_dir/lists_cat";
+}
+
+print "Reading metadata\n";
+my $meta_url = "https://www.openslr.org/resources/49/vox2_meta.csv";
+my $meta_path = "$data_base/vox2_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox2_meta.csv";
+    system("wget --no-check-certificate -O $meta_path $meta_url");
+}
+open(META_IN, "<", "$meta_path") or die "Could not open the output file $meta_path";
+my %spkr2gender = ();
+while (<META_IN>) {
+  chomp;
+  my ($spkr, $vox_id, $vgg_id, $gender, $set) = split;
+  $spkr2gender{$vox_id} = $gender;
+}
+close(META_IN) or die;
+
+print "Reading languages estimated voxlingua \n";
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox2_final.csv";
+my $lid_path = "$data_base/lang_vox2_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox2_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  $utt_id =~ s@/@-@g;
+  $utt_id =~ s@-[^-]*\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+
+opendir my $dh, "$dataset_path" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$dataset_path/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+my $num_spkrs = @spkr_dirs;
+my $count = 0;
+foreach (@spkr_dirs) {
+  my $spkr_id = $_;
+
+  $count++ ;
+  print "  processing speaker $spkr_id $count / $num_spkrs \n";
+  print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+
+  opendir my $dh, "$dataset_path/$spkr_id/" or die "Cannot open directory: $!";
+  my @rec_dirs = grep {-d "$dataset_path/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh);
+  closedir $dh;
+
+  foreach (@rec_dirs) {
+      my $rec_id = $_;
+      my $utt_id = "$spkr_id-$rec_id";
+      my $file_list = "$out_dir/lists_cat/$utt_id.txt";
+      if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
+	  die "Error creating $file_list";
+      }
+      my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
+      if($fs == 8){
+	  $wav = $wav." sox -t wav - -t wav -r 8k - |"
+      }
+      print WAV "$utt_id", " $wav", "\n";
+      print SPKR "$utt_id", " $spkr_id", "\n";
+      if (exists $utt2lang{$utt_id}) {
+	  print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+      }
+      else {
+	  print LANG "$utt_id N/A\n";
+      }
+  }
+}
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/voxceleb/adv.v1.1/local/score_dcf.py b/egs/voxceleb/adv.v1.1/local/score_dcf.py
new file mode 100755
index 00000000..3524d222
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/score_dcf.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+"""
+
+import sys
+import os
+import argparse
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils import SparseTrialScores, SparseTrialKey
+from hyperion.np.metrics import fast_eval_dcf_eer as fast_eval
+
+
+def score_dcf(key_file, score_file, output_path):
+
+    logging.info("Load key: %s" % key_file)
+    key = SparseTrialKey.load_txt(key_file)
+    logging.info("Load scores: %s" % score_file)
+    scr = SparseTrialScores.load_txt(score_file)
+    logging.info("separating tar/non")
+    tar, non = scr.get_tar_non(key)
+    logging.info("computing EER/DCF")
+    priors = np.array([0.001, 0.005, 0.01, 0.05])
+    min_dcf, act_dcf, eer, _, min_pmiss, min_pfa, act_pmiss, act_pfa = fast_eval(
+        tar, non, priors, return_probs=True
+    )
+
+    output_dir = os.path.dirname(output_path)
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+
+    ntar = len(tar)
+    nnon = len(non)
+
+    output_file = output_path + "_results"
+    with open(output_file, "w") as f:
+        s = "EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f} ntar: {9:d} nnon: {10:d}\n".format(
+            eer * 100,
+            min_dcf[3],
+            act_dcf[3],
+            min_dcf[2],
+            act_dcf[2],
+            min_dcf[1],
+            act_dcf[1],
+            min_dcf[0],
+            act_dcf[0],
+            ntar,
+            nnon,
+        )
+        f.write(s)
+        logging.info(s)
+        s = "min-pmiss={} min-pfa={} act-pmiss={} act-pfa={}".format(
+            min_pmiss, min_pfa, act_pmiss, act_pfa
+        )
+        logging.info(s)
+        s = "min-Nmiss={} min-Nfa={} act-Nmiss={} act-Nfa={}".format(
+            min_pmiss * ntar, min_pfa * nnon, act_pmiss * ntar, act_pfa * nnon
+        )
+        logging.info(s)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        fromfile_prefix_chars="@",
+        description="Computes EER and DCF",
+    )
+
+    parser.add_argument("--key-file", required=True)
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument("--output-path", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    score_dcf(**vars(args))
diff --git a/egs/voxceleb/adv.v1.1/local/score_voxceleb1_o_clean.sh b/egs/voxceleb/adv.v1.1/local/score_voxceleb1_o_clean.sh
new file mode 100755
index 00000000..b8247efc
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/score_voxceleb1_o_clean.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 <data-root> <score-dir>"
+  exit 1;
+fi
+
+set -e
+
+data_dir=$1
+score_dir=$2
+
+for cond in o_clean
+do
+    echo "Voxceleb $cond"
+    key=$data_dir/trials_$cond
+    #Compute performance
+    python local/score_dcf.py --key-file $key --score-file $score_dir/voxceleb1_scores --output-path $score_dir/voxceleb1_${cond} &
+done
+wait
+
diff --git a/egs/voxceleb/adv.v1.1/local/score_voxceleb1_single_cond.sh b/egs/voxceleb/adv.v1.1/local/score_voxceleb1_single_cond.sh
new file mode 100755
index 00000000..7531037e
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/local/score_voxceleb1_single_cond.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)  
+# Apache 2.0.
+#
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <data-root> <condition> <score-dir>"
+  exit 1;
+fi
+
+set -e
+
+data_dir=$1
+cond=$2
+score_dir=$3
+
+echo "Voxceleb $cond"
+key=$data_dir/trials_$cond
+#Compute performance
+python local/score_dcf.py --key-file $key --score-file $score_dir/voxceleb1_scores --output-path $score_dir/voxceleb1_${cond} 
+
+
diff --git a/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh b/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
index eeae00ac..e854b393 100755
--- a/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
+++ b/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
@@ -9,7 +9,6 @@ set -e
 nodes=fs01
 storage_name=$(date +'%m_%d_%H_%M')
 vaddir=`pwd`/exp/vad_e
-vad_config=conf/vad_16k.yaml
 
 stage=1
 config_file=default_config.sh
diff --git a/egs/voxceleb/adv.v1.1/run_010_prepare_victim_xvec_train_data.sh b/egs/voxceleb/adv.v1.1/run_004_prepare_victim_xvec_train_data.sh
similarity index 94%
rename from egs/voxceleb/adv.v1.1/run_010_prepare_victim_xvec_train_data.sh
rename to egs/voxceleb/adv.v1.1/run_004_prepare_victim_xvec_train_data.sh
index f89c9822..0e10ea68 100755
--- a/egs/voxceleb/adv.v1.1/run_010_prepare_victim_xvec_train_data.sh
+++ b/egs/voxceleb/adv.v1.1/run_004_prepare_victim_xvec_train_data.sh
@@ -16,7 +16,7 @@ config_file=default_config.sh
 if [ $stage -le 2 ]; then
     # This script preprocess audio for x-vector training
     steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-adv.v2.1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+	--storage_name voxceleb-adv.v1.1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
 	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
     utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
 
@@ -29,7 +29,6 @@ if [ $stage -le 3 ]; then
     # We also want several utterances per speaker. Now we'll throw out speakers
     # with fewer than 4 utterances.
     hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
-
 fi
 
 if [ $stage -le 4 ]; then
@@ -39,4 +38,3 @@ if [ $stage -le 4 ]; then
 	data/${nnet_data}_proc_audio_no_sil/lists_xvec
 fi
 
-exit
diff --git a/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh b/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
new file mode 100755
index 00000000..37a91211
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-adv.v1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu
+  
+fi
+
diff --git a/egs/voxceleb/adv.v1.1/run_012_prepare_transfer_xvec_train_data.sh b/egs/voxceleb/adv.v1.1/run_006_prepare_transfer_xvec_train_data.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_012_prepare_transfer_xvec_train_data.sh
rename to egs/voxceleb/adv.v1.1/run_006_prepare_transfer_xvec_train_data.sh
diff --git a/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh b/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
new file mode 100755
index 00000000..70bab280
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+if [ "$nnet" == "$transfer_nnet" ];then
+    echo "Victim and transfer model are the same"
+    echo "Skipping this step"
+    exit 0
+fi
+
+list_dir=data/${transfer_nnet_data}_proc_audio_no_sil
+nnet_type=$transfer_nnet_type
+nnet_dir=$transfer_nnet_dir
+nnet_cfg=$transfer_nnet_cfg
+nnet_args=$transfer_nnet_args
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-adv.v1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu 
+  
+fi
+
diff --git a/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh b/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
new file mode 100755
index 00000000..12f1e5fd
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+if [ "$nnet" == "$transfer_nnet" ];then
+    echo "Victim and transfer model are the same"
+    echo "Skipping this step"
+    exit 0
+fi
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+nnet_dir=$advft_nnet_dir
+nnet_cfg=$advft_nnet_cfg
+nnet_args=$advft_nnet_args
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-adv.v1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    adv_finetune_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+
+
+# #!/bin/bash
+# # Copyright
+# #                2019   Johns Hopkins University (Author: Jesus Villalba)
+# # Apache 2.0.
+# #
+# . ./cmd.sh
+# . ./path.sh
+# set -e
+
+# stage=1
+# ngpu=4
+# config_file=default_config.sh
+# resume=false
+# interactive=false
+# num_workers=8
+
+# . parse_options.sh || exit 1;
+# . $config_file
+# . datapath.sh
+
+# batch_size=$(($advft_batch_size_1gpu*$ngpu))
+# grad_acc_steps=$(echo $batch_size $advft_eff_batch_size | awk '{ print int($2/$1+0.5)}')
+# log_interval=$(echo 100*$grad_acc_steps | bc)
+# list_dir=data/${nnet_data}_proc_audio_no_sil
+
+# args=""
+# if [ "$resume" == "true" ];then
+#     args="--resume"
+# fi
+
+# if [ "$interactive" == "true" ];then
+#     export cuda_cmd=run.pl
+# fi
+
+# # Network Training
+# if [ $stage -le 1 ]; then
+#   mkdir -p $advft_nnet_dir/log
+#   $cuda_cmd --gpu $ngpu $advft_nnet_dir/log/train.log \
+#       hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+#       torch-adv-finetune-xvec-from-wav.py --feats $feat_config $aug_opt \
+#       --audio-path $list_dir/wav.scp \
+#       --time-durs-file $list_dir/utt2dur \
+#       --train-list $list_dir/lists_xvec/train.scp \
+#       --val-list $list_dir/lists_xvec/val.scp \
+#       --class-file $list_dir/lists_xvec/class2int \
+#       --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
+#       --iters-per-epoch $ipe \
+#       --batch-size $batch_size \
+#       --num-workers $num_workers \
+#       --grad-acc-steps $grad_acc_steps $advft_opt_opt $advft_lrs_opt \
+#       --epochs $advft_nnet_num_epochs \
+#       --s $s --margin $advft_margin --margin-warmup-epochs $advft_margin_warmup \
+#       --num-gpus $ngpu \
+#       --train-mode ft-full \
+#       --log-interval $log_interval \
+#       --in-model-path $nnet \
+#       --exp-path $advft_nnet_dir $advft_attack_opts $args
+
+# fi
+# #
+
+# exit
diff --git a/egs/voxceleb/adv.v1.1/run_030_extract_xvectors_victim_model.sh b/egs/voxceleb/adv.v1.1/run_009_extract_xvectors_victim_model.sh
similarity index 81%
rename from egs/voxceleb/adv.v1.1/run_030_extract_xvectors_victim_model.sh
rename to egs/voxceleb/adv.v1.1/run_009_extract_xvectors_victim_model.sh
index ff068c1b..2df747e6 100755
--- a/egs/voxceleb/adv.v1.1/run_030_extract_xvectors_victim_model.sh
+++ b/egs/voxceleb/adv.v1.1/run_009_extract_xvectors_victim_model.sh
@@ -36,11 +36,11 @@ if [ $stage -le 1 ]; then
     do
 	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
 	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors_from_wav.sh --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-	    --feat-config $feat_config \
-	    $nnet data/$name \
-	    $xvector_dir/$name
+	steps_xvec/extract_xvectors_from_wav.sh \
+	  --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+	  --feat-config $feat_config \
+	  $nnet data/$name \
+	  $xvector_dir/$name
     done
 fi
 
-exit
diff --git a/egs/voxceleb/adv.v1.1/run_031_extract_xvectors_transfer_model.sh b/egs/voxceleb/adv.v1.1/run_010_extract_xvectors_transfer_model.sh
similarity index 81%
rename from egs/voxceleb/adv.v1.1/run_031_extract_xvectors_transfer_model.sh
rename to egs/voxceleb/adv.v1.1/run_010_extract_xvectors_transfer_model.sh
index df29fc12..7e2488b3 100755
--- a/egs/voxceleb/adv.v1.1/run_031_extract_xvectors_transfer_model.sh
+++ b/egs/voxceleb/adv.v1.1/run_010_extract_xvectors_transfer_model.sh
@@ -33,11 +33,12 @@ if [ $stage -le 1 ]; then
     do
 	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
 	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors_from_wav.sh --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-	    --feat-config $feat_config \
-	    $nnet data/$name \
-	    $xvector_dir/$name
+	steps_xvec/extract_xvectors_from_wav.sh \
+	  --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+	  --feat-config $feat_config \
+	  $nnet data/$name \
+	  $xvector_dir/$name
     done
 fi
 
-exit
+
diff --git a/egs/voxceleb/adv.v1.1/run_040_eval_be_victim_model.sh b/egs/voxceleb/adv.v1.1/run_011_eval_be_victim_model.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_040_eval_be_victim_model.sh
rename to egs/voxceleb/adv.v1.1/run_011_eval_be_victim_model.sh
diff --git a/egs/voxceleb/adv.v1.1/run_041_eval_be_transfer_model.sh b/egs/voxceleb/adv.v1.1/run_012_eval_be_transfer_model.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_041_eval_be_transfer_model.sh
rename to egs/voxceleb/adv.v1.1/run_012_eval_be_transfer_model.sh
diff --git a/egs/voxceleb/adv.v1.1/run_043_eval_whitebox_attacks.sh b/egs/voxceleb/adv.v1.1/run_013_eval_whitebox_attacks.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_043_eval_whitebox_attacks.sh
rename to egs/voxceleb/adv.v1.1/run_013_eval_whitebox_attacks.sh
diff --git a/egs/voxceleb/adv.v1.1/run_044_eval_transfer_blackbox_attacks.sh b/egs/voxceleb/adv.v1.1/run_014_eval_transfer_blackbox_attacks.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_044_eval_transfer_blackbox_attacks.sh
rename to egs/voxceleb/adv.v1.1/run_014_eval_transfer_blackbox_attacks.sh
diff --git a/egs/voxceleb/adv.v1.1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh b/egs/voxceleb/adv.v1.1/run_015_eval_whitebox_attacks_with_randsmooth_defense.sh
similarity index 67%
rename from egs/voxceleb/adv.v1.1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh
rename to egs/voxceleb/adv.v1.1/run_015_eval_whitebox_attacks_with_randsmooth_defense.sh
index 3077ecf6..fc9d6a7d 100755
--- a/egs/voxceleb/adv.v1.1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh
+++ b/egs/voxceleb/adv.v1.1/run_015_eval_whitebox_attacks_with_randsmooth_defense.sh
@@ -376,181 +376,3 @@ fi
 
 exit
 
-
-# #!/bin/bash
-# # Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-# #                
-# # Apache 2.0.
-# #
-# . ./cmd.sh
-# . ./path.sh
-# set -e
-
-# stage=1
-# config_file=default_config.sh
-# use_gpu=false
-# do_analysis=false
-# save_wav=false
-
-# . parse_options.sh || exit 1;
-# . $config_file
-# . datapath.sh 
-
-# if [ "$use_gpu" == "true" ];then
-#     eval_args="--use-gpu true"
-#     eval_cmd="$cuda_eval_cmd"
-# else
-#     eval_cmd="$train_cmd"
-# fi
-
-# xvector_dir=exp/xvectors/$nnet_name
-# score_dir=exp/scores/$nnet_name
-
-# score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-# cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-# #thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-# thr005=2.94
-# thr001=4.60
-# thr0001=6.90
-# declare -a score_array
-# declare -a stats_array
-
-# if [ $stage -le 1 ];then
-
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    score_plda_dir=$score_dir/cosine_fgsm_e${eps}_randsmooth${sigma}
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type fgsm --eps $eps \
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-# 		$trial_list \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_fgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		$trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-
-# fi
-
-
-
-
-# if [ $stage -le 3 ];then
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	    score_plda_dir=$score_dir/cosine_randfgsm_e${eps}_a${alpha}_randsmooth$sigma
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with Rand-FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type rand-fgsm --eps $eps --alpha $alpha --smooth-sigma $sigma\
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 \
-# 		$trial_list \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	    
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-	    
-# 	done
-
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_randfgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		$trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-# fi
-
-
-# if [ $stage -le 4 ];then
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	    score_plda_dir=$score_dir/cosine_iterfgsm_e${eps}_a${alpha}_randsmooth$sigma
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with Iterative FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type iter-fgsm --eps $eps --alpha $alpha \
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-# 		$trial_list \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	    
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-	    
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_iterfgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		$trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-# fi
-
-# wait
diff --git a/egs/voxceleb/adv.v1.1/run_046_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh b/egs/voxceleb/adv.v1.1/run_016_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh
similarity index 100%
rename from egs/voxceleb/adv.v1.1/run_046_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh
rename to egs/voxceleb/adv.v1.1/run_016_eval_whitebox_attacks_with_randsmooth_wavegan_defense.sh
diff --git a/egs/voxceleb/adv.v1.1/run_053_eval_art_whitebox_attacks.sh b/egs/voxceleb/adv.v1.1/run_017_eval_art_whitebox_attacks.sh
similarity index 54%
rename from egs/voxceleb/adv.v1.1/run_053_eval_art_whitebox_attacks.sh
rename to egs/voxceleb/adv.v1.1/run_017_eval_art_whitebox_attacks.sh
index 92fbcc92..3a92630f 100755
--- a/egs/voxceleb/adv.v1.1/run_053_eval_art_whitebox_attacks.sh
+++ b/egs/voxceleb/adv.v1.1/run_017_eval_art_whitebox_attacks.sh
@@ -510,7 +510,7 @@ if [ $stage -le 12 ];then
 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner Linf attack confidence=$confidence"
 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
 	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.eps 0.3" \
+	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.initial-c 1e-5" \
 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
 	    --cal-file $cal_file --threshold $thr005 \
 	    $trial_list \
@@ -540,434 +540,510 @@ if [ $stage -le 12 ];then
 
 fi
 
+if [ $stage -le 14 ];then
+    score_array=()
+    stats_array=()
+    for norm in inf 1 2 
+    do
+      for eps in 0.00001 0.0001 0.001 0.01 0.1
+      do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/cosine_art_autopgdl${norm}_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with Auto-PGD $norm  attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+	  --cmd "$eval_cmd" $eval_args --nj 80 \
+	  --feat-config $feat_config  \
+	  --attack-opts "--attack.attack-type auto-pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm $norm" \
+	  --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	  --cal-file $cal_file --threshold $thr005 \
+	  $trial_list \
+    	  data/voxceleb1_test/utt2model \
+          data/voxceleb1_test \
+    	  $xvector_dir/voxceleb1_test/xvector.scp \
+	  $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+		   local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	  echo $f
+	  cat $f
+	  echo ""
+	done
 
-# #!/bin/bash
-# # Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-# #                
-# # Apache 2.0.
-# #
-# . ./cmd.sh
-# . ./path.sh
-# set -e
-
-# stage=1
-# config_file=default_config.sh
-# use_gpu=false
-# do_analysis=false
-# save_wav=false
-
-# . parse_options.sh || exit 1;
-# . $config_file
-# . datapath.sh 
-
-# if [ "$use_gpu" == "true" ];then
-#     eval_args="--use-gpu true"
-#     eval_cmd="$cuda_eval_cmd"
-# else
-#     eval_cmd="$train_cmd"
-# fi
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
 
-# xvector_dir=exp/xvectors/$nnet_name
-# score_dir=exp/scores/$nnet_name
+      done
+      if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/cosine_art_autopgdl${norm}_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+				 $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+				 $score_analysis_dir/voxceleb1 &
+      fi
+    done
+fi
+
+if [ $stage -le 15 ];then
+    score_array=()
+    stats_array=()
+    for norm in inf 1 2 
+    do
+      for eps in 0.0001 0.001 0.01 0.1
+      do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/cosine_art_autocgdl${norm}_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with Auto-CGD $norm  attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+	  --cmd "$eval_cmd" $eval_args --nj 80 \
+	  --feat-config $feat_config  \
+	  --attack-opts "--attack.attack-type auto-cgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm $norm" \
+	  --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	  --cal-file $cal_file --threshold $thr005 \
+	  $trial_list \
+    	  data/voxceleb1_test/utt2model \
+          data/voxceleb1_test \
+    	  $xvector_dir/voxceleb1_test/xvector.scp \
+	  $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+		   local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	  echo $f
+	  cat $f
+	  echo ""
+	done
 
-# score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-# cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
 
-# #thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-# thr005=2.94
-# thr001=4.60
-# thr0001=6.90
+      done
+      if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/cosine_art_autocgdl${norm}_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+				 $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+				 $score_analysis_dir/voxceleb1 &
+      fi
+    done
+fi
 
-# declare -a score_array
-# declare -a stats_array
+if [ $stage -le 16 ];then
+    score_array=()
+    stats_array=()
+    for eps in 0.0001 0.001 0.01 0.1
+    do
+      alpha=$(echo $eps | awk '{ print $0/5.}')
+      score_plda_dir=$score_dir/cosine_art_deepfool_e${eps}
+      echo "Eval Voxceleb 1 with Cosine scoring with DeepFool  attack eps=$eps"
+      steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+	--cmd "$eval_cmd" $eval_args --nj 80 \
+	--feat-config $feat_config  \
+	--attack-opts "--attack.attack-type deepfool --attack.eps $eps --attack.max-iter 100" \
+	--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	--cal-file $cal_file --threshold $thr005 \
+	$trial_list \
+    	data/voxceleb1_test/utt2model \
+        data/voxceleb1_test \
+    	$xvector_dir/voxceleb1_test/xvector.scp \
+	$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+      
+      $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+		 local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+      
+      for f in $(ls $score_plda_dir/*_results);
+      do
+	echo $f
+	cat $f
+	echo ""
+      done
+      
+      score_array+=($score_plda_dir/voxceleb1_scores)
+      stats_array+=($score_plda_dir/voxceleb1_stats)
+      
+    done
+    if [ "${do_analysis}" == "true" ];then
+      score_analysis_dir=$score_dir/cosine_art_deepfool_eall
+      local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+			       $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+			       $score_analysis_dir/voxceleb1 &
+    fi
+fi
 
-# if [ $stage -le 1 ];then
+if [ $stage -le 17 ];then
 
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/cosine_art_fgsm_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    for confidence in 0 #1
+    do
+	score_plda_dir=$score_dir/cosine_art_elasticnet_conf${confidence}
+	echo "Eval Voxceleb 1 with Cosine scoring with ElasticNet attack confidence=$confidence"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type elasticnet --attack.confidence $confidence --attack.max-iter 100 --attack.lr 0.01"  \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+    done
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgsm_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-# fi
+fi
 
-# if [ $stage -le 2 ];then
-#     score_array=()
-#     stats_array=()
 
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_fgsm_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGSM minimal attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-minimal" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+if [ $stage -le 20 ];then
+
+    for norm in inf 2
+    do
+	score_plda_dir=$score_dir/cosine_art_hopskipjump_norm${norm}
+	echo "Eval Voxceleb 1 with Cosine scoring with Hopskipjump attack norm=$norm"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type hop-skip-jump --attack.norm $norm --attack.max-iter 50 --attack.max-eval 10000 --attack.init-eval 10 --attack.init-size 100"  \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+    done
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgsm_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+fi
 
-# fi
 
-# if [ $stage -le 3 ];then
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/cosine_art_fgml1_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L1 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-norm 1" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+if [ $stage -le 23 ];then
+
+    for eta in 0.01
+    do
+	score_plda_dir=$score_dir/cosine_art_newtonfool_eta$eta
+	echo "Eval Voxceleb 1 with Cosine scoring with NewtonFool attack eta=$eta"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type newtonfool --attack.eta $eta" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+    done
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgml1_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-# fi
+fi
 
-# if [ $stage -le 4 ];then
-#     score_array=()
-#     stats_array=()
+if [ $stage -le 25 ];then
 
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_fgml1_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L1 minimal attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-minimal --attack-norm 1" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    for lambda_tv in 0.3
+    do
+	score_plda_dir=$score_dir/cosine_art_shadow_theta$theta
+	echo "Eval Voxceleb 1 with Cosine scoring with Shadow attack lambda=$lambda_tv"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type shadow --attack.lambda-tv $lambda_tv" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgml1_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
 
-# fi
+    done
 
+fi
 
-# if [ $stage -le 5 ];then
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/cosine_art_fgml2_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L2 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-norm 2" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+if [ $stage -le 26 ];then
+    score_array=()
+    stats_array=()
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/cosine_art_wass_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with Wassertein attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type wasserstein --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.reg 1" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgml2_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-# fi
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/cosine_art_wass_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+fi
 
-# if [ $stage -le 6 ];then
-#     score_array=()
-#     stats_array=()
+if [ $stage -le 27 ];then
 
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_fgml2_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L2 minimal attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-minimal --attack-norm 2" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    for confidence in 0 #1
+    do
+	score_plda_dir=$score_dir/cosine_art_zoo_conf${confidence}
+	echo "Eval Voxceleb 1 with Cosine scoring with Zoo attack confidence=$confidence"
+	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+	    --feat-config $feat_config  \
+	    --attack-opts "--attack.attack-type zoo --attack.confidence $confidence" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+    done
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_fgml2_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+fi
 
-# fi
 
+# The attacks below have issues when applying to audio
 
-# if [ $stage -le 7 ];then
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_iterfgsm_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with IterFGM attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type bim --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
+# if [ $stage -le 13 ];then
 
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+#     for eps in 0.0001
+#     do
+#       score_plda_dir=$score_dir/cosine_art_boundary_eps${eps}
+#       alpha=$(echo $eps | awk '{ print $0/5.}')
+#       echo "Eval Voxceleb 1 with Cosine scoring with boundary attack eps=$eps"
+#       steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+# 	--cmd "$eval_cmd" $eval_args --nj 400 \
+# 	--feat-config $feat_config  \
+# 	--attack-opts "--attack.attack-type boundary --attack.eps $eps --attack.delta $eps --attack.max-iter 5000" \
+# 	--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+# 	--cal-file $cal_file --threshold $thr005 \
+# 	$trial_list \
+#     	data/voxceleb1_test/utt2model \
+#         data/voxceleb1_test \
+#     	$xvector_dir/voxceleb1_test/xvector.scp \
+# 	$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+      
+#       $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+# 		 local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+      
+#       for f in $(ls $score_plda_dir/*_results);
+#       do
+# 	echo $f
+# 	cat $f
+# 	echo ""
+#       done
+#       if [ "${do_analysis}" == "true" ];then
+# 	score_analysis_dir=$score_plda_dir
+# 	local/attack_analysis.sh \
+# 	  --cmd "$train_cmd --mem 10G" \
+# 	  $trial_list $score_clean \
+# 	  $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+# 	  $score_analysis_dir/voxceleb1 &
+#       fi
 
 #     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_iterfgsm_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+
 # fi
 
-# if [ $stage -le 8 ];then
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
+# it needs acces to hidden layers
+# if [ $stage -le 18 ];then
+#   for eps in 0.00001 0.0001 0.001 0.01 0.1
+#   do
+#     alpha=$(echo $eps | awk '{ print $0/5.}')
+#     score_plda_dir=$score_dir/cosine_art_fadv_e${eps}
+#     echo "Eval Voxceleb 1 with Cosine scoring with feature adversaries  attack eps=$eps"
+#     steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+#       --cmd "$eval_cmd" $eval_args --nj 80 \
+#       --feat-config $feat_config  \
+#       --attack-opts "--attack.attack-type feature-adv --attack.delta $eps --attack.eps-step $alpha --attack.max-iter 100" \
+#       --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+#       --cal-file $cal_file --threshold $thr005 \
+#       $trial_list \
+#       data/voxceleb1_test/utt2model \
+#       data/voxceleb1_test \
+#       $xvector_dir/voxceleb1_test/xvector.scp \
+#       $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    
+#     $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+# 	       local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+    
+#     for f in $(ls $score_plda_dir/*_results);
 #     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_pgdlinf_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
+#       echo $f
+#       cat $f
+#       echo ""
 #     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_pgdlinf_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+    
+#     score_array+=($score_plda_dir/voxceleb1_scores)
+#     stats_array+=($score_plda_dir/voxceleb1_stats)
+    
+#   done
+#   if [ "${do_analysis}" == "true" ];then
+#     score_analysis_dir=$score_dir/cosine_art_fadv_eall
+#     local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+# 			     $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+# 			     $score_analysis_dir/voxceleb1 &
+#   fi
 # fi
 
-
-# if [ $stage -le 9 ];then
+# if [ $stage -le 19 ];then
 #     score_array=()
 #     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
+#     for norm in inf 1 2 
 #     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_pgdl1_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10 --attack-norm 1" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+#       for sigma in 0.0002
+#       do
+# 	score_plda_dir=$score_dir/cosine_art_geoda${norm}_s${sigma}
+# 	echo "Eval Voxceleb 1 with Cosine scoring with GeoDA $norm sigma=$sigma"
+# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh \
+# 	  --cmd "$eval_cmd" $eval_args --nj 80 \
+# 	  --feat-config $feat_config  \
+# 	  --attack-opts "--attack.attack-type geoda --attack.max-iter 4000 --attack.sigma-geoda $sigma --attack.norm $norm" \
+# 	  --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+# 	  --cal-file $cal_file --threshold $thr005 \
+# 	  $trial_list \
+#     	  data/voxceleb1_test/utt2model \
+#           data/voxceleb1_test \
+#     	  $xvector_dir/voxceleb1_test/xvector.scp \
+# 	  $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
     	
 # 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+# 		   local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
 	
 # 	for f in $(ls $score_plda_dir/*_results);
 # 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
+# 	  echo $f
+# 	  cat $f
+# 	  echo ""
 # 	done
 
 # 	score_array+=($score_plda_dir/voxceleb1_scores)
 # 	stats_array+=($score_plda_dir/voxceleb1_stats)
 
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_pgdl1_eall
+#       done
+#       if [ "${do_analysis}" == "true" ];then
+# 	score_analysis_dir=$score_dir/cosine_art_geoda${norm}_sall
 # 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+# 				 $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+# 				 $score_analysis_dir/voxceleb1 &
+#       fi
+#     done
 # fi
 
-# if [ $stage -le 10 ];then
-#     score_array=()
-#     stats_array=()
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
+#
+# if [ $stage -le 21 ];then
+
+#     for norm in inf 1 2
 #     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/cosine_art_pgdl2_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10 --attack-norm 2" \
+# 	score_plda_dir=$score_dir/cosine_art_brendel_norm${norm}
+# 	echo "Eval Voxceleb 1 with Cosine scoring with Brendel attack norm=$norm"
+# 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
+# 	    --feat-config $feat_config  \
+# 	    --attack-opts "--attack.attack-type brendel --attack.norm $norm --attack.max-iter 1000 --attack.lr 1e-3 --attack.binary-search-steps 10 --attack.init-size 100"  \
 # 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
 # 	    --cal-file $cal_file --threshold $thr005 \
 # 	    $trial_list \
@@ -985,28 +1061,28 @@ fi
 # 	    cat $f
 # 	    echo ""
 # 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
+# 	if [ "${do_analysis}" == "true" ];then
+# 	    score_analysis_dir=$score_plda_dir
+# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+# 		$trial_list $score_clean \
+# 		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+# 		$score_analysis_dir/voxceleb1 &
+# 	fi
 
 #     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/cosine_art_pgdl2_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
+
 # fi
 
-# if [ $stage -le 11 ];then
+## it needs to train some importance vector
+# if [ $stage -le 22 ];then
 
-#     for confidence in 0 #1
+#     for norm in 2
 #     do
-# 	score_plda_dir=$score_dir/cosine_art_cwl2_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
+# 	score_plda_dir=$score_dir/cosine_art_lowprofool_norm${norm}
+# 	echo "Eval Voxceleb 1 with Cosine scoring with LowProFool attack norm=$norm"
 # 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type cw-l2 --attack-opt "--attack-confidence $confidence" \
+# 	    --feat-config $feat_config  \
+# 	    --attack-opts "--attack.attack-type low-pro-fool --attack.norm $norm --attack.max-iter 100"  \
 # 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
 # 	    --cal-file $cal_file --threshold $thr005 \
 # 	    $trial_list \
@@ -1036,16 +1112,16 @@ fi
 
 # fi
 
+## Too SLOW
+# if [ $stage -le 24 ];then
 
-# if [ $stage -le 12 ];then
-
-#     for confidence in 0 #1
+#     for theta in 0.1
 #     do
-# 	score_plda_dir=$score_dir/cosine_art_cwlinf_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner Linf attack confidence=$confidence"
+# 	score_plda_dir=$score_dir/cosine_art_jsma_theta$theta
+# 	echo "Eval Voxceleb 1 with Cosine scoring with JSMA attack theta=$theta"
 # 	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --attack-type cw-linf --attack-opt "--attack-confidence $confidence --attack-eps 0.3" \
+# 	    --feat-config $feat_config  \
+# 	    --attack-opts "--attack.attack-type jsma --attack.theta $theta" \
 # 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
 # 	    --cal-file $cal_file --threshold $thr005 \
 # 	    $trial_list \
@@ -1074,5 +1150,3 @@ fi
 #     done
 
 # fi
-
-
diff --git a/egs/voxceleb/adv.v1.1/run_018_eval_art_transfer_blackbox_attacks.sh b/egs/voxceleb/adv.v1.1/run_018_eval_art_transfer_blackbox_attacks.sh
new file mode 100755
index 00000000..bc6390f2
--- /dev/null
+++ b/egs/voxceleb/adv.v1.1/run_018_eval_art_transfer_blackbox_attacks.sh
@@ -0,0 +1,633 @@
+#!/bin/bash
+# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
+#                
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+use_gpu=false
+do_analysis=false
+save_wav=false
+use_trials_subset=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh 
+
+transfer_feat_config=$feat_config
+
+if [ "$use_gpu" == "true" ];then
+    eval_args="--use-gpu true"
+    eval_cmd="$cuda_eval_cmd"
+else
+    eval_cmd="$train_cmd"
+fi
+
+if [ "$use_trials_subset" == "true" ];then
+    condition=o_clean_1000_1000
+else
+    condition=o_clean
+fi
+trial_list=data/voxceleb1_test/trials_$condition
+
+xvector_dir=exp/xvectors/$nnet_name
+score_dir=exp/scores/$nnet_name
+
+score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
+cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
+
+transfer_xvector_dir=exp/xvectors/$transfer_nnet_name
+transfer_score_dir=exp/scores/$transfer_nnet_name
+transfer_cal_file=$transfer_score_dir/cosine_cal_v1/cal_tel.h5
+
+#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
+thr005=2.94
+thr001=4.60
+thr0001=6.90
+declare -a score_array
+declare -a stats_array
+
+if [ $stage -le 1 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 2 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+
+if [ $stage -le 3 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with FGM L1 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 1" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 4 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with FGM minimal L1 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 1" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+
+if [ $stage -le 5 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with FGM L2 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 2" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 6 ];then
+
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring FGM minimal L2 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 2" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 7 ];then
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with iter FGSM attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type bim --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+
+if [ $stage -le 8 ];then
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 9 ];then
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 1" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 10 ];then
+    score_array=()
+    stats_array=()
+
+    for eps in 0.00001 0.0001 0.001 0.01 0.1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_e${eps}
+	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 2" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	score_array+=($score_plda_dir/voxceleb1_scores)
+	stats_array+=($score_plda_dir/voxceleb1_stats)
+
+    done
+    if [ "${do_analysis}" == "true" ];then
+	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_eall
+	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
+	    $score_analysis_dir/voxceleb1 &
+    fi
+
+fi
+
+
+if [ $stage -le 11 ];then
+
+    for confidence in 0 #1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwl2_conf${confidence}
+	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
+
+    done
+
+fi
+
+
+if [ $stage -le 12 ];then
+
+    for confidence in 0 #1
+    do
+	alpha=$(echo $eps | awk '{ print $0/5.}')
+	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwlinf_conf${confidence}
+	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner LInf attack confidence=$confidence"
+	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 40 \
+	    --feat-config $feat_config  \
+	    --transfer-feat-config $transfer_feat_config  \
+	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.eps 0.3" \
+	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
+	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
+	    --threshold $thr005 \
+	    $trial_list \
+    	    data/voxceleb1_test/utt2model \
+            data/voxceleb1_test \
+    	    $xvector_dir/voxceleb1_test/xvector.scp \
+	    $nnet \
+    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
+	    $transfer_nnet \
+	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
+    	
+	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
+	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
+	
+	for f in $(ls $score_plda_dir/*_results);
+	do
+	    echo $f
+	    cat $f
+	    echo ""
+	done
+	if [ "${do_analysis}" == "true" ];then
+	    score_analysis_dir=$score_plda_dir
+	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
+		$trial_list $score_clean \
+		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
+		$score_analysis_dir/voxceleb1 &
+	fi
+
+    done
+
+fi
+
+wait
+
diff --git a/egs/voxceleb/adv.v1.1/run_054_eval_art_transfer_blackbox_attacks.sh b/egs/voxceleb/adv.v1.1/run_054_eval_art_transfer_blackbox_attacks.sh
deleted file mode 100755
index bdcdeae4..00000000
--- a/egs/voxceleb/adv.v1.1/run_054_eval_art_transfer_blackbox_attacks.sh
+++ /dev/null
@@ -1,1260 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-use_trials_subset=false
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-transfer_feat_config=$feat_config
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-if [ "$use_trials_subset" == "true" ];then
-    condition=o_clean_1000_1000
-else
-    condition=o_clean
-fi
-trial_list=data/voxceleb1_test/trials_$condition
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-transfer_xvector_dir=exp/xvectors/$transfer_nnet_name
-transfer_score_dir=exp/scores/$transfer_nnet_name
-transfer_cal_file=$transfer_score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 2 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 3 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 4 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM minimal L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 5 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 6 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring FGM minimal L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 7 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with iter FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type bim --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 8 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 9 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 10 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 11 ];then
-
-    for confidence in 0 #1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwl2_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		$trial_list $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-
-if [ $stage -le 12 ];then
-
-    for confidence in 0 #1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwlinf_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner LInf attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 40 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.eps 0.3" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    $trial_list \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		$trial_list $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-wait
-
-
-# #!/bin/bash
-# # Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-# #                
-# # Apache 2.0.
-# #
-# . ./cmd.sh
-# . ./path.sh
-# set -e
-
-# stage=1
-# config_file=default_config.sh
-# use_gpu=false
-# do_analysis=false
-# save_wav=false
-
-# . parse_options.sh || exit 1;
-# . $config_file
-# . datapath.sh 
-
-# if [ "$use_gpu" == "true" ];then
-#     eval_args="--use-gpu true"
-#     eval_cmd="$cuda_eval_cmd"
-# else
-#     eval_cmd="$train_cmd"
-# fi
-
-# xvector_dir=exp/xvectors/$nnet_name
-# score_dir=exp/scores/$nnet_name
-
-# score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-# cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-# transfer_xvector_dir=exp/xvectors/$transfer_nnet_name
-# transfer_score_dir=exp/scores/$transfer_nnet_name
-# transfer_cal_file=$transfer_score_dir/cosine_cal_v1/cal_tel.h5
-
-# #thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-# thr005=2.94
-# thr001=4.60
-# thr0001=6.90
-# declare -a score_array
-# declare -a stats_array
-
-# if [ $stage -le 1 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 2 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-minimal" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-
-# if [ $stage -le 3 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM L1 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-norm 1" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 4 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM minimal L1 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-minimal --attack-norm 1" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-
-# if [ $stage -le 5 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with FGM L2 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-norm 2" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 6 ];then
-
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring FGM minimal L2 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type fgm --attack-opt "--attack-eps $eps --attack-minimal --attack-norm 2" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 7 ];then
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with iter FGSM attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type bim --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-
-# if [ $stage -le 8 ];then
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10" \
-
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 9 ];then
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10 --attack-norm 1" \
-
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 10 ];then
-#     score_array=()
-#     stats_array=()
-
-#     for eps in 0.00001 0.0001 0.001 0.01 0.1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_e${eps}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat
-# 	    --attack-type pgd --attack-opt "--attack-eps $eps --attack-eps-step $alpha --attack-max-iter 10 --attack-norm 2" \
-
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	score_array+=($score_plda_dir/voxceleb1_scores)
-# 	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-#     done
-#     if [ "${do_analysis}" == "true" ];then
-# 	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_eall
-# 	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 	    $trial_list $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 	    $score_analysis_dir/voxceleb1 &
-#     fi
-
-# fi
-
-
-# if [ $stage -le 11 ];then
-
-#     for confidence in 0 #1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwl2_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type cw-l2 --attack-opt "--attack-confidence $confidence" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_plda_dir
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		$trial_list $score_clean \
-# 		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-
-#     done
-
-# fi
-
-
-# if [ $stage -le 12 ];then
-
-#     for confidence in 0 #1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwlinf_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner LInf attack confidence=$confidence"
-# 	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 40 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type cw-linf --attack-opt "--attack-confidence $confidence --attack-eps 0.3" \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    $trial_list \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_single_cond.sh data/voxceleb1_test $condition $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_plda_dir
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		$trial_list $score_clean \
-# 		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-
-#     done
-
-# fi
-
-# wait
-
diff --git a/egs/voxceleb/adv.v1/conf b/egs/voxceleb/adv.v1/conf
deleted file mode 120000
index 7dfe9dce..00000000
--- a/egs/voxceleb/adv.v1/conf
+++ /dev/null
@@ -1 +0,0 @@
-../../sre19-cmn2/v1/conf
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
index 172da763..39016679 100644
--- a/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
+++ b/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
@@ -3,63 +3,29 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
-# victim x-vector training 
-nnet_data=voxceleb2cat_train_combined
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.05
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
 
-nnet_type=lresnet34
-dropout=0
-embed_dim=256
+#vad
+vad_config=conf/vad_16k.yaml
 
-s=30
-margin_warmup=20
-margin=0.3
+# victim x-vector training 
+nnet_data=voxceleb2cat_train
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-num_augs=5
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34
+nnet_cfg=conf/train_lresnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train_combined #this can be voxceleb2cat or voxceleb2cat_combined
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=400
-transfer_max_chunk=400
-transfer_ipe=1
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb2cat_combined
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_cfg=train_resetdnn_xvec.yaml
+transfer_nnet_name=${feat_type}_resetdnn5x512
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
diff --git a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
index 407c0cfd..81f78c60 100644
--- a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
+++ b/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
@@ -3,64 +3,92 @@
 # Both models uses the same features: 80 fbanks
 # Both models uses the same training data.
 
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
 # victim x-vector training 
-nnet_data=voxceleb2cat_train_combined
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.05
-
-nnet_type=resnet34
-dropout=0
-embed_dim=256
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-num_augs=5
+nnet_data=voxceleb2cat_train
+
+# victim x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34
+
+nnet_cfg=conf/train_lresnet34_xvec.yaml
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 
-
 # transfer model training
-transfer_nnet_data=voxceleb2cat_train_combined #this can be voxceleb2cat or voxceleb2cat_combined
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=400
-transfer_max_chunk=400
-transfer_ipe=1
-transfer_lr=0.05
+transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb2cat_combined
 
 transfer_nnet_type=resetdnn
-transfer_num_layers=5
-transfer_layer_dim=512
-transfer_expand_dim=1536
-transfer_dilation="1 2 3 4 1"
-transfer_kernel_sizes="5 3 3 3 1"
-transfer_dropout=0.1
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
+transfer_nnet_name=${feat_type}_resetdnn5x512
 transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
 transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
 
 
+
+# # victim x-vector training 
+# nnet_data=voxceleb2cat_train_combined
+
+# batch_size_1gpu=32
+# eff_batch_size=512 # effective batch size
+# min_chunk=400
+# max_chunk=400
+# ipe=1
+# lr=0.05
+
+# nnet_type=resnet34
+# dropout=0
+# embed_dim=256
+
+# s=30
+# margin_warmup=20
+# margin=0.3
+
+# nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
+# opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
+# lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+# nnet_name=${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+# nnet_num_epochs=70
+# num_augs=5
+# nnet_dir=exp/xvector_nnets/$nnet_name
+# nnet=$nnet_dir/model_ep0070.pth
+
+
+# # transfer model training
+# transfer_nnet_data=voxceleb2cat_train_combined #this can be voxceleb2cat or voxceleb2cat_combined
+
+# transfer_batch_size_1gpu=128
+# transfer_eff_batch_size=512 # effective batch size
+# transfer_min_chunk=400
+# transfer_max_chunk=400
+# transfer_ipe=1
+# transfer_lr=0.05
+
+# transfer_nnet_type=resetdnn
+# transfer_num_layers=5
+# transfer_layer_dim=512
+# transfer_expand_dim=1536
+# transfer_dilation="1 2 3 4 1"
+# transfer_kernel_sizes="5 3 3 3 1"
+# transfer_dropout=0.1
+# transfer_embed_dim=256
+
+# transfer_s=30
+# transfer_margin_warmup=20
+# transfer_margin=0.3
+
+# transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
+# transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
+# transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
+# transfer_nnet_name=${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
+# transfer_nnet_num_epochs=70
+
+# transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
+# transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
+
+
diff --git a/egs/voxceleb/adv.v1/local b/egs/voxceleb/adv.v1/local
deleted file mode 120000
index ce1cbf90..00000000
--- a/egs/voxceleb/adv.v1/local
+++ /dev/null
@@ -1 +0,0 @@
-../v1/local
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/run_002_compute_evad.sh b/egs/voxceleb/adv.v1/run_002_compute_evad.sh
index 98b0db7d..cc3d8296 100755
--- a/egs/voxceleb/adv.v1/run_002_compute_evad.sh
+++ b/egs/voxceleb/adv.v1/run_002_compute_evad.sh
@@ -9,7 +9,6 @@ set -e
 nodes=fs01
 storage_name=$(date +'%m_%d_%H_%M')
 vaddir=`pwd`/exp/vad_e
-vad_config=conf/vad_16k.yaml
 
 stage=1
 config_file=default_config.sh
diff --git a/egs/voxceleb/adv.v1/run_003_compute_fbank.sh b/egs/voxceleb/adv.v1/run_003_compute_fbank.sh
deleted file mode 100755
index 7bd8b6a3..00000000
--- a/egs/voxceleb/adv.v1/run_003_compute_fbank.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-fbankdir=`pwd`/exp/fbank
-vaddir=`pwd`/exp/fbank
-vaddir_gt=`pwd`/exp/vad_gt
-
-stage=1
-config_file=default_config.sh
-feat_vers="numpy"
-
-. parse_options.sh || exit 1;
-
-if [ "$feat_vers" == "kaldi" ];then
-    make_fbank=steps/make_fbank.sh
-    fbank_cfg=conf/fbank80_16k.conf
-else
-    fbank_cfg=conf/fbank80_16k.yaml
-    if [ "$feat_vers" == "numpy" ];then
-	make_fbank=steps_pyfe/make_fbank.sh
-    else
-	make_fbank=steps_pyfe/make_torch_fbank.sh
-    fi
-fi
-
-# Make filterbanks 
-if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $fbankdir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/fbank/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $fbankdir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
-    fi
-fi
-
-#Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	$make_fbank --write-utt2num-frames true --fbank-config $fbank_cfg --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_fbank/$name $fbankdir
-	utils/fix_data_dir.sh data/${name}
-    done
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh b/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..a448af9a
--- /dev/null
+++ b/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 16 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
+	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 16 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 16 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/egs/voxceleb/adv.v1/run_004_prepare_augment.sh b/egs/voxceleb/adv.v1/run_004_prepare_augment.sh
deleted file mode 100755
index 7d78ae92..00000000
--- a/egs/voxceleb/adv.v1/run_004_prepare_augment.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-# In this script, we augment the SWBD,SRE,MX6 and Voxceleb data with reverberation,
-# noise, music, and babble, and combined it with the clean data.
-# The combined list will be used to train the xvector DNN.
-
-frame_shift=0.01
-
-if [ $stage -le 1 ]; then
-
-    if [ ! -d "RIRS_NOISES" ]; then
-	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
-	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
-	else
-	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-	    unzip rirs_noises.zip
-	fi
-    fi
-
-    # Prepare the MUSAN corpus, which consists of music, speech, and noise
-    # suitable for augmentation.
-    local/make_musan.sh $musan_root 16 data
-    
-    # Get the duration of the MUSAN recordings.  This will be used by the
-    # script augment_data_dir.py.
-    for name in speech noise music; do
-	utils/data/get_utt2dur.sh data/musan_${name}
-	mv data/musan_${name}/utt2dur data/musan_${name}/reco2dur
-    done
-
-fi
-
-
-if [ $stage -le 2 ]; then
-    
-  for name in voxceleb2cat_train
-  do
-      export TMPDIR=data/tmp
-      mkdir -p $TMPDIR
-
-      awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/$name/utt2num_frames > data/$name/reco2dur
-      
-      # Make a reverberated version of the list.  Note that we don't add any
-      # additive noise here.
-
-      # Make a version with reverberated speech
-      rvb_opts=()
-      rvb_opts+=(--rir-set-parameters "0.2, RIRS_NOISES/real_rirs_isotropic_noises/rir_list")
-      rvb_opts+=(--rir-set-parameters "0.4, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
-      rvb_opts+=(--rir-set-parameters "0.4, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
-      
-      python steps/data/reverberate_data_dir.py \
-	      "${rvb_opts[@]}" \
-	      --speech-rvb-probability 1 \
-	      --pointsource-noise-addition-probability 0 \
-	      --isotropic-noise-addition-probability 0 \
-	      --num-replications 1 \
-	      --source-sampling-rate 16000 \
-	      data/${name} data/${name}_reverb
-      cp data/${name}/vad.scp data/${name}_reverb/
-      utils/copy_data_dir.sh --utt-suffix "-reverb" data/${name}_reverb data/${name}_reverb.new
-      rm -rf data/${name}_reverb
-      mv data/${name}_reverb.new data/${name}_reverb
-
-      
-      # Augment with musan_noise
-      python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0:13:8" --fg-noise-dir "data/musan_noise" data/${name} data/${name}_noise
-      # Augment with musan_music
-      python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/${name} data/${name}_music
-      # Augment with musan_speech
-      python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13:10" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/${name} data/${name}_babble
-
-      
-      awk '{ $1=$1"-reverb"; print $0}' data/${name}/reco2dur > data/${name}_reverb/reco2dur
-  
-      # Augment with musan_noise
-      python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0:13:8" --fg-noise-dir "data/musan_noise" data/${name}_reverb data/${name}_reverb_noise
-      # Augment with musan_music
-      python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/${name}_reverb data/${name}_reverb_music
-      # Augment with musan_speech
-      python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13:10" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/${name}_reverb data/${name}_reverb_babble
-
-
-      # Combine noise only
-      utils/combine_data.sh data/${name}_noise_all \
-			    data/${name}_noise data/${name}_music data/${name}_babble
-
-      # Combine reverbs
-      utils/combine_data.sh data/${name}_reverb_all data/${name}_reverb \
-			    data/${name}_reverb_noise data/${name}_reverb_music data/${name}_reverb_babble
-
-      # Combine reverb, noise, music, and babble into one directory.
-      utils/combine_data.sh data/${name}_aug data/${name}_reverb_all data/${name}_noise_all
-      unset TMPDIR
-  done
-
-fi
-
-
-if [ $stage -le 3 ];then
-  # Take a random subset of the augmentations 
-  utils/subset_data_dir.sh data/voxceleb2cat_train_aug \
-      $(wc -l data/voxceleb2cat_train/utt2spk | awk '{ print int('$num_augs'*$1)}') \
-      data/voxceleb2cat_train_augx${num_augs}
-  utils/fix_data_dir.sh data/voxceleb2cat_train_augx${num_augs}
-fi
-  
-      
-exit
diff --git a/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
new file mode 100755
index 00000000..6939052e
--- /dev/null
+++ b/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 2 ]; then
+    # This script preprocess audio for x-vector training
+    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
+	--storage_name voxceleb-adv.v1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
+    hyp_utils/kaldi/utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 3 ]; then
+    # Now, we remove files with less than 4s
+    hyp_utils/remove_short_audios.sh --min-len 4 data/${nnet_data}_proc_audio_no_sil
+
+    # We also want several utterances per speaker. Now we'll throw out speakers
+    # with fewer than 4 utterances.
+    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 4 ]; then
+    # Prepare train and validation lists for x-vectors
+    local/make_train_lists_sup_embed_with_augm.sh \
+	data/${nnet_data}_proc_audio_no_sil \
+	data/${nnet_data}_proc_audio_no_sil/lists_xvec
+fi
+
+exit
diff --git a/egs/voxceleb/adv.v1/run_005_compute_fbank_augment.sh b/egs/voxceleb/adv.v1/run_005_compute_fbank_augment.sh
deleted file mode 100755
index 10d13e03..00000000
--- a/egs/voxceleb/adv.v1/run_005_compute_fbank_augment.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-fbankdir=`pwd`/exp/fbank
-
-stage=1
-config_file=default_config.sh
-feat_vers="numpy"
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$feat_vers" == "kaldi" ];then
-    make_fbank=steps/make_fbank.sh
-    fbank_cfg=conf/fbank80_16k.conf
-else
-    fbank_cfg=conf/fbank80_16k.yaml
-    if [ "$feat_vers" == "numpy" ];then
-	make_fbank=steps_pyfe/make_fbank.sh
-    else
-	make_fbank=steps_pyfe/make_torch_fbank.sh
-    fi
-fi
-
-export TMPDIR=data/tmp
-mkdir -p $TMPDIR
-
-if [ $stage -le 1 ];then
-    
-  # Make filterbanks for the augmented data.  Note that we do not compute a new
-  # vad.scp file here.  Instead, we use the vad.scp from the clean version of
-  # the list.
-  for name in voxceleb2cat_train_augx${num_augs} 
-  do
-      $make_fbank --write-utt2num-frames true \
-	  --fbank-config $fbank_cfg --nj 120 --cmd "$train_cmd" \
-      	  data/$name exp/make_fbank/$name $fbankdir
-      fix_data_dir.sh data/$name
-  done
-
-fi
-
-
-if [ $stage -le 2 ];then
-    
-    # Combine the clean and augmented lists.  
-    utils/combine_data.sh --extra-files "utt2num_frames" data/voxceleb2cat_train_combined data/voxceleb2cat_train_augx${num_augs} data/voxceleb2cat_train
-
-fi
-    
-exit
-
diff --git a/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
new file mode 100755
index 00000000..f80d2924
--- /dev/null
+++ b/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$transfer_nnet_data" == "$nnet_data" ];then
+    echo "Training data for victim and transfer model are the same"
+    echo "Skipping this step"
+    exit 0
+fi
+
+if [ $stage -le 2 ]; then
+    # This script preprocess audio for x-vector training
+    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
+	--storage_name voxceleb-adv.v1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+	data/${transfer_nnet_data} data/${transfer_nnet_data}_proc_audio_no_sil exp/${transfer_nnet_data}_proc_audio_no_sil
+    hyp_utils/kaldi/utils/fix_data_dir.sh data/${transfer_nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 3 ]; then
+    # Now, we remove files with less than 4s
+    hyp_utils/remove_short_audios.sh --min-len 4 data/${transfer_nnet_data}_proc_audio_no_sil
+
+    # We also want several utterances per speaker. Now we'll throw out speakers
+    # with fewer than 4 utterances.
+    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${transfer_nnet_data}_proc_audio_no_sil
+
+fi
+
+if [ $stage -le 4 ]; then
+    # Prepare train and validation lists for x-vectors
+    local/make_train_lists_sup_embed_with_augm.sh \
+	data/${transfer_nnet_data}_proc_audio_no_sil \
+	data/${transfer_nnet_data}_proc_audio_no_sil/lists_xvec
+fi
+
+exit
diff --git a/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh b/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
new file mode 100755
index 00000000..03234eaa
--- /dev/null
+++ b/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=1
+config_file=default_config.sh
+use_gpu=false
+xvec_chunk_length=12800
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+    xvec_cmd="$cuda_eval_cmd --mem 4G"
+else
+    xvec_cmd="$train_cmd --mem 12G"
+fi
+
+if [ $stage -le 2 ]; then
+  # Extracts x-vectors for evaluation
+  for name in voxceleb1_test 
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 100 ? $num_spk:100))
+    steps_xvec/extract_xvectors_from_wav.sh \
+      --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+      --feat-config $feat_config \
+      $nnet data/$name \
+      $xvector_dir/$name
+  done
+fi
diff --git a/egs/voxceleb/adv.v1/run_010_prepare_victim_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_010_prepare_victim_xvec_train_data.sh
deleted file mode 100755
index 25a59571..00000000
--- a/egs/voxceleb/adv.v1/run_010_prepare_victim_xvec_train_data.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-# Now we prepare the features to generate examples for xvector training.
-if [ $stage -le 2 ]; then
-    # This script applies CMVN and removes nonspeech frames.  Note that this is somewhat
-    # wasteful, as it roughly doubles the amount of training data on disk.  After
-    # creating training examples, this can be removed.
-    steps_xvec/prepare_feats_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-adv.v1-$(date +'%m_%d_%H_%M') \
-	data/${nnet_data} data/${nnet_data}_no_sil exp/${nnet_data}_no_sil
-    utils/fix_data_dir.sh data/${nnet_data}_no_sil
-
-fi
-
-
-if [ $stage -le 3 ]; then
-    # Now, we need to remove features that are too short after removing silence
-    # frames.  We want atleast 4s (400 frames) per utterance.
-    hyp_utils/remove_short_utts.sh --min-len 400 data/${nnet_data}_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 8 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 8 data/${nnet_data}_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh data/${nnet_data}_no_sil data/${nnet_data}_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_011_train_victim_xvector.sh b/egs/voxceleb/adv.v1/run_011_train_victim_xvector.sh
deleted file mode 100755
index 141afa62..00000000
--- a/egs/voxceleb/adv.v1/run_011_train_victim_xvector.sh
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-ngpu=4
-config_file=default_config.sh
-resume=false
-interactive=false
-num_workers=8
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${nnet_data}_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
-# Network Training
-if [ $stage -le 1 ]; then
-
-    if [[ ${nnet_type} =~ resnet ]] || [[ ${nnet_type} =~ resnext ]]; then
-	train_exec=torch-train-resnet-xvec.py
-    elif [[ ${nnet_type} =~ efficientnet ]]; then
-	train_exec=torch-train-efficientnet-xvec.py
-    elif [[ ${nnet_type} =~ tdnn ]]; then
-	train_exec=torch-train-tdnn-xvec.py
-    elif [[ ${nnet_type} =~ transformer ]]; then
-	train_exec=torch-train-transformer-xvec-v1.py
-    else
-	echo "$nnet_type not supported"
-	exit 1
-    fi
-
-    mkdir -p $nnet_dir/log
-    $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	$train_exec \
-	--data-rspec scp:$list_dir/feats.scp \
-	--train-list $list_dir/lists_xvec/train.scp \
-	--val-list $list_dir/lists_xvec/val.scp \
-	--class-file $list_dir/lists_xvec/class2int \
-	--num-frames-file $list_dir/utt2num_frames \
-	--min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-	--iters-per-epoch $ipe \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--embed-dim $embed_dim $nnet_opt $opt_opt $lrs_opt \
-	--epochs $nnet_num_epochs \
-	--s $s --margin $margin --margin-warmup-epochs $margin_warmup \
-	--dropout-rate $dropout \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $nnet_dir $args
-
-fi
-
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_012_prepare_transfer_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_012_prepare_transfer_xvec_train_data.sh
deleted file mode 100755
index b622e992..00000000
--- a/egs/voxceleb/adv.v1/run_012_prepare_transfer_xvec_train_data.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$transfer_nnet_data" == "$nnet_data" ];then
-    echo "Training data for victim and transfer model are the same"
-    echo "Skipping this step"
-    exit 0
-fi
-
-# Now we prepare the features to generate examples for xvector training.
-if [ $stage -le 2 ]; then
-    # This script applies CMVN and removes nonspeech frames.  Note that this is somewhat
-    # wasteful, as it roughly doubles the amount of training data on disk.  After
-    # creating training examples, this can be removed.
-    steps_xvec/prepare_feats_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-$(date +'%m_%d_%H_%M') \
-	data/${transfer_nnet_data} data/${transfer_nnet_data}_no_sil \
-	exp/${transfer_nnet_data}_no_sil
-    utils/fix_data_dir.sh data/${transfer_nnet_data}_no_sil
-
-fi
-
-
-if [ $stage -le 3 ]; then
-    # Now, we need to remove features that are too short after removing silence
-    # frames.  We want atleast 4s (400 frames) per utterance.
-    hyp_utils/remove_short_utts.sh --min-len 400 data/${transfer_nnet_data}_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 8 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 8 data/${transfer_nnet_data}_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh data/${transfer_nnet_data}_no_sil \
-	data/${transfer_nnet_data}_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_013_train_transfer_xvector.sh b/egs/voxceleb/adv.v1/run_013_train_transfer_xvector.sh
deleted file mode 100755
index ad2c0177..00000000
--- a/egs/voxceleb/adv.v1/run_013_train_transfer_xvector.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-ngpu=4
-config_file=default_config.sh
-resume=false
-interactive=false
-num_workers=8
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-if [ "$nnet" == "$transfer_nnet" ];then
-    echo "Victim and transfer model are the same"
-    echo "Skipping this step"
-    exit 0
-fi
-
-nnet_data=$transfer_nnet_data
-batch_size_1gpu=$transfer_batch_size_1gpu
-eff_batch_size=$transfer_eff_batch_size
-min_chunk=$transfer_min_chunk
-max_chunk=$transfer_max_chunk
-ipe=$transfer_ipe
-
-nnet_type=$transfer_nnet_type
-dropout=$transfer_dropout
-embed_dim=$transfer_embed_dim
-
-s=$transfer_s
-margin_warmup=$transfer_margin_warmup
-margin=$transfer_margin
-
-nnet_dir=$transfer_nnet_dir
-nnet_opt=$transfer_nnet_opt
-opt_opt=$transfer_opt_opt
-lrs_opt=$transfer_lrs_opt
-
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${nnet_data}_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
-# Network Training
-if [ $stage -le 1 ]; then
-
-    if [[ ${nnet_type} =~ resnet ]] || [[ ${nnet_type} =~ resnext ]]; then
-	train_exec=torch-train-resnet-xvec.py
-    elif [[ ${nnet_type} =~ efficientnet ]]; then
-	train_exec=torch-train-efficientnet-xvec.py
-    elif [[ ${nnet_type} =~ tdnn ]]; then
-	train_exec=torch-train-tdnn-xvec.py
-    elif [[ ${nnet_type} =~ transformer ]]; then
-	train_exec=torch-train-transformer-xvec-v1.py
-    else
-	echo "$nnet_type not supported"
-	exit 1
-    fi
-
-    mkdir -p $nnet_dir/log
-    $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	$train_exec \
-	--data-rspec scp:$list_dir/feats.scp \
-	--train-list $list_dir/lists_xvec/train.scp \
-	--val-list $list_dir/lists_xvec/val.scp \
-	--class-file $list_dir/lists_xvec/class2int \
-	--num-frames-file $list_dir/utt2num_frames \
-	--min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-	--iters-per-epoch $ipe \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--embed-dim $embed_dim $nnet_opt $opt_opt $lrs_opt \
-	--epochs $nnet_num_epochs \
-	--s $s --margin $margin --margin-warmup-epochs $margin_warmup \
-	--dropout-rate $dropout \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $nnet_dir $args
-
-fi
-
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_030_extract_xvectors_victim_model.sh b/egs/voxceleb/adv.v1/run_030_extract_xvectors_victim_model.sh
deleted file mode 100755
index 02eb78de..00000000
--- a/egs/voxceleb/adv.v1/run_030_extract_xvectors_victim_model.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$use_gpu" == "true" ];then
-    xvec_args="--use-gpu true --chunk-length 12800"
-    xvec_cmd="$cuda_eval_cmd"
-else
-    xvec_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-
-if [ $stage -le 1 ]; then
-    # Extracts x-vectors for evaluation
-    for name in voxceleb1_test 
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors.sh --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-					      $nnet data/$name \
-					      $xvector_dir/$name
-    done
-fi
-
-exit
diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 1ee9468f..b8a17dc6 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -95,9 +95,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
 | | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
 | | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | || |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.40 | 0.065 |
+| | | | Cosine + AS-Norm | 0.52 | 0.33 | 0.045 |
+| | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
 
 ### VoxCeleb 1 Entire-Clean trial list
 
@@ -109,9 +109,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
 | | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
 | | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
+| | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
+| | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -123,9 +123,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
 | | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
 | | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
+| | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
+| | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
 
 ### VoxSRC2022 dev
 
@@ -137,9 +137,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
 | | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
 | | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | ||  |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
+| | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
+| | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..9e302200
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: fwseidrndresnet100
+  in_channels: 1
+  in_feats: 80
+  conv_channels: 128
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.05
+  se_r: 4
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..469e166b
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
new file mode 100644
index 00000000..003bf978
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet100.v3.0
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0029.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/README.md b/egs/voxceleb/v1.2/README.md
new file mode 100644
index 00000000..1ee9468f
--- /dev/null
+++ b/egs/voxceleb/v1.2/README.md
@@ -0,0 +1,263 @@
+# VoxCeleb V1.1
+
+Recipe for the VoxCeleb Speaker Verification Task
+
+## Differences w.r.t VoxCeleb V1 recipe
+
+In recipe version V1: 
+   - We compute speech augmentations and acoustic features offline and dump them to disk. 
+   - Augmentation is performed using Kaldi scripts and wav-reverbate tool
+   - Babble noise is created on-the-fly when computing features by mixing 3-7 single speaker files.
+
+In this recipe:
+   - We compute speech augmentations and acoustic features are computed always on-the-fly,
+     we don't dump any features to disk. 
+   - Augmentation is performed using Hyperin SpeechAugment class.
+   - The behavior of this class is controlled 
+     by the the configuration file `conf/reverb_noise_aug.yml`, 
+     which mimics the proportions of noise and RIR types, and SNRs used in the V1 or the recipe.
+   - Babble noise is created offline by mixing 3-10 single speaker files.
+
+
+## Citing
+
+## Training Data
+
+   - x-Vector network is trained on Voxceleb2 dev + test with augmentations
+     - MUSAN noise
+     - RIR reverberation
+
+## Test data
+
+   - Test data is VoxCeleb 1
+   - We evaluate 6 conditions:
+      - VoxCeleb-O (Original): Original Voxceleb test set with 40 speakers
+      - Voxceleb-O-cleaned: VoxCeleb-O cleaned-up of some errors
+      - VoxCeleb-E (Entire): List using all utterances of VoxCeleb1
+      - Voxceleb-E-cleaned: VoxCeleb-E cleaned-up of some errors
+      - VoxCeleb-H (Hard): List of hard trials between all utterances of VoxCeleb1, same gender and nationality trials.
+      - Voxceleb-H-cleaned: VoxCeleb-H cleaned-up of some errors
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it will use Light ResNet (16 base channels)
+   - For better performance use full ResNet (64 base channels) using `config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh` file as
+```bash
+run_011_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+run_030_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
+run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+```
+
+   - To train with mixed precision training use config file `config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh`
+
+## Recipe Steps:
+
+   - `run_001_prepare_data.sh`
+      - Data preparation script to generate Kaldi style data directories for 
+          - VoxCeleb2 train+test
+          - VoxCeleb1 O/E/H eval sets
+
+   - `run_002_compute_evad.sh`
+      - Computes Energy VAD for all datasets
+
+   - `run_003_prepare_noises_rirs.sh`
+      - Prepares MUSAN noises, music to be used by SpeechAugment class.
+      - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
+      - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
+
+   - `run_010_prepare_xvec_train_data.sh`
+      - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
+      - Removes silence from the audios
+      - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
+      - Creates training and validation lists for x-vector training
+
+   - `run_011_train_xvector.sh`
+      - Trains the x-vector network
+
+   - `run_030_extract_xvectors.sh`
+      - Extracts x-vectors for VoxCeleb2 or VoxCeleb2+augmentation for PLDA training
+      - Exctracts x-vectors for VoxCeleb1 test sets
+
+   - `run_040_eval_be.sh`
+      - Trains PLDA and evals PLDA and cosine scoring back-ends
+
+
+## Results
+
+### VoxCeleb 1 Original-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.11 | 0.069 | 0.126 |
+| | | | Cosine + AS-Norm | 1.10 | 0.065 | 0.108 |
+| | | | Cosine + QMF | 0.95 | 0.059 | 0.084 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
+| | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
+| | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | || |
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.16 | 0.073 | 0.130 |
+| | | | Cosine + AS-Norm | 1.13 | 0.068 | 0.118 |
+| | | | Cosine + QMF | 1.06 | 0.064 | 0.112 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
+| | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
+| | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | | | |
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.10 | 0.128 | 0.209 |
+| | | | Cosine + AS-Norm | 1.99 | 0.118 | 0.190 |
+| | | | Cosine + QMF | 1.84 | 0.111 | 0.184 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
+| | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
+| | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | | | |
+
+### VoxSRC2022 dev
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.87 | 0.185 | 0.304 |
+| | | | Cosine + AS-Norm | 2.84 | 0.182 | 0.304 |
+| | | | Cosine + QMF | 2.61 | 0.172 | 0.283 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
+| | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
+| | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | ||  |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF | | | |
+
+## Results before 2023
+
+### VoxCeleb 1 Original-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.00 | 0.129 | 0.216 |
+| | | | Cosine | 2.04 | 0.138 | 0.210 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA |  1.35 | 0.091 | 0.159 |
+| | | | Cosine |  1.22 | 0.082 | 0.129 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.074 | 0.124 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA |  1.30 | 0.090 | 0.160 |
+| | | | Cosine |  1.44 | 0.100 | 0.173 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.23 | 0.091 | 0.143 |
+| | | | Cosine |  1.17 | 0.081 | 0.110 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.37 | 0.104 | 0.179 |
+| | | | Cosine | 1.31 | 0.080 | 0.139 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.29 | 0.088 | 0.129 |
+| | | | Cosine | 1.23 | 0.083 | 0.136 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.095 | 0.156 |
+| | | | Cosine | 1.29 | 0.089 | 0.146 |
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.084 | 0.136 |
+| | | | Cosine | 1.18 | 0.078 | 0.115 |
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.084 | 0.145 |
+| | | | Cosine | 1.12 | 0.073 | 0.131 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.53 | 0.104 | 0.189 |
+| | | | Cosine | 1.31 | 0.084 | 0.132 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  0.98 | 0.066 | 0.116 |
+| | | | Cosine | 1.12 | 0.071 | 0.103 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.05 | 0.077 | 0.123 |
+| | | | Cosine | 0.96 | 0.065 | 0.110 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.04 | 0.071 | 0.118 |
+| | | | Cosine | 0.93 | 0.067 | 0.108 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  0.90 | 0.067 | 0.118 |
+| | | | Cosine | 0.85 | 0.060 | 0.094 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
+| | | | Cosine | 1.29 | 0.084 | 0.140 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |
+
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.86 | 0.124 | 0.210 |
+| | | | Cosine | 1.93 | 0.122 | 0.201 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.43 | 0.091 | 0.159 |
+| | | | Cosine | 1.24 | 0.080 | 0.136 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.077 | 0.132 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 1.27 | 0.084 | 0.150 |
+| | | | Cosine | 1.30 | 0.082 | 0.150 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.30 | 0.083 | 0.146 |
+| | | | Cosine | 1.09 | 0.071 | 0.124 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.45 | 0.097 | 0.165 |
+| | | | Cosine | 1.15 | 0.076 | 0.132 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.47 | 0.094 | 0.165 |
+| | | | Cosine | 1.27 | 0.082 | 0.148 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.31 | 0.086 | 0.149 |
+| | | | Cosine | 1.22 | 0.079 | 0.134 |
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.27 | 0.082 | 0.145 |
+| | | | Cosine | 1.16 | 0.074 | 0.130 |
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.077 | 0.136 |
+| | | | Cosine | 1.11 | 0.071 | 0.125 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.46 | 0.097 | 0.173 |
+| | | | Cosine | 1.24 | 0.080 | 0.140 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.071 | 0.127 |
+| | | | Cosine | 1.05 | 0.067 | 0.117 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.078 | 0.134 |
+| | | | Cosine | 1.05 | 0.069 | 0.121 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.18 | 0.075 | 0.131 |
+| | | | Cosine | 0.98 | 0.063 | 0.110 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA |  1.17 | 0.072 | 0.123 |
+| | | | Cosine | 0.94 | 0.061 | 0.107 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
+| | | | Cosine | 1.27 | 0.079 | 0.142 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |
+
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 3.29 | 0.195 | 0.318 |
+| | | | Cosine | 3.27 | 0.188 | 0.303 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.66 | 0.160 | 0.258 |
+| | | | Cosine | 2.32 | 0.139 | 0.232 |
+| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 2.19 | 0.133 | 0.215 |
+| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 2.33 | 0.139 | 0.227 |
+| | | | Cosine | 2.33 | 0.142 | 0.235 |
+| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.46 | 0.142 | 0.237 |
+| | | | Cosine | 2.14 | 0.126 | 0.203 |
+| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 2.57 | 0.153 | 0.255 |
+| | | | Cosine | 2.11 | 0.127 | 0.205 |
+| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 2.64 | 0.157 | 0.244 |
+| | | | Cosine | 2.33 | 0.141 | 0.232 |
+| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  2.42 | 0.144 | 0.245 |
+| | | | Cosine | 2.26 | 0.133 | 0.224
+| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.39 | 0.141 | 0.235 |
+| | | | Cosine | 2.17 | 0.128 | 0.215
+| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.28 | 0.131 | 0.225 |
+| | | | Cosine | 2.11 | 0.124 | 0.204 |
+| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  2.77 | 0.172 | 0.271 |
+| | | | Cosine | 2.45 | 0.141 | 0.225 |
+| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  2.07 | 0.124 | 0.201 |
+| | | | Cosine | 1.95 | 0.113 | 0.181 |
+| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  2.34 | 0.136 | 0.230 |
+| | | | Cosine | 1.99 | 0.119 | 0.196 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  2.18 | 0.127 | 0.211 |
+| | | | Cosine | 1.89 | 0.112 | 0.184 |
+| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  2.14 | 0.125 | 0.209 |
+| | | | Cosine | 1.84 | 0.110 | 0.186 |
+| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
+| | | | Cosine | 2.26 | 0.134 | 0.214 |
+| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
+| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |
+| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
diff --git a/egs/voxceleb/v1.2/cmd.sh b/egs/voxceleb/v1.2/cmd.sh
new file mode 100755
index 00000000..040f458b
--- /dev/null
+++ b/egs/voxceleb/v1.2/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/voxceleb/v1.2/conf/clsp.conf b/egs/voxceleb/v1.2/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/v1.2/conf/coe_gpu_bigmem.conf b/egs/voxceleb/v1.2/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/voxceleb/v1.2/conf/coe_gpu_long.conf b/egs/voxceleb/v1.2/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/v1.2/conf/coe_gpu_rtx.conf b/egs/voxceleb/v1.2/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/v1.2/conf/coe_gpu_short.conf b/egs/voxceleb/v1.2/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/v1.2/conf/coe_gpu_v100.conf b/egs/voxceleb/v1.2/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/voxceleb/v1.2/conf/fbank80_specaug1_stmn_16k.yaml b/egs/voxceleb/v1.2/conf/fbank80_specaug1_stmn_16k.yaml
new file mode 100644
index 00000000..8df42fc6
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/fbank80_specaug1_stmn_16k.yaml
@@ -0,0 +1,24 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 5
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 1
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 8
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 1
+  mask_method: mean
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/v1.2/conf/fbank80_stmn_16k.yaml b/egs/voxceleb/v1.2/conf/fbank80_stmn_16k.yaml
new file mode 100644
index 00000000..f4091f5d
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/fbank80_stmn_16k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..1633f4a2
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -0,0 +1,95 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+    dropout_rate: 0.2
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.2
+  norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    #min_lr: 1.0e-05
+    min_lr: 1.0e-06
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..877736b3
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  resnet_enc:
+    override_dropouts: true
+    dropout_rate: 0.25
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..f15d453d
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
@@ -0,0 +1,93 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 512
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    dropout_rate: 0.002
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..45e55d97
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  resnet_enc:
+    override_dropouts: true
+    dropout_rate: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/vad_16k.yaml b/egs/voxceleb/v1.2/conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/v1.2/datapath.sh b/egs/voxceleb/v1.2/datapath.sh
new file mode 100644
index 00000000..a7eb575c
--- /dev/null
+++ b/egs/voxceleb/v1.2/datapath.sh
@@ -0,0 +1,23 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  # voxceleb1_root=/export/corpora5/VoxCeleb1_v1 #voxceleb1 v1
+  voxceleb1_root=/export/corpora5/VoxCeleb1_v2 #voxceleb1 v2
+  voxceleb2_root=/export/corpora5/VoxCeleb2
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
+  voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
+  voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  voxsrc22_root=/exp/jvillalba/corpora/voxsrc22
+  musan_root=/expscratch/dgromero/corpora-open/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/egs/voxceleb/v1.2/default_config.sh b/egs/voxceleb/v1.2/default_config.sh
new file mode 120000
index 00000000..fd0e1bb1
--- /dev/null
+++ b/egs/voxceleb/v1.2/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
new file mode 100644
index 00000000..f2622b0e
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn2048x4.v3.0
+
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
new file mode 100644
index 00000000..a3ad0c29
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN small
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn512x3.v3.0
+
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0040.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/path.sh b/egs/voxceleb/v1.2/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/voxceleb/v1.2/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/voxceleb/v1.2/run_001_prepare_data.sh b/egs/voxceleb/v1.2/run_001_prepare_data.sh
new file mode 100755
index 00000000..831eb1bc
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_001_prepare_data.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. datapath.sh 
+. $config_file
+
+if [ $stage -le 1 ];then
+  # Prepare the VoxCeleb2 dataset for training.
+  hyp_utils/conda_env.sh \
+    prepare_data.py voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
+    --cat-videos --use-kaldi-ids \
+    --output-dir data/voxceleb2cat_train
+  #local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
+fi
+exit
+if [ $stage -le 2 ];then
+  # prepare voxceleb1 for test
+  # This script is for the old version of the dataset
+  # local/make_voxceleb1_oeh.pl $voxceleb1_root data
+  # Use this for the newer version of voxceleb1:
+  local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
+fi
+
+if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
+  local/prepare_voxsrc22_dev.py \
+    --vox1-corpus-dir $voxceleb1_root \
+    --voxsrc22-corpus-dir $voxsrc22_root \
+    --output-dir data/voxsrc22_dev
+fi
+
+# if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+#   local/prepare_voxsrc22_test.py \
+#     --corpus-dir $voxsrc22_root \
+#     --output-dir data/voxsrc22_test
+# fi
+
+if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
+  # # split vox2 into 2 parts, for cohort and qmf training
+  local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
+fi
diff --git a/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav.sh b/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav.sh
index 0d7e5d4c..7a97bb56 100755
--- a/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav.sh
+++ b/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav.sh
@@ -85,7 +85,7 @@ echo "$0: score $key_file to $output_dir"
 
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-adv-test-wav.py \
+    eval_xvec_cosine_scoring_from_adv_test_wav.py \
     --feats $feat_config ${args} \
     --v-file scp:$vector_file \
     --key-file $key_file \
diff --git a/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav_wavegan.sh b/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav_wavegan.sh
index f083ecb8..5ad16f77 100755
--- a/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav_wavegan.sh
+++ b/hyp_utils/adv/eval_cosine_scoring_from_adv_test_wav_wavegan.sh
@@ -92,7 +92,7 @@ fi
 echo "$0: score $key_file to $output_dir"
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py \
+    eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py \
     --feats $feat_config ${args} \
     --v-file scp:$vector_file \
     --key-file $key_file \
diff --git a/hyp_utils/adv/eval_cosine_scoring_from_art_test_wav.sh b/hyp_utils/adv/eval_cosine_scoring_from_art_test_wav.sh
index 3abd289b..bca8266e 100755
--- a/hyp_utils/adv/eval_cosine_scoring_from_art_test_wav.sh
+++ b/hyp_utils/adv/eval_cosine_scoring_from_art_test_wav.sh
@@ -88,7 +88,7 @@ echo "$0: score $key_file to $output_dir"
 
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ART_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-art-test-wav.py \
+    eval_xvec_cosine_scoring_from_art_test_wav.py \
     --feats $feat_config ${args} \
     --v-file scp:$vector_file \
     --key-file $key_file \
diff --git a/hyp_utils/adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh b/hyp_utils/adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh
index 51c248fd..008b6ccc 100755
--- a/hyp_utils/adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh
+++ b/hyp_utils/adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh
@@ -94,7 +94,7 @@ echo "$0: score $key_file to $output_dir"
 
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py \
+    eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py \
     --feats $feat_config --transfer_feats $transfer_feat_config ${args} \
     --v-file scp:$vector_file \
     --key-file $key_file \
diff --git a/hyp_utils/adv/eval_cosine_scoring_from_transfer_art_test_wav.sh b/hyp_utils/adv/eval_cosine_scoring_from_transfer_art_test_wav.sh
index 7f497d02..b60cdee4 100755
--- a/hyp_utils/adv/eval_cosine_scoring_from_transfer_art_test_wav.sh
+++ b/hyp_utils/adv/eval_cosine_scoring_from_transfer_art_test_wav.sh
@@ -96,7 +96,7 @@ echo "$0: score $key_file to $output_dir"
 
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ART_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py \
+    eval_xvec_cosine_scoring_from_transfer_art_test_wav.py \
     --feats $feat_config --transfer_feats $transfer_feat_config ${args} \
     --v-file scp:$vector_file \
     --key-file $key_file \
diff --git a/hyp_utils/xvectors/eval_cosine_scoring_from_test_wav.sh b/hyp_utils/xvectors/eval_cosine_scoring_from_test_wav.sh
index b17a3ea2..963fd91b 100755
--- a/hyp_utils/xvectors/eval_cosine_scoring_from_test_wav.sh
+++ b/hyp_utils/xvectors/eval_cosine_scoring_from_test_wav.sh
@@ -70,7 +70,7 @@ echo "$0: score $ndx_file to $output_dir"
 
 $cmd JOB=1:$nj $log_dir/${name}.JOB.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $num_gpus \
-    torch-eval-xvec-cosine-scoring-from-test-wav.py \
+    eval_xvec_cosine_scoring_from_test_wav.py \
     --feats $feat_config ${args} \
     --v-file scp:$vector_file \
     --ndx-file $ndx_file \
diff --git a/hyp_utils/xvectors/eval_xvec_logits_from_wav.sh b/hyp_utils/xvectors/eval_xvec_logits_from_wav.sh
index bdd53862..4765e809 100755
--- a/hyp_utils/xvectors/eval_xvec_logits_from_wav.sh
+++ b/hyp_utils/xvectors/eval_xvec_logits_from_wav.sh
@@ -84,7 +84,7 @@ fi
 if [ $stage -le 0 ];then
     $cmd JOB=1:$nj $output_dir/log/eval_logits.JOB.log \
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	torch-eval-xvec-logits-from-wav.py \
+	eval_xvec_logits_from_wav.py \
 	--feats $feat_config ${args} $write_num_frames_opt \
 	--part-idx JOB --num-parts $nj \
 	--input $data_dir/wav.scp \
diff --git a/hyperion/bin/adv_finetune_xvector_from_wav.py b/hyperion/bin/adv_finetune_xvector_from_wav.py
new file mode 100755
index 00000000..f387c7ac
--- /dev/null
+++ b/hyperion/bin/adv_finetune_xvector_from_wav.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+import torch
+import torch.nn as nn
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.adv_attacks import AttackFactory
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import SpineNetXVector as SpineXVec
+from hyperion.torch.models import TDNNXVector as TDXVec
+from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
+from hyperion.torch.utils import ddp
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    "efficientnet": EXVec,
+    "tdnn": TDXVec,
+    "transformer": TFXVec,
+    "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_feats(rank, **kwargs):
+    feat_args = AF.filter_args(**kwargs["feats"])
+    if rank == 0:
+        logging.info("feat args={}".format(feat_args))
+        logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=True, **feat_args)
+    if rank == 0:
+        logging.info("feat-extractor={}".format(feat_extractor))
+    return feat_extractor
+
+
+def init_xvector(num_classes, in_model_file, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_finetune_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network ft args={}".format(xvec_args))
+    xvec_args["num_classes"] = num_classes
+    model = TML.load(in_model_file)
+    model.change_config(**xvec_args)
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def init_hard_prototype_mining(model, train_loader, val_loader, rank):
+    try:
+        hard_prototype_mining = train_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
+        return
+
+    if rank == 0:
+        logging.info("setting hard prototypes")
+
+    affinity_matrix = model.compute_prototype_affinity()
+    train_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+    try:
+        hard_prototype_mining = val_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
+        return
+
+    val_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+
+def init_attack(feat_extractor, model, wav_scale, **kwargs):
+    victim_model = nn.Sequential(feat_extractor, model)
+    attack_args = AttackFactory.filter_args(**kwargs["attack"])
+    extra_args = {
+        "eps_scale": wav_scale,
+        "loss": nn.functional.cross_entropy,
+        "time_dim": 1,
+    }
+    attack_args.update(extra_args)
+    logging.info("attacks args={}".format(attack_args))
+    attack = AttackFactory.create(victim_model, **attack_args)
+    return attack
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    feat_extractor = init_feats(**kwargs)
+    model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
+    init_hard_prototype_mining(model, train_loader, val_loader, rank)
+    kwargs["wav_scale"] = train_loader.dataset.wav_scale
+    attack = init_attack(feat_extractor, model, **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model,
+        feat_extractor,
+        attack,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    AF.add_class_args(parser, prefix="feats")
+    xvec_class.add_finetune_args(parser, prefix="model")
+    AttackFactory.add_class_args(parser, prefix="attack")
+
+    parser.add_argument("--in-model-file", required=True)
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="""Fine-tune x-vector model from audio files 
+        with adversarial training"""
+    )
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
+
+
+# def init_data(
+#     audio_path,
+#     train_list,
+#     val_list,
+#     train_aug_cfg,
+#     val_aug_cfg,
+#     num_workers,
+#     num_gpus,
+#     rank,
+#     **kwargs
+# ):
+
+#     ad_args = AD.filter_args(**kwargs)
+#     sampler_args = Sampler.filter_args(**kwargs)
+#     if rank == 0:
+#         logging.info("audio dataset args={}".format(ad_args))
+#         logging.info("sampler args={}".format(sampler_args))
+#         logging.info("init datasets")
+
+#     train_data = AD(audio_path, train_list, aug_cfg=train_aug_cfg, **ad_args)
+#     val_data = AD(audio_path, val_list, aug_cfg=val_aug_cfg, is_val=True, **ad_args)
+
+#     if rank == 0:
+#         logging.info("init samplers")
+#     train_sampler = Sampler(train_data, **sampler_args)
+#     val_sampler = Sampler(val_data, **sampler_args)
+
+#     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+#     largs = (
+#         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+#     )
+
+#     train_loader = torch.utils.data.DataLoader(
+#         train_data, batch_sampler=train_sampler, **largs
+#     )
+
+#     test_loader = torch.utils.data.DataLoader(
+#         val_data, batch_sampler=val_sampler, **largs
+#     )
+
+#     return train_loader, test_loader
+
+
+# def init_feats(rank, **kwargs):
+#     feat_args = AF.filter_args(**kwargs["feats"])
+#     if rank == 0:
+#         logging.info("feat args={}".format(feat_args))
+#         logging.info("initializing feature extractor")
+#     feat_extractor = AF(trans=True, **feat_args)
+#     if rank == 0:
+#         logging.info("feat-extractor={}".format(feat_extractor))
+#     return feat_extractor
+
+
+# def init_xvector(num_classes, in_model_path, rank, train_mode, **kwargs):
+#     xvec_args = XVec.filter_finetune_args(**kwargs)
+#     if rank == 0:
+#         logging.info("xvector network ft args={}".format(xvec_args))
+#     xvec_args["num_classes"] = num_classes
+#     model = TML.load(in_model_path)
+#     model.rebuild_output_layer(**xvec_args)
+#     if train_mode == "ft-embed-affine":
+#         model.freeze_preembed_layers()
+#     if rank == 0:
+#         logging.info("x-vector-model={}".format(model))
+#     return model
+
+
+# def init_attack(feat_extractor, model, wav_scale, **kwargs):
+#     victim_model = nn.Sequential(feat_extractor, model)
+#     attack_args = AttackFactory.filter_args(**kwargs["attack"])
+#     extra_args = {
+#         "eps_scale": wav_scale,
+#         "loss": nn.functional.cross_entropy,
+#         "time_dim": 1,
+#     }
+#     attack_args.update(extra_args)
+#     logging.info("attacks args={}".format(attack_args))
+#     attack = AttackFactory.create(victim_model, **attack_args)
+#     return attack
+
+
+# def train_xvec(gpu_id, args):
+
+#     config_logger(args.verbose)
+#     del args.verbose
+#     logging.debug(args)
+
+#     kwargs = namespace_to_dict(args)
+#     torch.manual_seed(args.seed)
+#     set_float_cpu("float32")
+
+#     train_mode = kwargs["train_mode"]
+
+#     ddp_args = ddp.filter_ddp_args(**kwargs)
+#     device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+#     kwargs["rank"] = rank
+
+#     train_loader, test_loader = init_data(**kwargs)
+#     feat_extractor = init_feats(**kwargs)
+#     model = init_xvector(train_loader.dataset.num_classes, **kwargs)
+#     kwargs["wav_scale"] = train_loader.dataset.wav_scale
+#     attack = init_attack(feat_extractor, model, **kwargs)
+
+#     trn_args = Trainer.filter_args(**kwargs)
+#     if rank == 0:
+#         logging.info("trainer args={}".format(trn_args))
+#     metrics = {"acc": CategoricalAccuracy()}
+#     trainer = Trainer(
+#         model,
+#         feat_extractor,
+#         attack,
+#         device=device,
+#         metrics=metrics,
+#         ddp=world_size > 1,
+#         train_mode=train_mode,
+#         **trn_args
+#     )
+#     if args.resume:
+#         trainer.load_last_checkpoint()
+#     trainer.fit(train_loader, test_loader)
+
+#     ddp.ddp_cleanup()
+
+
+# if __name__ == "__main__":
+
+#     parser = ArgumentParser(
+#         description="Fine-tune x-vector model with adv attacks on wav domain"
+#     )
+
+#     parser.add_argument("--cfg", action=ActionConfigFile)
+#     parser.add_argument("--audio-path", required=True)
+#     parser.add_argument("--train-list", dest="train_list", required=True)
+#     parser.add_argument("--val-list", dest="val_list", required=True)
+
+#     AD.add_argparse_args(parser)
+#     Sampler.add_argparse_args(parser)
+
+#     parser.add_argument("--train-aug-cfg", default=None)
+#     parser.add_argument("--val-aug-cfg", default=None)
+
+#     parser.add_argument(
+#         "--num-workers", type=int, default=5, help="num_workers of data loader"
+#     )
+
+#     AF.add_class_args(parser, prefix="feats")
+#     parser.add_argument("--in-model-path", required=True)
+
+#     XVec.add_finetune_args(parser)
+#     AttackFactory.add_class_args(parser, prefix="attack")
+
+#     Trainer.add_class_args(parser)
+#     ddp.add_ddp_args(parser)
+
+#     # parser.add_argument('--num-gpus', type=int, default=1,
+#     #                     help='number of gpus, if 0 it uses cpu')
+#     parser.add_argument(
+#         "--seed", type=int, default=1123581321, help="random seed (default: 1)"
+#     )
+#     parser.add_argument(
+#         "--resume",
+#         action="store_true",
+#         default=False,
+#         help="resume training from checkpoint",
+#     )
+#     parser.add_argument(
+#         "--train-mode",
+#         default="ft-full",
+#         choices=["ft-full", "ft-embed-affine"],
+#         help=(
+#             "ft-full: adapt full x-vector network"
+#             "ft-embed-affine: adapt affine transform before embedding"
+#         ),
+#     )
+
+#     # parser.add_argument('--attack-eps', required=True, type=float,
+#     #                    help='epsilon adversarial attack')
+#     # parser.add_argument('--attack-eps-step', required=True, type=float,
+#     #                    help='eps step adversarial attack')
+#     # parser.add_argument('--attack-random-eps', default=False,
+#     #                    action='store_true',
+#     #                    help='use random eps in adv. attack')
+
+#     # parser.add_argument('--attack-max-iter', default=10, type=int,
+#     #                    help='number of iterations for adversarial optimization')
+
+#     # parser.add_argument('--p-attack', default=0.5, type=float,
+#     #                    help='ratio of batches with adv attack')
+
+#     parser.add_argument(
+#         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+#     )
+#     parser.add_argument("--local_rank", default=0, type=int)
+
+#     args = parser.parse_args()
+#     gpu_id = args.local_rank
+#     del args.local_rank
+
+#     if gpu_id == 0:
+#         try:
+#             config_file = Path(args.exp_path) / "config.yaml"
+#             parser.save(args, str(config_file), format="yaml", overwrite=True)
+#         except:
+#             pass
+
+#     # torch docs recommend using forkserver
+#     multiprocessing.set_start_method("forkserver")
+#     train_xvec(gpu_id, args)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 437127b2..f0a2d010 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -10,8 +10,12 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -223,9 +227,7 @@ def eval_cosine_scoring(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
-                device
-            )
+            vad = torch.tensor(vad, dtype=torch.bool).to(device)
             model.vad_t = vad
             logging.info(
                 "utt %s detected %d/%d (%.2f %%) speech frames"
@@ -244,7 +246,7 @@ def eval_cosine_scoring(
         for i in range(key.num_models):
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
-                model.x_e = x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
                 if key.tar[i, j]:
                     if attack.targeted:
                         t = non
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index aaa91214..23c9bf68 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -7,13 +7,18 @@
 import os
 import sys
 import time
+
 # [Added Sonal May21]
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -281,9 +286,7 @@ def eval_cosine_scoring_wavegan(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(
-                vad.astype(np.bool, copy=False), dtype=torch.bool, device=device
-            )
+            vad = torch.tensor(vad, dtype=torch.bool).to(device)
             model.vad_t = vad
             logging.info(
                 "utt %s detected %d/%d (%.2f %%) speech frames"
@@ -302,7 +305,7 @@ def eval_cosine_scoring_wavegan(
         for i in range(key.num_models):
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
-                model.x_e = x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
                 if key.tar[i, j]:
                     if attack.targeted:
                         t = non
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 8d4add76..c204e218 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -11,9 +11,13 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -24,8 +28,9 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
@@ -102,6 +107,10 @@ def __init__(
         self.threshold = threshold
 
     def forward(self, s_t):
+        if s_t.dim() == 4:
+            # this is for attacks that only work in 4D inputs
+            s_t = s_t[0, 0]
+
         f_t = s_t
         f_t = self.feat_extractor(s_t)
         if self.vad_t is not None:
@@ -116,6 +125,10 @@ def forward(self, s_t):
 
         f_t = f_t.transpose(1, 2).contiguous()
         x_t = self.xvector_model.extract_embed(f_t, embed_layer=self.embed_layer)
+        if self.x_e is None:
+            # this is for auto-pgd, when it runs a dummy evaluation
+            self.x_e = x_t
+
         x_t = l2_norm(x_t)
         x_e = l2_norm(self.x_e)
         tar_score = torch.sum(x_e * x_t, dim=-1, keepdim=True)
@@ -164,15 +177,15 @@ def eval_cosine_scoring(
     model.to(device)
     model.eval()
 
-    tar = np.asarray([1], dtype=np.int)
-    non = np.asarray([0], dtype=np.int)
+    tar = np.asarray([1], dtype=int)
+    non = np.asarray([0], dtype=int)
 
     logging.info("loading key and enrollment x-vectors")
     key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
     x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
 
     audio_args = AR.filter_args(**kwargs)
-    audio_reader = AR(test_wav_file)
+    audio_reader = AR(test_wav_file, **audio_args)
     wav_scale = audio_reader.wav_scale
 
     if save_adv_wav:
@@ -207,7 +220,7 @@ def eval_cosine_scoring(
 
     for j in range(key.num_tests):
         t1 = time.time()
-        logging.info("scoring test utt %s" % (key.seg_set[j]))
+        logging.info("scoring test utt %s", key.seg_set[j])
         s, fs = audio_reader.read([key.seg_set[j]])
         s = s[0]
         fs = fs[0]
@@ -224,18 +237,14 @@ def eval_cosine_scoring(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
-                device
-            )
+            vad = torch.tensor(vad, dtype=torch.bool).to(device)
             model.vad_t = vad
             logging.info(
-                "utt %s detected %d/%d (%.2f %%) speech frames"
-                % (
-                    key.seg_set[j],
-                    speech_frames,
-                    tot_frames,
-                    speech_frames / tot_frames * 100,
-                )
+                "utt %s detected %d/%d (%.2f %%) speech frames",
+                key.seg_set[j],
+                speech_frames,
+                tot_frames,
+                speech_frames / tot_frames * 100,
             )
 
         t2 = time.time()
@@ -246,7 +255,7 @@ def eval_cosine_scoring(
             model=model,
             loss=nn.CrossEntropyLoss(),
             optimizer=None,
-            input_shape=[1, s.shape[1]],
+            input_shape=(s.shape[1],),
             nb_classes=2,
             clip_values=(-wav_scale, wav_scale),
             device_type=device_type,
@@ -254,10 +263,11 @@ def eval_cosine_scoring(
 
         attack_args["num_samples"] = s.shape[-1]
         attack = AttackFactory.create(model_art, **attack_args)
+        # s = s[None, None, :, :]
         for i in range(key.num_models):
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
-                model.x_e = x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
                 if key.tar[i, j]:
                     if attack.targeted:
                         t = non
@@ -270,6 +280,7 @@ def eval_cosine_scoring(
                         t = non
 
                 s_adv = attack.generate(s, t)
+                # s_adv = s_adv[0, 0]
                 s_adv = torch.from_numpy(s_adv).to(device)
                 with torch.no_grad():
                     scores[i, j] = model(s_adv).cpu().numpy()[0, 1]
@@ -327,13 +338,13 @@ def eval_cosine_scoring(
     if num_seg_parts > 1:
         score_file = "%s-%03d-%03d" % (score_file, 1, seg_part_idx)
         stats_file = "%s-%03d-%03d" % (stats_file, 1, seg_part_idx)
-    logging.info("saving scores to %s" % (score_file))
+    logging.info("saving scores to %s", score_file)
     s = TrialScores(
         key.model_set, key.seg_set, scores, score_mask=np.logical_or(key.tar, key.non)
     )
     s.save_txt(score_file)
 
-    logging.info("saving stats to %s" % (stats_file))
+    logging.info("saving stats to %s", stats_file)
     attack_stats.to_csv(stats_file)
 
 
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 0e9493c0..53349dc4 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -10,8 +10,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -128,7 +132,7 @@ def eval_cosine_scoring(
     with torch.no_grad():
         for j in range(ndx.num_tests):
             t1 = time.time()
-            logging.info("scoring test utt %s" % (ndx.seg_set[j]))
+            logging.info("scoring test utt %s", ndx.seg_set[j])
             s, fs = audio_reader.read([ndx.seg_set[j]])
             s = s[0]
             fs = fs[0]
@@ -144,21 +148,15 @@ def eval_cosine_scoring(
             t4 = time.time()
             tot_frames = x_t.shape[1]
             if vad_spec is not None:
-                vad = torch.as_tensor(
-                    v_reader.read([ndx.seg_set[j]], num_frames=x_t.shape[1])[0].astype(
-                        np.uint8, copy=False
-                    ),
-                    dtype=torch.uint8,
-                ).to(device)
+                vad = v_reader.read([ndx.seg_set[j]], num_frames=x_t.shape[1])[0]
+                vad = torch.tensor(vad, dtype=torch.bool).to(device)
                 x_t = x_t[:, vad]
                 logging.info(
-                    "utt %s detected %d/%d (%.2f %%) speech frames"
-                    % (
-                        ndx.seg_set[j],
-                        x_t.shape[1],
-                        tot_frames,
-                        x_t.shape[1] / tot_frames * 100,
-                    )
+                    "utt %s detected %d/%d (%.2f %%) speech frames",
+                    ndx.seg_set[j],
+                    x_t.shape[1],
+                    tot_frames,
+                    x_t.shape[1] / tot_frames * 100,
                 )
 
             t5 = time.time()
@@ -169,9 +167,9 @@ def eval_cosine_scoring(
 
             for i in range(ndx.num_models):
                 if ndx.trial_mask[i, j]:
-                    y_e_i = torch.as_tensor(y_e[i], dtype=torch.get_default_dtype()).to(
-                        device
-                    )
+                    y_e_i = torch.as_tensor(
+                        y_e[i : i + 1], dtype=torch.get_default_dtype()
+                    ).to(device)
                     y_e_i = l2_norm(y_e_i)
                     scores_ij = torch.sum(y_e_i * y_t, dim=-1)
                     if calibrator is None:
@@ -213,9 +211,9 @@ def eval_cosine_scoring(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--v-file", dest="v_file", required=True)
-    parser.add_argument("--ndx-file", dest="ndx_file", default=None)
-    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--ndx-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
     parser.add_argument("--test-wav-file", required=True)
 
     AR.add_class_args(parser)
@@ -241,7 +239,7 @@ def eval_cosine_scoring(
     )
 
     parser.add_argument(
-        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+        "--use-gpu", default=False, action="store_true", help="evaluate  in gpu"
     )
 
     parser.add_argument("--seg-part-idx", default=1, type=int, help=("test part index"))
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index e0754498..1636e23b 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -10,8 +10,12 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -260,8 +264,8 @@ def eval_cosine_scoring(
         for i in range(key.num_models):
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
-                model.x_e = x_e[i].to(device)
-                tmodel.x_e = t_x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
+                tmodel.x_e = t_x_e[i : i + 1].to(device)
                 if key.tar[i, j]:
                     if attack.targeted:
                         t = non
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 0f9f375d..fd75ce7a 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -11,9 +11,13 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -24,8 +28,9 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
@@ -185,8 +190,8 @@ def eval_cosine_scoring(
     tmodel.to(device)
     tmodel.eval()
 
-    tar = np.asarray([1], dtype=np.int)
-    non = np.asarray([0], dtype=np.int)
+    tar = np.asarray([1], dtype=int)
+    non = np.asarray([0], dtype=int)
 
     logging.info("loading key and enrollment x-vectors")
     key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
@@ -249,9 +254,7 @@ def eval_cosine_scoring(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
-                device
-            )
+            vad = torch.tensor(vad, dtype=torch.bool).to(device)
             model.vad_t = vad
             tmodel.vad_t = vad
             logging.info(
@@ -283,8 +286,8 @@ def eval_cosine_scoring(
         for i in range(key.num_models):
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
-                model.x_e = x_e[i].to(device)
-                tmodel.x_e = t_x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
+                tmodel.x_e = t_x_e[i : i + 1].to(device)
                 if key.tar[i, j]:
                     if attack.targeted:
                         t = non
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
new file mode 100755
index 00000000..b7370b9b
--- /dev/null
+++ b/hyperion/bin/prepare_data.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from pathlib import Path
+
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.data_prep import DataPrep
+
+
+def make_parser(data_prep_class):
+    parser = ArgumentParser()
+    data_prep_class.add_class_args(parser)
+    return parser
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="""Prepares a dataset into relational database tables"""
+    )
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in DataPrep.registry.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    data_prep_class = DataPrep.registry[args.subcommand]
+    args = namespace_to_dict(args)[args.subcommand]
+
+    data_prep = data_prep_class(**args)
+    data_prep.prepare()
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 57a33b56..da8ebc3f 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -10,12 +10,15 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
-# from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
diff --git a/hyperion/data_prep/__init__.py b/hyperion/data_prep/__init__.py
new file mode 100644
index 00000000..7caae8c4
--- /dev/null
+++ b/hyperion/data_prep/__init__.py
@@ -0,0 +1,8 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+# from .data_prep import data_prep_registry
+from .data_prep import DataPrep
+from .voxceleb2 import VoxCeleb2DataPrep
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
new file mode 100644
index 00000000..966adeef
--- /dev/null
+++ b/hyperion/data_prep/data_prep.py
@@ -0,0 +1,56 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ActionYesNo
+from pathlib import Path
+
+
+class DataPrep:
+    """Base class for data preparation classes.
+
+    Attributes:
+      corpus_dir: input data directory
+      output_dir: output data directory
+      use_kaldi_ids: puts speaker-id in front of segment id like kaldi
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    registry = {}
+
+    def __init__(self, corpus_dir, output_dir, use_kaldi_ids, target_sample_freq):
+        self.corpus_dir = Path(corpus_dir)
+        self.output_dir = Path(output_dir)
+        self.use_kaldi_ids = use_kaldi_ids
+        self.target_sample_freq = target_sample_freq
+
+        self.output_dir.mkdir(exist_ok=True, parents=True)
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls.registry[cls.dataset_name()] = cls
+
+    @staticmethod
+    def dataset_name():
+        raise NotImplementedError()
+
+    @staticmethod
+    def add_class_args(parser):
+        parser.add_argument(
+            "--corpus-dir", required=True, help="""input data directory""",
+        )
+        parser.add_argument(
+            "--output-dir", required=True, help="""output data directory""",
+        )
+        parser.add_argument(
+            "--use-kaldi-ids",
+            default=False,
+            action=ActionYesNo,
+            help="""put speaker-id in front of segment id like kaldi""",
+        )
+        parser.add_argument(
+            "--target-sample-freq",
+            default=None,
+            type=int,
+            help="""target sampling frequency to convert the audios to""",
+        )
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
new file mode 100644
index 00000000..25692349
--- /dev/null
+++ b/hyperion/data_prep/voxceleb2.py
@@ -0,0 +1,169 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ActionYesNo
+from pathlib import Path
+import re
+
+import pandas as pd
+import numpy as np
+
+from ..utils.misc import urlretrieve_progress
+from ..utils import RecordingSet, SegmentSet, ClassInfo
+from .data_prep import DataPrep
+
+
+class VoxCeleb2DataPrep(DataPrep):
+    """Class for preparing VoxCeleb2 database into tables
+
+    Attributes:
+      corpus_dir: input data directory
+      subset: subset of the data dev or test
+      cat_videos: concatenate utterances from the same video.
+      output_dir: output data directory
+      use_kaldi_ids: puts speaker-id in front of segment id like kaldi
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    def __init__(
+        self,
+        corpus_dir,
+        subset,
+        cat_videos,
+        output_dir,
+        use_kaldi_ids,
+        target_sample_freq,
+    ):
+        super().__init__(corpus_dir, output_dir, use_kaldi_ids, target_sample_freq)
+        self.subset = subset
+        self.cat_videos = cat_videos
+
+    @staticmethod
+    def dataset_name():
+        return "voxceleb2"
+
+    @staticmethod
+    def add_class_args(parser):
+        DataPrep.add_class_args(parser)
+        parser.add_argument(
+            "--subset",
+            default="dev",
+            choices=["dev", "test"],
+            help="""vox2 subset in [dev, test]""",
+        )
+        parser.add_argument(
+            "--cat-videos",
+            default=False,
+            action=ActionYesNo,
+            help="""concatenate utterances from the same video.""",
+        )
+
+    def _get_metadata(self):
+        file_name = "vox2_meta.csv"
+        file_path = self.corpus_dir / file_name
+        if not file_path.exists():
+            file_path = self.output_dir / file_name
+            if not file_path.exists():
+                url = "https://www.openslr.org/resources/49/vox2_meta.csv"
+                file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
+
+        df_meta = pd.read_csv(file_path, sep="\t")
+        print(df_meta.head())
+        df_meta.set_index("VoxCeleb2 ID")
+        return df_meta
+
+    def _get_langs_est(self):
+        file_name = "lang_vox2_final.csv"
+        file_path = self.corpus_dir / file_name
+        if not file_path.exists():
+            file_path = self.output_dir / file_name
+            if not file_path.exists():
+                url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox2_final.csv"
+                file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
+
+        df_lang = pd.read_csv(file_path, sep=",")
+
+        def get_video(x):
+            x = re.sub("/.*.wav$", "", x)
+            x = re.sub("^.*/", "", x)
+            return x
+
+        df_lang["video"] = df_lang["filename"].apply(get_video)
+        df_lang["filename"].drop(["filename"], axis=1, inplace=True)
+        df_lang.drop_duplicates(inplace=True)
+        df_lang.set_index("video")
+        return df_lang
+
+    def prepare(self):
+        df_meta = self._get_metadata()
+        df_lang = self._get_langs_est()
+        rec_dir = self.corpus_dir / self.subset
+        rec_files = list(rec_dir.glob("**/*.m4a"))
+        speakers = [f.parents[1].name for f in rec_files]
+        video_ids = [f.parent.name for f in rec_files]
+        if self.concat_videos:
+            lists_cat_dir = self.output_dir / "lists_cat"
+            lists_cat_dir.mkdir(exist_ok=True, parents=True)
+            uniq_video_ids, uniq_video_idx, video_idx = np.unique(
+                video_ids, return_index=True, return_inverse=True
+            )
+            rec_ids = uniq_video_ids
+            speakers = speakers[uniq_video_idx]
+            if self.use_kaldi_ids:
+                rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
+            else:
+                rec_ids = uniq_video_ids
+
+            file_paths = []
+            for i, video_id in enumerate(uniq_video_ids):
+                list_file = lists_cat_dir / f"{video_id}.txt"
+                with open(list_file, "w") as fw:
+                    rec_mask = video_idx == i
+                    recs_i = rec_files[rec_mask]
+                    for rec in recs_i:
+                        fw.write(f"{rec}\n")
+
+                file_path = f"ffmpeg -v 8 -f concat -safe 0 -i {list_file} -f wav -acodec pcm_s16le -|"
+                file_paths.append(file_path)
+
+            video_ids = uniq_video_ids
+
+        else:
+            file_names = [f.name for f in rec_files]
+            if self.use_kaldi_ids:
+                rec_ids = [
+                    f"{s}-{v}-{f}" for s, v, f in zip(speakers, video_ids, file_names)
+                ]
+            else:
+                rec_ids = [f"{v}-{f}" for v, f in zip(video_ids, file_names)]
+
+            file_paths = []
+            for rec_file in rec_files:
+                file_path = f"ffmpeg -v 8 -i {rec_file} -f wav -acodec pcm_s16le - |"
+                file_paths.append(file_path)
+
+        recs = pd.DataFrame({"id": rec_ids, "file_path": file_paths})
+        recs = RecordingSet(recs)
+        segments = pd.DataFrame(
+            {
+                "id": rec_ids,
+                "video_ids": video_ids,
+                "speaker": speakers,
+                "gender": df_meta.loc[speakers, "Gender"],
+            }
+        )
+        segments = SegmentSet(segments)
+        uniq_speakers = np.unique(speakers)
+        speakers = pd.DataFrame(
+            {
+                "id": uniq_speakers,
+                "vgg_id": df_meta.loc[uniq_speakers, "VGGFace2 ID"],
+                "gender": df_meta.loc[uniq_speakers, "Gender"],
+            }
+        )
+        speakers = ClassInfo(speakers)
+
+        print(recs)
+        print(segments)
+        print(speakers)
diff --git a/hyperion/torch/adv_attacks/art_attack_factory.py b/hyperion/torch/adv_attacks/art_attack_factory.py
index ba103acf..801ba948 100644
--- a/hyperion/torch/adv_attacks/art_attack_factory.py
+++ b/hyperion/torch/adv_attacks/art_attack_factory.py
@@ -4,13 +4,24 @@
 """
 
 import numpy as np
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 
 try:
     from art.attacks import evasion as attacks
 except ImportError:
     pass
 
+from ...utils.misc import filter_func_args
+
+
+def make_4d_hook(func):
+    def wrapper(x, *args, **kwargs):
+        x = x[None, None]
+        y = func(x, *args, **kwargs)
+        return y[0, 0]
+
+    return wrapper
+
 
 class ARTAttackFactory(object):
     @staticmethod
@@ -28,11 +39,10 @@ def create(
         num_random_init=0,
         minimal=False,
         random_eps=False,
-        min_eps=None,
+        min_eps=1e-6,
         beta=0.001,
         theta=0.1,
         gamma=1.0,
-        etha=0.01,
         confidence=0.0,
         lr=1e-2,
         lr_decay=0.5,
@@ -42,9 +52,12 @@ def create(
         max_iter=10,
         overshoot=1.1,
         num_grads=10,
-        c=1e-3,
         max_halving=5,
         max_doubling=5,
+        tau_decr_factor=0.9,
+        initial_c=1e-5,
+        largest_c=20.0,
+        c_incr_factor=2.0,
         decision_rule="EN",
         init_eval=100,
         max_eval=10000,
@@ -53,31 +66,44 @@ def create(
         use_importance=False,
         abort_early=True,
         th=None,
+        es: int = 0,
         sigma=0.5,
         lambda_tv=0.3,
-        labmda_c=1.0,
+        lambda_c=1.0,
         lambda_s=0.5,
         reg=3000,
         kernel_size=5,
         eps_factor=1.1,
         eps_iter=10,
+        p_wassertein=2,
         conj_sinkhorn_iter=400,
         proj_sinkhorn_iter=400,
+        sub_dim: int = 10,
+        bin_search_tol: float = 0.1,
+        lambda_geoda: float = 0.6,
+        sigma_geoda: float = 0.0002,
+        lambda_fadv=0.0,
+        layers_fadv=[1],
+        thr_lowpro: float = 0.5,
+        lambda_lowpro: float = 1.5,
+        eta_lowpro: float = 0.2,
+        eta_lowpro_decay: float = 0.98,
+        eta_lowpro_min: float = 1e-7,
+        eta_newton: float = 0.01,
         targeted=False,
         num_samples=1,
         eps_scale=1,
         batch_size=1,
     ):
 
-        eps = eps * eps_scale
-        eps_step = eps_step * eps_scale
-        if min_eps is not None:
+        if attack_type not in ["feature-adv"]:
+            eps = eps * eps_scale
+            eps_step = eps_step * eps_scale
             min_eps = min_eps * eps_scale
+            delta = delta * eps_scale
 
-        attack_set = set(
-            ["fgm", "pgd", "auto-pgd", "boundary", "cw-linf", "wasserstein"]
-        )
-        if attack_type in attack_set:
+        attack_l12 = set(["fgm", "pgd", "auto-pgd", "wasserstein"])
+        if attack_type in attack_l12:
             if norm == 1:
                 eps = eps * num_samples
                 eps_step = eps_step * num_samples
@@ -98,14 +124,14 @@ def create(
                 epsilon=eps,
                 step_adapt=step_adapt,
                 max_iter=max_iter,
-                num_trials=num_trials,
+                num_trial=num_trial,
                 sample_size=sample_size,
                 init_size=init_size,
                 min_epsilon=min_eps,
             )
 
-        if attack_type == "hop-skin-jump":
-            return attacks.HopSkinJump(
+        if attack_type == "hop-skip-jump":
+            return attacks.HopSkipJump(
                 model,
                 targeted=targeted,
                 norm=norm,
@@ -132,7 +158,7 @@ def create(
             )
 
         if attack_type == "deepfool":
-            attacks.DeepFool(
+            return attacks.DeepFool(
                 model,
                 max_iter=max_iter,
                 epsilon=eps,
@@ -141,7 +167,7 @@ def create(
             )
 
         if attack_type == "elasticnet":
-            attacks.ElasticNet(
+            return attacks.ElasticNet(
                 model,
                 confidence=confidence,
                 targeted=targeted,
@@ -149,13 +175,25 @@ def create(
                 binary_search_steps=binary_search_steps,
                 max_iter=max_iter,
                 beta=beta,
-                initial_const=c,
+                initial_const=initial_c,
                 batch_size=batch_size,
                 decision_rule=decision_rule,
             )
 
+        if attack_type == "feature-adv":
+            return attacks.FeatureAdversariesPyTorch(
+                model,
+                delta=delta,
+                lambda_=lambda_fadv,
+                layer=tuple(layers_fadv),
+                max_iter=max_iter,
+                batch_size=batch_size,
+                step_size=eps_step,
+                random_start=num_random_init > 0,
+            )
+
         if attack_type == "threshold":
-            attacks.ThresholdAttack(model, th=th, es=es, targeted=targeted)
+            return attacks.ThresholdAttack(model, th=th, es=es, targeted=targeted)
 
         if attack_type == "fgm":
             return attacks.FastGradientMethod(
@@ -193,15 +231,48 @@ def create(
             )
 
         if attack_type == "auto-pgd":
-            return attacks.AutoProjectedGradientDescent(
+            if len(model.input_shape) == 1:
+                # autopgd only works with image kind shape
+                model._input_shape = (1, 1, model.input_shape[0])
+            attack = attacks.AutoProjectedGradientDescent(
                 model,
                 norm=norm,
                 eps=eps,
                 eps_step=eps_step,
                 max_iter=max_iter,
                 targeted=targeted,
-                nb_random_init=num_random_init,
-                random_eps=random_eps,
+                nb_random_init=max(1, num_random_init),
+                batch_size=batch_size,
+            )
+            attack.generate = make_4d_hook(attack.generate)
+            return attack
+
+        if attack_type == "auto-cgd":
+            if len(model.input_shape) == 1:
+                # autopgd only works with image kind shape
+                model._input_shape = (1, 1, model.input_shape[0])
+            attack = attacks.AutoConjugateGradient(
+                model,
+                norm=norm,
+                eps=eps,
+                eps_step=eps_step,
+                max_iter=max_iter,
+                targeted=targeted,
+                nb_random_init=max(1, num_random_init),
+                batch_size=batch_size,
+            )
+            attack.generate = make_4d_hook(attack.generate)
+            return attack
+
+        if attack_type == "geoda":
+            return attacks.GeoDA(
+                model,
+                norm=norm,
+                sub_dim=sub_dim,
+                max_iter=max_iter,
+                bin_search_tol=bin_search_tol,
+                lambda_param=lambda_geoda,
+                sigma=sigma_geoda,
                 batch_size=batch_size,
             )
 
@@ -210,14 +281,21 @@ def create(
                 model, theta=theta, gamma=gamma, batch_size=batch_size
             )
 
-        if attack_type == "newtonfool":
-            return attacks.NewtonFool(
-                model, eta=eta, max_iter=max_iter, batch_size=batch_size
+        if attack_type == "low-pro-fool":
+            return attacks.LowProFool(
+                model,
+                n_steps=max_iter,
+                threshold=thr_lowpro,
+                lambd=lambda_lowpro,
+                eta=eta_lowpro,
+                eta_decay=eta_lowpro_decay,
+                eta_min=eta_lowpro_min,
+                norm=norm,
             )
 
-        if attack_type == "threshold":
+        if attack_type == "newtonfool":
             return attacks.NewtonFool(
-                model, eta=eta, max_iter=max_iter, batch_size=batch_size
+                model, eta=eta_newton, max_iter=max_iter, batch_size=batch_size
             )
 
         if attack_type == "cw-l2":
@@ -227,8 +305,8 @@ def create(
                 learning_rate=lr,
                 binary_search_steps=binary_search_steps,
                 max_iter=max_iter,
-                initial_const=c,
                 targeted=targeted,
+                initial_const=initial_c,
                 max_halving=max_halving,
                 max_doubling=max_doubling,
                 batch_size=batch_size,
@@ -241,19 +319,20 @@ def create(
                 learning_rate=lr,
                 max_iter=max_iter,
                 targeted=targeted,
-                max_halving=max_halving,
-                max_doubling=max_doubling,
-                eps=eps,
+                decrease_factor=tau_decr_factor,
+                initial_const=initial_c,
+                largest_const=largest_c,
+                const_factor=c_incr_factor,
                 batch_size=batch_size,
             )
 
         if attack_type == "zoo":
-            return attacks.ZooMethod(
+            return attacks.ZooAttack(
                 model,
                 confidence,
                 learning_rate=lr,
                 max_iter=max_iter,
-                initial_const=c,
+                initial_const=initial_c,
                 targeted=targeted,
                 binary_search_steps=binary_search_steps,
                 abort_early=abort_early,
@@ -265,22 +344,33 @@ def create(
             )
 
         if attack_type == "shadow":
-            return attacks.ShadowAttack(
+            if len(model.input_shape) == 1:
+                # autopgd only works with image kind shape
+                model._input_shape = (1, 1, model.input_shape[0])
+
+            attack = attacks.ShadowAttack(
                 model,
                 sigma=sigma,
-                num_steps=num_iters,
+                nb_steps=max_iter,
                 learning_rate=lr,
                 lambda_tv=lambda_tv,
                 lambda_c=lambda_c,
                 lambda_s=lambda_s,
-                batch_norm=batch_norm,
+                batch_size=batch_size,
                 targeted=targeted,
             )
+            attack.generate = make_4d_hook(attack.generate)
+            return attack
 
         if attack_type == "wasserstein":
-            return attacks.Wasserstein(
+            if len(model.input_shape) == 1:
+                # autopgd only works with image kind shape
+                model._input_shape = (1, 1, model.input_shape[0])
+
+            attack = attacks.Wasserstein(
                 model,
                 targeted=targeted,
+                p=p_wassertein,
                 regularization=reg,
                 kernel_size=kernel_size,
                 eps=eps,
@@ -292,6 +382,8 @@ def create(
                 projected_sinkhorn_max_iter=proj_sinkhorn_iter,
                 batch_size=batch_size,
             )
+            attack.generate = make_4d_hook(attack.generate)
+            return attack
 
         raise Exception("%s is not a valid attack type" % (attack_type))
 
@@ -307,59 +399,7 @@ def filter_args(**kwargs):
             else:
                 kwargs["norm"] = int(kwargs["norm"])
 
-        valid_args = (
-            "attack_type",
-            "eps",
-            "delta",
-            "step_adapt",
-            "num_trial",
-            "sample_size",
-            "init_size",
-            "norm",
-            "eps_step",
-            "num_random_init",
-            "minimal",
-            "random_eps",
-            "min_eps",
-            "beta",
-            "theta",
-            "gamma",
-            "etha",
-            "confidence",
-            "decision_rule",
-            "lr",
-            "lr_decay",
-            "lr_num_decay",
-            "momentum",
-            "binary_search_steps",
-            "max_iter",
-            "init_eval",
-            "max_eval",
-            "overshoot",
-            "num_grads",
-            "c",
-            "max_halving",
-            "max_doubling",
-            "variable_h",
-            "abort_early",
-            "num_parallel",
-            "use_importance",
-            "th",
-            "sigma",
-            "lambda_tv",
-            "labmda_c",
-            "lambda_s",
-            "reg",
-            "kernel_size",
-            "eps_factor",
-            "eps_iter",
-            "conj_sinkhorn_iter",
-            "proj_sinkhorn_iter",
-            "targeted",
-        )
-
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
+        args = filter_func_args(ARTAttackFactory.create, kwargs)
         return args
 
     @staticmethod
@@ -371,7 +411,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--attack-type",
             type=str.lower,
-            default="fgsm",
+            default="fgm",
             choices=[
                 "boundary",
                 "brendel",
@@ -380,12 +420,15 @@ def add_class_args(parser, prefix=None):
                 "bim",
                 "pgd",
                 "auto-pgd",
+                "auto-cgd",
+                "feature-adv",
+                "low-pro-fool",
                 "jsma",
                 "newtonfool",
                 "cw-l2",
                 "cw-linf",
                 "elasticnet",
-                "hop-skin-jump",
+                "hop-skip-jump",
                 "zoo",
                 "threshold",
                 "shadow",
@@ -571,7 +614,7 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--min-eps",
-            default=None,
+            default=1e-6,
             type=float,
             help=("Stop attack if perturbation is smaller than min_eps."),
         )
@@ -614,12 +657,33 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--c",
+            "--initial-c",
             default=1e-2,
             type=float,
             help=("Initial weight of constraint function f in carlini-wagner attack"),
         )
 
+        parser.add_argument(
+            "--largest-c",
+            default=20.0,
+            type=float,
+            help=("largest weight of constraint function f in carlini-wagner attack"),
+        )
+
+        parser.add_argument(
+            "--c-incr-factor",
+            default=2,
+            type=float,
+            help=("factor to increment c in carline-wagner-l0/inf attack"),
+        )
+
+        parser.add_argument(
+            "--tau-decr-factor",
+            default=0.9,
+            type=float,
+            help=("factor to reduce tau in carline-wagner-linf attack"),
+        )
+
         parser.add_argument(
             "--max-halving",
             default=5,
@@ -635,10 +699,10 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--no-abort",
-            default=False,
-            action="store_true",
-            help=("do not abort early in optimizer iterations"),
+            "--abort-early",
+            default=True,
+            action=ActionYesNo,
+            help=("abort early in optimizer iterations"),
         )
 
         parser.add_argument(
@@ -670,6 +734,14 @@ def add_class_args(parser, prefix=None):
                 "Threshold for threshold attack, None indicates finding and minimum threshold"
             ),
         )
+        parser.add_argument(
+            "--es",
+            default=0,
+            type=int,
+            help=(
+                "Indicates whether the attack uses CMAES (0) or DE (1) as Evolutionary Strategy"
+            ),
+        )
 
         parser.add_argument(
             "--sigma",
@@ -704,6 +776,19 @@ def add_class_args(parser, prefix=None):
                 "Scalar penalty weight for similarity of color channels in perturbation"
             ),
         )
+        parser.add_argument(
+            "--lambda-fadv",
+            default=0.0,
+            type=float,
+            help=("Regularization parameter of the L-inf soft constraint"),
+        )
+        parser.add_argument(
+            "--layers-fadv",
+            default=[1],
+            type=int,
+            nargs="+",
+            help=("indices of the representation layers"),
+        )
 
         parser.add_argument(
             "--reg",
@@ -730,6 +815,12 @@ def add_class_args(parser, prefix=None):
             type=int,
             help=("Number of iterations to increase the epsilon."),
         )
+        parser.add_argument(
+            "--p-wassertein",
+            default=2,
+            type=int,
+            help=("lp distance for wassertein distance"),
+        )
         parser.add_argument(
             "--conj-sinkhorn-iter",
             default=400,
@@ -743,6 +834,65 @@ def add_class_args(parser, prefix=None):
             help=("maximum number of iterations for the projected sinkhorn optimizer"),
         )
 
+        parser.add_argument(
+            "--thr-lowpro",
+            type=float,
+            default=0.5,
+            help="""Lowest prediction probability of a valid adversary for low-pro-fool""",
+        )
+        parser.add_argument(
+            "--lambda-lowpro",
+            type=float,
+            default=1.5,
+            help="""Amount of lp-norm impact on objective function for low-pro-fool""",
+        )
+        parser.add_argument(
+            "--eta-lowpro",
+            type=float,
+            default=0.2,
+            help="""Rate of updating the perturbation vectors for low-pro-fool""",
+        )
+        parser.add_argument(
+            "--eta-lowpro-decay",
+            type=float,
+            default=0.98,
+            help="""Step-by-step decrease of eta for low-pro-fool""",
+        )
+        parser.add_argument(
+            "--eta-lowpro-min", type=float, default=1e-7, help="""Minimal eta value"""
+        )
+        parser.add_argument(
+            "--eta-newton", type=float, default=0.01, help="""eta for newtonfool"""
+        )
+        # parser.add_argument(
+        #     "--sub-dim",
+        #     default=10,
+        #     type=int,
+        #     help="Dimensionality of 2D frequency space (DCT).",
+        # )
+
+        # parser.add_argument(
+        #     "--bin-search-tol",
+        #     default=0.1,
+        #     type=float,
+        #     help="""Maximum remaining L2 perturbation defining binary search
+        #     convergence""",
+        # )
+        # parser.add_argument(
+        #     "--lambda-geoda",
+        #     default=0.6,
+        #     type=float,
+        #     help="""The lambda of equation 19 with lambda_param=0 corresponding to a
+        #     single iteration and lambda_param=1 to a uniform distribution of
+        #     iterations per step.""",
+        # )
+        # parser.add_argument(
+        #     "--sigma-geoda",
+        #     default=0.0002,
+        #     type=float,
+        #     help="""Variance of the Gaussian perturbation.""",
+        # )
+
         parser.add_argument(
             "--targeted",
             default=False,
diff --git a/hyperion/torch/adv_attacks/attack_factory.py b/hyperion/torch/adv_attacks/attack_factory.py
index 5d53f6bc..ca89a794 100644
--- a/hyperion/torch/adv_attacks/attack_factory.py
+++ b/hyperion/torch/adv_attacks/attack_factory.py
@@ -30,7 +30,7 @@ def create(
         binary_search_steps=9,
         max_iter=10,
         abort_early=True,
-        c=1e-3,
+        initial_c=1e-3,
         reduce_c=False,
         c_incr_factor=2,
         tau_decr_factor=0.9,
@@ -47,6 +47,7 @@ def create(
 
         eps = eps * eps_scale
         alpha = alpha * eps_scale
+        norm = float(norm)
 
         if attack_type == "fgsm":
             return FGSMAttack(
@@ -98,7 +99,7 @@ def create(
                 binary_search_steps,
                 max_iter,
                 abort_early,
-                c,
+                initial_c,
                 norm_time=norm_time,
                 time_dim=time_dim,
                 use_snr=use_snr,
@@ -114,7 +115,7 @@ def create(
                 lr,
                 max_iter,
                 abort_early,
-                c,
+                initial_c,
                 reduce_c,
                 c_incr_factor,
                 indep_channels,
@@ -130,7 +131,7 @@ def create(
                 lr,
                 max_iter,
                 abort_early,
-                c,
+                initial_c,
                 reduce_c,
                 c_incr_factor,
                 tau_decr_factor,
@@ -219,9 +220,8 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--norm",
-            type=float,
-            default=float("inf"),
-            choices=[float("inf"), 1, 2],
+            default="inf",
+            choices=["inf", "1", "2"],
             help=("Attack perturbation norm"),
         )
 
@@ -284,7 +284,7 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--c",
+            "--initial-c",
             default=1e-2,
             type=float,
             help=(
diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index 6813c6b7..369962fd 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -115,3 +115,53 @@ def filter_func_args(func, kwargs, skip=set()):
 
     args = sig.bind_partial(**my_kwargs).arguments
     return args
+
+
+from tqdm import tqdm
+
+
+def tqdm_urlretrieve_hook(t):
+    """Wraps tqdm instance.
+    Don't forget to close() or __exit__()
+    the tqdm instance once you're done with it (easiest using `with` syntax).
+    Example
+    -------
+    >>> from urllib.request import urlretrieve
+    >>> with tqdm(...) as t:
+    ...     reporthook = tqdm_urlretrieve_hook(t)
+    ...     urlretrieve(..., reporthook=reporthook)
+    Source: https://github.com/tqdm/tqdm/blob/master/examples/tqdm_wget.py
+    """
+    last_b = [0]
+
+    def update_to(b=1, bsize=1, tsize=None):
+        """
+        b  : int, optional
+            Number of blocks transferred so far [default: 1].
+        bsize  : int, optional
+            Size of each block (in tqdm units) [default: 1].
+        tsize  : int, optional
+            Total size (in tqdm units). If [default: None] or -1,
+            remains unchanged.
+        """
+        if tsize not in (None, -1):
+            t.total = tsize
+            displayed = t.update((b - last_b[0]) * bsize)
+            last_b[0] = b
+            return displayed
+
+    return update_to
+
+
+def urlretrieve_progress(url, filename=None, data=None, desc=None):
+    """
+    Works exactly like urllib.request.urlretrieve, but attaches a tqdm hook to display
+    a progress bar of the download.
+    Use "desc" argument to display a user-readable string that informs what is being downloaded.
+    Taken from lhotse: https://github.com/lhotse-speech/lhotse/blob/master/lhotse/utils.py
+    """
+    from urllib.request import urlretrieve
+
+    with tqdm(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=desc) as t:
+        reporthook = tqdm_urlretrieve_hook(t)
+        return urlretrieve(url=url, filename=filename, reporthook=reporthook, data=data)

From 947047d3b7641125d4c0ee527db7e51d48cd8d8d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 14 Apr 2023 11:05:21 -0400
Subject: [PATCH 093/154] deleted adv.v1 and vae.v1 recipes

---
 egs/voxceleb/adv.v1/README.md                 | 164 -----
 egs/voxceleb/adv.v1/cmd.sh                    |  25 -
 egs/voxceleb/adv.v1/datapath.sh               |  21 -
 egs/voxceleb/adv.v1/default_config.sh         |   1 -
 ...g_victim_lresnet34_transfer_resetdnn.v1.sh |  32 -
 ...fig_victim_resnet34_transfer_lresnet.v1.sh |  61 --
 ...ig_victim_resnet34_transfer_resetdnn.v1.sh |  94 ---
 egs/voxceleb/adv.v1/hyp_utils                 |   1 -
 egs/voxceleb/adv.v1/path.sh                   |   6 -
 egs/voxceleb/adv.v1/run_001_prepare_data.sh   |  26 -
 egs/voxceleb/adv.v1/run_002_compute_evad.sh   |  56 --
 .../adv.v1/run_003_prepare_noises_rirs.sh     |  67 --
 .../run_004_prepare_victim_xvec_train_data.sh |  42 --
 ...un_006_prepare_transfer_xvec_train_data.sh |  48 --
 .../run_008_extract_xvectors_victim_model.sh  |  37 --
 ...run_031_extract_xvectors_transfer_model.sh |  41 --
 .../adv.v1/run_040_eval_be_victim_model.sh    |  57 --
 .../adv.v1/run_041_eval_be_transfer_model.sh  |  58 --
 .../adv.v1/run_042_eval_victim_from_wav.sh    |  54 --
 .../adv.v1/run_043_eval_whitebox_attacks.sh   | 346 ----------
 .../run_044_eval_transfer_blackbox_attacks.sh | 481 --------------
 ...hitebox_attacks_with_randsmooth_defense.sh | 544 ---------------
 .../run_053_eval_art_whitebox_attacks.sh      | 536 ---------------
 ..._054_eval_art_transfer_blackbox_attacks.sh | 626 ------------------
 egs/voxceleb/adv.v1/steps                     |   1 -
 egs/voxceleb/adv.v1/steps_adv                 |   1 -
 egs/voxceleb/adv.v1/steps_be                  |   1 -
 egs/voxceleb/adv.v1/steps_fe                  |   1 -
 egs/voxceleb/adv.v1/steps_pyfe                |   1 -
 egs/voxceleb/adv.v1/steps_xvec                |   1 -
 egs/voxceleb/adv.v1/utils                     |   1 -
 egs/voxceleb/vae.v1/README.md                 |  89 ---
 egs/voxceleb/vae.v1/cmd.sh                    |  25 -
 egs/voxceleb/vae.v1/conf                      |   1 -
 egs/voxceleb/vae.v1/datapath.sh               |  21 -
 egs/voxceleb/vae.v1/default_config.sh         |   1 -
 ...e_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh |  30 -
 ..._resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh |  29 -
 ...ig_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh |  29 -
 ...ig_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh |  30 -
 ...g_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh |  30 -
 ...g_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh |  30 -
 ...e_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh |  30 -
 ...ae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh |  30 -
 ...ae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh |  30 -
 ...e_resnet2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh |  31 -
 ...e_resnet2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh |  31 -
 ...q_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh |  45 --
 ...meansvq_z256cb512x16_c142.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh |  34 -
 ...kmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh |  33 -
 ...meansvq_z256cb512x16_c142.opt.lr0.01.v1.sh |  33 -
 ...svq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh |  43 --
 ...vq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh |  43 --
 ...svq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh |  43 --
 ...akmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh |  33 -
 ...meansvq_z256cb512x128_c18.opt.lr0.01.v1.sh |  33 -
 ...meansvq_z256cb512x16_c142.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh |  33 -
 ...meansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh |  33 -
 ..._z256cb512x4_c569_predvar.opt.lr0.01.v1.sh |  34 -
 ...kmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh |  33 -
 ...kmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh |  33 -
 ...akmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh |  46 --
 ...svq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh |  42 --
 ...akmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh |  45 --
 ...svq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh |  45 --
 ...svq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh |  43 --
 egs/voxceleb/vae.v1/hyp_utils                 |   1 -
 egs/voxceleb/vae.v1/local                     |   1 -
 egs/voxceleb/vae.v1/path.sh                   |   5 -
 egs/voxceleb/vae.v1/run_001_prepare_data.sh   |  28 -
 egs/voxceleb/vae.v1/run_002_compute_evad.sh   |  57 --
 egs/voxceleb/vae.v1/run_003_compute_fbank.sh  |  70 --
 .../vae.v1/run_004_prepare_augment.sh         | 123 ----
 .../vae.v1/run_005_compute_fbank_augment.sh   |  57 --
 .../run_010_prepare_gen_model_train_data.sh   |  45 --
 egs/voxceleb/vae.v1/run_011_train_model.sh    | 137 ----
 egs/voxceleb/vae.v1/run_012_eval_recons.sh    |  44 --
 .../vae.v1/run_013_eval_xvector_asv.sh        |  63 --
 egs/voxceleb/vae.v1/steps                     |   1 -
 egs/voxceleb/vae.v1/steps_be                  |   1 -
 egs/voxceleb/vae.v1/steps_fe                  |   1 -
 egs/voxceleb/vae.v1/steps_pyfe                |   1 -
 egs/voxceleb/vae.v1/steps_xvec                |   1 -
 egs/voxceleb/vae.v1/utils                     |   1 -
 89 files changed, 5427 deletions(-)
 delete mode 100644 egs/voxceleb/adv.v1/README.md
 delete mode 100755 egs/voxceleb/adv.v1/cmd.sh
 delete mode 100644 egs/voxceleb/adv.v1/datapath.sh
 delete mode 120000 egs/voxceleb/adv.v1/default_config.sh
 delete mode 100644 egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
 delete mode 100644 egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_lresnet.v1.sh
 delete mode 100644 egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
 delete mode 120000 egs/voxceleb/adv.v1/hyp_utils
 delete mode 100755 egs/voxceleb/adv.v1/path.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_001_prepare_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_002_compute_evad.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_031_extract_xvectors_transfer_model.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_040_eval_be_victim_model.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_041_eval_be_transfer_model.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_042_eval_victim_from_wav.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_043_eval_whitebox_attacks.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_044_eval_transfer_blackbox_attacks.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_053_eval_art_whitebox_attacks.sh
 delete mode 100755 egs/voxceleb/adv.v1/run_054_eval_art_transfer_blackbox_attacks.sh
 delete mode 120000 egs/voxceleb/adv.v1/steps
 delete mode 120000 egs/voxceleb/adv.v1/steps_adv
 delete mode 120000 egs/voxceleb/adv.v1/steps_be
 delete mode 120000 egs/voxceleb/adv.v1/steps_fe
 delete mode 120000 egs/voxceleb/adv.v1/steps_pyfe
 delete mode 120000 egs/voxceleb/adv.v1/steps_xvec
 delete mode 120000 egs/voxceleb/adv.v1/utils
 delete mode 100644 egs/voxceleb/vae.v1/README.md
 delete mode 100755 egs/voxceleb/vae.v1/cmd.sh
 delete mode 120000 egs/voxceleb/vae.v1/conf
 delete mode 100644 egs/voxceleb/vae.v1/datapath.sh
 delete mode 120000 egs/voxceleb/vae.v1/default_config.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_dvae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_dvae_resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256swish_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x128_c18.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569_predvar.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
 delete mode 100644 egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
 delete mode 120000 egs/voxceleb/vae.v1/hyp_utils
 delete mode 120000 egs/voxceleb/vae.v1/local
 delete mode 100755 egs/voxceleb/vae.v1/path.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_001_prepare_data.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_002_compute_evad.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_003_compute_fbank.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_004_prepare_augment.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_005_compute_fbank_augment.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_010_prepare_gen_model_train_data.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_011_train_model.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_012_eval_recons.sh
 delete mode 100755 egs/voxceleb/vae.v1/run_013_eval_xvector_asv.sh
 delete mode 120000 egs/voxceleb/vae.v1/steps
 delete mode 120000 egs/voxceleb/vae.v1/steps_be
 delete mode 120000 egs/voxceleb/vae.v1/steps_fe
 delete mode 120000 egs/voxceleb/vae.v1/steps_pyfe
 delete mode 120000 egs/voxceleb/vae.v1/steps_xvec
 delete mode 120000 egs/voxceleb/vae.v1/utils

diff --git a/egs/voxceleb/adv.v1/README.md b/egs/voxceleb/adv.v1/README.md
deleted file mode 100644
index cace8c2c..00000000
--- a/egs/voxceleb/adv.v1/README.md
+++ /dev/null
@@ -1,164 +0,0 @@
-# VoxCeleb Adversarial Attacks Version 1
-
-Last update 2021/04/22
-
-Recipe to evaluate Adversarial Attacks to x-Vector Speaker Verification Systems
-
-## Threat Model
-
-Speaker verification pipeline where:
-  - Enrollment side is not under attack, x-vectors for enrollment utterances are
-    pre-computed and storded on disk
-  - Test side is under Adversarial Attacks. 
-    The attack adds an inperceptible perturbation to the 
-    test waveform to make the system to:
-        - Classify target trials as non-targets
-        - Classify non-target trials as targets
-
-As attacks happen in waveform domain, test x-vectors cannot be precomputed and
-need to be recomputed for each trial.
-Also, the speaker verification pipeline needs to be fully differentiable from wave to score,
-so the attack algorithm can optimize the perturbation noise.
-
-However, to train the x-vector network, this recipe computes acoustic features and speech augmentations off-line.
-Look version adv.v1.1, for a newer recipe which computes features 
-and augmentations on the fly.
-
-Two broad types of attacks:
-    - White-box: the attacker has access to the x-vector model under attack
-    - Transfer based Black-box: the attacker doesn't have access to the x-vector model under attack (black-box model),
-       but has access to another x-vector model (white-box). Perturvation is obtained from the white-box model
-       and used to attack the black-box model.
-
-Multiple attacks algorithms: FGSM, Iter-FGSM, PGD, Carlini-Wagner.
-
-## Citing
-
-  If you use this recipe, please cite:
-```
-@inproceedings{Villalba2020,
-address = {Shanghai, China},
-author = {Villalba, Jes{\'{u}}s and Zhang, Yuekai and Dehak, Najim},
-booktitle = {Interspeech 2020},
-month = {sep},
-title = {{x-Vectors Meet Adversarial Attacks : Benchmarking Adversarial Robustness in Speaker Verification}},
-year = {2020}
-}
-```
-
-## Training Data
-
-   - x-Vector network is trained on Voxceleb2 dev + test with augmentations
-     - MUSAN noise
-     - RIR reverberation
-
-## Test Data
-
-   - Test data is VoxCeleb 1 Original Clean trial list.
-   - We don't use the larger Entire and Hard list because of the high computing cost
-     of these experiments. 
-
-## Usage
-
-   - Run the run_0*.sh scripts in sequence
-   - By default it will use ResNet34 as victim model and Residual E-TDNN as transfer model
-   - You can change that modifying the configuration script.
-   - For example, to use LResNet34 as transfer model use `config_victim_resnet34_transfer_lresnet.v1.sh` 
-     when calling each of the steps as
-```bash
-run_0*.sh --config-file global_conf/config_victim_resnet34_transfer_lresnet.v1.sh
-```
-
-## Recipe Steps:
-
-   - `run_001_prepare_data.sh`
-      - Data preparation script to generate Kaldi style data directories for 
-          - VoxCeleb2 train+test
-          - VoxCeleb1 Original eval sets
-
-   - `run_002_compute_evad.sh`
-      - Computes Energy VAD for all datasets
-
-   - `run_003_compute_fbank.sh`
-      - Computes log-filter-banks acoustic features for all datasets
-
-   - `run_004_prepare_augment.sh`
-      - Prepares Kaldi style data directories for augmented training data with MUSAN noise and RIR reverberation.
-
-   - `run_005_compute_fbank_augment.sh
-      - Computes log-filter-banks for augmented datasets
-
-   - `run_010_prepare_victim_xvec_train_data.sh`
-      - Prepares features train the victim x-vector model
-      - Applies sort-time mean normalization and remove silence frames
-      - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
-      - Creates training and validation lists for x-vector training
-
-   - `run_011_train_victim_xvector.sh`
-      - Trains the victim x-vector network
-
-   - `run_012_prepare_transfer_xvec_train_data.sh`
-      - Prepares features train the transfer white-box x-vector model
-      - If training data for victim and tranfer models is the same, it does nothing
-
-   - `run_013_train_transfer_xvector.sh`
-      - Trains the transfer white-box x-vector network
-
-   - `run_030_extract_xvectors_victim_model.sh`
-      - Exctracts x-vectors for VoxCeleb1 test set using the victim model
-
-   - `run_031_extract_xvectors_transfer_model.sh`
-      - Exctracts x-vectors for VoxCeleb1 test set using the transfer model
-
-   - `run_040_eval_be_victim_model.sh`
-      - Eval cosine scoring back-end without attack on victim model x-vectors
-      - Trains calibration for the victim model scores
-      - Results are left in `exp/scores/$nnet_name/cosine/voxceleb1_o_clean_results`
-
-   - `run_041_eval_be_tranfer_model.sh`
-      - Eval cosine scoring back-end without attack on transfer model x-vectors
-      - Trains calibration for the tranfer model scores
-      - Results are left in `exp/scores/$transfer_nnet_name/cosine/voxceleb1_o_clean_results`
-   
-   - `run_042_eval_victim_from_wav.sh`
-      - Eval cosine scoring back-end without attack on victim model x-vectors 
-        from the test wave, computing features and x-vectors on the fly.
-      - This script is just to check that we get the same result as in step 40.
-      - You don't need to run it.
-      - Results are left in `exp/scores/$nnet_name/cosine_from_wav/voxceleb1_o_clean_results`
-
-   - `run_043_eval_whitebox_attacks.sh`
-      - Eval white box attacks implemented in Hyperion toolkit: FGSM, Iter-FGSM, PGD, Carlini-Wagner
-      - Results are left in `exp/scores/$nnet_name/cosine_${attack_related_label}/voxceleb1_o_clean_results`
-      - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
-      - Curves are left in `exp/scores/$nnet_name/cosine_${attack_related_label}_eall/`
-      - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
-      - Wavs are saves to `exp/scores/$nnet_name/cosine_${attack_related_label}/wav`
- 
-   - `run_044_eval_transfer_blackbox_attacks.sh`
-      - Eval transfer black box attacks implemented in Hyperion toolkit: FGSM, Iter-FGSM, PGD, Carlini-Wagner
-      - Results are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_${attack_related_label}/voxceleb1_o_clean_results`
-      - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
-      - Curves are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_${attack_related_label}_eall/`
-      - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
-      - Wavs are saves to `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_${attack_related_label}/wav`
-
-   - `run_045_eval_whitebox_attacks_with_randsmooth_defense.sh`
-      - Eval white box attacks with Gaussian randomized smoothing defense.
-      - Results are left in `exp/scores/$nnet_name/cosine_${attack_related_label}_randsmooth${smooth_sigma}/voxceleb1_o_clean_results`
- 
-   - `run_053_eval_art_whitebox_attacks.sh`
-      - Eval white box attacks implemented in IBM's Adversarial Robustness Toolkit (ART): FGSM, Iter-FGSM, PGD, Carlini-Wagner
-      - Results are left in `exp/scores/$nnet_name/cosine_art_${attack_related_label}/voxceleb1_o_clean_results`
-      - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
-      - Curves are left in `exp/scores/$nnet_name/cosine_art_${attack_related_label}_eall/`
-      - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
-      - Wavs are saves to `exp/scores/$nnet_name/cosine_art_${attack_related_label}/wav`
-
-   - `run_054_eval_art_transfer_blackbox_attacks.sh`
-      - Eval transfer black box attacks implemented in IBM's Adversarial Robustness Toolkit (ART): FGSM, Iter-FGSM, PGD, Carlini-Wagner
-      - Results are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_art_${attack_related_label}/voxceleb1_o_clean_results`
-      - When using option `--do-analysis true` it calculates curves: SNR vs EER, SNR vs actual DCF, Linf vs EER, Linf vs actual DCF
-      - Curves are left in `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_art_${attack_related_label}_eall/`
-      - When using `--save-wav true`, it writes adversarial wavs of succesful attacks to disk
-      - Wavs are saves to `exp/scores/$nnet_name/transfer.$transfer_nnet/cosine_art_${attack_related_label}/wav`
diff --git a/egs/voxceleb/adv.v1/cmd.sh b/egs/voxceleb/adv.v1/cmd.sh
deleted file mode 100755
index 9fb941ae..00000000
--- a/egs/voxceleb/adv.v1/cmd.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-# you can change cmd.sh depending on what type of queue you are using.
-# If you have no queueing system and want to run on a local machine, you
-# can change all instances 'queue.pl' to run.pl (but be careful and run
-# commands one by one: most recipes will exhaust the memory on your
-# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
-# with slurm.  Different queues are configured differently, with different
-# queue names and different ways of specifying things like memory;
-# to account for these differences you can create and edit the file
-# conf/queue.conf to match your queue's configuration.  Search for
-# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
-# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
-
-if [ "$(hostname -d)" == "cm.gemini" ];then
-    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
-    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
-    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-else
-    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\""
-    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\""
-    export cuda_eval_cmd="$train_cmd"
-fi
-
-
-
diff --git a/egs/voxceleb/adv.v1/datapath.sh b/egs/voxceleb/adv.v1/datapath.sh
deleted file mode 100644
index 6d48a66d..00000000
--- a/egs/voxceleb/adv.v1/datapath.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright
-#            2018   Johns Hopkins University (Author: Jesus Villalba)
-#
-# Paths to the databases used in the experiment
-
-#paths to databases
-
-if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
-    voxceleb1_root=/export/corpora5/VoxCeleb1_v1
-    voxceleb2_root=/export/corpora5/VoxCeleb2
-    musan_root=/export/corpora5/JHU/musan
-elif [ "$(hostname --domain)" == "cm.gemini" ];then
-    voxceleb1_root=/expscratch/dsnyder/VoxCeleb1
-    voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
-    musan_root=/expscratch/dgromero/corpora-open/musan
-else
-    echo "Put your database paths here"
-    exit 1
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/default_config.sh b/egs/voxceleb/adv.v1/default_config.sh
deleted file mode 120000
index c91ded65..00000000
--- a/egs/voxceleb/adv.v1/default_config.sh
+++ /dev/null
@@ -1 +0,0 @@
-global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
deleted file mode 100644
index 39016679..00000000
--- a/egs/voxceleb/adv.v1/global_conf/config_victim_lresnet34_transfer_resetdnn.v1.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-# Victim model Light ResNet34 x-vector
-# For the black-box attacks we use Residual E-TDNN to generate the attack and transfer them to the ResNet34
-# Both models uses the same features: 80 fbanks
-# Both models uses the same training data.
-
-# acoustic features
-feat_config=conf/fbank80_stmn_16k.yaml
-feat_type=fbank80_stmn
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# victim x-vector training 
-nnet_data=voxceleb2cat_train
-
-# victim x-vector cfg
-nnet_type=resnet
-nnet_name=${feat_type}_lresnet34
-nnet_cfg=conf/train_lresnet34_xvec.yaml
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-# transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb2cat_combined
-
-transfer_nnet_type=resetdnn
-transfer_nnet_cfg=train_resetdnn_xvec.yaml
-transfer_nnet_name=${feat_type}_resetdnn5x512
-transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
-transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
-
-
diff --git a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_lresnet.v1.sh b/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_lresnet.v1.sh
deleted file mode 100644
index 97f4283e..00000000
--- a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_lresnet.v1.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-# Victim model ResNet34 x-vector
-# For the black-box attacks we use Light ResNet34 to generate the attack and transfer them to the ResNet34
-# Both models uses the same features: 80 fbanks
-# Both models uses the same training data.
-
-# victim x-vector training 
-nnet_data=voxceleb2cat_train_combined
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.05
-
-nnet_type=resnet34
-dropout=0
-embed_dim=256
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=70
-num_augs=5
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-
-# transfer model training
-transfer_nnet_data=voxceleb2cat_train_combined #this can be voxceleb2cat or voxceleb2cat_combined
-
-transfer_batch_size_1gpu=128
-transfer_eff_batch_size=512 # effective batch size
-transfer_min_chunk=400
-transfer_max_chunk=400
-transfer_ipe=1
-transfer_lr=0.05
-
-transfer_nnet_type=lresnet34
-transfer_dropout=0
-transfer_embed_dim=256
-
-transfer_s=30
-transfer_margin_warmup=20
-transfer_margin=0.3
-
-transfer_nnet_opt="--resnet-type $transfer_nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-transfer_nnet_name=${transfer_nnet_type}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-transfer_nnet_num_epochs=70
-
-transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
-transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
-
-
diff --git a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh b/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
deleted file mode 100644
index 81f78c60..00000000
--- a/egs/voxceleb/adv.v1/global_conf/config_victim_resnet34_transfer_resetdnn.v1.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-# Victim model ResNet34 x-vector
-# For the black-box attacks we use Residual E-TDNN to generate the attack and transfer them to the ResNet34
-# Both models uses the same features: 80 fbanks
-# Both models uses the same training data.
-
-# acoustic features
-feat_config=conf/fbank80_stmn_16k.yaml
-feat_type=fbank80_stmn
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# victim x-vector training 
-nnet_data=voxceleb2cat_train
-
-# victim x-vector cfg
-nnet_type=resnet
-nnet_name=${feat_type}_lresnet34
-
-nnet_cfg=conf/train_lresnet34_xvec.yaml
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-
-# transfer model training
-transfer_nnet_data=voxceleb2cat_train #this can be voxceleb2cat or voxceleb2cat_combined
-
-transfer_nnet_type=resetdnn
-transfer_nnet_name=${feat_type}_resetdnn5x512
-transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
-transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
-
-
-
-# # victim x-vector training 
-# nnet_data=voxceleb2cat_train_combined
-
-# batch_size_1gpu=32
-# eff_batch_size=512 # effective batch size
-# min_chunk=400
-# max_chunk=400
-# ipe=1
-# lr=0.05
-
-# nnet_type=resnet34
-# dropout=0
-# embed_dim=256
-
-# s=30
-# margin_warmup=20
-# margin=0.3
-
-# nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool"
-# opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-# lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-# nnet_name=${nnet_type}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-# nnet_num_epochs=70
-# num_augs=5
-# nnet_dir=exp/xvector_nnets/$nnet_name
-# nnet=$nnet_dir/model_ep0070.pth
-
-
-# # transfer model training
-# transfer_nnet_data=voxceleb2cat_train_combined #this can be voxceleb2cat or voxceleb2cat_combined
-
-# transfer_batch_size_1gpu=128
-# transfer_eff_batch_size=512 # effective batch size
-# transfer_min_chunk=400
-# transfer_max_chunk=400
-# transfer_ipe=1
-# transfer_lr=0.05
-
-# transfer_nnet_type=resetdnn
-# transfer_num_layers=5
-# transfer_layer_dim=512
-# transfer_expand_dim=1536
-# transfer_dilation="1 2 3 4 1"
-# transfer_kernel_sizes="5 3 3 3 1"
-# transfer_dropout=0.1
-# transfer_embed_dim=256
-
-# transfer_s=30
-# transfer_margin_warmup=20
-# transfer_margin=0.3
-
-# transfer_nnet_opt="--tdnn-type $transfer_nnet_type --in-feats 80 --num-enc-blocks $transfer_num_layers --enc-hid-units $transfer_layer_dim --enc-expand-units $transfer_expand_dim --kernel-size $transfer_kernel_sizes --dilation $transfer_dilation"
-# transfer_opt_opt="--optim.opt-type adam --optim.lr $transfer_lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-# transfer_lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-# transfer_nnet_name=${transfer_nnet_type}_nl${transfer_num_layers}ld${transfer_layer_dim}_e${transfer_embed_dim}_arcs${transfer_s}m${transfer_margin}_do${transfer_dropout}_adam_lr${transfer_lr}_b${transfer_eff_batch_size}_amp.v1
-# transfer_nnet_num_epochs=70
-
-# transfer_nnet_dir=exp/xvector_nnets/$transfer_nnet_name
-# transfer_nnet=$transfer_nnet_dir/model_ep0070.pth
-
-
diff --git a/egs/voxceleb/adv.v1/hyp_utils b/egs/voxceleb/adv.v1/hyp_utils
deleted file mode 120000
index f6d1eb7a..00000000
--- a/egs/voxceleb/adv.v1/hyp_utils
+++ /dev/null
@@ -1 +0,0 @@
-../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/path.sh b/egs/voxceleb/adv.v1/path.sh
deleted file mode 100755
index 42bfa7be..00000000
--- a/egs/voxceleb/adv.v1/path.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-
-export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
-export TOOLS_ROOT=$HYP_ROOT/tools
-
-. $TOOLS_ROOT/path.sh
-HYP_ART_ENV=$HYP_ENV
diff --git a/egs/voxceleb/adv.v1/run_001_prepare_data.sh b/egs/voxceleb/adv.v1/run_001_prepare_data.sh
deleted file mode 100755
index 8af0f353..00000000
--- a/egs/voxceleb/adv.v1/run_001_prepare_data.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. datapath.sh 
-
-
-if [ $stage -le 1 ];then
-
-    # Prepare the VoxCeleb2 dataset for training.
-    local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
-fi
-
-if [ $stage -le 2 ];then
-    # prepare voxceleb1 for test
-    local/make_voxceleb1_o.pl $voxceleb1_root data
-fi
diff --git a/egs/voxceleb/adv.v1/run_002_compute_evad.sh b/egs/voxceleb/adv.v1/run_002_compute_evad.sh
deleted file mode 100755
index cc3d8296..00000000
--- a/egs/voxceleb/adv.v1/run_002_compute_evad.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-vaddir=`pwd`/exp/vad_e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-
-if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $fbankdir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $fbankdir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
-    fi
-fi
-
-#Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_vad/$name $vaddir
-	utils/fix_data_dir.sh data/${name}
-    done
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh b/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
deleted file mode 100755
index a448af9a..00000000
--- a/egs/voxceleb/adv.v1/run_003_prepare_noises_rirs.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-# We prepare the noise files and RIR for online speech augmentation
-
-if [ $stage -le 1 ]; then
-
-    # Prepare the MUSAN corpus, which consists of music, speech, and noise
-    # suitable for augmentation.
-    local/make_musan.sh $musan_root 16 data
-    
-    for name in musan_noise musan_music
-    do
-	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
-	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
-	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
-	utils/fix_data_dir.sh data/${name}_proc_audio
-    done
-
-fi
-
-if [ $stage -le 2 ]; then
-
-    # Create Babble noise from MUSAN speech files
-    for name in musan_speech
-    do
-	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
-	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
-	    data/${name} data/${name}_babble exp/${name}_babble
-	# utils/fix_data_dir.sh data/${name}_babble
-    done
-fi
-
-if [ $stage -le 3 ]; then
-    if [ ! -d "RIRS_NOISES" ]; then
-	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
-	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
-	else
-	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-	    unzip rirs_noises.zip
-	fi
-    fi
-    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom
-    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 16 data/rirs_mediumroom
-    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 16 data/rirs_real
-    for rirs in rirs_smallroom rirs_mediumroom rirs_real
-    do
-	#pack all rirs in h5 files
-	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
-    done
-    
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
deleted file mode 100755
index 6939052e..00000000
--- a/egs/voxceleb/adv.v1/run_004_prepare_victim_xvec_train_data.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ $stage -le 2 ]; then
-    # This script preprocess audio for x-vector training
-    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-adv.v1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
-	data/${nnet_data} data/${nnet_data}_proc_audio_no_sil exp/${nnet_data}_proc_audio_no_sil
-    hyp_utils/kaldi/utils/fix_data_dir.sh data/${nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 3 ]; then
-    # Now, we remove files with less than 4s
-    hyp_utils/remove_short_audios.sh --min-len 4 data/${nnet_data}_proc_audio_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 4 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh \
-	data/${nnet_data}_proc_audio_no_sil \
-	data/${nnet_data}_proc_audio_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh b/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
deleted file mode 100755
index f80d2924..00000000
--- a/egs/voxceleb/adv.v1/run_006_prepare_transfer_xvec_train_data.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$transfer_nnet_data" == "$nnet_data" ];then
-    echo "Training data for victim and transfer model are the same"
-    echo "Skipping this step"
-    exit 0
-fi
-
-if [ $stage -le 2 ]; then
-    # This script preprocess audio for x-vector training
-    steps_xvec/preprocess_audios_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-adv.v1-$(date +'%m_%d_%H_%M') --use-bin-vad true \
-	data/${transfer_nnet_data} data/${transfer_nnet_data}_proc_audio_no_sil exp/${transfer_nnet_data}_proc_audio_no_sil
-    hyp_utils/kaldi/utils/fix_data_dir.sh data/${transfer_nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 3 ]; then
-    # Now, we remove files with less than 4s
-    hyp_utils/remove_short_audios.sh --min-len 4 data/${transfer_nnet_data}_proc_audio_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 4 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 4 data/${transfer_nnet_data}_proc_audio_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh \
-	data/${transfer_nnet_data}_proc_audio_no_sil \
-	data/${transfer_nnet_data}_proc_audio_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh b/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
deleted file mode 100755
index 03234eaa..00000000
--- a/egs/voxceleb/adv.v1/run_008_extract_xvectors_victim_model.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-nnet_stage=1
-config_file=default_config.sh
-use_gpu=false
-xvec_chunk_length=12800
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$use_gpu" == "true" ];then
-    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
-    xvec_cmd="$cuda_eval_cmd --mem 4G"
-else
-    xvec_cmd="$train_cmd --mem 12G"
-fi
-
-if [ $stage -le 2 ]; then
-  # Extracts x-vectors for evaluation
-  for name in voxceleb1_test 
-  do
-    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-    nj=$(($num_spk < 100 ? $num_spk:100))
-    steps_xvec/extract_xvectors_from_wav.sh \
-      --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-      --feat-config $feat_config \
-      $nnet data/$name \
-      $xvector_dir/$name
-  done
-fi
diff --git a/egs/voxceleb/adv.v1/run_031_extract_xvectors_transfer_model.sh b/egs/voxceleb/adv.v1/run_031_extract_xvectors_transfer_model.sh
deleted file mode 100755
index 5daf2ec8..00000000
--- a/egs/voxceleb/adv.v1/run_031_extract_xvectors_transfer_model.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$use_gpu" == "true" ];then
-    xvec_args="--use-gpu true --chunk-length 12800"
-    xvec_cmd="$cuda_eval_cmd"
-else
-    xvec_cmd="$train_cmd"
-fi
-
-nnet_name=$transfer_nnet_name
-nnet=$transfer_nnet
-
-xvector_dir=exp/xvectors/$nnet_name
-
-if [ $stage -le 1 ]; then
-    # Extracts x-vectors for evaluation
-    for name in voxceleb1_test 
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors.sh --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-					      $nnet data/$name \
-					      $xvector_dir/$name
-    done
-fi
-
-exit
diff --git a/egs/voxceleb/adv.v1/run_040_eval_be_victim_model.sh b/egs/voxceleb/adv.v1/run_040_eval_be_victim_model.sh
deleted file mode 100755
index ac8c8a24..00000000
--- a/egs/voxceleb/adv.v1/run_040_eval_be_victim_model.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-score_plda_dir=$score_dir/cosine
-
-if [ $stage -le 1 ];then
-
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials_o_clean \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$score_plda_dir/voxceleb1_scores
-
-    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-
-    for f in $(ls $score_plda_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
-
-
-if [ $stage -le 2 ];then
-    local/calibrate_voxceleb1_o_clean.sh --cmd "$train_cmd" $score_plda_dir
-
-    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1_o_clean.sh data/voxceleb1_test ${score_plda_dir}_cal_v1
-
-    for f in $(ls ${score_plda_dir}_cal_v1/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-
-fi
diff --git a/egs/voxceleb/adv.v1/run_041_eval_be_transfer_model.sh b/egs/voxceleb/adv.v1/run_041_eval_be_transfer_model.sh
deleted file mode 100755
index b9451768..00000000
--- a/egs/voxceleb/adv.v1/run_041_eval_be_transfer_model.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-
-nnet_name=$transfer_nnet_name
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-score_plda_dir=$score_dir/cosine
-
-if [ $stage -le 1 ];then
-
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials_o_clean \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$score_plda_dir/voxceleb1_scores
-
-    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-
-    for f in $(ls $score_plda_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
-
-if [ $stage -le 2 ];then
-    local/calibrate_voxceleb1_o_clean.sh --cmd "$train_cmd" $score_plda_dir
-
-    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1_o_clean.sh data/voxceleb1_test ${score_plda_dir}_cal_v1
-
-    for f in $(ls ${score_plda_dir}_cal_v1/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-
-fi
diff --git a/egs/voxceleb/adv.v1/run_042_eval_victim_from_wav.sh b/egs/voxceleb/adv.v1/run_042_eval_victim_from_wav.sh
deleted file mode 100755
index b8ee5ada..00000000
--- a/egs/voxceleb/adv.v1/run_042_eval_victim_from_wav.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-score_plda_dir=$score_dir/cosine_from_wav
-
-if [ $stage -le 1 ];then
-
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_xvec/eval_cosine_scoring_from_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 200 \
-	--feat-config conf/fbank80_stmn_16k.yaml \
-	--cal-file $cal_file \
-	data/voxceleb1_test/trials_o_clean \
-    	data/voxceleb1_test/utt2model \
-        data/voxceleb1_test \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-	$nnet $score_plda_dir/voxceleb1_scores
-    	
-    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-
-    for f in $(ls $score_plda_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/run_043_eval_whitebox_attacks.sh b/egs/voxceleb/adv.v1/run_043_eval_whitebox_attacks.sh
deleted file mode 100755
index 55500abd..00000000
--- a/egs/voxceleb/adv.v1/run_043_eval_whitebox_attacks.sh
+++ /dev/null
@@ -1,346 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-feat_config=conf/fbank80_stmn_16k.yaml
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/cosine_fgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config \
-	    --attack-opts "--attack.attack-type fgsm --attack.eps $eps" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_fgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 2 ];then
-    score_array=()
-    stats_array=()
-    for snr in 30 20 10 0
-    do
-	score_plda_dir=$score_dir/cosine_fgsm_snr${snr}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack snr=$snr"
-	steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type snr-fgsm --attack.snr $snr" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_fgsm_snrall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 3 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_randfgsm_e${eps}_a${alpha}
-	echo "Eval Voxceleb 1 with Cosine scoring with Rand-FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type rand-fgsm --attack.eps $eps --attack.alpha $alpha" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_randfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 4 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_iterfgsm_e${eps}_a${alpha}
-	echo "Eval Voxceleb 1 with Cosine scoring with Iterative FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type iter-fgsm --attack.eps $eps --attack.alpha $alpha" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_iterfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 5 ];then
-
-    for confidence in 0 #1
-    do
-	for lr in 0.001
-	do
-	    for it in 10
-	    do
-		
-		score_plda_dir=$score_dir/cosine_cwl2_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence lr=$lr num-its=$it"
-		steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 100 \
-		    --feat-config $feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-	    done
-	done
-    done
-    
-fi
-
-
-if [ $stage -le 6 ];then
-
-    for confidence in 0 #1
-    do
-	for lr in 0.001 
-	do
-	    for it in 10
-	    do
-		score_plda_dir=$score_dir/cosine_cwrms_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner RMS attack confidence=$confidence lr=$lr num_its=$it"
-		steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd -tc 15" $eval_args --nj 100 \
-		    --feat-config $feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.norm-time --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-		
-	    done
-	done
-    done
-
-fi
-
-
-if [ $stage -le 7 ];then
-
-    for confidence in 0 #1
-    do
-	for lr in 0.001 
-	do
-	    for it in 10
-	    do
-		score_plda_dir=$score_dir/cosine_cwsnr_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner SNR attack confidence=$confidence lr=$lr num_its=$it"
-		steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd -tc 15" $eval_args --nj 100 \
-		    --feat-config $feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.use-snr --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-		
-	    done
-	done
-    done
-fi
-
-
-exit
-
diff --git a/egs/voxceleb/adv.v1/run_044_eval_transfer_blackbox_attacks.sh b/egs/voxceleb/adv.v1/run_044_eval_transfer_blackbox_attacks.sh
deleted file mode 100755
index 937b4b6b..00000000
--- a/egs/voxceleb/adv.v1/run_044_eval_transfer_blackbox_attacks.sh
+++ /dev/null
@@ -1,481 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-feat_config=conf/fbank80_stmn_16k.yaml
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-transfer_feat_config=$feat_config
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-transfer_xvector_dir=exp/xvectors/$transfer_nnet_name
-transfer_score_dir=exp/scores/$transfer_nnet_name
-transfer_cal_file=$transfer_score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_fgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgsm --attack.eps $eps" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_fgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 2 ];then
-
-    score_array=()
-    stats_array=()
-
-    for snr in 30 20 10 0
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_fgsm_snr${snr}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack snr=$snr"
-	steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type snr-fgsm --attack.snr $snr" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_fgsm_snrall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 3 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_randfgsm_e${eps}_a${alpha}
-	echo "Eval Voxceleb 1 with Cosine scoring with Rand-FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type rand-fgsm --attack.eps $eps --attack.alpha $alpha" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_randfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 4 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_iterfgsm_e${eps}_a${alpha}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type iter-fgsm --attack.eps $eps --attack.alpha $alpha" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_iterfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 5 ];then
-
-    for confidence in 0 1
-    do
-	for lr in 0.001
-	do
-	    for it in 10
-	    do
-
-		score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_cwl2_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence lr=$lr num-its=$it"
-		steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 100 \
-		    --feat-config $feat_config  \
-		    --transfer-feat-config $transfer_feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-		    --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet \
-    		    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-		    $transfer_nnet \
-		    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-
-	    done
-	done
-    done
-
-fi
-
-
-if [ $stage -le 6 ];then
-
-    for confidence in 0 1
-    do
-	for lr in 0.001
-	do
-	    for it in 10
-	    do
-
-		score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_cwrms_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner RMS attack confidence=$confidence lr=$lr num-its=$it"
-		steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 200 \
-		    --feat-config $feat_config  \
-		    --transfer-feat-config $transfer_feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.norm-time --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-		    --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet \
-    		    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-		    $transfer_nnet \
-		    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-
-	    done
-	done
-    done
-
-fi
-
-
-if [ $stage -le 7 ];then
-
-    for confidence in 0 1
-    do
-	for lr in 0.001
-	do
-	    for it in 10
-	    do
-
-		score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_cwsnr_conf${confidence}_lr${lr}_noabort_it$it
-		echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner SNR attack confidence=$confidence lr=$lr num-its=$it"
-		steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 100 \
-		    --feat-config $feat_config  \
-		    --transfer-feat-config $transfer_feat_config  \
-		    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.norm-time --attack.max-iter $it" \
-		    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-		    --threshold $thr005 \
-		    data/voxceleb1_test/trials_o_clean \
-    		    data/voxceleb1_test/utt2model \
-		    data/voxceleb1_test \
-    		    $xvector_dir/voxceleb1_test/xvector.scp \
-		    $nnet \
-    		    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-		    $transfer_nnet \
-		    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		
-		$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		
-		for f in $(ls $score_plda_dir/*_results);
-		do
-		    echo $f
-		    cat $f
-		    echo ""
-		done
-		if [ "${do_analysis}" == "true" ];then
-		    score_analysis_dir=$score_plda_dir
-		    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			data/voxceleb1_test/trials_o_clean $score_clean \
-			$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			$score_analysis_dir/voxceleb1 &
-		fi
-
-	    done
-	done
-    done
-
-fi
-
-
-# if [ $stage -le -8 ];then
-
-#     for confidence in 0 1
-#     do
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_cwl0_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L0 attack confidence=$confidence"
-# 	steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 1000 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type cw-l0 --confidence $confidence --c-factor 10 \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    data/voxceleb1_test/trials_o_clean \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_plda_dir
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		data/voxceleb1_test/trials_o_clean $score_clean \
-# 		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-
-# fi
-
-
-# if [ $stage -le 9 ];then
-
-#     for confidence in 0 1
-#     do
-# 	alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_cwlinf_conf${confidence}
-# 	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner LInf attack confidence=$confidence"
-# 	steps_adv/eval_cosine_scoring_from_transfer_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 40 \
-# 	    --feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 	    --transfer-feat-config $transfer_feat_conf --transfer-audio-feat $transfer_feat \
-# 	    --attack-type cw-linf --confidence $confidence --c-factor 2 \
-# 	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-# 	    --threshold $thr005 \
-# 	    data/voxceleb1_test/trials_o_clean \
-#     	    data/voxceleb1_test/utt2model \
-#             data/voxceleb1_test \
-#     	    $xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $nnet \
-#     	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-# 	    $transfer_nnet \
-# 	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-# 	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-# 	for f in $(ls $score_plda_dir/*_results);
-# 	do
-# 	    echo $f
-# 	    cat $f
-# 	    echo ""
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_plda_dir
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		data/voxceleb1_test/trials_o_clean $score_clean \
-# 		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-
-#     done
-
-# fi
-
-wait
-
diff --git a/egs/voxceleb/adv.v1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh b/egs/voxceleb/adv.v1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh
deleted file mode 100755
index ad2e4cdf..00000000
--- a/egs/voxceleb/adv.v1/run_045_eval_whitebox_attacks_with_randsmooth_defense.sh
+++ /dev/null
@@ -1,544 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-feat_config=conf/fbank80_stmn_16k.yaml
-sigmas="0.001 0.01"
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    for sigma in $sigmas
-    do
-	score_array=()
-	stats_array=()
-	for eps in 0.00001 0.0001 0.001 0.01 0.1
-	do
-	    score_plda_dir=$score_dir/cosine_fgsm_e${eps}_randsmooth${sigma}
-	    echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-		--feat-config $feat_config \
-		--attack-opts "--attack.attack-type fgsm --attack.eps $eps" \
-		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-		data/voxceleb1_test/trials_o_clean \
-    		data/voxceleb1_test/utt2model \
-		data/voxceleb1_test \
-    		$xvector_dir/voxceleb1_test/xvector.scp \
-		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	    
-	    score_array+=($score_plda_dir/voxceleb1_scores)
-	    stats_array+=($score_plda_dir/voxceleb1_stats)
-	    
-	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-	    for f in $(ls $score_plda_dir/*_results);
-	    do
-		echo $f
-		cat $f
-		echo ""
-	    done
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_dir/cosine_fgsm_eall
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-		$score_analysis_dir/voxceleb1 &
-	fi
-    done
-fi
-
-
-if [ $stage -le 2 ];then
-
-    for sigma in $sigmas
-    do
-	score_array=()
-	stats_array=()
-	for snr in 30 20 10 0
-	do
-	    score_plda_dir=$score_dir/cosine_fgsm_snr${snr}_randsmooth${sigma}
-	    echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack snr=$snr"
-	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-		--feat-config $feat_config  \
-		--attack-opts "--attack.attack-type snr-fgsm --attack.snr $snr" \
-		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-		data/voxceleb1_test/trials_o_clean \
-    		data/voxceleb1_test/utt2model \
-		data/voxceleb1_test \
-    		$xvector_dir/voxceleb1_test/xvector.scp \
-		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	    score_array+=($score_plda_dir/voxceleb1_scores)
-	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-	    for f in $(ls $score_plda_dir/*_results);
-	    do
-		echo $f
-		cat $f
-		echo ""
-	    done
-
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_dir/cosine_fgsm_snrall
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-		$score_analysis_dir/voxceleb1 &
-	fi
-    done
-fi
-
-
-if [ $stage -le 3 ];then
-    for sigma in $sigmas
-    do
-
-	score_array=()
-	stats_array=()
-	for eps in 0.00001 0.0001 0.001 0.01 0.1
-	do
-	    alpha=$(echo $eps | awk '{ print $0/5.}')
-	    score_plda_dir=$score_dir/cosine_randfgsm_e${eps}_a${alpha}_randsmooth${sigma}
-	    echo "Eval Voxceleb 1 with Cosine scoring with Rand-FGSM attack eps=$eps"
-	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-		--feat-config $feat_config  \
-		--attack-opts "--attack.attack-type rand-fgsm --attack.eps $eps --attack.alpha $alpha" \
-		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-		data/voxceleb1_test/trials_o_clean \
-    		data/voxceleb1_test/utt2model \
-		data/voxceleb1_test \
-    		$xvector_dir/voxceleb1_test/xvector.scp \
-		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	    score_array+=($score_plda_dir/voxceleb1_scores)
-	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-	    for f in $(ls $score_plda_dir/*_results);
-	    do
-		echo $f
-		cat $f
-		echo ""
-	    done
-
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_dir/cosine_randfgsm_eall
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-		$score_analysis_dir/voxceleb1 &
-	fi
-    done
-fi
-
-
-if [ $stage -le 4 ];then
-    for sigma in $sigmas
-    do
-
-	score_array=()
-	stats_array=()
-	for eps in 0.00001 0.0001 0.001 0.01 0.1
-	do
-	    alpha=$(echo $eps | awk '{ print $0/5.}')
-	    score_plda_dir=$score_dir/cosine_iterfgsm_e${eps}_a${alpha}_randsmooth${sigma}
-	    echo "Eval Voxceleb 1 with Cosine scoring with Iterative FGSM attack eps=$eps"
-	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-		--feat-config $feat_config  \
-		--attack-opts "--attack.attack-type iter-fgsm --attack.eps $eps --attack.alpha $alpha" \
-		--save-wav $save_wav --save-wav-path $score_plda_dir/wav --smooth-sigma $sigma \
-		--cal-file $cal_file --threshold $thr005 \
-		data/voxceleb1_test/trials_o_clean \
-    		data/voxceleb1_test/utt2model \
-		data/voxceleb1_test \
-    		$xvector_dir/voxceleb1_test/xvector.scp \
-		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-
-	    score_array+=($score_plda_dir/voxceleb1_scores)
-	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-	    for f in $(ls $score_plda_dir/*_results);
-	    do
-		echo $f
-		cat $f
-		echo ""
-	    done
-
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_dir/cosine_iterfgsm_eall
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-		$score_analysis_dir/voxceleb1 &
-	fi
-    done
-fi
-
-
-if [ $stage -le 5 ];then
-
-    for sigma in $sigmas
-    do
-
-	for confidence in 0 #1
-	do
-	    for lr in 0.001
-	    do
-		for it in 10
-		do
-		    
-		    score_plda_dir=$score_dir/cosine_cwl2_conf${confidence}_lr${lr}_noabort_it${it}_randsmooth${sigma}
-		    echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence lr=$lr num-its=$it"
-		    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 100 \
-			--feat-config $feat_config  \
-			--attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.max-iter $it" \
-			--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-			--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-			data/voxceleb1_test/trials_o_clean \
-    			data/voxceleb1_test/utt2model \
-			data/voxceleb1_test \
-    			$xvector_dir/voxceleb1_test/xvector.scp \
-			$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		    
-		    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-			local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		    
-		    for f in $(ls $score_plda_dir/*_results);
-		    do
-			echo $f
-			cat $f
-			echo ""
-		    done
-		    if [ "${do_analysis}" == "true" ];then
-			score_analysis_dir=$score_plda_dir
-			local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			    data/voxceleb1_test/trials_o_clean $score_clean \
-			    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			    $score_analysis_dir/voxceleb1 &
-		    fi
-		done
-	    done
-	done
-    done
-fi
-
-
-if [ $stage -le 6 ];then
-    for sigma in $sigmas
-    do
-
-	for confidence in 0 #1
-	do
-	    for lr in 0.001 
-	    do
-		for it in 10
-		do
-		    score_plda_dir=$score_dir/cosine_cwrms_conf${confidence}_lr${lr}_noabort_it${it}_randsmooth${sigma}
-		    echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner RMS attack confidence=$confidence lr=$lr num_its=$it"
-		    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd -tc 15" $eval_args --nj 100 \
-			--feat-config $feat_config  \
-			--attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.norm-time --attack.max-iter $it" \
-			--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-			--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-			data/voxceleb1_test/trials_o_clean \
-    			data/voxceleb1_test/utt2model \
-			data/voxceleb1_test \
-    			$xvector_dir/voxceleb1_test/xvector.scp \
-			$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		    
-		    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-			local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		    
-		    for f in $(ls $score_plda_dir/*_results);
-		    do
-			echo $f
-			cat $f
-			echo ""
-		    done
-		    if [ "${do_analysis}" == "true" ];then
-			score_analysis_dir=$score_plda_dir
-			local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			    data/voxceleb1_test/trials_o_clean $score_clean \
-			    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			    $score_analysis_dir/voxceleb1 &
-		    fi
-		    
-		done
-	    done
-	done
-    done
-fi
-
-
-if [ $stage -le 7 ];then
-    for sigma in $sigmas
-    do
-
-	for confidence in 0 #1
-	do
-	    for lr in 0.001 
-	    do
-		for it in 10
-		do
-		    score_plda_dir=$score_dir/cosine_cwsnr_conf${confidence}_lr${lr}_noabort_it${it}_randsmooth${sigma}
-		    echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner SNR attack confidence=$confidence lr=$lr num_its=$it"
-		    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd -tc 15" $eval_args --nj 100 \
-			--feat-config $feat_config  \
-			--attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence --attack.lr $lr --attack.no-abort --attack.use-snr --attack.max-iter $it" \
-			--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-			--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-			data/voxceleb1_test/trials_o_clean \
-    			data/voxceleb1_test/utt2model \
-			data/voxceleb1_test \
-    			$xvector_dir/voxceleb1_test/xvector.scp \
-			$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    		    
-		    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-			local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-		    
-		    for f in $(ls $score_plda_dir/*_results);
-		    do
-			echo $f
-			cat $f
-			echo ""
-		    done
-		    if [ "${do_analysis}" == "true" ];then
-			score_analysis_dir=$score_plda_dir
-			local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-			    data/voxceleb1_test/trials_o_clean $score_clean \
-			    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-			    $score_analysis_dir/voxceleb1 &
-		    fi
-		    
-		done
-	    done
-	done
-    done
-fi
-
-
-exit
-
-
-
-# #!/bin/bash
-# # Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-# #                
-# # Apache 2.0.
-# #
-# . ./cmd.sh
-# . ./path.sh
-# set -e
-
-# stage=1
-# config_file=default_config.sh
-# use_gpu=false
-# do_analysis=false
-# save_wav=false
-
-# . parse_options.sh || exit 1;
-# . $config_file
-# . datapath.sh 
-
-# if [ "$use_gpu" == "true" ];then
-#     eval_args="--use-gpu true"
-#     eval_cmd="$cuda_eval_cmd"
-# else
-#     eval_cmd="$train_cmd"
-# fi
-
-# xvector_dir=exp/xvectors/$nnet_name
-# score_dir=exp/scores/$nnet_name
-
-# score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-# cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-# #thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-# thr005=2.94
-# thr001=4.60
-# thr0001=6.90
-# declare -a score_array
-# declare -a stats_array
-
-# if [ $stage -le 1 ];then
-
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    score_plda_dir=$score_dir/cosine_fgsm_e${eps}_randsmooth${sigma}
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type fgsm --eps $eps \
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-# 		data/voxceleb1_test/trials_o_clean \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_fgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-
-# fi
-
-
-
-
-# if [ $stage -le 3 ];then
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	    score_plda_dir=$score_dir/cosine_randfgsm_e${eps}_a${alpha}_randsmooth$sigma
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with Rand-FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type rand-fgsm --eps $eps --alpha $alpha --smooth-sigma $sigma\
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 \
-# 		data/voxceleb1_test/trials_o_clean \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-	    
-# 	done
-
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_randfgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-# fi
-
-
-# if [ $stage -le 4 ];then
-#     for sigma in 0.001 0.01
-#     do
-# 	score_array=()
-# 	stats_array=()
-# 	for eps in 0.00001 0.0001 0.001 0.01 0.1
-# 	do
-# 	    alpha=$(echo $eps | awk '{ print $0/5.}')
-# 	    score_plda_dir=$score_dir/cosine_iterfgsm_e${eps}_a${alpha}_randsmooth$sigma
-# 	    echo "Eval Voxceleb 1 with Cosine scoring with Iterative FGSM attack eps=$eps"
-# 	    steps_adv/eval_cosine_scoring_from_adv_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-# 		--feat-config conf/fbank80_16k.pyconf --audio-feat logfb \
-# 		--attack-type iter-fgsm --eps $eps --alpha $alpha \
-# 		--save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-# 		--cal-file $cal_file --threshold $thr005 --smooth-sigma $sigma \
-# 		data/voxceleb1_test/trials_o_clean \
-#     		data/voxceleb1_test/utt2model \
-# 		data/voxceleb1_test \
-#     		$xvector_dir/voxceleb1_test/xvector.scp \
-# 		$nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-	    
-# 	    score_array+=($score_plda_dir/voxceleb1_scores)
-# 	    stats_array+=($score_plda_dir/voxceleb1_stats)
-    	    
-# 	    $train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-# 		local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	    
-# 	    for f in $(ls $score_plda_dir/*_results);
-# 	    do
-# 		echo $f
-# 		cat $f
-# 		echo ""
-# 	    done
-	    
-# 	done
-# 	if [ "${do_analysis}" == "true" ];then
-# 	    score_analysis_dir=$score_dir/cosine_iterfgsm_eall_randsmooth$sigma
-# 	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-# 		data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-# 		$score_analysis_dir/voxceleb1 &
-# 	fi
-#     done
-# fi
-
-# wait
diff --git a/egs/voxceleb/adv.v1/run_053_eval_art_whitebox_attacks.sh b/egs/voxceleb/adv.v1/run_053_eval_art_whitebox_attacks.sh
deleted file mode 100755
index 3d01fbfa..00000000
--- a/egs/voxceleb/adv.v1/run_053_eval_art_whitebox_attacks.sh
+++ /dev/null
@@ -1,536 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-feat_config=conf/fbank80_stmn_16k.yaml
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/cosine_art_fgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 2 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_fgsm_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM minimal attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.eps-step $alpha --attack.minimal" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgsm_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-if [ $stage -le 3 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/cosine_art_fgml1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgml1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 4 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_fgml1_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L1 minimal attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.eps-step $alpha --attack.minimal --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgml1_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 5 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/cosine_art_fgml2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgml2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 6 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_fgml2_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM-L2 minimal attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.eps-step $alpha --attack.minimal --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_fgml2_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 7 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_iterfgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with IterFGM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type bim --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_iterfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 8 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_pgdlinf_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_pgdlinf_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-
-if [ $stage -le 9 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_pgdl1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_pgdl1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 10 ];then
-    score_array=()
-    stats_array=()
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/cosine_art_pgdl2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/cosine_art_pgdl2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-fi
-
-if [ $stage -le 11 ];then
-
-    for confidence in 0 #1
-    do
-	score_plda_dir=$score_dir/cosine_art_cwl2_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-
-if [ $stage -le 12 ];then
-
-    for confidence in 0 #1
-    do
-	score_plda_dir=$score_dir/cosine_art_cwlinf_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner Linf attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 400 \
-	    --feat-config $feat_config  \
-	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.eps 0.3" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-
diff --git a/egs/voxceleb/adv.v1/run_054_eval_art_transfer_blackbox_attacks.sh b/egs/voxceleb/adv.v1/run_054_eval_art_transfer_blackbox_attacks.sh
deleted file mode 100755
index 254cef78..00000000
--- a/egs/voxceleb/adv.v1/run_054_eval_art_transfer_blackbox_attacks.sh
+++ /dev/null
@@ -1,626 +0,0 @@
-#!/bin/bash
-# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
-#                
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-do_analysis=false
-save_wav=false
-feat_config=conf/fbank80_stmn_16k.yaml
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh 
-
-transfer_feat_config=$feat_config
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name
-score_dir=exp/scores/$nnet_name
-
-score_clean=$score_dir/cosine_cal_v1/voxceleb1_scores
-cal_file=$score_dir/cosine_cal_v1/cal_tel.h5
-
-transfer_xvector_dir=exp/xvectors/$transfer_nnet_name
-transfer_score_dir=exp/scores/$transfer_nnet_name
-transfer_cal_file=$transfer_score_dir/cosine_cal_v1/cal_tel.h5
-
-#thresholds for p=(0.05,0.01,0.001) -> thr=(2.94, 4.60, 6.90)
-thr005=2.94
-thr001=4.60
-thr0001=6.90
-declare -a score_array
-declare -a stats_array
-
-if [ $stage -le 1 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 2 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgsm_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 3 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 4 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM minimal L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml1_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 5 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with FGM L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 6 ];then
-
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring FGM minimal L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type fgm --attack.eps $eps --attack.minimal --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_fgml2_minimal_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 7 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with iter FGSM attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type bim --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_iterfgsm_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-
-if [ $stage -le 8 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD Linf attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdlinf_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 9 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L1 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 1" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl1_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 10 ];then
-    score_array=()
-    stats_array=()
-
-    for eps in 0.00001 0.0001 0.001 0.01 0.1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_e${eps}
-	echo "Eval Voxceleb 1 with Cosine scoring with PGD L2 attack eps=$eps"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 80 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type pgd --attack.eps $eps --attack.eps-step $alpha --attack.max-iter 10 --attack.norm 2" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	score_array+=($score_plda_dir/voxceleb1_scores)
-	stats_array+=($score_plda_dir/voxceleb1_stats)
-
-    done
-    if [ "${do_analysis}" == "true" ];then
-	score_analysis_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_pgdl2_eall
-	local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-	    data/voxceleb1_test/trials_o_clean $score_clean "${score_array[*]}" "${stats_array[*]}" \
-	    $score_analysis_dir/voxceleb1 &
-    fi
-
-fi
-
-
-if [ $stage -le 11 ];then
-
-    for confidence in 0 #1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwl2_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner L2 attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 20 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type cw-l2 --attack.confidence $confidence" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-
-if [ $stage -le 12 ];then
-
-    for confidence in 0 #1
-    do
-	alpha=$(echo $eps | awk '{ print $0/5.}')
-	score_plda_dir=$score_dir/transfer.${transfer_nnet_name}/cosine_art_cwlinf_conf${confidence}
-	echo "Eval Voxceleb 1 with Cosine scoring with Carlini-Wagner LInf attack confidence=$confidence"
-	steps_adv/eval_cosine_scoring_from_transfer_art_test_wav.sh --cmd "$eval_cmd" $eval_args --nj 40 \
-	    --feat-config $feat_config  \
-	    --transfer-feat-config $transfer_feat_config  \
-	    --attack-opts "--attack.attack-type cw-linf --attack.confidence $confidence --attack.eps 0.3" \
-	    --save-wav $save_wav --save-wav-path $score_plda_dir/wav \
-	    --cal-file $cal_file --transfer-cal-file $transfer_cal_file \
-	    --threshold $thr005 \
-	    data/voxceleb1_test/trials_o_clean \
-    	    data/voxceleb1_test/utt2model \
-            data/voxceleb1_test \
-    	    $xvector_dir/voxceleb1_test/xvector.scp \
-	    $nnet \
-    	    $transfer_xvector_dir/voxceleb1_test/xvector.scp \
-	    $transfer_nnet \
-	    $score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats
-    	
-	$train_cmd --mem 10G $score_plda_dir/log/score_voxceleb1.log \
-	    local/score_voxceleb1_o_clean.sh data/voxceleb1_test $score_plda_dir 
-	
-	for f in $(ls $score_plda_dir/*_results);
-	do
-	    echo $f
-	    cat $f
-	    echo ""
-	done
-	if [ "${do_analysis}" == "true" ];then
-	    score_analysis_dir=$score_plda_dir
-	    local/attack_analysis.sh --cmd "$train_cmd --mem 10G" \
-		data/voxceleb1_test/trials_o_clean $score_clean \
-		$score_plda_dir/voxceleb1_scores $score_plda_dir/voxceleb1_stats \
-		$score_analysis_dir/voxceleb1 &
-	fi
-
-    done
-
-fi
-
-wait
-
diff --git a/egs/voxceleb/adv.v1/steps b/egs/voxceleb/adv.v1/steps
deleted file mode 120000
index aede39fe..00000000
--- a/egs/voxceleb/adv.v1/steps
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/steps_adv b/egs/voxceleb/adv.v1/steps_adv
deleted file mode 120000
index fa9be351..00000000
--- a/egs/voxceleb/adv.v1/steps_adv
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/adv
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/steps_be b/egs/voxceleb/adv.v1/steps_be
deleted file mode 120000
index b2098c2a..00000000
--- a/egs/voxceleb/adv.v1/steps_be
+++ /dev/null
@@ -1 +0,0 @@
-../v1/steps_be
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/steps_fe b/egs/voxceleb/adv.v1/steps_fe
deleted file mode 120000
index 73ccc1eb..00000000
--- a/egs/voxceleb/adv.v1/steps_fe
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/vad
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/steps_pyfe b/egs/voxceleb/adv.v1/steps_pyfe
deleted file mode 120000
index 7b9d122a..00000000
--- a/egs/voxceleb/adv.v1/steps_pyfe
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/feats
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/steps_xvec b/egs/voxceleb/adv.v1/steps_xvec
deleted file mode 120000
index af66a94d..00000000
--- a/egs/voxceleb/adv.v1/steps_xvec
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/voxceleb/adv.v1/utils b/egs/voxceleb/adv.v1/utils
deleted file mode 120000
index 3d590a1d..00000000
--- a/egs/voxceleb/adv.v1/utils
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/README.md b/egs/voxceleb/vae.v1/README.md
deleted file mode 100644
index 1514fae4..00000000
--- a/egs/voxceleb/vae.v1/README.md
+++ /dev/null
@@ -1,89 +0,0 @@
-# VoxCeleb Version 3
-
-Last update 2020/07/09
-
-This recipe is work in progress
-
-Recipe to evaluate generative models on VoxCeleb
-We train models on VoxCeleb2 and evaluate on full VoxCeleb1.
-The goal is to evaluate the hability of generative models to 
-recostruct VoxCeleb1 data or to generate data from scratch.
-
-## Models included:
-
-  The following models can be evaluated with this recipe:
-  - Basic Autoencoders (AE)
-  - Variational Autoencoders (VAE
-  - VQ-VAE
-  - Denoising AE, VAE, VQ-VAE
-
-## Training Data
-
-  - Autoencoders, VAE, VQ-VAE, GAN are trained on 
-     - VoxCeleb2 dev+test
-  - Denoising versions are trained on
-     - VoxCeleb2 dev+test + augmentation with 
-        - MUSAN noise
-        - RIR reverberation
-
-## Test Data
-
-   - Test data is the full VoxCeleb 1
-
-## Usage
-
-   - Run the run_stepnumber_*.sh scripts in sequence
-   - Depending on the model that you are testing you can skip some steps
-       - if not running denoising versions skip steps 3 and 4
-       - Run train/eval steps only corresponding to the model that you are using
-
-## Results
-
-We compute average of the metrics across VoxCeleb1, values in parenthesis are std.
-We report EER on VoxCeleb1 Test Original Clean Task using recostructed log-filter-banks and LResNet34 x-vector trained in recipe v1.1.
-Baseline EER=1.94% when using original log-filter-banks.
-
-### Models trained without augmentation
-
-| Config | Model Type | Architecture |  Latent-channels | Compression (bits x/bits z) | ELBO/dim (std) | MSE (std) | L1 (std) | codebook size | EER(%) | 
-| ------ | ---------- | ------------ | :--------: | :-------: | :----:   | :----:   | :----:   | :----: | :----: | 
-| config_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh | VAE | DC1d Enc-Dec <br> dc-blocks=4 / hid-channels=256 | 80 | 8 | -1.96 (0.62) | 1.57 (0.91) | 0.90 (0.24) | | 16.36 |
-| config_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh | VAE | DC1d Enc-Dec <br> dc-blocks=9 / hid-channels=256 | 80 | 8 | -1.95 (0.62) | 1.56 (0.91) | 0.89 (0.24) |
-| config_vae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh | VAE | ResNet1d Enc-Dec <br> res-blocks=4/ hid-channels=256 | 80 | 8 | -1.97 (0.65) | 1.55 (0.93) | 0.89 (0.25) | | 15.05 |
-| config_vae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh | VAE | ResNet1d Enc-Dec <br> res-blocks=8/ hid-channels=256 | 80 | 8 | -1.98 (0.65) | 1.55 (0.93) | 0.88 (0.25) | | 13.45 |
-| config_vae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh | VAE | ResNet1d Enc-Dec <br> res-blocks=16/ hid-channels=256 | 80 | 8 | -1.98 (0.69) | 1.54 (0.94) | 0.88 (0.25) | | 13.45 |
-| config_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh | VAE | DC2d Enc-Dec <br> dc-blocks=4 / hid-channels=64 | 80 | 0.8 | -2.25 (1.00) | 1.49 (1.06) | 0.84 (0.29) | | 10.04 |
-| config_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh | VAE | DC2d Enc-Dec <br> dc-blocks=8 / hid-channels=64 | 80 | 0.8 | -2.23 (1.00) | 1.49 (1.06) | 0.84 (0.29) |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 2275 | -1.84 (0.21) | 2.20 (0.71) | 1.12 (0.16) | 512 |  28.42 | 
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 1138  | -1.79 (0.32) | 1.86 (0.78) | 1.01 (0.19) | 512x2 | 22.08 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 569  | -1.40 (0.43) | 1.69 (0.83) | 0.95 (0.21) | 512x4 | 19.18 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569_predvar.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 569  | -1.78 (0.42) | 1.70 (0.83) | 0.95 (0.21) | 512x4 | 18.16 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 284  | -1.87 (0.59) | 1.56 (0.89) | 0.89 (0.23) | 512x8 | 15.48 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 142  | -2.04 (0.83) | 1.46 (0.96) | 0.84 (0.27) | 512x16 | 11.77 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 71  | -2.15 (1.4) | 1.43 (1.08) | 0.80 (0.32) | 512x32 | 8.13 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 36  | -9.27 (8.31) | 1.49 (1.22) | 0.79 (0.36) | 512x64 | 6.41 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x128_c18.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 18  | -20.97 (20.62) | 1.46 (1.24) | 0.77 (0.38) | 512x128 | 5.67 |
-| config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 9  | -27.91 (26.00) | 1.49 (1.27) | 0.78 (0.39) | 512x256 | 5.41 |
-| config_vqvae_transformer_b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6/ d_model=512 / heads=8 / d_ff=2048 | 512 | 36 |  -1.74(0.31) | 0.48 (0.15) | 0.52 (0.08) | 512x8 | 10.49 |
-| config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6 / d_model=512 / heads=8 / att-context=25 / d_ff=2048 | 512 | 36 |  -1.61(0.15) | 0.42 (0.08) | 0.49 (0.05) | 512x8 | 4.26 |
-| config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6 / d_model=512 / heads=8 / att-context=25 / d_ff=2048 <br> RAdam Opt. | 512 | 36 |  -1.33(0.15) | 0.28 (0.05) | 0.40 (0.03) | 512x8 | 4.06 | 
-| config_vqvae_transformer_b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6 / d_model=512 / heads=8 / d_ff=2048 <br> Rel. Pos Enc. <br> RAdam Opt. | 512 | 36 |  -1.29(0.10) | 0.27 (0.05) | 0.39 (0.03) | 512x8 | 4.21 |
-| config_vqvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6 / d_model=512 / heads=8 / att-context=25 / d_ff=2048 <br> Rel. Pos Enc. <br> RAdam Opt. | 512 | 36 |  -1.30(0.09) | 0.27 (0.04) | 0.39 (0.03) | 512x8 | 4.02 |
-| config_vqvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh | VQ-VAE | Conformer Enc <br> blocks=6 / d_model=512 / heads=8 / att-context=25 / d_ff=2048 <br> RAdam Opt. | 512 | 36 |  -1.26(0.10) | 0.28 (0.04) | 0.39 (0.03) | 512x8 | 4.06 |
-
-
-### Models trained with augmentation (Denoising versions)
-
-| Config | Model Type | Architecture |  Latent-channels | Compression (bits x/bits z) | ELBO/dim (std) | MSE (std) | L1 (std) | codebook size | EER(%) | 
-| ------ | ---------- | ------------ | :--------: | :-------: | :----:   | :----:   | :----:   | :----:   | :----:   | 
-| config_dvae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh | VAE | ResNet1d Enc-Dec <br> res-blocks=16 / hid-channels=256 | 80 | 8 | -1.77 (0.33) | 1.67 (0.87) | 0.94 (0.22) | | 16.70 |
-| config_dvae_resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh | VAE | ResNet2d Enc-Dec <br> res-blocks=16 / base-channels=64 | 80 | 0.8 | -1.77 (0.39) | 1.57 (0.92) | 0.89 (0.25) | | 12.40 |
-| config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 569  | -1.75 (0.29) | 1.78 (0.84) | 0.98 (0.21) | 512x4 | 18.37 |
-| config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 284  | -1.80 (0.42) | 1.69 (0.83) | 0.95 (0.21) | 512x8 | 15.19 |
-| config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 142  | -1.81 (0.42) | 1.55 (0.97) | 0.87 (0.26) | 512x16 | 11.37 |
-| config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh | VQ-VAE | ResNet1d Enc-Dec <br> res-blocks=8 / hid-channels=256 | 256 | 71  | -1.95 (0.49) | 1.47 (1.03) | 0.83 (0.30) | 512x32 | 8.75 |
-| config_vqdvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh | VQ-VAE | Xformer Enc <br> blocks=6/ d_model=512 / heads=8 / att-context=25 / d_ff=2048 <br> Radam Opt. | 512 | 36 | -1.85 (0.13) | 0.56 (0.31) | 0.57 (0.11) | 512x8 | 5.3 |
-| config_vqdvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh | VQ-VAE | Xformer Enc <br> blocks=6/ d_model=512 / heads=8 / d_ff=2048 <br> Rel. Pos. Enc <br> Radam Opt. | 512 | 36 | -1.77 (0.05) | 0.43 (0.10) | 0.51 (0.04) | 512x8 | 4.56 |
-| config_vqdvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh | VQ-VAE | Conformer Enc <br> blocks=6/ d_model=512 / heads=8 / d_ff=2048 <br> Rel. Pos. Enc <br> Radam Opt. | 512 | 36 | -1.83 (0.05) | 0.59 (0.11) | 0.59 (0.04) | 512x8 | 6.56 |
-
-
diff --git a/egs/voxceleb/vae.v1/cmd.sh b/egs/voxceleb/vae.v1/cmd.sh
deleted file mode 100755
index fe9c55b0..00000000
--- a/egs/voxceleb/vae.v1/cmd.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-# you can change cmd.sh depending on what type of queue you are using.
-# If you have no queueing system and want to run on a local machine, you
-# can change all instances 'queue.pl' to run.pl (but be careful and run
-# commands one by one: most recipes will exhaust the memory on your
-# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
-# with slurm.  Different queues are configured differently, with different
-# queue names and different ways of specifying things like memory;
-# to account for these differences you can create and edit the file
-# conf/queue.conf to match your queue's configuration.  Search for
-# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
-# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
-
-if [ "$(hostname -d)" == "cm.gemini" ];then
-    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
-    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
-    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-else
-    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
-    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
-    export cuda_eval_cmd="$train_cmd"
-fi
-
-
-
diff --git a/egs/voxceleb/vae.v1/conf b/egs/voxceleb/vae.v1/conf
deleted file mode 120000
index 7dfe9dce..00000000
--- a/egs/voxceleb/vae.v1/conf
+++ /dev/null
@@ -1 +0,0 @@
-../../sre19-cmn2/v1/conf
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/datapath.sh b/egs/voxceleb/vae.v1/datapath.sh
deleted file mode 100644
index 632362a7..00000000
--- a/egs/voxceleb/vae.v1/datapath.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright
-#            2018   Johns Hopkins University (Author: Jesus Villalba)
-#
-# Paths to the databases used in the experiment
-
-#paths to databases
-
-if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
-    voxceleb1_root=/export/corpora5/VoxCeleb1
-    voxceleb2_root=/export/corpora5/VoxCeleb2
-    musan_root=/export/corpora5/JHU/musan
-elif [ "$(hostname --domain)" == "cm.gemini" ];then
-    voxceleb1_root=/expscratch/dsnyder/VoxCeleb1
-    voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
-    musan_root=/expscratch/dgromero/corpora-open/musan
-else
-    echo "Put your database paths here"
-    exit 1
-fi
-
-
diff --git a/egs/voxceleb/vae.v1/default_config.sh b/egs/voxceleb/vae.v1/default_config.sh
deleted file mode 120000
index 5755326d..00000000
--- a/egs/voxceleb/vae.v1/default_config.sh
+++ /dev/null
@@ -1 +0,0 @@
-global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index 19b1cedf..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# Denoising VAE with symmetric ResNet1D encoder-decoder with 
-# 16 residual blocks, 256 dim per block, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=128
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=dvae
-narch=resnet1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 3 4 6 3 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 3 4 6 3 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b16d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=90
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0090.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh
deleted file mode 100644
index 68fbba13..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_dvae_resnet2d_b16c64_z80_c0.8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-# Denoising VAE with symmetric ResNet2D encoder-decoder with 
-# 16 residual blocks, 64 base channels, latent_dim=80, compression factor=0.8
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=dvae
-narch=resnet2d
-enc_opt="--enc.in-conv-channels 64 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 2 2 2 2 --enc.resb-channels 64 128 256 512 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 512 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 2 2 2 2 --dec.resb-channels 512 256 128 64 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b16c64_z${latent_dim}_c0.8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=100
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0100.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index 3dc324ae..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b4d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-# VAE with symmetric DC1 encoder-decoder with 4 layers, 256 dim per layer, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=512
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=dc1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.conv-repeats 1 1 1 1 --enc.conv-channels 256 --enc.conv-kernel-sizes 3 --enc.conv-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.conv-repeats 1 1 1 1 --dec.conv-channels 256 --dec.conv-kernel-sizes 3 --dec.conv-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b4d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=540
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0540.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index 11d79a6b..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_dc1d_b9d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric DC1 encoder-decoder with 9 layers, 256 dim per layer, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=512
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=dc1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.conv-repeats 2 2 3 2 --enc.conv-channels 256 --enc.conv-kernel-sizes 3 --enc.conv-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.conv-repeats 2 2 3 2 --dec.conv-channels 256 --dec.conv-kernel-sizes 3 --dec.conv-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${model_type}_${narch}_b9d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=550
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0550.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
deleted file mode 100644
index 6de722df..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric deep conv 2D encoder-decoder with 
-# 4 residual blocks, 64 base channels , latent_channels=80, compression factor=0.8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=64
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=dc2d
-vae_opt=""
-enc_opt="--enc.in-conv-channels 64 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.conv-repeats 1 1 1 1 --enc.conv-channels 64 128 256 512 --enc.conv-kernel-sizes 3 --enc.conv-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 512 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.conv-repeats 1 1 1 1 --dec.conv-channels 512 256 128 64 --dec.conv-kernel-sizes 3 --dec.conv-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b4c64_z${latent_dim}_c0.8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=500
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0440.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
deleted file mode 100644
index 879ce269..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_dc2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric deep conv 2D encoder-decoder with 
-# 8 residual blocks, 64 base channels , latent_channels=80, compression factor=0.8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=dc2d
-vae_opt=""
-enc_opt="--enc.in-conv-channels 64 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.conv-repeats 2 2 2 2 --enc.conv-channels 64 128 256 512 --enc.conv-kernel-sizes 3 --enc.conv-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 512 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.conv-repeats 2 2 2 2 --dec.conv-channels 512 256 128 64 --dec.conv-kernel-sizes 3 --dec.conv-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8c64_z${latent_dim}_c0.8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=400
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0400.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index aca516a1..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b16d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric ResNet1D encoder-decoder with 
-# 16 residual blocks, 256 dim per block, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=128
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=resnet1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 3 4 6 3 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 3 4 6 3 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b16d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=410
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0410.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index be0a00b6..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b4d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric ResNet1D encoder-decoder with 
-# 16 residual blocks, 256 dim per block, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=128
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=resnet1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 1 1 1 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 1 1 1 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b4d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=370
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0370.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh
deleted file mode 100644
index 167b3837..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet1d_b8d256_z80_c8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-# VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=80, compression factor=8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=128
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=resnet1d
-vae_opt="--in-feats 80"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_z${latent_dim}_c8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=420
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0420.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
deleted file mode 100644
index 0240d1d0..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b4c64_z80_c0.8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-# VAE with symmetric ResNet2D encoder-decoder with 
-# 4 residual blocks, 64 base channels , latent_channels=80, compression factor=0.8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=resnet2d
-vae_opt=""
-enc_opt="--enc.in-conv-channels 64 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 1 1 1 --enc.resb-channels 64 128 256 512 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 512 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 1 1 1 --dec.resb-channels 512 256 128 64 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b4c64_z${latent_dim}_c0.8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=600
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0205.pth
-
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
deleted file mode 100644
index ff503162..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vae_resnet2d_b8c64_z80_c0.8.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-# VAE with symmetric ResNet2D encoder-decoder with 
-# 8 residual blocks, 64 base channels , latent_channels=80, compression factor=0.8
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=80
-model_type=vae
-narch=resnet2d
-vae_opt=""
-enc_opt="--enc.in-conv-channels 64 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 2 2 2 2 --enc.resb-channels 64 128 256 512 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 80 --dec.in-conv-channels 512 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 2 2 2 2 --dec.resb-channels 512 256 128 64 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8c64_z${latent_dim}_c0.8_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=205
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0205.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
-
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh
deleted file mode 100644
index 98af99a2..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.0025.v6.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-k# VQ-VAE with Conformer Encoder for Enc and Dec with 
-# 6 conformer blocks, relative pos encoder, d_model=512, heads=8, d_ff=2048, 
-# latent_dim=512, codebook=512x8, compression factor=36, att-context=25
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.0025
-
-model_type=vq-dvae
-
-dropout=0
-narch=conformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-conv_kernel=31
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context --enc.conv-kernel-sizes $conv_kernel"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context --dec.conv-kernel-sizes $conv_kernel"
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 10000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 10000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}cbk${conv_kernel}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv6_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=40
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0040.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
deleted file mode 100644
index 841207ea..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=142
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=16
-narch=resnet1d
-model_type=vq-dvae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=100
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0100.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
deleted file mode 100644
index 795a8d4f..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=71
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=32
-narch=resnet1d
-model_type=vq-dvae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=100
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0100.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
deleted file mode 100644
index da17dc19..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=569
-# Trained for denosing
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=4
-narch=resnet1d
-model_type=vq-dvae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=90
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0090.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
deleted file mode 100644
index a2d8005e..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=284
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=8
-narch=resnet1d
-model_type=vq-dvae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=100
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0100.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256swish_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256swish_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
deleted file mode 100644
index 435460c2..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_resnet1d_b8d256swish_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=142
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=16
-narch=resnet1d
-model_type=vq-dvae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2 --enc.hid-act swish"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2 --dec.hid-act swish"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256swish_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=100
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0100.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
deleted file mode 100644
index f99031d1..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=512x8, compression factor=36
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-dvae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context"
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=40
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0040.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh b/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh
deleted file mode 100644
index 03fe5a33..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqdvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.005.v6.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, relative pos encoder, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=512x8, compression factor=36
-
-nnet_data=voxceleb2cat_train_combined
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.005
-
-model_type=vq-dvae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context --enc.rel-pos-enc"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context --dec.rel-pos-enc"
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 10000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 10000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}linff${d_ff}rpe_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv6_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=40
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0040.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
deleted file mode 100644
index e4962443..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_conformer_lac25b6d512h8cbk31ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=conformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-conv_kernel=31
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context --enc.conv-kernel-sizes $conv_kernel"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context --dec.conv-kernel-sizes $conv_kernel"
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}cbk${conv_kernel}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=120
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0120.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh
deleted file mode 100644
index 31487e05..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512_c2275.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=2275
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=1
-narch=resnet1d
-model_type=vq-vae
-vq_type=ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=370
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0370.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x128_c18.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x128_c18.opt.lr0.01.v1.sh
deleted file mode 100644
index 56deb6c8..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x128_c18.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=18
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=128
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=550
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0550.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
deleted file mode 100644
index f5b56dc2..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x16_c142.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=142
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=16
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=440
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0440.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh
deleted file mode 100644
index 7998a6c3..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x256_c9.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=9
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=256
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=360
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0360.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh
deleted file mode 100644
index 1252c9e4..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x2_c1138.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=1138
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=2
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=510
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0510.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
deleted file mode 100644
index 59327eb4..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x32_c71.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=71
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=32
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=440
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0440.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
deleted file mode 100644
index 2082dd74..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=569
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=4
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=370
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0370.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569_predvar.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569_predvar.opt.lr0.01.v1.sh
deleted file mode 100644
index 6ce2b144..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x4_c569_predvar.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=569
-# P(x|z) with sample dependent variances predicted by nnet
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=4
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups --px-pdf normal-diag-cov"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_predvar_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=400
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0400.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh
deleted file mode 100644
index 8ef652f3..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x64_c36.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=64
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=460
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0460.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
deleted file mode 100644
index 56498b78..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_resnet1d_b8d256_emakmeansvq_z256cb512x8_c284.opt.lr0.01.v1.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# VQ-VAE with symmetric ResNet1D encoder-decoder with 
-# 8 residual blocks, 256 dim per block, latent_dim=256, codebook=512, compression factor=284
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=256
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-dropout=0
-latent_dim=256
-vq_clusters=512
-num_groups=8
-narch=resnet1d
-model_type=vq-vae
-vq_type=multi-ema-k-means-vq
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.in-conv-channels 256 --enc.in-kernel-size 5 --enc.in-stride 1 --enc.resb-repeats 1 2 3 2 --enc.resb-channels 256 --enc.resb-kernel-sizes 3 --enc.resb-strides 1 2 2 2"
-dec_opt="--dec.in-channels 256 --dec.in-conv-channels 256 --dec.in-kernel-size 3 --dec.in-stride 1 --dec.resb-repeats 1 2 3 2 --dec.resb-channels 256 --dec.resb-kernel-sizes 3 --dec.resb-strides 1 2 2 2"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 16000 --lrsched.hold-steps 16000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 8000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b8d256_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv1_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=430
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0430.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
deleted file mode 100644
index 3c193e06..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type scaled-dot-prod-v1"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type scaled-dot-prod-v1"
-
-
-#opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-#lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 12000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b${blocks}d${d_model}h${heads}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=160
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0160.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
-
-
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
deleted file mode 100644
index ba68e597..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type scaled-dot-prod-v1 --enc.rel-pos-enc"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type scaled-dot-prod-v1 --dec.rel-pos-enc"
-
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_b${blocks}d${d_model}h${heads}linff${d_ff}rpe_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=150
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0150.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
deleted file mode 100644
index f02db8e9..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context"
-
-
-#opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-#lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 12000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_adam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=170
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0170.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
deleted file mode 100644
index 59a8843d..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context"
-
-
-#opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp"
-#lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 12000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}linff${d_ff}_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=170
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0170.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh b/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
deleted file mode 100644
index a04f4b58..00000000
--- a/egs/voxceleb/vae.v1/global_conf/config_vqvae_transformer_lac25b6d512h8ff2048rpe_emakmeansvq_z512cb512x8_c36_radam.opt.lr0.01.v4.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-# VQ-VAE with Transformer Encoder for Enc and Dec with 
-# 6 transformer blocks, d_model=512, heads=8, d_ff=2048, latent_dim=512, codebook=8x8, compression factor=36
-
-nnet_data=voxceleb2cat_train
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-min_chunk=400
-max_chunk=400
-ipe=1
-lr=0.01
-
-model_type=vq-vae
-
-dropout=0
-narch=transformer-enc-v1
-blocks=6
-d_model=512
-heads=8
-d_ff=2048
-att_context=25
-
-latent_dim=512
-vq_type=multi-ema-k-means-vq
-vq_clusters=512
-num_groups=8
-
-vae_opt="--in-feats 80 --z-dim $latent_dim --vq-type $vq_type --vq-clusters $vq_clusters --vq-groups $num_groups"
-enc_opt="--enc.num-blocks $blocks --enc.d-model $d_model --enc.num-heads $heads --enc.ff-type linear --enc.d-ff $d_ff --enc.in-layer-type linear --enc.att-type local-scaled-dot-prod-v1 --enc.att-context $att_context --enc.rel-pos-enc"
-dec_opt="--dec.in-feats $latent_dim --dec.num-blocks $blocks --dec.d-model $d_model --dec.num-heads $heads --dec.ff-type linear --dec.d-ff $d_ff --dec.in-layer-type linear --dec.att-type local-scaled-dot-prod-v1 --dec.att-context $att_context --dec.rel-pos-enc"
-
-
-opt_opt="--optim.opt-type radam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 2000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-nnet_name=${model_type}_${narch}_lac${att_context}b${blocks}d${d_model}h${heads}linff${d_ff}rpe_${vq_type}_z${latent_dim}c${vq_clusters}x${num_groups}_do${dropout}_optv4_radam_lr${lr}_b${eff_batch_size}.$nnet_data
-nnet_num_epochs=160
-num_augs=5
-nnet_dir=exp/vae_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0160.pth
-
-# xvector network trained with recipe v1.1
-xvec_nnet_name=fbank80_stmn_lresnet34_e256_arcs30m0.3_do0_adam_lr0.05_b512_amp.v1
-xvec_nnet_dir=../v1.1/exp/xvector_nnets/$xvec_nnet_name
-xvec_nnet=$xvec_nnet_dir/model_ep0070.pth
diff --git a/egs/voxceleb/vae.v1/hyp_utils b/egs/voxceleb/vae.v1/hyp_utils
deleted file mode 120000
index f6d1eb7a..00000000
--- a/egs/voxceleb/vae.v1/hyp_utils
+++ /dev/null
@@ -1 +0,0 @@
-../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/local b/egs/voxceleb/vae.v1/local
deleted file mode 120000
index ce1cbf90..00000000
--- a/egs/voxceleb/vae.v1/local
+++ /dev/null
@@ -1 +0,0 @@
-../v1/local
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/path.sh b/egs/voxceleb/vae.v1/path.sh
deleted file mode 100755
index 6994fdab..00000000
--- a/egs/voxceleb/vae.v1/path.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-
-export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
-export TOOLS_ROOT=$HYP_ROOT/tools
-
-. $TOOLS_ROOT/path.sh
diff --git a/egs/voxceleb/vae.v1/run_001_prepare_data.sh b/egs/voxceleb/vae.v1/run_001_prepare_data.sh
deleted file mode 100755
index 65ff18d0..00000000
--- a/egs/voxceleb/vae.v1/run_001_prepare_data.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. datapath.sh 
-
-
-if [ $stage -le 1 ];then
-
-    # Prepare the VoxCeleb2 dataset for training.
-    local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
-    #local/make_voxceleb2cat.pl $voxceleb2_root test 16 data/voxceleb2cat_test
-    #utils/combine_data.sh data/voxceleb2cat data/voxceleb2cat_train data/voxceleb2cat_test 
-fi
-
-if [ $stage -le 2 ];then
-    # prepare voxceleb1 for test
-    local/make_voxceleb1_oeh.pl $voxceleb1_root data
-fi
diff --git a/egs/voxceleb/vae.v1/run_002_compute_evad.sh b/egs/voxceleb/vae.v1/run_002_compute_evad.sh
deleted file mode 100755
index eeae00ac..00000000
--- a/egs/voxceleb/vae.v1/run_002_compute_evad.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-vaddir=`pwd`/exp/vad_e
-vad_config=conf/vad_16k.yaml
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-
-if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $vaddir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
-    fi
-fi
-
-#Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_vad/$name $vaddir
-	utils/fix_data_dir.sh data/${name}
-    done
-fi
-
-
diff --git a/egs/voxceleb/vae.v1/run_003_compute_fbank.sh b/egs/voxceleb/vae.v1/run_003_compute_fbank.sh
deleted file mode 100755
index 713a34cb..00000000
--- a/egs/voxceleb/vae.v1/run_003_compute_fbank.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-nodes=fs01
-storage_name=$(date +'%m_%d_%H_%M')
-fbankdir=`pwd`/exp/fbank
-vaddir=`pwd`/exp/fbank
-vaddir_gt=`pwd`/exp/vad_gt
-
-stage=1
-config_file=default_config.sh
-feat_vers="numpy"
-
-. parse_options.sh || exit 1;
-
-if [ "$feat_vers" == "kaldi" ];then
-    make_fbank=steps/make_fbank.sh
-    fbank_cfg=conf/fbank80_16k.conf
-else
-    fbank_cfg=conf/fbank80_16k.yaml
-    if [ "$feat_vers" == "numpy" ];then
-	make_fbank=steps_pyfe/make_fbank.sh
-    else
-	make_fbank=steps_pyfe/make_torch_fbank.sh
-    fi
-fi
-
-# Make filterbanks 
-if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $fbankdir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/fbank/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $fbankdir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $fbankdir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
-    fi
-fi
-
-#Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	$make_fbank --write-utt2num-frames true --fbank-config $fbank_cfg --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_fbank/$name $fbankdir
-	utils/fix_data_dir.sh data/${name}
-    done
-
-fi
-
-
diff --git a/egs/voxceleb/vae.v1/run_004_prepare_augment.sh b/egs/voxceleb/vae.v1/run_004_prepare_augment.sh
deleted file mode 100755
index 7d78ae92..00000000
--- a/egs/voxceleb/vae.v1/run_004_prepare_augment.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-# In this script, we augment the SWBD,SRE,MX6 and Voxceleb data with reverberation,
-# noise, music, and babble, and combined it with the clean data.
-# The combined list will be used to train the xvector DNN.
-
-frame_shift=0.01
-
-if [ $stage -le 1 ]; then
-
-    if [ ! -d "RIRS_NOISES" ]; then
-	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
-	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
-	else
-	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-	    unzip rirs_noises.zip
-	fi
-    fi
-
-    # Prepare the MUSAN corpus, which consists of music, speech, and noise
-    # suitable for augmentation.
-    local/make_musan.sh $musan_root 16 data
-    
-    # Get the duration of the MUSAN recordings.  This will be used by the
-    # script augment_data_dir.py.
-    for name in speech noise music; do
-	utils/data/get_utt2dur.sh data/musan_${name}
-	mv data/musan_${name}/utt2dur data/musan_${name}/reco2dur
-    done
-
-fi
-
-
-if [ $stage -le 2 ]; then
-    
-  for name in voxceleb2cat_train
-  do
-      export TMPDIR=data/tmp
-      mkdir -p $TMPDIR
-
-      awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/$name/utt2num_frames > data/$name/reco2dur
-      
-      # Make a reverberated version of the list.  Note that we don't add any
-      # additive noise here.
-
-      # Make a version with reverberated speech
-      rvb_opts=()
-      rvb_opts+=(--rir-set-parameters "0.2, RIRS_NOISES/real_rirs_isotropic_noises/rir_list")
-      rvb_opts+=(--rir-set-parameters "0.4, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
-      rvb_opts+=(--rir-set-parameters "0.4, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
-      
-      python steps/data/reverberate_data_dir.py \
-	      "${rvb_opts[@]}" \
-	      --speech-rvb-probability 1 \
-	      --pointsource-noise-addition-probability 0 \
-	      --isotropic-noise-addition-probability 0 \
-	      --num-replications 1 \
-	      --source-sampling-rate 16000 \
-	      data/${name} data/${name}_reverb
-      cp data/${name}/vad.scp data/${name}_reverb/
-      utils/copy_data_dir.sh --utt-suffix "-reverb" data/${name}_reverb data/${name}_reverb.new
-      rm -rf data/${name}_reverb
-      mv data/${name}_reverb.new data/${name}_reverb
-
-      
-      # Augment with musan_noise
-      python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0:13:8" --fg-noise-dir "data/musan_noise" data/${name} data/${name}_noise
-      # Augment with musan_music
-      python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/${name} data/${name}_music
-      # Augment with musan_speech
-      python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13:10" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/${name} data/${name}_babble
-
-      
-      awk '{ $1=$1"-reverb"; print $0}' data/${name}/reco2dur > data/${name}_reverb/reco2dur
-  
-      # Augment with musan_noise
-      python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0:13:8" --fg-noise-dir "data/musan_noise" data/${name}_reverb data/${name}_reverb_noise
-      # Augment with musan_music
-      python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/${name}_reverb data/${name}_reverb_music
-      # Augment with musan_speech
-      python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13:10" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/${name}_reverb data/${name}_reverb_babble
-
-
-      # Combine noise only
-      utils/combine_data.sh data/${name}_noise_all \
-			    data/${name}_noise data/${name}_music data/${name}_babble
-
-      # Combine reverbs
-      utils/combine_data.sh data/${name}_reverb_all data/${name}_reverb \
-			    data/${name}_reverb_noise data/${name}_reverb_music data/${name}_reverb_babble
-
-      # Combine reverb, noise, music, and babble into one directory.
-      utils/combine_data.sh data/${name}_aug data/${name}_reverb_all data/${name}_noise_all
-      unset TMPDIR
-  done
-
-fi
-
-
-if [ $stage -le 3 ];then
-  # Take a random subset of the augmentations 
-  utils/subset_data_dir.sh data/voxceleb2cat_train_aug \
-      $(wc -l data/voxceleb2cat_train/utt2spk | awk '{ print int('$num_augs'*$1)}') \
-      data/voxceleb2cat_train_augx${num_augs}
-  utils/fix_data_dir.sh data/voxceleb2cat_train_augx${num_augs}
-fi
-  
-      
-exit
diff --git a/egs/voxceleb/vae.v1/run_005_compute_fbank_augment.sh b/egs/voxceleb/vae.v1/run_005_compute_fbank_augment.sh
deleted file mode 100755
index 10d13e03..00000000
--- a/egs/voxceleb/vae.v1/run_005_compute_fbank_augment.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-fbankdir=`pwd`/exp/fbank
-
-stage=1
-config_file=default_config.sh
-feat_vers="numpy"
-
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$feat_vers" == "kaldi" ];then
-    make_fbank=steps/make_fbank.sh
-    fbank_cfg=conf/fbank80_16k.conf
-else
-    fbank_cfg=conf/fbank80_16k.yaml
-    if [ "$feat_vers" == "numpy" ];then
-	make_fbank=steps_pyfe/make_fbank.sh
-    else
-	make_fbank=steps_pyfe/make_torch_fbank.sh
-    fi
-fi
-
-export TMPDIR=data/tmp
-mkdir -p $TMPDIR
-
-if [ $stage -le 1 ];then
-    
-  # Make filterbanks for the augmented data.  Note that we do not compute a new
-  # vad.scp file here.  Instead, we use the vad.scp from the clean version of
-  # the list.
-  for name in voxceleb2cat_train_augx${num_augs} 
-  do
-      $make_fbank --write-utt2num-frames true \
-	  --fbank-config $fbank_cfg --nj 120 --cmd "$train_cmd" \
-      	  data/$name exp/make_fbank/$name $fbankdir
-      fix_data_dir.sh data/$name
-  done
-
-fi
-
-
-if [ $stage -le 2 ];then
-    
-    # Combine the clean and augmented lists.  
-    utils/combine_data.sh --extra-files "utt2num_frames" data/voxceleb2cat_train_combined data/voxceleb2cat_train_augx${num_augs} data/voxceleb2cat_train
-
-fi
-    
-exit
-
diff --git a/egs/voxceleb/vae.v1/run_010_prepare_gen_model_train_data.sh b/egs/voxceleb/vae.v1/run_010_prepare_gen_model_train_data.sh
deleted file mode 100755
index c2f5c832..00000000
--- a/egs/voxceleb/vae.v1/run_010_prepare_gen_model_train_data.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-
-. parse_options.sh || exit 1;
-. $config_file
-
-# Now we prepare the features to generate examples for xvector training.
-if [ $stage -le 2 ]; then
-    # This script applies CMVN and removes nonspeech frames.  Note that this is somewhat
-    # wasteful, as it roughly doubles the amount of training data on disk.  After
-    # creating training examples, this can be removed.
-    steps_xvec/prepare_feats_for_nnet_train.sh --nj 40 --cmd "$train_cmd" \
-	--storage_name voxceleb-vae.v1-$(date +'%m_%d_%H_%M') \
-	data/${nnet_data} data/${nnet_data}_no_sil exp/${nnet_data}_no_sil
-    utils/fix_data_dir.sh data/${nnet_data}_no_sil
-
-fi
-
-
-if [ $stage -le 3 ]; then
-    # Now, we need to remove features that are too short after removing silence
-    # frames.  We want atleast 4s (400 frames) per utterance.
-    hyp_utils/remove_short_utts.sh --min-len 400 data/${nnet_data}_no_sil
-
-    # We also want several utterances per speaker. Now we'll throw out speakers
-    # with fewer than 8 utterances.
-    hyp_utils/remove_spk_few_utts.sh --min-num-utts 8 data/${nnet_data}_no_sil
-
-fi
-
-if [ $stage -le 4 ]; then
-    # Prepare train and validation lists for x-vectors
-    local/make_train_lists_sup_embed_with_augm.sh data/${nnet_data}_no_sil data/${nnet_data}_no_sil/lists_xvec
-fi
-
-exit
diff --git a/egs/voxceleb/vae.v1/run_011_train_model.sh b/egs/voxceleb/vae.v1/run_011_train_model.sh
deleted file mode 100755
index 8c9bb4d4..00000000
--- a/egs/voxceleb/vae.v1/run_011_train_model.sh
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-ngpu=1
-config_file=default_config.sh
-resume=false
-interactive=false
-num_workers=8
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ x=int($2/$1+0.5); if(x==0){ x=1 }; print x }')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${nnet_data}_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
-
-
-# Network Training
-if [ $stage -le 1 ]; then
-  mkdir -p $nnet_dir/log
-
-  if [ "$model_type" == "vae" ] || [ "$model_type" == "vq-vae" ];then
-      # Train VAE
-      train_exec=torch-train-${model_type}.py
-      $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-	  hyp_utils/conda_env.sh --num-gpus $ngpu \
-	  $train_exec $narch:$narch \
-	  --data-rspec scp:$list_dir/feats.scp \
-	  --train-list $list_dir/lists_xvec/train.scp \
-	  --val-list $list_dir/lists_xvec/val.scp \
-	  --num-frames-file $list_dir/utt2num_frames \
-	  --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-	  --iters-per-epoch $ipe \
-	  --batch-size $batch_size \
-	  --num-workers $num_workers $opt_opt $lrs_opt \
-	  --grad-acc-steps $grad_acc_steps \
-	  --epochs $nnet_num_epochs \
-	  --z-dim $latent_dim $enc_opt $dec_opt $vae_opt \
-	  --num-gpus $ngpu \
-	  --log-interval $log_interval \
-	  --exp-path $nnet_dir $args
-
-      # train_exec=torch-train-${narch}-${model_type}.py
-
-      # $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-      # 	  hyp_utils/conda_env.sh --num-gpus $ngpu \
-      # 	  $train_exec \
-      # 	  --data-rspec scp:$list_dir/feats.scp \
-      # 	  --train-list $list_dir/lists_xvec/train.scp \
-      # 	  --val-list $list_dir/lists_xvec/val.scp \
-      # 	  --num-frames-file $list_dir/utt2num_frames \
-      # 	  --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-      # 	  --iters-per-epoch $ipe \
-      # 	  --batch-size $batch_size \
-      # 	  --num-workers $num_workers $opt_opt $lrs_opt \
-      # 	  --grad-acc-steps $grad_acc_steps \
-      # 	  --epochs $nnet_num_epochs \
-      # 	  --z-dim $latent_dim $enc_opt $dec_opt $vae_opt \
-      # 	  --num-gpus $ngpu \
-      # 	  --log-interval $log_interval \
-      # 	  --exp-path $nnet_dir $args
-
-  elif [[ "$model_type" =~ "dvae" ]];then
-      # Train Denoising VAE
-      train_exec=torch-train-${model_type}.py
-
-      $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-	  hyp_utils/conda_env.sh --num-gpus $ngpu \
-	  $train_exec $narch:$narch\
-	  --data-rspec scp:$list_dir/feats.scp \
-	  --train-list $list_dir/lists_xvec/train.scp \
-	  --train-pair-list $list_dir/lists_xvec/augm2clean.scp \
-	  --val-list $list_dir/lists_xvec/val.scp \
-	  --val-pair-list $list_dir/lists_xvec/augm2clean.scp \
-	  --num-frames-file $list_dir/utt2num_frames \
-	  --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-	  --iters-per-epoch $ipe \
-	  --batch-size $batch_size \
-	  --num-workers $num_workers $opt_opt $lrs_opt \
-	  --grad-acc-steps $grad_acc_steps \
-	  --epochs $nnet_num_epochs \
-	  --z-dim $latent_dim $enc_opt $dec_opt $vae_opt \
-	  --num-gpus $ngpu \
-	  --log-interval $log_interval \
-	  --exp-path $nnet_dir $args
-
-      # train_exec=torch-train-${narch}-${model_type}.py
-
-      # $cuda_cmd --gpu $ngpu $nnet_dir/log/train.log \
-      # 	  hyp_utils/conda_env.sh --num-gpus $ngpu \
-      # 	  $train_exec \
-      # 	  --data-rspec scp:$list_dir/feats.scp \
-      # 	  --train-list $list_dir/lists_xvec/train.scp \
-      # 	  --train-pair-list $list_dir/lists_xvec/augm2clean.scp \
-      # 	  --val-list $list_dir/lists_xvec/val.scp \
-      # 	  --val-pair-list $list_dir/lists_xvec/augm2clean.scp \
-      # 	  --num-frames-file $list_dir/utt2num_frames \
-      # 	  --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-      # 	  --iters-per-epoch $ipe \
-      # 	  --batch-size $batch_size \
-      # 	  --num-workers $num_workers $opt_opt $lrs_opt \
-      # 	  --grad-acc-steps $grad_acc_steps \
-      # 	  --epochs $nnet_num_epochs \
-      # 	  --z-dim $latent_dim $enc_opt $dec_opt $vae_opt \
-      # 	  --num-gpus $ngpu \
-      # 	  --log-interval $log_interval \
-      # 	  --exp-path $nnet_dir $args
-      
-  else
-    echo "unknown model type $model_type"
-    exit 1
-  
-  fi
-
-fi
-
-
-exit
diff --git a/egs/voxceleb/vae.v1/run_012_eval_recons.sh b/egs/voxceleb/vae.v1/run_012_eval_recons.sh
deleted file mode 100755
index 961ae68d..00000000
--- a/egs/voxceleb/vae.v1/run_012_eval_recons.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2020   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-#xvec_chunk_length=12800
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$use_gpu" == "true" ];then
-    eval_args="--use-gpu true"
-    eval_cmd="$cuda_eval_cmd"
-else
-    eval_cmd="$train_cmd"
-fi
-
-output_dir=exp/recons_output/$nnet_name
-if [[ "$model_type" =~ "vae" ]];then
-    eval_script=hyp_utils/generative/eval_vae.sh
-else
-    echo "unknown model type $model_type"
-    exit 1
-fi
-
-if [ $stage -le 1 ]; then
-    for name in voxceleb1_test 
-    do
-	num_utt=$(wc -l data/$name/utt2spk | awk '{ print $1}')
-	nj=$(($num_utt < 100 ? $num_utt:100))
-	$eval_script --cmd "$eval_cmd --mem 6G" --nj $nj ${eval_args} \
-	    $nnet data/$name $output_dir/$name
-    done
-fi
-
-
-
-
diff --git a/egs/voxceleb/vae.v1/run_013_eval_xvector_asv.sh b/egs/voxceleb/vae.v1/run_013_eval_xvector_asv.sh
deleted file mode 100755
index 11932091..00000000
--- a/egs/voxceleb/vae.v1/run_013_eval_xvector_asv.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2018   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-config_file=default_config.sh
-use_gpu=false
-xvec_chunk_length=12800
-. parse_options.sh || exit 1;
-. $config_file
-
-if [ "$use_gpu" == "true" ];then
-    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
-    xvec_cmd="$cuda_eval_cmd"
-else
-    xvec_cmd="$train_cmd"
-fi
-
-xvector_dir=exp/xvectors/$nnet_name/$xvec_nnet_name
-score_be_dir=exp/scores/$nnet_name/$xvec_nnet_name/cosine
-
-
-if [ $stage -le 1 ]; then
-    # Extracts x-vectors for evaluation
-    for name in voxceleb1_test 
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 100 ? $num_spk:100))
-	steps_xvec/extract_xvectors_with_vae_preproc.sh \
-	    --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
-	    $xvec_nnet $nnet data/$name \
-	    $xvector_dir/$name
-    done
-fi
-
-
-if [ $stage -le 2 ];then
-
-    echo "Eval Voxceleb 1 with Cosine scoring"
-    steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$score_be_dir/voxceleb1_scores
-
-    $train_cmd --mem 10G --num-threads 6 $score_be_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1.sh data/voxceleb1_test $score_be_dir 
-
-    for f in $(ls $score_be_dir/*_results);
-    do
-	echo $f
-	cat $f
-	echo ""
-    done
-
-fi
-
-exit
diff --git a/egs/voxceleb/vae.v1/steps b/egs/voxceleb/vae.v1/steps
deleted file mode 120000
index aede39fe..00000000
--- a/egs/voxceleb/vae.v1/steps
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/steps_be b/egs/voxceleb/vae.v1/steps_be
deleted file mode 120000
index 4958fae7..00000000
--- a/egs/voxceleb/vae.v1/steps_be
+++ /dev/null
@@ -1 +0,0 @@
-../v1.1/steps_be
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/steps_fe b/egs/voxceleb/vae.v1/steps_fe
deleted file mode 120000
index 73ccc1eb..00000000
--- a/egs/voxceleb/vae.v1/steps_fe
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/vad
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/steps_pyfe b/egs/voxceleb/vae.v1/steps_pyfe
deleted file mode 120000
index 7b9d122a..00000000
--- a/egs/voxceleb/vae.v1/steps_pyfe
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/feats
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/steps_xvec b/egs/voxceleb/vae.v1/steps_xvec
deleted file mode 120000
index af66a94d..00000000
--- a/egs/voxceleb/vae.v1/steps_xvec
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/voxceleb/vae.v1/utils b/egs/voxceleb/vae.v1/utils
deleted file mode 120000
index 3d590a1d..00000000
--- a/egs/voxceleb/vae.v1/utils
+++ /dev/null
@@ -1 +0,0 @@
-hyp_utils/kaldi/utils
\ No newline at end of file

From 083b210a0acfe60750d398f595eb68f61a7928d4 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 14 Apr 2023 15:52:15 -0400
Subject: [PATCH 094/154] started adapt adv.v2

---
 egs/voxceleb/adv.v1.1/run_002_compute_evad.sh |   2 +-
 egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml |  98 +++++++++++------
 .../adv.v2/conf/lresnet34_spknet.yaml         | 100 +++++++++++------
 egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml | 101 ++++++++++++------
 .../adv.v2/run_011_train_victim_xvector.sh    |  56 +++++-----
 .../generate_adv_attacks_xvector_classif.sh   |   2 +-
 .../adv/generate_adv_attacks_xvector_verif.sh |   2 +-
 .../generate_adv_attacks_xvector_classif.py}  |   0
 .../generate_adv_attacks_xvector_verif.py}    |   0
 9 files changed, 233 insertions(+), 128 deletions(-)
 rename hyperion/{bin_deprec2/torch-generate-adv-attacks-xvector-classif.py => bin/generate_adv_attacks_xvector_classif.py} (100%)
 rename hyperion/{bin_deprec2/torch-generate-adv-attacks-xvector-verif.py => bin/generate_adv_attacks_xvector_verif.py} (100%)

diff --git a/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh b/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
index e854b393..f6b8e62f 100755
--- a/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
+++ b/egs/voxceleb/adv.v1.1/run_002_compute_evad.sh
@@ -20,7 +20,7 @@ config_file=default_config.sh
 if [ $stage -le 1 ]; then
     # Prepare to distribute data over multiple machines
     if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
+	dir_name=$USER/hyp-data/voxceleb/adv.v1.1/$storage_name/vad/storage
 	if [ "$nodes" == "b0" ];then
 	    utils/create_split_dir.pl \
 			    utils/create_split_dir.pl \
diff --git a/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml b/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
index 79f33282..d07a2126 100644
--- a/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
+++ b/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
@@ -1,32 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
 feats: fbank80_stmn_16k.yaml
-min_chunk_length: 4
-max_chunk_length: 4
-iters_per_epoch: 6
-resnet_type: lresnet34
-in_feats: 80
-in_channels: 1
-in_kernel_size: 3
-in_stride: 1
-no_maxpool: true
-dropout_rate: 0.0
-embed_dim: 10
-margin: 0.2
-margin_warmup_epochs: 6.0
-s: 30
-epochs: 20
-optim:
-  amsgrad: true
-  beta1: 0.9
-  beta2: 0.95
-  lr: 0.01
-  opt_type: adam
-  weight_decay: 1.0e-05
-lrsched:
-  decay_rate: 0.5
-  decay_steps: 8000
-  eps: 1.0e-08
-  hold_steps: 16000
-  lrsch_type: exp_lr
-  min_lr: 1.0e-05
-  update_lr_on_opt_step: true
-  warmup_steps: 1000
+model:
+  resnet_type: lresnet34
+  in_feats: 80
+  in_channels: 1
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  dropout_rate: 0.0
+  embed_dim: 10
+  margin: 0.2
+  margin_warmup_epochs: 6.0
+  s: 30
+trainer:
+  epochs: 20
+  eff_batch_size: 512
+  optim:
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    lr: 0.01
+    opt_type: adam
+    weight_decay: 1.0e-05
+  lrsched:
+    decay_rate: 0.5
+    decay_steps: 8000
+    eps: 1.0e-08
+    hold_steps: 16000
+    lrsch_type: exp_lr
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+
diff --git a/egs/voxceleb/adv.v2/conf/lresnet34_spknet.yaml b/egs/voxceleb/adv.v2/conf/lresnet34_spknet.yaml
index 0a78edb5..a12487ee 100644
--- a/egs/voxceleb/adv.v2/conf/lresnet34_spknet.yaml
+++ b/egs/voxceleb/adv.v2/conf/lresnet34_spknet.yaml
@@ -1,34 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
 feats: fbank80_stmn_16k.yaml
-train_aug_cfg: conf/reverb_noise_aug.yaml
-val_aug_cfg: conf/reverb_noise_aug.yaml
-min_chunk_length: 4
-max_chunk_length: 4
-iters_per_epoch: 6
-resnet_type: lresnet34
-in_feats: 80
-in_channels: 1
-in_kernel_size: 3
-in_stride: 1
-no_maxpool: true
-dropout_rate: 0.0
-embed_dim: 256
-margin: 0.3
-margin_warmup_epochs: 20.0
-s: 30
-epochs: 70
-optim:
-  amsgrad: true
-  beta1: 0.9
-  beta2: 0.95
-  lr: 0.05
-  opt_type: adam
-  weight_decay: 1.0e-05
-lrsched:
-  decay_rate: 0.5
-  decay_steps: 8000
-  eps: 1.0e-08
-  hold_steps: 40000
-  lrsch_type: exp_lr
-  min_lr: 1.0e-05
-  update_lr_on_opt_step: true
-  warmup_steps: 1000
+model:
+  resnet_type: lresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched: lrsched_exp_default.yaml
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
+  use_amp: true
+  log_interval: 1000
+  epochs: 70
+  eff_batch_size: 512
diff --git a/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml b/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
index 4754206d..94e26f24 100644
--- a/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
+++ b/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
@@ -1,34 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+    data_loader:
+      num_workers: 8
 feats: fbank80_stmn_16k.yaml
-min_chunk_length: 4
-max_chunk_length: 4
-iters_per_epoch: 6
-resnet_type: res2net50
-in_feats: 80
-in_channels: 1
-in_kernel_size: 3
-in_stride: 1
-no_maxpool: true
-res2net_width_factor: 1.625
-res2net_scale: 4
-dropout_rate: 0.0
-embed_dim: 10
-margin: 0.2
-margin_warmup_epochs: 6.0
-s: 30
-epochs: 20
-optim:
-  amsgrad: true
-  beta1: 0.9
-  beta2: 0.95
-  lr: 0.01
-  opt_type: adam
-  weight_decay: 1.0e-05
-lrsched:
-  decay_rate: 0.5
-  decay_steps: 8000
-  eps: 1.0e-08
-  hold_steps: 16000
-  lrsch_type: exp_lr
-  min_lr: 1.0e-05
-  update_lr_on_opt_step: true
-  warmup_steps: 1000
+model:
+  resnet_type: res2net50
+  in_feats: 80
+  in_channels: 1
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 1.625
+  res2net_scale: 4
+  dropout_rate: 0.0
+  embed_dim: 10
+  margin: 0.2
+  margin_warmup_epochs: 6.0
+  s: 30
+trainer:
+  epochs: 20
+  eff_batch_size: 256
+  optim:
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    lr: 0.01
+    opt_type: adam
+    weight_decay: 1.0e-05
+  lrsched:
+    decay_rate: 0.5
+    decay_steps: 8000
+    eps: 1.0e-08
+    hold_steps: 16000
+    lrsch_type: exp_lr
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 1000
diff --git a/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh b/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
index c5d03ee2..971b88a3 100755
--- a/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
+++ b/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
@@ -10,45 +10,45 @@ set -e
 stage=1
 ngpu=4
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
+num_workers=""
+use_tb=false
+use_wandb=false
 
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh
 
-batch_size=$(($spknet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $spknet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${spknet_data}_proc_audio_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
+nnet_type=$spknet_command
+nnet_data=$spknet_data
+nnet_dir=$spknet_dir
+nnet_cfg=$spknet_config
+list_dir=data/${nnet_data}_proc_audio_no_sil
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
+nnet_type=$spknet_command
+nnet_dir
+
 # Network Training
 if [ $stage -le 1 ]; then
-
-    mkdir -p $spknet_dir/log
-    $cuda_cmd --gpu $ngpu $spknet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $spknet_command --cfg $spknet_config \
-	--audio-path $list_dir/wav.scp \
-	--time-durs-file $list_dir/utt2dur \
-	--train-list $list_dir/lists_xvec/train.scp \
-	--val-list $list_dir/lists_xvec/val.scp \
-	--class-file $list_dir/lists_xvec/class2int \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $spknet_dir $args
-
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu
+  
 fi
+
diff --git a/hyp_utils/adv/generate_adv_attacks_xvector_classif.sh b/hyp_utils/adv/generate_adv_attacks_xvector_classif.sh
index 29d762af..f0401c3a 100755
--- a/hyp_utils/adv/generate_adv_attacks_xvector_classif.sh
+++ b/hyp_utils/adv/generate_adv_attacks_xvector_classif.sh
@@ -75,7 +75,7 @@ echo "$0: generate attacks for $data_dir to $output_dir"
 if [ $stage -le 1 ];then
     $cmd JOB=1:$nj $log_dir/generate_attack.JOB.log \
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	torch-generate-adv-attacks-xvector-classif.py \
+	generate_adv_attacks_xvector_classif.py \
 	--feats $feat_config ${args} $attacks_opts \
 	--wav-file $wav \
 	--list-file $list \
diff --git a/hyp_utils/adv/generate_adv_attacks_xvector_verif.sh b/hyp_utils/adv/generate_adv_attacks_xvector_verif.sh
index 4cf99518..e20b03ff 100755
--- a/hyp_utils/adv/generate_adv_attacks_xvector_verif.sh
+++ b/hyp_utils/adv/generate_adv_attacks_xvector_verif.sh
@@ -73,7 +73,7 @@ echo "$0: generate attacks for $data_dir to $output_dir"
 if [ $stage -le 1 ];then
     $cmd JOB=1:$nj $log_dir/generate_attack.JOB.log \
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	torch-generate-adv-attacks-xvector-verif.py \
+	generate_adv_attacks_xvector_verif.py \
 	--feats $feat_config ${args} $attacks_opts \
 	--v-file scp:$vector_file \
 	--key-file $key_file \
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
similarity index 100%
rename from hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-classif.py
rename to hyperion/bin/generate_adv_attacks_xvector_classif.py
diff --git a/hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
similarity index 100%
rename from hyperion/bin_deprec2/torch-generate-adv-attacks-xvector-verif.py
rename to hyperion/bin/generate_adv_attacks_xvector_verif.py

From df273d2a6d6dd294e875db86642334a616f42701 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 3 May 2023 12:42:24 -0400
Subject: [PATCH 095/154] fixed block and local attention with mask in
 conformers

---
 .../v1/conf/reverb_noise20dB_aug.yaml         |   34 +
 ...2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml |   74 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml |   77 +
 ...base_conf_rnnt_k2_pruned_stage1_v3.10.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml |   76 +
 ...ase_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml |   76 +
 ...ase_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml |   76 +
 ...2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml |   76 +
 ...2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml |   75 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh |   30 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh |   32 +
 ...wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh |   32 +
 ...wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh |   32 +
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh |   32 +
 ...g_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh |   32 +
 hyperion/bin/train_wav2vec2rnn_transducer.py  |    5 +-
 hyperion/np/augment/speed_augment.py          |   17 +-
 .../layer_blocks/conformer_encoder_v1.py      |   76 +-
 .../layer_blocks/transducer_predictor.py      |   20 +-
 .../layer_blocks/transformer_encoder_v1.py    |   45 +-
 hyperion/torch/layers/__init__.py             |    2 +-
 hyperion/torch/layers/attention.py            | 1366 ++++++++++-------
 hyperion/torch/layers/pos_encoder.py          |   68 +-
 hyperion/torch/lr_schedulers/factory.py       |   86 +-
 hyperion/torch/models/__init__.py             |    1 +
 hyperion/torch/models/transducer/__init__.py  |    1 +
 .../transducer/conformer_v1_rnn_transducer.py |   87 ++
 .../models/transducer/lstm_rnn_transducer.py  |  151 --
 .../models/transducer/rnn_rnn_transducer.py   |    2 +-
 .../torch/models/transducer/rnn_transducer.py |    2 +
 .../torch/models/wav2transducer/__init__.py   |    2 +
 .../wav2transducer/hf_wav2rnn_transducer.py   |    2 -
 .../hf_wav2vec2conformer_v1_rnn_transducer.py |  105 ++
 .../hf_wav2vec2rnn_rnn_transducer.py          |    7 +-
 hyperion/torch/narchs/conformer_encoder_v1.py |  238 +--
 hyperion/torch/narchs/rnn_encoder.py          |   17 +-
 .../torch/narchs/rnn_transducer_decoder.py    |   13 +-
 hyperion/torch/torch_model.py                 |    7 +-
 hyperion/torch/utils/masking.py               |    7 +-
 52 files changed, 2923 insertions(+), 958 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
 create mode 100644 egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
 create mode 100644 egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
 create mode 100644 hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
 delete mode 100644 hyperion/torch/models/transducer/lstm_rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py

diff --git a/egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml b/egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml
new file mode 100644
index 00000000..23086ecb
--- /dev/null
+++ b/egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml
@@ -0,0 +1,34 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 10
+      max_snr: 20
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 10
+      max_snr: 20
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 10
+      max_snr: 20
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
new file mode 100644
index 00000000..fc5b833a
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
new file mode 100644
index 00000000..c16a9e6d
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 64
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
new file mode 100644
index 00000000..9dd6a944
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
@@ -0,0 +1,77 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 64
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+      pos_enc_type: abs
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
new file mode 100644
index 00000000..43c2063d
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 1
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
new file mode 100644
index 00000000..3b3a83b4
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 32
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
new file mode 100644
index 00000000..9286657b
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 16
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
new file mode 100644
index 00000000..b4869ed3
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 4
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
new file mode 100644
index 00000000..645f784c
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 2
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
new file mode 100644
index 00000000..fbbac0c2
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 8
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
new file mode 100644
index 00000000..f1f8c414
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 32
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
new file mode 100644
index 00000000..44cb9642
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 16
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
new file mode 100644
index 00000000..031061f9
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 8
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
new file mode 100644
index 00000000..6cb61718
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 4
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
new file mode 100644
index 00000000..4b5e0e4d
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      att_type: block-scaled-dot-prod-v1
+      att_context: 2
+      d_model: 512
+      num_heads: 8
+      num_blocks: 1
+      d_ff: 2048
+      in_layer_type: linear
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
new file mode 100644
index 00000000..91b5fccb
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 4
+  val:
+    dataset:
+      aug_cfgs: 
+        - conf/reverb_noise20dB_aug.yaml
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 28.
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 4
+model: 
+  hf_feats:
+    pretrained_model_path: facebook/wav2vec2-base-960h
+  transducer:
+    encoder:
+      rnn_type: lstm
+      num_layers: 1
+      hid_feats: 512
+      proj_feats: 0
+      out_feats: 512
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 512
+        embed_dropout_rate: 0.4
+        rnn_dropout_rate: 0.4
+        rnn_type: lstm
+      joiner:
+        hid_feats: 512
+  feat_fusion_method: weighted-avg
+  feat_fusion_start: 2
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4200
+    hold_steps: 1500
+    min_lr: 4e-5
+    warmup_steps: 1500
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  # eff_batch_size: 1024
+  eff_batch_size: 128
+  train_mode: hf-feats-frozen-nograd
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
new file mode 100644
index 00000000..a0e4f1a9
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v1.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0120.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
new file mode 100644
index 00000000..823f50b1
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
@@ -0,0 +1,30 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0115.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
new file mode 100644
index 00000000..16971bcc
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
new file mode 100644
index 00000000..d4b45852
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.3
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
new file mode 100644
index 00000000..3c98fc9b
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.4.1
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
new file mode 100644
index 00000000..187ad022
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.4.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0100.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
new file mode 100644
index 00000000..1538a7d1
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.4
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0104.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
new file mode 100644
index 00000000..0ce9fd99
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.5
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
new file mode 100644
index 00000000..81702305
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.6
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
new file mode 100644
index 00000000..83f7682d
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.7
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0110.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
new file mode 100644
index 00000000..beb92d39
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2conformer_v1_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_conf_rnnt_k2_pruned.v3.9
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0050.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0100.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
new file mode 100644
index 00000000..8e15e372
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
@@ -0,0 +1,32 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wav2vec2base
+#vad
+# vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=train_clean_100
+dev_data=dev_clean
+
+bpe_model=data/lang_bpe_1000/bpe.model
+# x-vector cfg
+
+nnet_type=hf_wav2vec2rnn_rnn_transducer
+
+nnet_s1_base_cfg=conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_lstm_rnnt_k2_pruned.v1.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/transducer_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0120.pth
+nnet_s1=$nnet_s1_dir/model_ep0070.pth
+nnet_s1=$nnet_s1_dir/model_ep0105.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2base_transducer_stage2_v1.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/transducer_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0020.pth
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index 67f5c6ba..7018c406 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -17,7 +17,8 @@
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-from hyperion.torch.models import (HFWav2Vec2RNNRNNTransducer,
+from hyperion.torch.models import (HFWav2Vec2ConformerV1RNNTransducer,
+                                   HFWav2Vec2RNNRNNTransducer,
                                    HFWav2Vec2RNNTransducer)
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
@@ -28,6 +29,8 @@
 model_dict = {
     "hf_wav2vec2rnn_transducer": HFWav2Vec2RNNTransducer,
     "hf_wav2vec2rnn_rnn_transducer": HFWav2Vec2RNNRNNTransducer,
+    "hf_wav2vec2conformer_v1_rnn_transducer":
+    HFWav2Vec2ConformerV1RNNTransducer,
     # "hf_hubert2rnn_transducer": HFWav2Vec2RNNTransducer,
     # "hf_hubert2rnn_rnn_transducer": Hubert2RNNRNNTransducer,
     # "hf_wavlm2rnn_transducer": HFHubert2RNNTransducer,
diff --git a/hyperion/np/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
index 4400a4b4..18a15651 100644
--- a/hyperion/np/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -34,10 +34,8 @@ def __init__(
         rng=None,
     ):
         logging.info(
-            "init speed augment with prob={}, speed_ratios={}, keep_length={}".format(
-                speed_prob, speed_ratios, keep_length
-            )
-        )
+            "init speed augment with prob={}, speed_ratios={}, keep_length={}".
+            format(speed_prob, speed_ratios, keep_length))
         self.speed_prob = speed_prob
         self.speed_ratios = speed_ratios
         self.keep_length = keep_length
@@ -63,12 +61,12 @@ def create(cls, cfg, random_seed=112358, rng=None):
             with open(cfg, "r") as f:
                 cfg = yaml.load(f, Loader=yaml.FullLoader)
 
-        assert isinstance(cfg, dict), "wrong object type for cfg={}".format(cfg)
+        assert isinstance(cfg, dict), f"wrong object type for cfg={cfg}"
 
         return cls(
             speed_prob=cfg["speed_prob"],
             speed_ratios=cfg["speed_ratios"],
-            keep_length=cfg["keep_length"],
+            keep_length=cfg["keep_length"] if "keep_length" in cfg else False,
             random_seed=random_seed,
             rng=rng,
         )
@@ -100,11 +98,12 @@ def forward(self, x):
         # print(f"1 r={r} {x.shape} {y.shape}", flush=True)
         if self.keep_length:
             if r > 1:
-                dither = np.max(x) / 2 ** 15  # we add some dither in the padding
-                pad_y = dither * np.ones((x.shape[-1] - y.shape[-1],), dtype=y.dtype)
+                dither = np.max(x) / 2**15  # we add some dither in the padding
+                pad_y = dither * np.ones(
+                    (x.shape[-1] - y.shape[-1], ), dtype=y.dtype)
                 y = np.concatenate((y, pad_y), axis=-1)
             elif r < 1:
-                y = y[: x.shape[-1]]
+                y = y[:x.shape[-1]]
 
         # print(f"2 r={r} {x.shape} {y.shape}", flush=True)
         return y, info
diff --git a/hyperion/torch/layer_blocks/conformer_encoder_v1.py b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
index b2eab352..5764c85e 100644
--- a/hyperion/torch/layer_blocks/conformer_encoder_v1.py
+++ b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
@@ -94,14 +94,14 @@ def __init__(
         self.ff_macaron = ff_macaron
         if ff_macaron:
             self.ff_scale = 0.5
-            self.feed_forward_macaron = self._make_ff(
-                feed_forward, num_feats, d_ff, ff_kernel_size, hid_act, dropout_rate
-            )
+            self.feed_forward_macaron = self._make_ff(feed_forward, num_feats,
+                                                      d_ff, ff_kernel_size,
+                                                      hid_act, dropout_rate)
             self.norm_ff_macaron = nn.LayerNorm(num_feats)
 
-        self.feed_forward = self._make_ff(
-            feed_forward, num_feats, d_ff, ff_kernel_size, hid_act, dropout_rate
-        )
+        self.feed_forward = self._make_ff(feed_forward, num_feats, d_ff,
+                                          ff_kernel_size, hid_act,
+                                          dropout_rate)
 
         conv_blocks = []
         for i in range(conv_repeats):
@@ -145,7 +145,7 @@ def _make_att(
         """Creates multihead attention block from att_type string
 
         Args:
-           att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1']
+           att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1', 'block-scaled-dot-prod-att-v1']
            num_feats: input/output feat. dimension (aka d_model)
            num_heads: number of heads
            dropout_rate: dropout rate for attention block
@@ -170,11 +170,15 @@ def _make_att(
                     d_k,
                     causal_pos_enc,
                     dropout_rate,
-                    time_dim=1,
                 )
 
             return ScaledDotProdAttV1(
-                num_feats, num_feats, num_heads, d_k, d_k, dropout_rate, time_dim=1
+                num_feats,
+                num_feats,
+                num_heads,
+                d_k,
+                d_k,
+                dropout_rate,
             )
 
         if att_type == "local-scaled-dot-prod-v1":
@@ -188,7 +192,6 @@ def _make_att(
                     context,
                     causal_pos_enc,
                     dropout_rate,
-                    time_dim=1,
                 )
 
             return LocalScaledDotProdAttV1(
@@ -199,11 +202,34 @@ def _make_att(
                 d_k,
                 context,
                 dropout_rate,
-                time_dim=1,
+            )
+
+        if att_type == "block-scaled-dot-prod-v1":
+            if pos_enc_type == "rel":
+                return BlockScaledDotProdAttRelPosEncV1(
+                    num_feats,
+                    num_feats,
+                    num_heads,
+                    d_k,
+                    d_k,
+                    context,
+                    causal_pos_enc,
+                    dropout_rate,
+                )
+
+            return BlockScaledDotProdAttV1(
+                num_feats,
+                num_feats,
+                num_heads,
+                d_k,
+                d_k,
+                context,
+                dropout_rate,
             )
 
     @staticmethod
-    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation, dropout_rate):
+    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation,
+                 dropout_rate):
         """Creates position-wise feed forward block from ff_type string
 
         Args:
@@ -219,19 +245,27 @@ def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation, dropout_rat
 
         """
         if ff_type == "linear":
-            return PositionwiseFeedForward(
-                num_feats, hid_feats, activation, dropout_rate, time_dim=1
-            )
+            return PositionwiseFeedForward(num_feats,
+                                           hid_feats,
+                                           activation,
+                                           dropout_rate,
+                                           time_dim=1)
 
         if ff_type == "conv1dx2":
-            return Conv1dx2(
-                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
-            )
+            return Conv1dx2(num_feats,
+                            hid_feats,
+                            kernel_size,
+                            activation,
+                            dropout_rate,
+                            time_dim=1)
 
         if ff_type == "conv1d-linear":
-            return Conv1dLinear(
-                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
-            )
+            return Conv1dLinear(num_feats,
+                                hid_feats,
+                                kernel_size,
+                                activation,
+                                dropout_rate,
+                                time_dim=1)
 
     def forward(self, x, pos_emb=None, mask=None):
         """Forward pass function
diff --git a/hyperion/torch/layer_blocks/transducer_predictor.py b/hyperion/torch/layer_blocks/transducer_predictor.py
index 00339fe7..6f43343a 100644
--- a/hyperion/torch/layer_blocks/transducer_predictor.py
+++ b/hyperion/torch/layer_blocks/transducer_predictor.py
@@ -85,7 +85,7 @@ def __init__(self,
 
     def get_config(self):
         config = {
-            "pred_type": "conv",
+            "pred_type": "rnn",
             "vocab_size": self.vocab_size,
             "embed_dim": self.embed_dim,
             "num_layers": self.num_layers,
@@ -187,7 +187,7 @@ def __init__(
             out_feats = embed_dim
 
         self.out_feats = out_feats
-        if out_feats != embed_feats:
+        if out_feats != embed_dim:
             self.output_proj = nn.Linear(embed_dim, out_feats)
         else:
             self.output_proj = None
@@ -210,7 +210,7 @@ def forward(
         self,
         y: torch.Tensor,
         states: Optional[torch.Tensor] = None,
-    ) -> Tuple[torch.Tensor, None]:
+    ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
         """
         Args:
           y:
@@ -223,19 +223,21 @@ def forward(
         """
         y = y.to(torch.int64)
         embed = self.embedding(y)
-        if self.context > 1:
+        if self.context_size > 1:
             embed = embed.transpose(1, 2)
             if states is None:
-                embed = F.pad(embedding_out, pad=(self.context_size - 1, 0))
+                embed = nn.functional.pad(embed,
+                                          pad=(self.context_size - 1, 0))
             else:
-                raise NotImplementedError()
-            embed = self.conv(embed).transpose(1, 2)
+                embed = torch.cat((states[0], embed), dim=-1)
+
+            out = self.conv(embed).transpose(1, 2)
 
-        out = self.hid_act(embed)
+        out = self.hid_act(out)
         if self.output_proj:
             out = self.output_proj(out)
 
-        return out, None
+        return out, (embed[:, :, -self.context_size + 1:], )
 
         # # this stuff about clamp() is a temporary fix for a mismatch
         # # at utterance start, we use negative ids in beam_search.py
diff --git a/hyperion/torch/layer_blocks/transformer_encoder_v1.py b/hyperion/torch/layer_blocks/transformer_encoder_v1.py
index c8eaaa1b..cfb843b6 100644
--- a/hyperion/torch/layer_blocks/transformer_encoder_v1.py
+++ b/hyperion/torch/layer_blocks/transformer_encoder_v1.py
@@ -67,9 +67,9 @@ def __init__(
             self.self_attn = self_attn
 
         if isinstance(feed_forward, str):
-            self.feed_forward = self._make_ff(
-                feed_forward, num_feats, d_ff, ff_kernel_size, ff_act, ff_dropout_rate
-            )
+            self.feed_forward = self._make_ff(feed_forward, num_feats, d_ff,
+                                              ff_kernel_size, ff_act,
+                                              ff_dropout_rate)
         else:
             self.feed_forward = feed_forward
 
@@ -122,11 +122,15 @@ def _make_att(
                     d_k,
                     causal_pos_enc,
                     dropout_rate,
-                    time_dim=1,
                 )
 
             return ScaledDotProdAttV1(
-                num_feats, num_feats, num_heads, d_k, d_k, dropout_rate, time_dim=1
+                num_feats,
+                num_feats,
+                num_heads,
+                d_k,
+                d_k,
+                dropout_rate,
             )
 
         if att_type == "local-scaled-dot-prod-v1":
@@ -140,7 +144,6 @@ def _make_att(
                     context,
                     causal_pos_enc,
                     dropout_rate,
-                    time_dim=1,
                 )
 
             return LocalScaledDotProdAttV1(
@@ -151,11 +154,11 @@ def _make_att(
                 d_k,
                 context,
                 dropout_rate,
-                time_dim=1,
             )
 
     @staticmethod
-    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation, dropout_rate):
+    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation,
+                 dropout_rate):
         """Creates position-wise feed forward block from ff_type string
 
         Args:
@@ -171,19 +174,27 @@ def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation, dropout_rat
 
         """
         if ff_type == "linear":
-            return PositionwiseFeedForward(
-                num_feats, hid_feats, activation, dropout_rate, time_dim=1
-            )
+            return PositionwiseFeedForward(num_feats,
+                                           hid_feats,
+                                           activation,
+                                           dropout_rate,
+                                           time_dim=1)
 
         if ff_type == "conv1dx2":
-            return Conv1dx2(
-                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
-            )
+            return Conv1dx2(num_feats,
+                            hid_feats,
+                            kernel_size,
+                            activation,
+                            dropout_rate,
+                            time_dim=1)
 
         if ff_type == "conv1d-linear":
-            return Conv1dLinear(
-                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
-            )
+            return Conv1dLinear(num_feats,
+                                hid_feats,
+                                kernel_size,
+                                activation,
+                                dropout_rate,
+                                time_dim=1)
 
     def forward(self, x, pos_emb=None, mask=None):
         """Forward pass function
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index 42b40303..b2aa1692 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -17,6 +17,6 @@
 from .mvn import MeanVarianceNorm
 from .norm_layer_factory import NormLayer1dFactory, NormLayer2dFactory
 from .pool_factory import GlobalPool1dFactory
-from .pos_encoder import NoPosEncoder, PosEncoder, RelPosEncoder
+from .pos_encoder import NoPosEncoder, PosEncoder, RelPosEncoder, ConvPosEncoder
 from .spec_augment import AxisMasker, SpecAugment, SpecWarper
 from .subpixel_convs import ICNR1d, ICNR2d, SubPixelConv1d, SubPixelConv2d
diff --git a/hyperion/torch/layers/attention.py b/hyperion/torch/layers/attention.py
index 8ab75111..3e53cec9 100644
--- a/hyperion/torch/layers/attention.py
+++ b/hyperion/torch/layers/attention.py
@@ -20,20 +20,23 @@ class ScaledDotProdAttV1(nn.Module):
        d_k: key/query projection dimension
        d_v: value projection dimension
        dropout_rate: dropout rate
-       time_dim: time dimension in the input, default=1 meaning input
-                 dimensions are (batch, time, in_feats)
     """
 
     def __init__(
-        self, in_feats, out_feats, num_heads, d_k, d_v, dropout_rate=0, time_dim=1
+        self,
+        in_feats,
+        out_feats,
+        num_heads,
+        d_k,
+        d_v,
+        dropout_rate=0,
     ):
         super().__init__()
-        # We assume d_v always equals d_k
+        # We assume d_q always equals d_k
         self.d_v = d_v
         self.d_k = d_k
         self.num_heads = num_heads
         self.dropout_rate = dropout_rate
-        self.time_dim = time_dim
         self.linear_q = nn.Linear(in_feats, num_heads * d_k)
         self.linear_k = nn.Linear(in_feats, num_heads * d_k)
         self.linear_v = nn.Linear(in_feats, num_heads * d_v)
@@ -54,7 +57,7 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "{}(in_feats={}, out_feats={}, num_heads={}, d_k={}, d_v={}, dropout_rate={}, time_dim={})".format(
+        s = "{}(in_feats={}, out_feats={}, num_heads={}, d_k={}, d_v={}, dropout_rate={})".format(
             self.__class__.__name__,
             self.in_feats,
             self.out_feats,
@@ -62,17 +65,11 @@ def __str__(self):
             self.d_k,
             self.d_v,
             self.dropout_rate,
-            self.time_dim,
         )
         return s
 
     def _compute_qkv(self, query, key, value):
         batch_size = value.size(0)
-        if self.time_dim != 1:
-            query = query.transpose(1, self.time_dim)
-            key = key.transpose(1, self.time_dim)
-            value = value.transpose(1, self.time_dim)
-
         q = self.linear_q(query).view(batch_size, -1, self.num_heads, self.d_k)
         k = self.linear_k(key).view(batch_size, -1, self.num_heads, self.d_k)
         v = self.linear_v(value).view(batch_size, -1, self.num_heads, self.d_v)
@@ -85,8 +82,7 @@ def _compute_qkv(self, query, key, value):
     def _compute_softmax(self, scores, mask):
         if mask is not None:
             mask = mask.unsqueeze(1).eq(
-                0
-            )  # (batch, 1, time1, time2) or (batch, 1, time)
+                0)  # (batch, 1, time1, time2) or (batch, 1, time)
             if scores.dtype == torch.half:
                 min_value = -65504
             else:
@@ -95,14 +91,14 @@ def _compute_softmax(self, scores, mask):
             if mask.dim() == 4:
                 scores = scores.masked_fill(mask, min_value)
                 return torch.softmax(scores, dim=-1).masked_fill(
-                    mask, 0.0
-                )  # (batch, head, time1, time2)
+                    mask, 0.0)  # (batch, head, time1, time2)
             else:
-                mask1 = mask.unsqueze(2)
+                mask1 = mask.unsqueeze(2)
                 mask2 = mask.unsqueeze(-1)
                 scores = scores.masked_fill(mask1, min_value)
                 scores = scores.masked_fill(mask2, min_value)
-                return torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
+                return torch.softmax(scores,
+                                     dim=-1)  # (batch, head, time1, time2)
 
         return torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
 
@@ -114,15 +110,13 @@ def _apply_attn(self, v):
             p_attn = self.attn
 
         x = torch.matmul(p_attn, v)  # (batch, head, time1, d_k)
-        x = (
-            x.transpose(1, 2)
-            .contiguous()
-            .view(batch_size, -1, self.num_heads * self.d_v)
-        )  # (batch, time1, d_model)
+        x = (x.transpose(1, 2).contiguous().view(batch_size, -1,
+                                                 self.num_heads * self.d_v)
+             )  # (batch, time1, d_model)
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    ___compute_softmax = _compute_softmax
-    ___apply_attn = _apply_attn
+    _base_compute_softmax = _compute_softmax
+    _base_apply_attn = _apply_attn
 
     def forward(self, query, key, value, mask=None):
         """Computes 'Scaled Dot Product Attention'.
@@ -141,10 +135,9 @@ def forward(self, query, key, value, mask=None):
         """
         q, k, v = self._compute_qkv(query, key, value)
         scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(
-            self.d_k
-        )  # (batch, head, time1, time2)
-        self.attn = self.___compute_softmax(scores, mask)
-        return self.___apply_attn(v)
+            self.d_k)  # (batch, head, time1, time2)
+        self.attn = self._base_compute_softmax(scores, mask)
+        return self._base_apply_attn(v)
 
 
 class LocalScaledDotProdAttV1(ScaledDotProdAttV1):
@@ -160,8 +153,6 @@ class LocalScaledDotProdAttV1(ScaledDotProdAttV1):
        d_v: value projection dimension
        context: maximum attention temporal context.
        dropout_rate: dropout rate
-       time_dim: time dimension in the input, default=1 meaning input
-                 dimensions are (batch, time, in_feats)
     """
 
     def __init__(
@@ -173,85 +164,39 @@ def __init__(
         d_v,
         context=25,
         dropout_rate=0,
-        time_dim=1,
     ):
         """Construct an MultiHeadedAttention object."""
-        super().__init__(
-            in_feats, out_feats, num_heads, d_k, d_v, dropout_rate, time_dim
-        )
+        super().__init__(in_feats, out_feats, num_heads, d_k, d_v,
+                         dropout_rate)
         self.context = context
 
     def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = (
-            "{}(in_feats={}, out_feats={}, num_heads={}, d_k={}, d_v={}, "
-            "context={}, dropout_rate={}, time_dim={})".format(
-                self.__class__.__name__,
-                self.in_feats,
-                self.out_feats,
-                self.num_heads,
-                self.d_k,
-                self.d_v,
-                self.context,
-                self.dropout_rate,
-                self.time_dim,
-            )
-        )
+        s = ("{}(in_feats={}, out_feats={}, num_heads={}, d_k={}, d_v={}, "
+             "context={}, dropout_rate={})".format(
+                 self.__class__.__name__,
+                 self.in_feats,
+                 self.out_feats,
+                 self.num_heads,
+                 self.d_k,
+                 self.d_v,
+                 self.context,
+                 self.dropout_rate,
+             ))
         return s
 
-    def _compute_qkv00(self, query, key, value):
-        batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
-        if self.time_dim != 1:
-            query = query.transpose(1, self.time_dim)
-            key = key.transpose(1, self.time_dim)
-            value = value.transpose(1, self.time_dim)
-
-        context_k = self.context
-        num_blocks = math.ceil(t2 / context_k)  # (t2 + context_k//2)//context_k
-        context_q = math.ceil(t1 / num_blocks)
-        num_blocks_q = math.ceil(t1 / context_q)  # (t1 + context_q//2)//context_q
-        assert (
-            num_blocks == num_blocks_q
-        ), "num_blocks_k({})!=num_blocks_q({}), context_k={}, context_q={}, t1={}, t2={}".format(
-            num_blocks, num_blocks_q, context_k, context_q, t1, t2
-        )
-        pad1 = context_q * num_blocks - t1
-        pad2 = context_k * num_blocks - t2
-        # print('1',query.shape,key.shape,value.shape,pad1,pad2, context_q, context_k)
-        if pad1 > 0:
-            query = nn.functional.pad(query, (0, 0, 0, pad1))
-
-        if pad2 > 0:
-            key = nn.functional.pad(key, (0, 0, 0, pad2))
-            value = nn.functional.pad(value, (0, 0, 0, pad2))
-
-        # print('2',query.shape,key.shape,value.shape)
-        q0 = self.linear_q(query)  # (batch, time1, head*d_k)
-        k0 = self.linear_k(key)  # (batch, time2, head*d_k)
-        v0 = self.linear_v(value)  # (batch, time2, head*d_v)
-
-        return q0, k0, v0, context_q, context_k, num_blocks
-
     def _compute_qkv0(self, query, key, value):
         batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
-        if self.time_dim != 1:
-            query = query.transpose(1, self.time_dim)
-            key = key.transpose(1, self.time_dim)
-            value = value.transpose(1, self.time_dim)
-
-        num_blocks = round(t2 / self.context)
-        # print(num_blocks, t2, self.context)
+        t1 = query.size(1)
+        t2 = key.size(1)
+
+        num_blocks = max(1, round(t2 / self.context))
         context_k = math.ceil(t2 / num_blocks)
         context_q = math.ceil(t1 / num_blocks)
         pad1 = context_q * num_blocks - t1
         pad2 = context_k * num_blocks - t2
-        # print('1',query.shape,key.shape,value.shape,pad1,pad2, context_q, context_k)
         if pad1 > 0:
             query = nn.functional.pad(query, (0, 0, 0, pad1))
 
@@ -259,17 +204,16 @@ def _compute_qkv0(self, query, key, value):
             key = nn.functional.pad(key, (0, 0, 0, pad2))
             value = nn.functional.pad(value, (0, 0, 0, pad2))
 
-        # print('2',query.shape,key.shape,value.shape)
         q0 = self.linear_q(query)  # (batch, time1, head*d_k)
         k0 = self.linear_k(key)  # (batch, time2, head*d_k)
         v0 = self.linear_v(value)  # (batch, time2, head*d_v)
 
         return q0, k0, v0, context_q, context_k, num_blocks
 
-    def _compute_scores(
-        self, q0, k0, num_blocks, context_q, context_k, q_left_shift, k_left_shift
-    ):
-
+    def _compute_scores(self, q0, k0, num_blocks, context_q, context_k,
+                        q_left_shift, k_left_shift):
+        # q0 (batch, time1, head*d_k)
+        # k0 (batch, time2, head*d_k)
         batch_size = q0.size(0)
         if q_left_shift > 0:
             # we are computing the shifted block-diag score matrix
@@ -278,22 +222,14 @@ def _compute_scores(
             q0 = q0[:, q_left_shift:-q_right_shift]
             k0 = k0[:, k_left_shift:-k_right_shift]
 
-        q = (
-            q0.view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+        q = (q0.view(batch_size, -1, self.num_heads,
+                     self.d_k).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, head, blocks, time1, d_k)
-        k = (
-            k0.view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+        k = (k0.view(batch_size, -1, self.num_heads,
+                     self.d_k).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, head, blocks time2, d_k)
-        # print('4',q.shape,k.shape)
-
         return torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
 
     @staticmethod
@@ -331,7 +267,7 @@ def _softmax(scores1, scores2, shift1, shift2, t1, t2):
         context2 = scores1.size(4)
 
         # set elements in scores2 that overlap with elements in scores1 to -inf
-        scores2[:, :, :, : context1 - shift1, : context2 - shift2] = min_val
+        scores2[:, :, :, :context1 - shift1, :context2 - shift2] = min_val
         scores2[:, :, :, shift1:, shift2:] = min_val
 
         # set the padding time steps that we had to add to make integer block-number to -inf
@@ -371,9 +307,9 @@ def _softmax(scores1, scores2, shift1, shift2, t1, t2):
         scores2 = scores2.view(batch_size, num_heads, -1, context2)
         # print('aa', scores1.shape, scores2.shape)
         # pad scores2  to have the same size as scores1
-        scores2 = nn.functional.pad(
-            scores2, (0, 0, shift1, context1 - shift1), mode="constant", value=min_val
-        )
+        scores2 = nn.functional.pad(scores2, (0, 0, shift1, context1 - shift1),
+                                    mode="constant",
+                                    value=min_val)
         # print('bb', scores1.shape, scores2.shape)
         # concat scores1, scores2 and do softmax in time2 dimension
         # (batch, heads, blocks*time1, 2*time2)
@@ -381,17 +317,13 @@ def _softmax(scores1, scores2, shift1, shift2, t1, t2):
 
         # now we separate back probs into probs1, and probs2
         # probs1
-        probs1 = (
-            probs[:, :, :, :context2]
-            .contiguous()
-            .view(batch_size, num_heads, num_blocks, -1, context2)
-        )
+        probs1 = (probs[:, :, :, :context2].contiguous().view(
+            batch_size, num_heads, num_blocks, -1, context2))
         # probs2
-        probs2 = (
-            probs[:, :, shift1 : -(context1 - shift1), context2:]
-            .contiguous()
-            .view(batch_size, num_heads, num_blocks - 1, -1, context2)
-        )
+        probs2 = (probs[:, :, shift1:-(context1 - shift1),
+                        context2:].contiguous().view(batch_size, num_heads,
+                                                     num_blocks - 1, -1,
+                                                     context2))
 
         return probs1, probs2
 
@@ -406,9 +338,9 @@ def _mask_scores_1d(self, scores, mask, shift1, shift2):
         context1 = scores.size(3)
         context2 = scores.size(4)
         mask_blocks = torch.ones_like(scores, dtype=mask.dtype)
-        mask_single_block = torch.zeros(
-            (batch_size, context1, context2), dtype=mask.dtype
-        )
+        mask_single_block = torch.zeros((batch_size, context1, context2),
+                                        dtype=mask.dtype,
+                                        device=mask.device)
 
         t1_start = shift1
         t2_start = shift2
@@ -416,9 +348,11 @@ def _mask_scores_1d(self, scores, mask, shift1, shift2):
             t1_end = t1_start + context1
             t2_end = t2_start + context2
             mask_single_block.fill_(False)
-            mask_single_block.masked_fill_(mask[:, 0, t1_start:t1_end], True)
-            mask_single_block.masked_fill_(mask[:, :, t2_start:t2_end], True)
-            mask_blocks[:, block] = mask_single_block
+            mask_single_block.masked_fill_(mask[:, t1_start:t1_end, None],
+                                           True)
+            mask_single_block.masked_fill_(mask[:, None, t2_start:t2_end],
+                                           True)
+            mask_blocks[:, :, block] = mask_single_block.unsqueeze(1)
             t1_start += context1
             t2_start += context2
 
@@ -437,23 +371,24 @@ def _mask_scores_2d(self, scores, mask, shift1, shift2):
         mask_blocks = torch.ones_like(scores, dtype=mask.dtype)
         t1_start = shift1
         t2_start = shift2
+        mask = mask.unsequeeze(1)
         for block in range(num_blocks):
             t1_end = min(t1_start + context1, mask.size(1))
             t2_end = min(t2_start + context2, mask.size(2))
-            mask_blocks[:, block, : (t1_end - t1_start), : (t2_end - t2_start)] = mask[
-                :, t1_start:t1_end, t2_start:t2_end
-            ]
+            mask_blocks[:, :, block, :(t1_end - t1_start), :(
+                t2_end - t2_start)] = mask[:, :, t1_start:t1_end,
+                                           t2_start:t2_end]
             t1_start += context1
             t2_start += context2
 
         return scores.masked_fill(mask_blocks, min_value)
 
-    def _compute_softmax(
-        self, scores1, scores2, mask, q_left_shift, k_left_shift, t1, t2
-    ):
+    def _compute_softmax(self, scores1, scores2, mask, q_left_shift,
+                         k_left_shift, t1, t2):
+
         if mask is not None:
             # put to -inf scores in points where mask==0
-            if mask.dim() == 4:
+            if mask.dim() == 3:
                 # case when mask is 2d matrix per batch element
                 mask = mask.eq(0)  # (batch, time1, time2)
 
@@ -461,28 +396,27 @@ def _compute_softmax(
                 scores1 = self._mask_scores_2d(scores1, mask, 0, 0)
 
                 # second, we mask shifted block diagonal blocks
-                scores2 = self._mask_scores_2d(
-                    scores2, mask, q_left_shift, k_left_shift
-                )
+                scores2 = self._mask_scores_2d(scores2, mask, q_left_shift,
+                                               k_left_shift)
 
-            else:
+            elif mask.dim() == 2:
                 # case when mask is 1d vector per batch element,
                 # meaning that time1 and time2 are the same, so mask is symmetric
-                pad2 = 0  # fix this
+                pad2 = scores1.size(2) * scores1.size(3) - mask.size(-1)
                 mask = nn.functional.pad(mask, (0, pad2))
-                mask = mask.squeeze(1).eq(0)  # (batch, 1, time)
+                mask = mask.eq(0)  # (batch, time)
 
                 # first, we mask block diagonal blocks
                 scores1 = self._mask_scores_1d(scores1, mask, 0, 0)
 
                 # second, we mask shifted block diagonal blocks
-                scores2 = self._mask_scores_1d(
-                    scores2, mask, q_left_shift, k_left_shift
-                )
+                scores2 = self._mask_scores_1d(scores2, mask, q_left_shift,
+                                               k_left_shift)
+            else:
+                raise ValueError()
 
-        self.attn1, self.attn2 = self._softmax(
-            scores1, scores2, q_left_shift, k_left_shift, t1, t2
-        )
+        self.attn1, self.attn2 = self._softmax(scores1, scores2, q_left_shift,
+                                               k_left_shift, t1, t2)
 
     def _apply_attn(self, v0, t1):
         if self.dropout_rate > 0:
@@ -501,51 +435,43 @@ def _apply_attn(self, v0, t1):
         q_right_shift = context_q - q_left_shift
         k_right_shift = context_k - k_left_shift
 
-        v = (
-            v0.view(batch_size, -1, self.num_heads, self.d_v)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+        v = (v0.view(batch_size, -1, self.num_heads,
+                     self.d_v).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, heads, blocks, time2, d_v)
         # print('8',p_attn1.shape,p_attn2.shape, v.shape)
         # (batch, head, blocks, time1, time2) x (batch, head, blocks, time2, d_v)
         x = torch.matmul(p_attn1, v)  # (batch, heads, blocks, time1, d_k)
         # print('9',x.shape)
-        x = (
-            x.view(batch_size, self.num_heads, -1, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, -1, self.num_heads * self.d_v)
-        )
+        x = (x.view(batch_size, self.num_heads, -1,
+                    self.d_k).transpose(1, 2).contiguous().view(
+                        batch_size, -1, self.num_heads * self.d_v))
         # (batch, time1, d_model)
         # print('10',x.shape)
 
-        v = (
-            v0[:, k_left_shift:-k_right_shift]
-            .view(batch_size, -1, self.num_heads, self.d_v)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks - 1, -1, self.d_v)
-        )
+        v = (v0[:, k_left_shift:-k_right_shift].view(
+            batch_size, -1, self.num_heads,
+            self.d_v).transpose(1,
+                                2).contiguous().view(batch_size,
+                                                     self.num_heads,
+                                                     num_blocks - 1, -1,
+                                                     self.d_v))
         # (batch, blocks-1, head, time2, d_v)
         # print('11',p_attn1.shape,p_attn2.shape, v.shape)
         # (batch, blocks-1, head, time1, time2) x (batch, blocks-1, head, time2, d_v)
         x2 = torch.matmul(p_attn2, v)  # (batch, heads, blocks-1, time1, d_k)
         # print('12',x2.shape)
-        x2 = (
-            x2.view(batch_size, self.num_heads, -1, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, -1, self.num_heads * self.d_v)
-        )
+        x2 = (x2.view(batch_size, self.num_heads, -1, self.d_k).transpose(
+            1, 2).contiguous().view(batch_size, -1, self.num_heads * self.d_v))
         # (batch, time1, d_model)
         # print('12',x2.shape)
-        x[:, q_left_shift:-q_right_shift:] = x[:, q_left_shift:-q_right_shift:] + x2
+        x[:,
+          q_left_shift:-q_right_shift:] = x[:,
+                                            q_left_shift:-q_right_shift:] + x2
         x = x[:, :t1]
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    def forward1(self, query, key, value, mask):
+    def forward(self, query, key, value, mask):
         """Computes 'Local Scaled Dot Product Attention'.
 
         Args:
@@ -561,170 +487,91 @@ def forward1(self, query, key, value, mask):
            Attention weigthed average of the values with size=(batch, time1, out_feats)
         """
         batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
-        if t2 <= self.context:
+        t1 = query.size(1)
+        t2 = key.size(1)
+        if t2 <= 2 * self.context:
             return super().forward(query, key, value, mask)
 
         q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
-            query, key, value
-        )
-        # q0  size=(batch, time1, head * d_k)
-        # k0  size=(batch, time2, head * d_k)
-        # v0  size=(batch, time2, head * d_v)
+            query, key, value)
+        # q0  size=(batch, time1, head*d_k)
+        # k0  size=(batch, time2, head*d_k)
+        # v0  size=(batch, time2, head*d_v)
 
         # compute block diagonal affinity matrix
-        # # print('3',q0.shape,k0.shape,v0.shape)
-        # q = q0.view(
-        #     batch_size, -1, self.num_heads, self.d_k).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        #     # (batch, head, blocks, time1, d_k)
-        # k = k0.view(
-        #     batch_size, -1, self.num_heads, self.d_k).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        #     # (batch, head, blocks time2, d_k)
-        # # print('4',q.shape,k.shape)
-
-        # scores1 = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        scores1 = self._compute_scores(q0, k0, num_blocks, context_q, context_k, 0, 0)
+        scores1 = self._compute_scores(q0, k0, num_blocks, context_q,
+                                       context_k, 0, 0)
         # (batch, head, blocks context_q, context_k)
-        # print('5',scores1.shape)
 
         # compute shifted block diagonal affinity matrix
         q_left_shift = context_q // 2
         k_left_shift = context_k // 2
-        # q_right_shift = context_q - q_left_shift
-        # k_right_shift = context_k - k_left_shift
-        # q = q0[:,q_left_shift:-q_right_shift].view(
-        #     batch_size, -1, self.num_heads, self.d_k).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks-1, -1, self.d_k)
-        #     # (batch, blocks-1, head, time1, d_k)
-        # k = k0[:,k_left_shift:-k_right_shift].view(
-        #     batch_size, -1, self.num_heads, self.d_k).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks-1, -1, self.d_k)
-        #     # (batch, blocks-1, head, d_k)
-        # # print('6',q.shape,k.shape)
-
-        # scores2 = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        scores2 = self._compute_scores(
-            q0, k0, num_blocks - 1, context_q, context_k, q_left_shift, k_left_shift
-        )
+        scores2 = self._compute_scores(q0, k0, num_blocks - 1, context_q,
+                                       context_k, q_left_shift, k_left_shift)
         # (batch, head, blocks-1 context_q, context_k)
-        # print('7',scores2.shape)
 
         # combine both block diagonal affinity matrix to do the softmax
-        # if mask is not None:
-        #     # put to -inf scores in points where mask==0
-        #     if mask.dim() == 4:
-        #         # case when mask is 2d matrix per batch element
-        #         mask = mask.eq(0)  # (batch, time1, time2)
-
-        #         # first, we mask block diagonal blocks
-        #         scores1 = self._mask_scores_2d(scores1, mask, 0, 0)
-
-        #         # second, we mask shifted block diagonal blocks
-        #         scores2 = self._mask_scores_2d(scores2, mask, q_left_shift, k_left_shift)
+        self._compute_softmax(scores1, scores2, mask, q_left_shift,
+                              k_left_shift, t1, t2)
+        return self._apply_attn(v0, t1)
 
-        #     else:
-        #         # case when mask is 1d vector per batch element,
-        #         # meaning that time1 and time2 are the same, so mask is symmetric
-        #         mask = nn.functional.pad(mask, (0, pad2))
-        #         mask = mask.squeeze(1).eq(0)  # (batch, 1, time)
 
-        #         # first, we mask block diagonal blocks
-        #         scores1 = self._mask_scores_1d(scores1, mask, 0, 0)
+class BlockScaledDotProdAttV1(ScaledDotProdAttV1):
+    """Block Scaled dot product multihead attention layer
+       It calculates self-attention with block diagonal mask
 
-        #         # second, we mask shifted block diagonal blocks
-        #         scores2 = self._mask_scores_1d(scores2, mask, q_left_shift, k_left_shift)
+    Attributes:
+       in_feats: input feature dimension
+       out_feats: output feature dimension
+       num_heads: number of heads
+       d_k: key/query projection dimension
+       d_v: value projection dimension
+       context: maximum attention temporal context.
+       dropout_rate: dropout rate
+    """
 
-        # self.attn1, self.attn2 = self._softmax(
-        #     scores1, scores2, q_left_shift, k_left_shift, t1, t2)
+    def __init__(
+        self,
+        in_feats,
+        out_feats,
+        num_heads,
+        d_k,
+        d_v,
+        context=25,
+        dropout_rate=0,
+    ):
+        """Construct an MultiHeadedAttention object."""
+        super().__init__(in_feats, out_feats, num_heads, d_k, d_v,
+                         dropout_rate)
+        self.context = context
 
-        self._compute_softmax(
-            scores1, scores2, mask, q_left_shift, k_left_shift, t1, t2
-        )
-        return self._apply_attn(v0, t1)
+    def __repr__(self):
+        return self.__str__()
 
-        # if self.dropout_rate > 0:
-        #     p_attn1 = self.dropout(self.attn1)
-        #     p_attn2 = self.dropout(self.attn2)
-        # else:
-        #     p_attn1 = self.attn1
-        #     p_attn2 = self.attn2
-
-        # v = v0.view(
-        #     batch_size, -1, self.num_heads, self.d_v).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        # # (batch, heads, blocks, time2, d_v)
-        # # print('8',p_attn1.shape,p_attn2.shape, v.shape)
-        # # (batch, blocks, head, time1, time2) x (batch, blocks, head, time2, d_v)
-        # x = torch.matmul(p_attn1, v)  # (batch, heads, blocks, time1, d_k)
-        # # print('9',x.shape)
-        # x = x.view(batch_size, self.num_heads, -1, self.d_k).transpose(
-        #     1, 2).contiguous().view(batch_size, -1, self.num_heads * self.d_v)
-        # # (batch, time1, d_model)
-        # # print('10',x.shape)
-
-        # v = v0[:,k_left_shift:-k_right_shift].view(
-        #     batch_size, -1, self.num_heads, self.d_v).transpose(
-        #         1, 2).contiguous().view(
-        #             batch_size, self.num_heads, num_blocks-1, -1, self.d_v)
-        # # (batch, blocks-1, head, time2, d_v)
-        # # print('11',p_attn1.shape,p_attn2.shape, v.shape)
-        # # (batch, blocks-1, head, time1, time2) x (batch, blocks-1, head, time2, d_v)
-        # x2 = torch.matmul(p_attn2, v)  # (batch, heads, blocks-1, time1, d_k)
-        # # print('12',x2.shape)
-        # x2 = x2.view(batch_size, self.num_heads, -1, self.d_k).transpose(
-        #     1, 2).contiguous().view(batch_size, -1, self.num_heads * self.d_v)
-        # # (batch, time1, d_model)
-        # # print('12',x2.shape)
-        # x[:,q_left_shift:-q_right_shift:] = x[:,q_left_shift:-q_right_shift:] + x2
-        # x = x[:,:t1]
-        # return self.linear_out(x)  # (batch, time1, d_model)
-
-    def forward2(self, query, key, value, mask):
-        """Computes 'Local Scaled Dot Product Attention'.
+    def __str__(self):
+        s = ("{}(in_feats={}, out_feats={}, num_heads={}, d_k={}, d_v={}, "
+             "context={}, dropout_rate={})".format(
+                 self.__class__.__name__,
+                 self.in_feats,
+                 self.out_feats,
+                 self.num_heads,
+                 self.d_k,
+                 self.d_v,
+                 self.context,
+                 self.dropout_rate,
+             ))
+        return s
 
-        Args:
-           query: query with size=(batch, time1, in_feats),
-                  where time1 is the output time dimension
-           key: key with size=(batch, time2, in_feats)
-                  where time1 is the input time dimension
-           value: value with size=(batch, time2, in_feats)
-           mask: optional mask with size=(batch, time1, time2),
-                  to zero attention between some time steps.
-                 or (batch, time) if time1=time2
-        Returns:
-           Attention weigthed average of the values with size=(batch, time1, out_feats)
-        """
+    def _compute_qkv0(self, query, key, value):
         batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
-        if t2 <= self.context:
-            return super().forward(query, key, value, mask)
-
-        if self.time_dim != 1:
-            query = query.transpose(1, self.time_dim)
-            key = key.transpose(1, self.time_dim)
-            value = value.transpose(1, self.time_dim)
+        t1 = query.size(1)
+        t2 = key.size(1)
 
-        context_k = self.context
-        num_blocks = math.ceil(t2 / context_k)  # (t2 + context_k//2)//context_k
+        num_blocks = max(1, t2 // self.context)
+        context_k = math.ceil(t2 / num_blocks)
         context_q = math.ceil(t1 / num_blocks)
-        num_blocks_q = math.ceil(t1 / context_q)  # (t1 + context_q//2)//context_q
-        assert (
-            num_blocks == num_blocks_q
-        ), "num_blocks_k({})!=num_blocks_q({}), context_k={}, context_q={}, t1={}, t2={}".format(
-            num_blocks, num_blocks_q, context_k, context_q, t1, t2
-        )
         pad1 = context_q * num_blocks - t1
         pad2 = context_k * num_blocks - t2
-        # print('1',query.shape,key.shape,value.shape,pad1,pad2, context_q, context_k)
         if pad1 > 0:
             query = nn.functional.pad(query, (0, 0, 0, pad1))
 
@@ -732,152 +579,185 @@ def forward2(self, query, key, value, mask):
             key = nn.functional.pad(key, (0, 0, 0, pad2))
             value = nn.functional.pad(value, (0, 0, 0, pad2))
 
-        # print('2',query.shape,key.shape,value.shape)
         q0 = self.linear_q(query)  # (batch, time1, head*d_k)
         k0 = self.linear_k(key)  # (batch, time2, head*d_k)
         v0 = self.linear_v(value)  # (batch, time2, head*d_v)
 
-        # # q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
-        # #     query, key, value)
-        # # # q0  size=(batch, time1, head*d_k)
-        # # # k0  size=(batch, time2, head*d_k)
-        # # # v0  size=(batch, time2, head*d_v)
+        return q0, k0, v0, context_q, context_k, num_blocks
 
-        # compute block diagonal affinity matrix
-        # # print('3',q0.shape,k0.shape,v0.shape)
-        q = (
-            q0.view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+    def _compute_scores(self, q0, k0, num_blocks, context_q, context_k):
+        # q0 (batch, time1, head*d_k)
+        # k0 (batch, time2, head*d_k)
+        batch_size = q0.size(0)
+        q = (q0.view(batch_size, -1, self.num_heads,
+                     self.d_k).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, head, blocks, time1, d_k)
-        k = (
-            k0.view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+        k = (k0.view(batch_size, -1, self.num_heads,
+                     self.d_k).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, head, blocks time2, d_k)
-        # # print('4',q.shape,k.shape)
 
-        scores1 = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        # # scores1 = self._compute_scores(
-        # #     q0, k0, num_blocks, context_q, context_k, 0, 0)
-        # (batch, head, blocks context_q, context_k)
-        # print('5',scores1.shape)
+        return torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
 
-        # compute shifted block diagonal affinity matrix
-        q_left_shift = context_q // 2
-        k_left_shift = context_k // 2
-        q_right_shift = context_q - q_left_shift
-        k_right_shift = context_k - k_left_shift
-        q = (
-            q0[:, q_left_shift:-q_right_shift]
-            .view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks - 1, -1, self.d_k)
-        )
-        # (batch, blocks-1, head, time1, d_k)
-        k = (
-            k0[:, k_left_shift:-k_right_shift]
-            .view(batch_size, -1, self.num_heads, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks - 1, -1, self.d_k)
-        )
-        #     # (batch, blocks-1, head, d_k)
-        # # print('6',q.shape,k.shape)
+    @staticmethod
+    def _softmax(scores, t1, t2):
+        """Computes softmax for block diagonal attention maps
 
-        scores2 = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
-        # scores2 = self._compute_scores(
-        #     q0, k0, num_blocks-1, context_q, context_k,
-        #     q_left_shift, k_left_shift)
-        # (batch, head, blocks-1 context_q, context_k)
-        # print('7',scores2.shape)
+        Args:
+          scores: attention scores from block-diagonal score matrix
+                   with size=(batch, heads, blocks, t1, t2)
+          t1: length of time dimension 1 (output time dimension)
+          t2: length of time dimension 2 (input time dimension), with self-att t1=t2.
 
-        # combine both block diagonal affinity matrix to do the softmax
-        # if mask is not None:
-        #     # put to -inf scores in points where mask==0
-        #     if mask.dim() == 4:
-        #         # case when mask is 2d matrix per batch element
-        #         mask = mask.eq(0)  # (batch, time1, time2)
+        Returns
+          probs1: posterior attention scores for block-diagonal att. matrix
+                   with size=(batch, heads, blocks, t1, t2)
+          probs2: posterior attention scores for a shifted block-diagonal att. matrix
+                   with size=(batch, heads, blocks-1, t1, t2)
+
+        """
+        if scores.dtype == torch.half:
+            min_val = -65504
+        else:
+            min_val = -1e20
+
+        batch_size = scores.size(0)
+        num_heads = scores.size(1)
+        num_blocks = scores.size(2)
+        context1 = scores.size(3)
+        context2 = scores.size(4)
+
+        # set the padding time steps that we had to add to make integer block-number to -inf
+        # in scores1
 
-        #         # first, we mask block diagonal blocks
-        #         scores1 = self._mask_scores_2d(scores1, mask, 0, 0)
+        dt1 = max(0, scores.size(2) * scores.size(3) - t1)
+        if dt1 > 0:
+            scores[:, :, -1, -dt1:, :] = min_val
 
-        #         # second, we mask shifted block diagonal blocks
-        #         scores2 = self._mask_scores_2d(scores2, mask, q_left_shift, k_left_shift)
+        dt2 = max(0, scores.size(2) * scores.size(4) - t2)
+        if dt2 > 0:
+            scores[:, :, -1, :, -dt2:] = min_val
 
-        #     else:
-        #         # case when mask is 1d vector per batch element,
-        #         # meaning that time1 and time2 are the same, so mask is symmetric
-        #         mask = nn.functional.pad(mask, (0, pad2))
-        #         mask = mask.squeeze(1).eq(0)  # (batch, 1, time)
+        # flatten blocks and time1 dimensions
+        scores = scores.view(batch_size, num_heads, -1, context2)
+        # pad scores2  to have the same size as scores1
 
-        #         # first, we mask block diagonal blocks
-        #         scores1 = self._mask_scores_1d(scores1, mask, 0, 0)
+        # (batch, heads, blocks*time1, time2)
+        probs = torch.softmax(scores, dim=-1).contiguous().view(
+            batch_size, num_heads, num_blocks, -1, context2)
 
-        #         # second, we mask shifted block diagonal blocks
-        #         scores2 = self._mask_scores_1d(scores2, mask, q_left_shift, k_left_shift)
+        return probs
 
-        self.attn1, self.attn2 = self._softmax(
-            scores1, scores2, q_left_shift, k_left_shift, t1, t2
-        )
+    def _mask_scores_1d(self, scores, mask):
+        if scores.dtype == torch.half:
+            min_value = -65504
+        else:
+            min_value = -1e20
+
+        batch_size = scores.size(0)
+        num_blocks = scores.size(2)
+        context1 = scores.size(3)
+        context2 = scores.size(4)
+        mask_blocks = torch.ones_like(scores, dtype=mask.dtype)
+        mask_single_block = torch.zeros((batch_size, context1, context2),
+                                        dtype=mask.dtype,
+                                        device=mask.device)
+
+        t1_start = 0
+        t2_start = 0
+        for block in range(num_blocks):
+            t1_end = t1_start + context1
+            t2_end = t2_start + context2
+            mask_single_block.fill_(False)
+            mask_single_block.masked_fill_(mask[:, t1_start:t1_end, None],
+                                           True)
+            mask_single_block.masked_fill_(mask[:, None, t2_start:t2_end],
+                                           True)
+            mask_blocks[:, :, block] = mask_single_block.unsqueeze(1)
+            t1_start += context1
+            t2_start += context2
+
+        return scores.masked_fill(mask_blocks, min_value)
+
+    def _mask_scores_2d(self, scores, mask):
+        if scores.dtype == torch.half:
+            min_value = -65504
+        else:
+            min_value = -1e20
+
+        batch_size = scores.size(0)
+        num_blocks = scores.size(2)
+        context1 = scores.size(3)
+        context2 = scores.size(4)
+        mask_blocks = torch.ones_like(scores, dtype=mask.dtype)
+        t1_start = 0
+        t2_start = 0
+        mask = mask.unsequeeze(1)
+        for block in range(num_blocks):
+            t1_end = min(t1_start + context1, mask.size(1))
+            t2_end = min(t2_start + context2, mask.size(2))
+            mask_blocks[:, :, block, :(t1_end - t1_start), :(
+                t2_end - t2_start)] = mask[:, :, t1_start:t1_end,
+                                           t2_start:t2_end]
+            t1_start += context1
+            t2_start += context2
+
+        return scores.masked_fill(mask_blocks, min_value)
+
+    def _compute_softmax(self, scores, mask, t1, t2):
+
+        if mask is not None:
+            # put to -inf scores in points where mask==0
+            if mask.dim() == 3:
+                # case when mask is 2d matrix per batch element
+                mask = mask.eq(0)  # (batch, time1, time2)
+
+                # first, we mask block diagonal blocks
+                scores = self._mask_scores_2d(scores, mask)
+
+            elif mask.dim() == 2:
+                # case when mask is 1d vector per batch element,
+                # meaning that time1 and time2 are the same, so mask is symmetric
+                pad2 = scores.size(2) * scores.size(3) - mask.size(-1)
+                mask = nn.functional.pad(mask, (0, pad2))
+                mask = mask.eq(0)  # (batch, time)
+
+                # first, we mask block diagonal blocks
+                scores = self._mask_scores_1d(scores, mask)
+
+            else:
+                raise ValueError()
 
-        # # self._compute_softmax(scores1, scores2, mask,
-        # #                       q_left_shift, k_left_shift, t1, t2)
-        # # return self._apply_attn(v0, t1)
+        self.attn = self._softmax(scores, t1, t2)
 
+    def _apply_attn(self, v0, t1):
         if self.dropout_rate > 0:
-            p_attn1 = self.dropout(self.attn1)
-            p_attn2 = self.dropout(self.attn2)
+            p_attn = self.dropout(self.attn)
         else:
-            p_attn1 = self.attn1
-            p_attn2 = self.attn2
+            p_attn = self.attn
 
-        v = (
-            v0.view(batch_size, -1, self.num_heads, self.d_v)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks, -1, self.d_k)
-        )
+        batch_size = p_attn.size(0)
+        num_blocks = p_attn.size(2)
+        context_q = p_attn.size(3)
+        context_k = p_attn.size(4)
+        q_left_shift = context_q // 2
+        k_left_shift = context_k // 2
+        q_right_shift = context_q - q_left_shift
+        k_right_shift = context_k - k_left_shift
+
+        v = (v0.view(batch_size, -1, self.num_heads,
+                     self.d_v).transpose(1, 2).contiguous().view(
+                         batch_size, self.num_heads, num_blocks, -1, self.d_k))
         # (batch, heads, blocks, time2, d_v)
-        # print('8',p_attn1.shape,p_attn2.shape, v.shape)
-        # (batch, blocks, head, time1, time2) x (batch, blocks, head, time2, d_v)
-        x = torch.matmul(p_attn1, v)  # (batch, heads, blocks, time1, d_k)
-        # print('9',x.shape)
-        x = (
-            x.view(batch_size, self.num_heads, -1, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, -1, self.num_heads * self.d_v)
-        )
-        # (batch, time1, d_model)
-        # print('10',x.shape)
 
-        v = (
-            v0[:, k_left_shift:-k_right_shift]
-            .view(batch_size, -1, self.num_heads, self.d_v)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, self.num_heads, num_blocks - 1, -1, self.d_v)
-        )
-        # (batch, blocks-1, head, time2, d_v)
-        # print('11',p_attn1.shape,p_attn2.shape, v.shape)
-        # (batch, blocks-1, head, time1, time2) x (batch, blocks-1, head, time2, d_v)
-        x2 = torch.matmul(p_attn2, v)  # (batch, heads, blocks-1, time1, d_k)
-        # print('12',x2.shape)
-        x2 = (
-            x2.view(batch_size, self.num_heads, -1, self.d_k)
-            .transpose(1, 2)
-            .contiguous()
-            .view(batch_size, -1, self.num_heads * self.d_v)
-        )
+        # (batch, head, blocks, time1, time2) x (batch, head, blocks, time2, d_v)
+        x = torch.matmul(p_attn, v)  # (batch, heads, blocks, time1, d_k)
+        x = (x.view(batch_size, self.num_heads, -1,
+                    self.d_k).transpose(1, 2).contiguous().view(
+                        batch_size, -1, self.num_heads * self.d_v))
         # (batch, time1, d_model)
-        # print('12',x2.shape)
-        x[:, q_left_shift:-q_right_shift:] = x[:, q_left_shift:-q_right_shift:] + x2
+
         x = x[:, :t1]
         return self.linear_out(x)  # (batch, time1, d_model)
 
@@ -897,35 +777,24 @@ def forward(self, query, key, value, mask):
            Attention weigthed average of the values with size=(batch, time1, out_feats)
         """
         batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
+        t1 = query.size(1)
+        t2 = key.size(1)
 
-        if t2 <= 2 * self.context:
+        if t2 < 2 * self.context:
             return super().forward(query, key, value, mask)
 
         q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
-            query, key, value
-        )
+            query, key, value)
         # q0  size=(batch, time1, head*d_k)
         # k0  size=(batch, time2, head*d_k)
         # v0  size=(batch, time2, head*d_v)
 
         # compute block diagonal affinity matrix
-        scores1 = self._compute_scores(q0, k0, num_blocks, context_q, context_k, 0, 0)
+        scores = self._compute_scores(q0, k0, num_blocks, context_q, context_k)
         # (batch, head, blocks context_q, context_k)
 
-        # compute shifted block diagonal affinity matrix
-        q_left_shift = context_q // 2
-        k_left_shift = context_k // 2
-        scores2 = self._compute_scores(
-            q0, k0, num_blocks - 1, context_q, context_k, q_left_shift, k_left_shift
-        )
-        # (batch, head, blocks-1 context_q, context_k)
-
         # combine both block diagonal affinity matrix to do the softmax
-        self._compute_softmax(
-            scores1, scores2, mask, q_left_shift, k_left_shift, t1, t2
-        )
+        self._compute_softmax(scores, mask, t1, t2)
         return self._apply_attn(v0, t1)
 
 
@@ -942,8 +811,6 @@ class ScaledDotProdAttRelPosEncV1(ScaledDotProdAttV1):
        d_v: value projection dimension
        causal_pos_enc: positional encoder is 0 for attending future frames.
        dropout_rate: dropout rate
-       time_dim: time dimension in the input, default=1 meaning input
-                 dimensions are (batch, time, in_feats)
     """
 
     def __init__(
@@ -955,7 +822,6 @@ def __init__(
         d_v,
         causal_pos_enc=False,
         dropout_rate=0,
-        time_dim=1,
     ):
         super().__init__(
             in_feats,
@@ -964,7 +830,6 @@ def __init__(
             d_k,
             d_v,
             dropout_rate=dropout_rate,
-            time_dim=time_dim,
         )
 
         self.linear_pos = nn.Linear(in_feats, num_heads * d_k)
@@ -992,19 +857,17 @@ def _apply_tril(self, x):
          1 1 1 1 ]
         """
         diag = x.size(3) - x.size(2)
-        if (
-            self._tril is None
-            or self._tril.size(2) < x.size(2)
-            or self._tril.size(3) < x.size(3)
-            or self._tril_diag != diag
-        ):
+        if (self._tril is None or self._tril.size(2) < x.size(2)
+                or self._tril.size(3) < x.size(3) or self._tril_diag != diag):
             # in these cases we need to recompute the lower triangular mask
-            ones = torch.ones((x.size(2), x.size(3)), dtype=x.dtype, device=x.device)
+            ones = torch.ones((x.size(2), x.size(3)),
+                              dtype=x.dtype,
+                              device=x.device)
             self._tril = torch.tril(ones, diag)[None, None, :, :]
             self._tril_diag = diag
             tril = self._tril
         else:
-            tril = self._tril[:, :, : x.size(2), : x.size(3)]
+            tril = self._tril[:, :, :x.size(2), :x.size(3)]
 
         return x * tril
 
@@ -1019,19 +882,17 @@ def _apply_triu(self, x):
         """
         # we add 1 to put the diagonal to 0 so we don't count the R_0 embedding twice
         diag = x.size(3) - x.size(2) + 1
-        if (
-            self._triu is None
-            or self._triu.size(2) < x.size(2)
-            or self._triu.size(3) < x.size(3)
-            or self._triu_diag != diag
-        ):
+        if (self._triu is None or self._triu.size(2) < x.size(2)
+                or self._triu.size(3) < x.size(3) or self._triu_diag != diag):
             # in these cases we need to recompute the lower triangular mask
-            ones = torch.ones((x.size(2), x.size(3)), dtype=x.dtype, device=x.device)
+            ones = torch.ones((x.size(2), x.size(3)),
+                              dtype=x.dtype,
+                              device=x.device)
             self._triu = torch.triu(ones, diag)[None, None, :, :]
             self._triu_diag = diag
             triu = self._triu
         else:
-            triu = self._triu[:, :, -x.size(2) :, -x.size(3) :]
+            triu = self._triu[:, :, -x.size(2):, -x.size(3):]
 
         return x * triu
 
@@ -1095,7 +956,8 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
         q, k, v = self._compute_qkv(query, key, value)
 
         pos_batch_size = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads, self.d_k)
+        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads,
+                                          self.d_k)
         p = p.transpose(1, 2)  # (batch, head, time2, d_k)
 
         q = q.transpose(1, 2)  # (batch, time1, head, d_k)
@@ -1103,13 +965,14 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
         q_plus_v = (q + self.v).transpose(1, 2)  # (batch, head, time1, d_k)
 
         # compute A(a) + A(c) in Sec3.3, 2nd Eq.
-        AC = torch.matmul(q_plus_u, k.transpose(-2, -1))  # (batch, head, time1, time2)
+        AC = torch.matmul(q_plus_u,
+                          k.transpose(-2, -1))  # (batch, head, time1, time2)
 
         # compute A(b) + A(d) in Sec3.3, 2nd Eq. for the causal part
         # This is the sum of Btilde and Dtilde in the Appendix of the paper
-        BDtilde = torch.matmul(
-            q_plus_v, p.transpose(-2, -1)
-        )  # (batch, head, time1, time2)
+        BDtilde = torch.matmul(q_plus_v,
+                               p.transpose(-2,
+                                           -1))  # (batch, head, time1, time2)
         # apply left shift as indicated in the Appendix to geth B+D
         BD = self._left_shift(BDtilde)
 
@@ -1119,19 +982,15 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
             # we assume that t2 >= t1
             dt = key.size(1) - query.size(1)
             pos_emb_noncausal = pos_emb[:, dt:].flip(
-                dims=(1,)
-            )  # we flip to get R_0, ..., R_{L-1}
-            pos_emb_noncausal[
-                :, :, 0::2
-            ] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
+                dims=(1, ))  # we flip to get R_0, ..., R_{L-1}
+            pos_emb_noncausal[:, :, 0::
+                              2] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
             assert pos_emb[0, -2, 0] == -pos_emb_noncausal[0, 1, 0]
             p = self.linear_pos(pos_emb_noncausal).view(
-                pos_batch_size, -1, self.num_heads, self.d_k
-            )
+                pos_batch_size, -1, self.num_heads, self.d_k)
             p = p.transpose(1, 2)  # (batch, head, time2-dt, d_k)
-            BDtilde = torch.matmul(
-                q_plus_v, p.transpose(-2, -1)
-            )  # (batch, head, time1, time2-dt)
+            BDtilde = torch.matmul(q_plus_v, p.transpose(
+                -2, -1))  # (batch, head, time1, time2-dt)
             BD_noncausal = self._right_shift(BDtilde)
             BD[:, :, :, dt:] += BD_noncausal
 
@@ -1158,8 +1017,6 @@ class LocalScaledDotProdAttRelPosEncV1(LocalScaledDotProdAttV1):
        context: maximum attention temporal context.
        causal_pos_enc: positional encoder is 0 for attending future frames.
        dropout_rate: dropout rate
-       time_dim: time dimension in the input, default=1 meaning input
-                 dimensions are (batch, time, in_feats)
     """
 
     def __init__(
@@ -1172,7 +1029,6 @@ def __init__(
         context=25,
         causal_pos_enc=False,
         dropout_rate=0,
-        time_dim=1,
     ):
         super().__init__(
             in_feats,
@@ -1182,7 +1038,6 @@ def __init__(
             d_v,
             context,
             dropout_rate=dropout_rate,
-            time_dim=time_dim,
         )
 
         self.linear_pos = nn.Linear(in_feats, num_heads * d_k)
@@ -1210,19 +1065,17 @@ def _apply_tril(self, x):
          1 1 1 1 ]
         """
         diag = x.size(4) - x.size(3)
-        if (
-            self._tril is None
-            or self._tril.size(3) < x.size(3)
-            or self._tril.size(4) < x.size(4)
-            or self._tril_diag != diag
-        ):
+        if (self._tril is None or self._tril.size(3) < x.size(3)
+                or self._tril.size(4) < x.size(4) or self._tril_diag != diag):
             # in these cases we need to recompute the lower triangular mask
-            ones = torch.ones((x.size(3), x.size(4)), dtype=x.dtype, device=x.device)
+            ones = torch.ones((x.size(3), x.size(4)),
+                              dtype=x.dtype,
+                              device=x.device)
             self._tril = torch.tril(ones, diag)[None, None, None, :, :]
             self._tril_diag = diag
             tril = self._tril
         else:
-            tril = self._tril[:, :, :, : x.size(3), : x.size(4)]
+            tril = self._tril[:, :, :, :x.size(3), :x.size(4)]
 
         return x * tril
 
@@ -1237,19 +1090,17 @@ def _apply_triu(self, x):
         """
         # we add 1 to put the diagonal to 0 so we don't count the R_0 embedding twice
         diag = x.size(4) - x.size(3) + 1
-        if (
-            self._triu is None
-            or self._triu.size(3) < x.size(3)
-            or self._triu.size(4) < x.size(4)
-            or self._triu_diag != diag
-        ):
+        if (self._triu is None or self._triu.size(3) < x.size(3)
+                or self._triu.size(4) < x.size(4) or self._triu_diag != diag):
             # in these cases we need to recompute the lower triangular mask
-            ones = torch.ones((x.size(3), x.size(4)), dtype=x.dtype, device=x.device)
+            ones = torch.ones((x.size(3), x.size(4)),
+                              dtype=x.dtype,
+                              device=x.device)
             self._triu = torch.triu(ones, diag)[None, None, None, :, :]
             self._triu_diag = diag
             triu = self._triu
         else:
-            triu = self._triu[:, :, :, -x.size(3) :, -x.size(4) :]
+            triu = self._triu[:, :, :, -x.size(3):, -x.size(4):]
 
         return x * triu
 
@@ -1320,27 +1171,52 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
            Attention weigthed average of the value with size=(batch, time1, out_feats)
         """
         batch_size = query.size(0)
-        t1 = query.size(self.time_dim)
-        t2 = key.size(self.time_dim)
-        q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
-            query, key, value
-        )
-        # q0  size=(batch, time1, head*d_k)
-        # k0  size=(batch, time2, head*d_k)
-        # v0  size=(batch, time2, head*d_v)
-
-        q_plus_u0 = q0 + self.u.view(-1, q0.size(-1))  # (batch, time1, head*d_k)
-
-        # q = q.transpose(1, 2)  # (batch, time1, head, d_k)
-        # q_plus_u = (q + self.u).transpose(1, 2) #(batch, head, time1, d_k)
-        # q_plus_v = (q + self.v).transpose(1, 2) #(batch, head, time1, d_k)
+        t1 = query.size(1)
+        t2 = key.size(1)
+        if round(t2 / self.context) > 1:
+            return self._forward_nblocks(query, key, value, pos_emb, mask)
+        else:
+            return self._forward_1block(query, key, value, pos_emb, mask)
 
-        # compute A(a) + A(c) in Sec3.3, 2nd Eq. block diagonals
-        #   1) compute block diagonal affinity matrix
-        AC1 = self._compute_scores(
-            q_plus_u0, k0, num_blocks, context_q, context_k, 0, 0
-        )
-        # (batch, head, blocks, context_q,  context_k)
+    def _forward_nblocks(self, query, key, value, pos_emb=None, mask=None):
+        """Computes 'Scaled Dot Product Attention' for the case that we have
+           more than 1block in the block diagonal attention matrix.
+
+        Args:
+           query: query with size=(batch, time1, in_feats),
+                  where time1 is the output time dimension
+           key: key with size=(batch, time2, in_feats)
+                  where time1 is the input time dimension
+           value: value with size=(batch, time2, in_feats)
+           pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0
+           mask: optional mask with size=(batch, time1, time2),
+                  to zero attention between some time steps
+                  or size=(batch, time) to make time1=time2
+        Returns:
+           Attention weigthed average of the value with size=(batch, time1, out_feats)
+        """
+        batch_size = query.size(0)
+        t1 = query.size(1)
+        t2 = key.size(1)
+
+        q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
+            query, key, value)
+        # q0  size=(batch, time1, head*d_k)
+        # k0  size=(batch, time2, head*d_k)
+        # v0  size=(batch, time2, head*d_v)
+
+        q_plus_u0 = q0 + self.u.view(-1,
+                                     q0.size(-1))  # (batch, time1, head*d_k)
+
+        # q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+        # q_plus_u = (q + self.u).transpose(1, 2) #(batch, head, time1, d_k)
+        # q_plus_v = (q + self.v).transpose(1, 2) #(batch, head, time1, d_k)
+
+        # compute A(a) + A(c) in Sec3.3, 2nd Eq. block diagonals
+        #   1) compute block diagonal affinity matrix
+        AC1 = self._compute_scores(q_plus_u0, k0, num_blocks, context_q,
+                                   context_k, 0, 0)
+        # (batch, head, blocks, context_q,  context_k)
 
         #   2) compute shifted block diagonal matrix
         q_left_shift = context_q // 2
@@ -1359,28 +1235,27 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
 
         pos_emb = pos_emb[:, -context_k:]  # (1, context_k, d_model)
         pos_batch_size = pos_emb.size(0)
-        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads, self.d_k)
+        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads,
+                                          self.d_k)
         p = p.transpose(1, 2)  # (1, head, context_k, d_k)
 
-        q = q0.view(
-            batch_size, -1, self.num_heads, self.d_k
-        )  # (batch, time1, head, d_k)
+        q = q0.view(batch_size, -1, self.num_heads,
+                    self.d_k)  # (batch, time1, head, d_k)
         q_plus_v = (q + self.v).transpose(1, 2)  # (batch, head, time1, d_k)
 
         # compute A(b) + A(d) in Sec3.3, 2nd Eq. for the causal part
         # This is the sum of Btilde and Dtilde in the Appendix of the paper
         BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1)) / math.sqrt(
-            self.d_k
-        )  # (batch, head, time1, context_k)
+            self.d_k)  # (batch, head, time1, context_k)
         # apply left shift as indicated in the Appendix to geth B+D
         #  1) block-diagonal part of BD: BD1
         BD1 = self._left_shift(
-            BDtilde, context_q, 0
-        )  # (batch, head, blocks, context_q, context_k)
+            BDtilde, context_q,
+            0)  # (batch, head, blocks, context_q, context_k)
         #  2) shifted block diagonal part of BD: BD2
         BD2 = self._left_shift(
-            BDtilde, context_q, q_left_shift
-        )  # (batch, head, blocks-1, context_q, context_k)
+            BDtilde, context_q,
+            q_left_shift)  # (batch, head, blocks-1, context_q, context_k)
 
         # print('BD\n',BD1[0,0,0,:10,:10])
         # print(BD2[0,0,0,:10,:10])
@@ -1391,22 +1266,18 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
             # we assume that t2 >= t1, and therefore context_k >= context_q
             dt = context_k - context_q
             pos_emb_noncausal = pos_emb[:, dt:].flip(
-                dims=(1,)
-            )  # we flip to get R_0, ..., R_{L-1}
-            pos_emb_noncausal[
-                :, :, 0::2
-            ] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
+                dims=(1, ))  # we flip to get R_0, ..., R_{L-1}
+            pos_emb_noncausal[:, :, 0::
+                              2] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
             assert pos_emb[0, -2, 0] == -pos_emb_noncausal[0, 1, 0]
             p = self.linear_pos(pos_emb_noncausal).view(
-                pos_batch_size, -1, self.num_heads, self.d_k
-            )
+                pos_batch_size, -1, self.num_heads, self.d_k)
             p = p.transpose(1, 2)  # (batch, head, context_k-dt, d_k)
             BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1)) / math.sqrt(
-                self.d_k
-            )  # (batch, head, time1, context_k-dt)
+                self.d_k)  # (batch, head, time1, context_k-dt)
             BD_noncausal1 = self._right_shift(
-                BDtilde, context_q, 0
-            )  # (batch, head, blocks, context_q, context_k-dt)
+                BDtilde, context_q,
+                0)  # (batch, head, blocks, context_q, context_k-dt)
             BD_noncausal2 = self._right_shift(
                 BDtilde, context_q, q_left_shift
             )  # (batch, head, blocks-1, context_q, context_k-dt)
@@ -1422,7 +1293,394 @@ def forward(self, query, key, value, pos_emb=None, mask=None):
         # add AC and BD for block-diag s
         scores1 = AC1 + BD1  # (batch, head, blocks, context_q, context_k)
         scores2 = AC2 + BD2  # (batch, head, blocks-1, context_q, context_k)
-        self._compute_softmax(
-            scores1, scores2, mask, q_left_shift, k_left_shift, t1, t2
+        self._compute_softmax(scores1, scores2, mask, q_left_shift,
+                              k_left_shift, t1, t2)
+        return self._apply_attn(v0, t1)
+
+    def _forward_1block(self, query, key, value, pos_emb=None, mask=None):
+        """Computes 'Scaled Dot Product Attention' for the case that
+           there is only one block in the block-diagonal attention matrix.
+
+        Args:
+           query: query with size=(batch, time1, in_feats),
+                  where time1 is the output time dimension
+           key: key with size=(batch, time2, in_feats)
+                  where time1 is the input time dimension
+           value: value with size=(batch, time2, in_feats)
+           pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0
+           mask: optional mask with size=(batch, time1, time2),
+                  to zero attention between some time steps
+                  or size=(batch, time) to make time1=time2
+        Returns:
+           Attention weigthed average of the value with size=(batch, time1, out_feats)
+        """
+        batch_size = value.size(0)
+        q, k, v = self._compute_qkv(query, key, value)
+        context_q = query.size(1)
+
+        pos_batch_size = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads,
+                                          self.d_k)
+        p = p.transpose(1, 2)  # (batch, head, time2, d_k)
+
+        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+        q_plus_u = (q + self.u).transpose(1, 2)  # (batch, head, time1, d_k)
+        q_plus_v = (q + self.v).transpose(1, 2)  # (batch, head, time1, d_k)
+
+        # compute A(a) + A(c) in Sec3.3, 2nd Eq.
+        AC = torch.matmul(q_plus_u, k.transpose(-2, -1))
+        # AC = (batch, head, time1, time2)
+
+        # compute A(b) + A(d) in Sec3.3, 2nd Eq. for the causal part
+        # This is the sum of Btilde and Dtilde in the Appendix of the paper
+        BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1))
+        # BDtilde = (batch, head, time1, time2)
+        # apply left shift as indicated in the Appendix to geth B+D
+        BD = self._left_shift(BDtilde, context_q, 0).squeeze(2)
+
+        if not self.causal_pos_enc:
+            # compute A(b) + A(d) for the non-causal part,
+            # this is not included in the paper because it doesn't allow to attent to future postions
+            # we assume that t2 >= t1
+            dt = key.size(1) - query.size(1)
+            pos_emb_noncausal = pos_emb[:, dt:].flip(
+                dims=(1, ))  # we flip to get R_0, ..., R_{L-1}
+            pos_emb_noncausal[:, :, 0::
+                              2] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
+            assert pos_emb[0, -2, 0] == -pos_emb_noncausal[0, 1, 0]
+            p = self.linear_pos(pos_emb_noncausal).view(
+                pos_batch_size, -1, self.num_heads, self.d_k)
+            p = p.transpose(1, 2)  # (batch, head, time2-dt, d_k)
+            BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1))
+            # BDtilde = (batch, head, time1, time2-dt)
+            BD_noncausal = self._right_shift(BDtilde, context_q, 0).squeeze(2)
+            BD[:, :, :, dt:] += BD_noncausal
+
+        # add and normalize
+        scores = (AC + BD) / math.sqrt(self.d_k)  # (batch, head, time1, time2)
+        self.attn = self._base_compute_softmax(scores, mask)
+        return self._base_apply_attn(v)
+
+
+class BlockScaledDotProdAttRelPosEncV1(BlockScaledDotProdAttV1):
+    """Block Scaled dot product multihead attention layer
+       It calculates self-attention with block diagonal mask
+
+       It uses  relative positional encoders as defined in
+       https://arxiv.org/pdf/1901.02860.pdf
+
+    Attributes:
+       in_feats: input feature dimension
+       out_feats: output feature dimension
+       num_heads: number of heads
+       d_k: key/query projection dimension
+       d_v: value projection dimension
+       context: maximum attention temporal context.
+       causal_pos_enc: positional encoder is 0 for attending future frames.
+       dropout_rate: dropout rate
+    """
+
+    def __init__(
+        self,
+        in_feats,
+        out_feats,
+        num_heads,
+        d_k,
+        d_v,
+        context=25,
+        causal_pos_enc=False,
+        dropout_rate=0,
+    ):
+        super().__init__(
+            in_feats,
+            out_feats,
+            num_heads,
+            d_k,
+            d_v,
+            context,
+            dropout_rate=dropout_rate,
         )
+
+        self.linear_pos = nn.Linear(in_feats, num_heads * d_k)
+        # u, v in paper, Sec 3.3, 2nd eq.
+        self.u = nn.Parameter(torch.Tensor(num_heads, d_k))
+        self.v = nn.Parameter(torch.Tensor(num_heads, d_k))
+        # we use same init as in espnet
+        nn.init.xavier_uniform_(self.u)
+        nn.init.xavier_uniform_(self.v)
+
+        self.causal_pos_enc = causal_pos_enc
+
+        self._tril = None
+        self._tril_diag = 0
+        self._triu = None
+        self._triu_diag = 0
+
+    def _apply_tril(self, x):
+        """Applies lower triangular mask to (Q + v^T) W R_{i-j} attention matrix
+           to keep causal attention points, i.e., i-j >= 0
+        E.g.,
+        if t1=3, t2=4 this will apply a mask
+        [1 1 0 0;
+         1 1 1 0;
+         1 1 1 1 ]
+        """
+        diag = x.size(4) - x.size(3)
+        if (self._tril is None or self._tril.size(3) < x.size(3)
+                or self._tril.size(4) < x.size(4) or self._tril_diag != diag):
+            # in these cases we need to recompute the lower triangular mask
+            ones = torch.ones((x.size(3), x.size(4)),
+                              dtype=x.dtype,
+                              device=x.device)
+            self._tril = torch.tril(ones, diag)[None, None, None, :, :]
+            self._tril_diag = diag
+            tril = self._tril
+        else:
+            tril = self._tril[:, :, :, :x.size(3), :x.size(4)]
+
+        return x * tril
+
+    def _apply_triu(self, x):
+        """Applies upper triangular mask to (Q + v^T) W R_{i-j} attention matrix
+            to keep non-causal attention points, i.e., i-j < 0
+        E.g.,
+        if t1=3, t2=4 this will apply a mask
+        [0 0 1 1;
+         0 0 0 1;
+         0 0 0 0 ]
+        """
+        # we add 1 to put the diagonal to 0 so we don't count the R_0 embedding twice
+        diag = x.size(4) - x.size(3) + 1
+        if (self._triu is None or self._triu.size(3) < x.size(3)
+                or self._triu.size(4) < x.size(4) or self._triu_diag != diag):
+            # in these cases we need to recompute the lower triangular mask
+            ones = torch.ones((x.size(3), x.size(4)),
+                              dtype=x.dtype,
+                              device=x.device)
+            self._triu = torch.triu(ones, diag)[None, None, None, :, :]
+            self._triu_diag = diag
+            triu = self._triu
+        else:
+            triu = self._triu[:, :, :, -x.size(3):, -x.size(4):]
+
+        return x * triu
+
+    def _left_shift(self, x, context):
+        """Applies left shifts to the rows of x
+            to get scores with relative pos encodings R_{i-j}
+            i-j >=0, causal attention
+
+        E.g.
+            [q0 R3, q0 R2, q0 R1, q0 R0;
+             q1 R3, q1 R2, q1 R1, q1 R0;
+             q2 R3, q2 R2, q2 R1, q2 R0]
+
+        becomes:
+            [q0 R1, q0 R0,  0   ,   0  ;
+             q1 R2, q1 R1, q1 R0,   0  ;
+             q2 R3, q2 R2, q2 R1, q2 R0]
+        """
+        x = x.view(x.size(0), x.size(1), -1, context, x.size(-1))
+        x_pad = nn.functional.pad(x, (1, 0), mode="constant", value=0)
+        x_pad = x_pad.view(*x.size()[:3], x.size(4) + 1, x.size(3))
+        x = x_pad[:, :, :, 1:].view_as(x)
+        return self._apply_tril(x)
+
+    def _right_shift(self, x, context):
+        """Applies right shifts to the rows of x
+            to get scores with relative pos encodings R_{i-j}
+            i-j < 0, non-causal attention
+
+        E.g.
+            [q0 R_0, q0 R_{-1}, q0 R_{-2};
+             q1 R_0, q1 R_{-1}, q1 R_{-2};
+             q2 R_0, q1 R_{-1}, q2 R_{-2}]
+
+        becomes:
+            [ 0, q0 R_{-1}, q0 R_{-2};
+              0, 0        , q1 R_{-1};
+              0, 0        ,    0     ]
+        """
+        x = x.view(x.size(0), x.size(1), -1, context, x.size(-1))
+        x_pad = nn.functional.pad(x, (0, 1), mode="constant", value=0)
+        x_pad = x_pad.view(*x.size()[:3], x.size(4) + 1, x.size(3))
+        x = x_pad[:, :, :, :-1].view_as(x)
+        return self._apply_triu(x)
+
+    def forward(self, query, key, value, pos_emb=None, mask=None):
+        """Computes 'Scaled Dot Product Attention'.
+
+        Args:
+           query: query with size=(batch, time1, in_feats),
+                  where time1 is the output time dimension
+           key: key with size=(batch, time2, in_feats)
+                  where time1 is the input time dimension
+           value: value with size=(batch, time2, in_feats)
+           pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0
+           mask: optional mask with size=(batch, time1, time2),
+                  to zero attention between some time steps
+                  or size=(batch, time) to make time1=time2
+        Returns:
+           Attention weigthed average of the value with size=(batch, time1, out_feats)
+        """
+        batch_size = query.size(0)
+        t1 = query.size(1)
+        t2 = key.size(1)
+        if t2 // self.context > 1:
+            return self._forward_nblocks(query, key, value, pos_emb, mask)
+        else:
+            return self._forward_1block(query, key, value, pos_emb, mask)
+
+    def _forward_nblocks(self, query, key, value, pos_emb=None, mask=None):
+        """Computes 'Scaled Dot Product Attention' for the case that we have
+           more than 1block in the block diagonal attention matrix.
+
+        Args:
+           query: query with size=(batch, time1, in_feats),
+                  where time1 is the output time dimension
+           key: key with size=(batch, time2, in_feats)
+                  where time1 is the input time dimension
+           value: value with size=(batch, time2, in_feats)
+           pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0
+           mask: optional mask with size=(batch, time1, time2),
+                  to zero attention between some time steps
+                  or size=(batch, time) to make time1=time2
+        Returns:
+           Attention weigthed average of the value with size=(batch, time1, out_feats)
+        """
+        batch_size = query.size(0)
+        t1 = query.size(1)
+        t2 = key.size(1)
+
+        q0, k0, v0, context_q, context_k, num_blocks = self._compute_qkv0(
+            query, key, value)
+        # q0  size=(batch, time1, head*d_k)
+        # k0  size=(batch, time2, head*d_k)
+        # v0  size=(batch, time2, head*d_v)
+
+        q_plus_u0 = q0 + self.u.view(-1,
+                                     q0.size(-1))  # (batch, time1, head*d_k)
+
+        # q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+        # q_plus_u = (q + self.u).transpose(1, 2) #(batch, head, time1, d_k)
+        # q_plus_v = (q + self.v).transpose(1, 2) #(batch, head, time1, d_k)
+
+        # compute A(a) + A(c) in Sec3.3, 2nd Eq. block diagonals
+        #   1) compute block diagonal affinity matrix
+        AC1 = self._compute_scores(q_plus_u0, k0, num_blocks, context_q,
+                                   context_k)
+        # (batch, head, blocks, context_q,  context_k)
+
+        # AC = torch.matmul(q_plus_u, k.transpose(-2, -1)) # (batch, head, time1, time2)
+
+        pos_emb = pos_emb[:, -context_k:]  # (1, context_k, d_model)
+        pos_batch_size = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads,
+                                          self.d_k)
+        p = p.transpose(1, 2)  # (1, head, context_k, d_k)
+
+        q = q0.view(batch_size, -1, self.num_heads,
+                    self.d_k)  # (batch, time1, head, d_k)
+        q_plus_v = (q + self.v).transpose(1, 2)  # (batch, head, time1, d_k)
+
+        # compute A(b) + A(d) in Sec3.3, 2nd Eq. for the causal part
+        # This is the sum of Btilde and Dtilde in the Appendix of the paper
+        BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1)) / math.sqrt(
+            self.d_k)
+        # BDtilde = (batch, head, time1, context_k)
+        # apply left shift as indicated in the Appendix to geth B+D
+        #  1) block-diagonal part of BD: BD1
+        BD1 = self._left_shift(BDtilde, context_q)
+        # BD = (batch, head, blocks, context_q, context_k)
+        # print('BD\n',BD1[0,0,0,:10,:10])
+
+        if not self.causal_pos_enc:
+            # compute A(b) + A(d) for the non-causal part,
+            # this is not included in the paper because it doesn't allow to attent to future postions
+            # we assume that t2 >= t1, and therefore context_k >= context_q
+            dt = context_k - context_q
+            pos_emb_noncausal = pos_emb[:, dt:].flip(
+                dims=(1, ))  # we flip to get R_0, ..., R_{L-1}
+            pos_emb_noncausal[:, :, 0::
+                              2] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
+            assert pos_emb[0, -2, 0] == -pos_emb_noncausal[0, 1, 0]
+            p = self.linear_pos(pos_emb_noncausal).view(
+                pos_batch_size, -1, self.num_heads, self.d_k)
+            p = p.transpose(1, 2)  # (batch, head, context_k-dt, d_k)
+            BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1)) / math.sqrt(
+                self.d_k)  # (batch, head, time1, context_k-dt)
+            BD_noncausal1 = self._right_shift(BDtilde, context_q)
+            # BD = (batch, head, blocks, context_q, context_k-dt)
+            # print(BD_noncausal1[0,0,0,:10,:10])
+            BD1[:, :, :, :, dt:] += BD_noncausal1
+
+        # print(BD1[0,0,0,:10,:10])
+
+        # add AC and BD for block-diag s
+        scores = AC1 + BD1  # (batch, head, blocks, context_q, context_k)
+        self._compute_softmax(scores, mask, t1, t2)
         return self._apply_attn(v0, t1)
+
+    def _forward_1block(self, query, key, value, pos_emb=None, mask=None):
+        """Computes 'Scaled Dot Product Attention' for the case that
+           there is only one block in the block-diagonal attention matrix.
+
+        Args:
+           query: query with size=(batch, time1, in_feats),
+                  where time1 is the output time dimension
+           key: key with size=(batch, time2, in_feats)
+                  where time1 is the input time dimension
+           value: value with size=(batch, time2, in_feats)
+           pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0
+           mask: optional mask with size=(batch, time1, time2),
+                  to zero attention between some time steps
+                  or size=(batch, time) to make time1=time2
+        Returns:
+           Attention weigthed average of the value with size=(batch, time1, out_feats)
+        """
+        batch_size = value.size(0)
+        q, k, v = self._compute_qkv(query, key, value)
+        context_q = query.size(1)
+
+        pos_batch_size = pos_emb.size(0)
+        p = self.linear_pos(pos_emb).view(pos_batch_size, -1, self.num_heads,
+                                          self.d_k)
+        p = p.transpose(1, 2)  # (batch, head, time2, d_k)
+
+        q = q.transpose(1, 2)  # (batch, time1, head, d_k)
+        q_plus_u = (q + self.u).transpose(1, 2)  # (batch, head, time1, d_k)
+        q_plus_v = (q + self.v).transpose(1, 2)  # (batch, head, time1, d_k)
+
+        # compute A(a) + A(c) in Sec3.3, 2nd Eq.
+        AC = torch.matmul(q_plus_u, k.transpose(-2, -1))
+        # AC = (batch, head, time1, time2)
+
+        # compute A(b) + A(d) in Sec3.3, 2nd Eq. for the causal part
+        # This is the sum of Btilde and Dtilde in the Appendix of the paper
+        BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1))
+        # BDtilde = (batch, head, time1, time2)
+        # apply left shift as indicated in the Appendix to geth B+D
+        BD = self._left_shift(BDtilde, context_q).squeeze(2)
+
+        if not self.causal_pos_enc:
+            # compute A(b) + A(d) for the non-causal part,
+            # this is not included in the paper because it doesn't allow to attent to future postions
+            # we assume that t2 >= t1
+            dt = key.size(1) - query.size(1)
+            pos_emb_noncausal = pos_emb[:, dt:].flip(
+                dims=(1, ))  # we flip to get R_0, ..., R_{L-1}
+            pos_emb_noncausal[:, :, 0::
+                              2] *= -1  # we multiply sin emb by -1 to get R_0, R_{-1}, ..., R_{-(L-1)}
+            assert pos_emb[0, -2, 0] == -pos_emb_noncausal[0, 1, 0]
+            p = self.linear_pos(pos_emb_noncausal).view(
+                pos_batch_size, -1, self.num_heads, self.d_k)
+            p = p.transpose(1, 2)  # (batch, head, time2-dt, d_k)
+            BDtilde = torch.matmul(q_plus_v, p.transpose(-2, -1))
+            # BDtilde = (batch, head, time1, time2-dt)
+            BD_noncausal = self._right_shift(BDtilde, context_q).squeeze(2)
+            BD[:, :, :, dt:] += BD_noncausal
+
+        # add and normalize
+        scores = (AC + BD) / math.sqrt(self.d_k)  # (batch, head, time1, time2)
+        self.attn = self._base_compute_softmax(scores, mask)
+        return self._base_apply_attn(v)
diff --git a/hyperion/torch/layers/pos_encoder.py b/hyperion/torch/layers/pos_encoder.py
index f3aa17e9..b6f3672e 100644
--- a/hyperion/torch/layers/pos_encoder.py
+++ b/hyperion/torch/layers/pos_encoder.py
@@ -3,10 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import math
+from typing import Union
 
 import torch
 from torch import nn
 
+from .activation_factory import ActivationFactory as AF
+
 
 class PosEncoder(nn.Module):
     """Positional encoding.
@@ -16,7 +19,7 @@ class PosEncoder(nn.Module):
       dropout_rate: dropout rate
     """
 
-    def __init__(self, num_feats, dropout_rate=0):
+    def __init__(self, num_feats: int, dropout_rate: float = 0):
         super().__init__()
         self.num_feats = num_feats
         self.dropout_rate = dropout_rate
@@ -29,9 +32,9 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "{}(num_feats={}, dropout_rate={})".format(
-            self.__class__.__name__, self.num_feats, self.dropout_rate
-        )
+        s = "{}(num_feats={}, dropout_rate={})".format(self.__class__.__name__,
+                                                       self.num_feats,
+                                                       self.dropout_rate)
         return s
 
     def _pe(self, x, relative=False):
@@ -45,22 +48,21 @@ def _pe(self, x, relative=False):
         pe = torch.zeros(x.size(1), self.num_feats)
         if relative:
             # this is for relative positional encoders
-            position = torch.arange(
-                x.size(1) - 1, -1, -1, dtype=torch.float32
-            ).unsqueeze(1)
+            position = torch.arange(x.size(1) - 1, -1, -1,
+                                    dtype=torch.float32).unsqueeze(1)
         else:
-            position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+            position = torch.arange(0, x.size(1),
+                                    dtype=torch.float32).unsqueeze(1)
         div_term = torch.exp(
-            torch.arange(0, self.num_feats, 2, dtype=torch.float32)
-            * -(math.log(10000.0) / self.num_feats)
-        )
+            torch.arange(0, self.num_feats, 2, dtype=torch.float32) *
+            -(math.log(10000.0) / self.num_feats))
         pe[:, 0::2] = torch.sin(position * div_term)
         pe[:, 1::2] = torch.cos(position * div_term)
         pe = pe.unsqueeze(0)
         self.pe = pe.to(device=x.device, dtype=x.dtype)
         return self.pe
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         """Add positional encoding.
 
         Args:
@@ -70,7 +72,7 @@ def forward(self, x):
             x-scaled + pos-encoder
         """
         pe = self._pe(x)
-        x = x * self.xscale + pe[:, : x.size(1)]
+        x = x * self.xscale + pe[:, :x.size(1)]
         if self.dropout_rate > 0:
             return self.dropout(x)
         return x
@@ -88,10 +90,10 @@ class RelPosEncoder(PosEncoder):
       dropout_rate: dropout rate
     """
 
-    def __init__(self, num_feats, dropout_rate=0):
+    def __init__(self, num_feats: int, dropout_rate: float = 0):
         super().__init__(num_feats, dropout_rate)
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         """Add positional encoding.
 
         Args:
@@ -105,7 +107,7 @@ def forward(self, x):
         x = x * self.xscale
         # we want embedding  [R_L,..., R_0]
         # while in non relative we want [R_0, ..., R_L]
-        pos_emb = self.pe[:, -x.size(1) :]
+        pos_emb = self.pe[:, -x.size(1):]
         # this pos_emb is matrix Q in
         # https://arxiv.org/pdf/1901.02860.pdf Appendix B
         # I think it should have been denoted as R,
@@ -126,7 +128,7 @@ class NoPosEncoder(nn.Module):
     def __init__(self):
         super().__init__()
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         """Identity map
 
         Args:
@@ -136,3 +138,35 @@ def forward(self, x):
             x
         """
         return x
+
+
+class ConvPosEncoder(nn.Module):
+    """Convolutional positional encoder like the one used in wav2vec2
+
+    Attributes:
+      num_feats: number of input/output features
+      kernel_size: kernel size of convolution
+      num_groups: number of groups of the convolution
+      activation: hidden activation
+    """
+
+    def __init__(self, num_feats: int, kernel_size: int, num_groups: int,
+                 activation: Union[str, nn.Module]):
+        super().__init__()
+        self.conv = nn.Conv1d(num_feats,
+                              num_feats,
+                              kernel_size=kernel_size,
+                              padding=kernel_size // 2,
+                              groups=num_groups)
+        self.activation = AF.create(activation)
+        self.num_pad_remove = 1 if kernel_size % 2 == 0 else 0
+
+    def forward(self, x: torch.Tensor):
+        x = x.transpose(1, 2)
+        x = self.conv(x)
+        if self.num_pad_remove > 0:
+            x = x[:, :, :-self.num_pad_remove]
+
+        x = self.activation(x).transpose(1, 2)
+
+        return x
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index 4bd086ad..ab518ad4 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -14,6 +14,7 @@
 
 
 class LRSchedulerFactory(object):
+
     def create(
         optimizer,
         lrsch_type,
@@ -168,6 +169,8 @@ def create(
                 eps=eps,
             )
 
+        raise ValueError(f"invalid lrsch_type={lrsch_type}")
+
     @staticmethod
     def filter_args(**kwargs):
 
@@ -218,11 +221,9 @@ def add_class_args(parser, prefix=None):
                 "noam_lr",
                 "triangular_lr",
             ],
-            help=(
-                "Learning rate schedulers: None, Exponential,"
-                "Cosine Annealing, Cosine Annealing for Adam,"
-                "Reduce on Plateau"
-            ),
+            help=("Learning rate schedulers: None, Exponential,"
+                  "Cosine Annealing, Cosine Annealing for Adam,"
+                  "Reduce on Plateau"),
         )
 
         parser.add_argument(
@@ -231,22 +232,29 @@ def add_class_args(parser, prefix=None):
             type=float,
             help=("LR decay rate in exp lr"),
         )
-        parser.add_argument(
-            "--decay-steps", default=100, type=int, help=("LR decay steps in exp lr")
-        )
-        parser.add_argument(
-            "--power", default=0.5, type=float, help=("power in inverse power lr")
-        )
-
-        parser.add_argument(
-            "--hold-steps", default=10, type=int, help=("LR hold steps in exp lr")
-        )
-        parser.add_argument("--t", default=10, type=int, help=("Period in cos lr"))
+        parser.add_argument("--decay-steps",
+                            default=100,
+                            type=int,
+                            help=("LR decay steps in exp lr"))
+        parser.add_argument("--power",
+                            default=0.5,
+                            type=float,
+                            help=("power in inverse power lr"))
+
+        parser.add_argument("--hold-steps",
+                            default=10,
+                            type=int,
+                            help=("LR hold steps in exp lr"))
+        parser.add_argument("--t",
+                            default=10,
+                            type=int,
+                            help=("Period in cos lr"))
         parser.add_argument(
             "--t-mul",
             default=1,
             type=int,
-            help=("Period multiplicator for each restart in cos/triangular lr"),
+            help=(
+                "Period multiplicator for each restart in cos/triangular lr"),
         )
         parser.add_argument(
             "--gamma",
@@ -262,9 +270,9 @@ def add_class_args(parser, prefix=None):
             help=("Do warm restarts in cos lr"),
         )
 
-        parser.add_argument(
-            "--monitor", default="val_loss", help=("Monitor metric to reduce lr")
-        )
+        parser.add_argument("--monitor",
+                            default="val_loss",
+                            help=("Monitor metric to reduce lr"))
         parser.add_argument(
             "--mode",
             default="min",
@@ -276,21 +284,24 @@ def add_class_args(parser, prefix=None):
             "--factor",
             default=0.1,
             type=float,
-            help=("Factor by which the learning rate will be reduced on plateau"),
+            help=(
+                "Factor by which the learning rate will be reduced on plateau"
+            ),
         )
 
         parser.add_argument(
             "--patience",
             default=10,
             type=int,
-            help=(
-                "Number of epochs with no improvement after which learning rate will be reduced"
-            ),
+            help=
+            ("Number of epochs with no improvement after which learning rate will be reduced"
+             ),
         )
 
-        parser.add_argument(
-            "--threshold", default=1e-4, type=float, help=("Minimum metric improvement")
-        )
+        parser.add_argument("--threshold",
+                            default=1e-4,
+                            type=float,
+                            help=("Minimum metric improvement"))
 
         parser.add_argument(
             "--threshold_mode",
@@ -303,16 +314,20 @@ def add_class_args(parser, prefix=None):
             "--cooldown",
             default=0,
             type=int,
-            help=(
-                "Number of epochs to wait before resuming normal operation after lr has been reduced"
-            ),
+            help=
+            ("Number of epochs to wait before resuming normal operation after lr has been reduced"
+             ),
         )
 
-        parser.add_argument(
-            "--eps", default=1e-8, type=float, help=("Minimum decay applied to lr")
-        )
+        parser.add_argument("--eps",
+                            default=1e-8,
+                            type=float,
+                            help=("Minimum decay applied to lr"))
 
-        parser.add_argument("--min-lr", default=0, type=float, help=("Minimum lr"))
+        parser.add_argument("--min-lr",
+                            default=0,
+                            type=float,
+                            help=("Minimum lr"))
 
         parser.add_argument(
             "--warmup-steps",
@@ -341,7 +356,8 @@ def add_class_args(parser, prefix=None):
         )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
             # help='learning rate scheduler options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 2df4e047..0ab63adf 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -8,6 +8,7 @@
 from .vae.vq_vae import VQVAE
 from .transducer import RNNTransducer, RNNRNNTransducer
 from .wav2transducer import (HFWav2Vec2RNNRNNTransducer,
+                             HFWav2Vec2ConformerV1RNNTransducer,
                              HFWav2Vec2RNNTransducer, HFWav2Vec2Transducer)
 from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
                            HFWavLM2ResNet1dXVector)
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
index ee3c85f5..984e15ec 100644
--- a/hyperion/torch/models/transducer/__init__.py
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -4,6 +4,7 @@
 
 """
 
+from .conformer_v1_rnn_transducer import ConformerV1RNNTransducer
 from .rnn_rnn_transducer import RNNRNNTransducer
 from .rnn_transducer import RNNTransducer, RNNTransducerOutput
 from .transducer import Transducer
diff --git a/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
new file mode 100644
index 00000000..05a82103
--- /dev/null
+++ b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
@@ -0,0 +1,87 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from typing import Dict, Optional, Tuple, Union
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
+
+import torch
+
+from ...narchs import ConformerEncoderV1
+from .rnn_transducer import RNNTransducer
+
+
+class ConformerV1RNNTransducer(RNNTransducer):
+    """RNN-T with Conformer Encoder
+
+    Attributes:
+      encoder: dictionary of options to initialize RNNEncoder class or RNNEncoder object
+      decoder: RNN-T Decoder config. dictionary or module.
+
+    """
+
+    def __init__(self, encoder, decoder):
+        if isinstance(encoder, dict):
+            encoder = ConformerEncoderV1(**encoder)
+        else:
+            assert isinstance(encoder, RNNEncoder)
+
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = RNNTransducer.filter_args(**kwargs)
+        encoder_args = ConformerEncoderV1.filter_args(**kwargs["encoder"])
+        args["encoder"] = encoder_args
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ConformerEncoderV1.add_class_args(parser, prefix="encoder", skip=skip)
+        RNNTransducer.add_class_args(parser)
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    def change_config(
+        self,
+        encoder,
+        decoder,
+    ):
+        logging.info("changing transducer encoder config")
+        self.encoder.change_config(**encoder)
+        super().chage_config(**decoder)
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        args = RNNTransducer.filter_finetune_args(**kwargs)
+        encoder_args = ConformerEncoderV1.filter_finetune_args(
+            **kwargs["encoder"])
+        args["encoder"] = encoder_args
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ConformerEncoderV1.add_finetune_args(parser, prefix="encoder")
+        RNNTransducer.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/lstm_rnn_transducer.py b/hyperion/torch/models/transducer/lstm_rnn_transducer.py
deleted file mode 100644
index 983334d4..00000000
--- a/hyperion/torch/models/transducer/lstm_rnn_transducer.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""
- Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import logging
-from typing import Dict, Optional, Union
-
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
-try:
-    import k2
-except ModuleNotFoundError:
-    from ...utils import dummy_k2 as k2
-
-import torch
-
-from ...torch_model import TorchModel
-from ..narchs import RNNTransducerDecoder
-
-
-class RNNTransducer(TorchModel):
-    """ Base-class for RNN-T in
-    "Sequence Transduction with Recurrent Neural Networks"
-    https://arxiv.org/pdf/1211.3711.pdf
-
-    Attributes:
-      encoder: Encoder network module
-      decoder: RNN-T Decoder config. dictionary or module.
-    """
-
-    def __init__(
-        self,
-        encoder: TorchModel,
-        decoder: Union[Dict, RNNTransducerDecoder],
-    ):
-        super().__init__()
-        assert isinstance(encoder, TorchModel)
-        if isinstance(decoder, dict):
-            decoder = RNNTransducerDecoder(**decoder)
-        else:
-            assert isinstance(decoder, RNNTransducerDecoder)
-
-        self.encoder = encoder
-        self.decoder = decoder
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        x_lengths: torch.Tensor,
-        y: k2.RaggedTensor,
-    ) -> torch.Tensor:
-        """
-        Args:
-          x: input features with shape = (N, T, C)
-          x_lengths: feature number for frames with shape = (N,)
-          y: ragged tensor with 2 axes [utt][label]. It contains labels of each
-            utterance.
-        Returns:
-          - Token logits with shape = (N, vocab_size)
-          - RNN-T loss.
-        """
-        assert x.ndim == 3, x.shape
-        assert x_lengths.ndim == 1, x_lengths.shape
-        assert y.num_axes == 2, y.num_axes
-
-        assert x.size(0) == x_lengths.size(0) == y.dim0
-
-        x, x_lengths = self.encoder(x, x_lengths)
-        assert torch.all(x_lengths > 0)
-
-        logits, loss = self.decoder(x, x_lengths, y)
-        return logits, loss
-
-    def set_train_mode(self, mode):
-        if mode == self._train_mode:
-            return
-
-        if mode == "full":
-            self.unfreeze()
-        elif mode == "frozen":
-            self.freeze()
-        else:
-            raise ValueError(f"invalid train_mode={mode}")
-
-        self._train_mode = mode
-
-    def _train(self, train_mode: str):
-        if train_mode in ["full", "frozen"]:
-            super()._train(train_mode)
-        else:
-            raise ValueError(f"invalid train_mode={train_mode}")
-
-    @staticmethod
-    def valid_train_modes():
-        return ["full", "frozen"]
-
-    def get_config(self):
-        dec_cfg = self.decoder.get_config()
-        config = {
-            "decoder": dec_cfg,
-        }
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-    @staticmethod
-    def filter_args(**kwargs):
-
-        # get arguments for pooling
-        decoder_args = RNNTransducerDecoder.filter_args(**kwargs["decoder"])
-        args["decoder"] = decoder_args
-        return args
-
-    @staticmethod
-    def add_class_args(parser, prefix=None, skip=set()):
-
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        RNNTransducerDecoder.add_class_args(parser, prefix="decoder")
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
-
-    def change_config(
-        self,
-        decoder,
-    ):
-        logging.info("changing transducer config")
-        self.decoder.change_config(**decoder)
-
-    @staticmethod
-    def filter_finetune_args(**kwargs):
-        # get arguments for pooling
-        decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
-        args["decoder"] = decoder_args
-        return args
-
-    @staticmethod
-    def add_finetune_args(parser, prefix=None):
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        RNNTransducerDecoder.add_finetune_args(parser, prefix="decoder")
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/rnn_rnn_transducer.py b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
index 1c0704f5..02d0c482 100644
--- a/hyperion/torch/models/transducer/rnn_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
@@ -50,7 +50,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        RNNEncoder.add_class_args(parser, prefix="encoder")
+        RNNEncoder.add_class_args(parser, prefix="encoder", skip=skip)
         RNNTransducer.add_class_args(parser)
         if prefix is not None:
             outer_parser.add_argument("--" + prefix,
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index 0b886fdf..3326ef81 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -50,6 +50,8 @@ def __init__(
         if encoder is not None:
             assert isinstance(encoder, TorchModel)
         if isinstance(decoder, dict):
+            if encoder is not None:
+                decoder["in_feats"] = encoder.out_shape()[-1]
             decoder = RNNTransducerDecoder(**decoder)
         else:
             assert isinstance(decoder, RNNTransducerDecoder)
diff --git a/hyperion/torch/models/wav2transducer/__init__.py b/hyperion/torch/models/wav2transducer/__init__.py
index 79af6349..71e82b98 100644
--- a/hyperion/torch/models/wav2transducer/__init__.py
+++ b/hyperion/torch/models/wav2transducer/__init__.py
@@ -5,5 +5,7 @@
 """
 
 from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
+from .hf_wav2vec2conformer_v1_rnn_transducer import \
+    HFWav2Vec2ConformerV1RNNTransducer
 from .hf_wav2vec2rnn_rnn_transducer import HFWav2Vec2RNNRNNTransducer
 from .hf_wav2vec2rnn_transducer import HFWav2Vec2RNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
index f68a6f7a..47dfd910 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
@@ -38,7 +38,6 @@ def __init__(self,
         self.hf_feats = hf_feats
         if isinstance(transducer, dict):
             transducer["decoder"]["in_feats"] = hf_feats.hidden_size
-            #transducer["joiner"]["in_feats"] = hf_feats.hidden_size
             if "class_name" in transducer:
                 del transducer["class_name"]
 
@@ -48,7 +47,6 @@ def __init__(self,
             assert isinstance(transducer, RNNTransducer)
             if transducer.encoder is None:
                 assert transducer.decoder.in_feats == hf_feats.hidden_size
-                #assert transducer.joiner.in_feats == hf_feats.hidden_size
 
         self.transducer = transducer
         self.feat_fusion_start = feat_fusion_start
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py
new file mode 100644
index 00000000..09b0196e
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py
@@ -0,0 +1,105 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFWav2Vec2
+from ..transducer import ConformerV1RNNTransducer
+from .hf_wav2rnn_transducer import HFWav2RNNTransducer
+
+
+class HFWav2Vec2ConformerV1RNNTransducer(HFWav2RNNTransducer):
+    """Class for Conformer based RNN-T with Wav2Vec2 features
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      transducer: Transducer configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, ConformerV1RNNTransducer],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+
+        if isinstance(hf_feats, dict):
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWav2Vec2)
+
+        if isinstance(transducer, dict):
+            transducer["encoder"]["in_feats"] = hf_feats.hidden_size
+            if "class_name" in transducer:
+                del transducer["class_name"]
+
+            transducer = ConformerV1RNNTransducer(**transducer)
+        else:
+            assert isinstance(transducer, ConformerV1RNNTransducer)
+
+        super().__init__(hf_feats, transducer, feat_fusion_start,
+                         feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2RNNTransducer.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1RNNTransducer.filter_args(
+            **kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        ConformerV1RNNTransducer.add_class_args(parser,
+                                                prefix="transducer",
+                                                skip={"in_feats"})
+        HFWav2RNNTransducer.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1RNNTransducer.filter_finetune_args(
+            **kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
+        ConformerV1RNNTransducer.add_finetune_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
index d2b13fb6..a4d2b0cc 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
@@ -44,8 +44,7 @@ def __init__(
             assert isinstance(hf_feats, HFWav2Vec2)
 
         if isinstance(transducer, dict):
-            transducer["decoder"]["in_feats"] = hf_feats.hidden_size
-            #transducer["joiner"]["in_feats"] = hf_feats.hidden_size
+            transducer["encoder"]["in_feats"] = hf_feats.hidden_size
             if "class_name" in transducer:
                 del transducer["class_name"]
 
@@ -72,7 +71,9 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
-        RNNRNNTransducer.add_class_args(parser, prefix="transducer")
+        RNNRNNTransducer.add_class_args(parser,
+                                        prefix="transducer",
+                                        skip={"in_feats"})
         HFWav2RNNTransducer.add_class_args(parser)
 
         if prefix is not None:
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 98160a25..ed328223 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -5,14 +5,15 @@
 
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 
+from ...utils.misc import filter_func_args
 from ..layer_blocks import ConformerEncoderBlockV1 as EBlock
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from ..layers import ActivationFactory as AF
-from ..layers import NoPosEncoder
 from ..layers import NormLayer1dFactory as NLF
-from ..layers import PosEncoder, RelPosEncoder
+from ..layers import NoPosEncoder, PosEncoder, RelPosEncoder, ConvPosEncoder
+from ..utils import seq_lengths_to_mask, scale_seq_lengths
 from .net_arch import NetArch
 
 
@@ -37,7 +38,7 @@ class ConformerEncoderV1(NetArch):
       d_model: encoder blocks feature dimension
       num_heads: number of heads
       num_blocks: number of self attn blocks
-      att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1']
+      att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1', 'block-scaled-dot-prod-att-v1']
       att_context: maximum context range for local attention
       conv_repeats: number of conv blocks in each conformer block
       conv_kernel_sizes: kernel size for conv blocks
@@ -49,11 +50,10 @@ class ConformerEncoderV1(NetArch):
       pos_dropout_rate: dropout rate for positional encoder
       att_dropout_rate: dropout rate for attention block
       in_layer_type: input layer block type in ['linear','conv2d-sub', 'embed', None]
-      pos_enc_type: type of positional encoder ['no', 'abs', 'rel']
+      pos_enc_type: type of positional encoder ['no', 'abs', 'rel', 'conv']
 
       causal_pos_enc: if True, use causal positional encodings (when rel_pos_enc=True), it assumes
                       that query q_i only attents to key k_j when j<=i
-      no_pos_enc: if True, it doesn't use positional encoder.
       hid_act:  hidden activations in ff and input blocks
       conv_norm_layer: norm layer constructor or str for conv block,
                        if None it uses BatchNorm1d
@@ -68,8 +68,6 @@ class ConformerEncoderV1(NetArch):
       padding_idx: padding idx for embed layer
       in_time_dim: time dimension in the input Tensor
       out_time_dim: dimension that we want to be time in the output tensor
-      rel_pos_enc: if True, use relative postional encodings, absolute encodings otherwise. (deprecated)
-      red_lnorm: (deprecated)
     """
 
     def __init__(
@@ -92,17 +90,17 @@ def __init__(
         in_layer_type="conv2d-sub",
         pos_enc_type="rel",
         causal_pos_enc=False,
+        pos_kernel_size=128,
+        pos_num_groups=16,
         hid_act="swish",
         conv_norm_layer=None,
         se_r=None,
         ff_macaron=True,
-        red_lnorms=False,
+        red_lnorms=True,
         concat_after=False,
         padding_idx=-1,
-        in_time_dim=-1,
+        in_time_dim=1,
         out_time_dim=1,
-        rel_pos_enc=True,
-        red_lnorm=False,
     ):
 
         super().__init__()
@@ -115,14 +113,11 @@ def __init__(
         self.att_context = att_context
 
         self.conv_repeats = self._standarize_cblocks_param(
-            conv_repeats, num_blocks, "conv_repeats"
-        )
+            conv_repeats, num_blocks, "conv_repeats")
         self.conv_kernel_sizes = self._standarize_cblocks_param(
-            conv_kernel_sizes, num_blocks, "conv_kernel_sizes"
-        )
+            conv_kernel_sizes, num_blocks, "conv_kernel_sizes")
         self.conv_strides = self._standarize_cblocks_param(
-            conv_strides, num_blocks, "conv_strides"
-        )
+            conv_strides, num_blocks, "conv_strides")
 
         self.ff_type = ff_type
         self.d_ff = d_ff
@@ -141,6 +136,8 @@ def __init__(
         self.in_time_dim = in_time_dim
         self.out_time_dim = out_time_dim
         self.hid_act = hid_act
+        self.pos_kernel_size = pos_kernel_size
+        self.pos_num_groups = pos_num_groups
 
         self.conv_norm_layer = conv_norm_layer
         norm_groups = None
@@ -174,8 +171,7 @@ def __init__(
                     ff_macaron=ff_macaron,
                     out_lnorm=self.red_lnorms,
                     concat_after=concat_after,
-                )
-            )
+                ))
 
         self.blocks = nn.ModuleList(blocks)
         if not self.red_lnorms:
@@ -210,6 +206,9 @@ def _make_in_layer(self):
             pos_enc = RelPosEncoder(d_model, self.pos_dropout_rate)
         elif self.pos_enc_type == "abs":
             pos_enc = PosEncoder(d_model, self.pos_dropout_rate)
+        elif self.pos_enc_type == "conv":
+            pos_enc = ConvPosEncoder(d_model, self.pos_kernel_size,
+                                     self.pos_num_groups, self.hid_act)
         else:
             raise Exception("wrong pos-enc-type={}".format(self.pos_enc_type))
 
@@ -224,13 +223,15 @@ def _make_in_layer(self):
                 pos_enc,
             )
         elif self.in_layer_type == "conv2d-sub":
-            self.in_layer = Conv2dSubsampler(
-                in_feats, d_model, hid_act, pos_enc, time_dim=self.in_time_dim
-            )
+            self.in_layer = Conv2dSubsampler(in_feats,
+                                             d_model,
+                                             hid_act,
+                                             pos_enc,
+                                             time_dim=self.in_time_dim)
         elif self.in_layer_type == "embed":
             self.in_layer = nn.Sequential(
-                nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx), pos_enc
-            )
+                nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx),
+                pos_enc)
         elif isinstance(self.in_layer_type, nn.Module):
             self.in_layer = nn.Sequential(self.in_layer_type, pos_enc)
         elif self.in_layer_type is None:
@@ -238,7 +239,12 @@ def _make_in_layer(self):
         else:
             raise ValueError("unknown in_layer_type: " + self.in_layer_type)
 
-    def forward(self, x, x_lengths=None, x_mask=None, target_shape=None):
+    def forward(self,
+                x,
+                x_lengths=None,
+                x_mask=None,
+                return_mask=False,
+                target_shape=None):
         """Forward pass function
 
         Args:
@@ -246,14 +252,20 @@ def forward(self, x, x_lengths=None, x_mask=None, target_shape=None):
           x_lengths: lengths of the input sequences.
           x_mask: mask to indicate valid time steps for x (batch, time).
                   It overwrites the mask of x_lengths.
+          return_mask: if True, it also return the output mask
+          target_shape: unused
 
         Returns:
            Tensor with output features
-           Tensor with mask
+           Tensor with output lengths
+           Tensor with mask if return_mask is True
         """
+        max_in_length = x.size(self.in_time_dim)
+        if x_mask is None and x_lengths is not None:
+            x_mask = seq_lengths_to_mask(x_lengths, max_in_length, time_dim=1)
 
         if isinstance(self.in_layer, Conv2dSubsampler):
-            x, mask = self.in_layer(x, x_mask)
+            x, x_mask = self.in_layer(x, x_mask)
         else:
             if self.in_time_dim != 1:
                 x = x.transpose(1, self.in_time_dim).contiguous()
@@ -266,7 +278,7 @@ def forward(self, x, x_lengths=None, x_mask=None, target_shape=None):
             b_args = {}
 
         for i in range(len(self.blocks)):
-            x, mask = self.blocks[i](x, mask=mask, **b_args)
+            x, x_mask = self.blocks[i](x, mask=x_mask, **b_args)
 
         if not self.red_lnorms:
             x = self.norm_out(x)
@@ -274,10 +286,13 @@ def forward(self, x, x_lengths=None, x_mask=None, target_shape=None):
         if self.out_time_dim != 1:
             x = x.transpose(1, self.out_time_dim)
 
-        if mask is None:
-            return x
+        if x_lengths is not None:
+            x_lengths = scale_seq_lengths(x_lengths, x.size(1), max_in_length)
 
-        return x, mask
+        if return_mask:
+            return x, x_lengths, x_mask
+
+        return x, x_lengths
 
     def get_config(self):
         """Gets network config
@@ -303,6 +318,8 @@ def get_config(self):
             "in_layer_type": self.in_layer_type,
             "pos_enc_type": self.pos_enc_type,
             "causal_pos_enc": self.causal_pos_enc,
+            "pos_kernel_size": self.pos_kernel_size,
+            "pos_num_groups": self.pos_num_groups,
             "hid_act": self.hid_act,
             "se_r": self.se_r,
             "ff_macaron": self.ff_macaron,
@@ -372,41 +389,11 @@ def filter_args(**kwargs):
         Returns:
           args dictionary
         """
-
-        if "no_ff_macaron" in kwargs:
-            kwargs["ff_macaron"] = not kwargs["no_ff_macaron"]
-
-        valid_args = (
-            "num_blocks",
-            "in_feats",
-            "d_model",
-            "num_heads",
-            "att_type",
-            "att_context",
-            "conv_repeats",
-            "conv_kernel_sizes",
-            "conv_strides",
-            "ff_type",
-            "d_ff",
-            "ff_kernel_size",
-            "dropout_rate",
-            "pos_dropout_rate",
-            "att_dropout_rate",
-            "in_layer_type",
-            "hid_act",
-            "pos_enc_type",
-            "causal_pos_enc",
-            "conv_norm_layer",
-            "se_r",
-            "ff_macaron",
-            "red_lnorms",
-            "concat_after",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args = filter_func_args(ConformerEncoderV1.__init__, kwargs)
+        return args
 
     @staticmethod
-    def add_class_args(parser, prefix=None, in_feats=False):
+    def add_class_args(parser, prefix=None, skip=set()):
         """Adds Conformer config parameters to argparser
 
         Args:
@@ -417,18 +404,21 @@ def add_class_args(parser, prefix=None, in_feats=False):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        if in_feats:
-            parser.add_argument(
-                "--in-feats", type=int, default=80, help=("input feature dimension")
-            )
+        if "in_feats" not in skip:
+            parser.add_argument("--in-feats",
+                                type=int,
+                                default=80,
+                                help=("input feature dimension"))
 
-        parser.add_argument(
-            "--num-blocks", default=6, type=int, help=("number of tranformer blocks")
-        )
+        parser.add_argument("--num-blocks",
+                            default=6,
+                            type=int,
+                            help=("number of tranformer blocks"))
 
-        parser.add_argument(
-            "--d-model", default=512, type=int, help=("encoder layer sizes")
-        )
+        parser.add_argument("--d-model",
+                            default=512,
+                            type=int,
+                            help=("encoder layer sizes"))
 
         parser.add_argument(
             "--num-heads",
@@ -440,7 +430,10 @@ def add_class_args(parser, prefix=None, in_feats=False):
         parser.add_argument(
             "--att-type",
             default="scaled-dot-prod-v1",
-            choices=["scaled-dot-prod-v1", "local-scaled-dot-prod-v1"],
+            choices=[
+                "scaled-dot-prod-v1", "local-scaled-dot-prod-v1",
+                "block-scaled-dot-prod-v1"
+            ],
             help=("type of self-attention"),
         )
 
@@ -464,7 +457,9 @@ def add_class_args(parser, prefix=None, in_feats=False):
             default=[31],
             nargs="+",
             type=int,
-            help=("kernels sizes for the depth-wise convs of each conformer block"),
+            help=(
+                "kernels sizes for the depth-wise convs of each conformer block"
+            ),
         )
 
         parser.add_argument(
@@ -496,10 +491,9 @@ def add_class_args(parser, prefix=None, in_feats=False):
             help=("kernel size in convolutional feed forward block"),
         )
 
-        try:
-            parser.add_argument("--hid-act", default="swish", help="hidden activation")
-        except:
-            pass
+        parser.add_argument("--hid-act",
+                            default="swish",
+                            help="hidden activation")
 
         parser.add_argument(
             "--pos-dropout-rate",
@@ -507,12 +501,14 @@ def add_class_args(parser, prefix=None, in_feats=False):
             type=float,
             help="positional encoder dropout",
         )
-        parser.add_argument(
-            "--att-dropout-rate", default=0, type=float, help="self-att dropout"
-        )
-        parser.add_argument(
-            "--dropout-rate", default=0.1, type=float, help="feed-forward layer dropout"
-        )
+        parser.add_argument("--att-dropout-rate",
+                            default=0,
+                            type=float,
+                            help="self-att dropout")
+        parser.add_argument("--dropout-rate",
+                            default=0.1,
+                            type=float,
+                            help="feed-forward layer dropout")
 
         parser.add_argument(
             "--in-layer-type",
@@ -521,37 +517,45 @@ def add_class_args(parser, prefix=None, in_feats=False):
             help=("type of input layer"),
         )
 
-        # parser.add_argument('--abs-pos-enc', default=False, action='store_true',
-        #                     help='use absolute positional encoder')
         parser.add_argument(
             "--pos-enc-type",
             default="rel",
-            choices=["no", "rel", "abs"],
+            choices=["no", "rel", "abs", "conv"],
             help=("type of positional encoder"),
         )
 
         parser.add_argument(
             "--causal-pos-enc",
             default=False,
-            action="store_true",
-            help="relative positional encodings are zero when attending to the future",
+            action=ActionYesNo,
+            help=
+            "relative positional encodings are zero when attending to the future",
+        )
+        parser.add_argument(
+            "--pos-kernel-size",
+            default=128,
+            type=int,
+            help="kernel size for conv positional encoder",
+        )
+        parser.add_argument(
+            "--pos-num-groups",
+            default=16,
+            type=int,
+            help="number of conv groups for conv positional encoder",
         )
 
-        try:
-            parser.add_argument(
-                "--conv-norm-layer",
-                default=None,
-                choices=[
-                    "batch-norm",
-                    "group-norm",
-                    "instance-norm",
-                    "instance-norm-affine",
-                    "layer-norm",
-                ],
-                help="type of normalization layer for conv block in conformer",
-            )
-        except:
-            pass
+        parser.add_argument(
+            "--conv-norm-layer",
+            default=None,
+            choices=[
+                "batch-norm",
+                "group-norm",
+                "instance-norm",
+                "instance-norm-affine",
+                "layer-norm",
+            ],
+            help="type of normalization layer for conv block in conformer",
+        )
 
         parser.add_argument(
             "--se-r",
@@ -561,30 +565,26 @@ def add_class_args(parser, prefix=None, in_feats=False):
         )
 
         parser.add_argument(
-            "--no-ff-macaron",
-            default=False,
-            action="store_true",
+            "--ff-macaron",
+            default=True,
+            action=ActionYesNo,
             help="do not use macaron style ff layers ",
         )
 
         parser.add_argument(
             "--red-lnorms",
-            default=False,
-            action="store_true",
+            default=True,
+            action=ActionYesNo,
             help="use redundant Lnorm at conformer blocks' outputs",
         )
 
         parser.add_argument(
             "--concat-after",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="concatenate attention input and output instead of adding",
         )
 
-        # parser.add_argument('--in-norm', default=False, action='store_true',
-        #                     help='batch normalization at the input')
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='conformer encoder options')
-
-    add_argparse_args = add_class_args
+            outer_parser.add_argument("--" + prefix,
+                                      action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/rnn_encoder.py b/hyperion/torch/narchs/rnn_encoder.py
index 593405c5..0c3b623a 100644
--- a/hyperion/torch/narchs/rnn_encoder.py
+++ b/hyperion/torch/narchs/rnn_encoder.py
@@ -13,6 +13,7 @@
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 
+from ...utils.misc import filter_func_args
 from ..layer_blocks import TransformerConv2dSubsampler as Subsampler
 from ..layers import ActivationFactory as AF
 #from ..layers import NormLayer1dFactory as NLF
@@ -46,7 +47,7 @@ def __init__(self,
                  bidirectional: bool = False,
                  dropout_rate: float = 0.0,
                  subsample_input: bool = False,
-                 subsampling_act: str = "relu6"):
+                 subsampling_act: str = "relu"):
         super().__init__()
         if rnn_type != "lstm":
             proj_feats = 0
@@ -74,7 +75,7 @@ def __init__(self,
 
         if rnn_type == "lstm":
             self.rnn = nn.LSTM(
-                input_size=hid_feats,
+                input_size=lstm_in_dim,
                 hidden_size=hid_feats,
                 num_layers=num_layers,
                 bias=True,
@@ -85,7 +86,7 @@ def __init__(self,
             )
         else:
             self.rnn = nn.GRU(
-                input_size=hid_feats,
+                input_size=lstm_in_dim,
                 hidden_size=hid_feats,
                 num_layers=num_layers,
                 bias=True,
@@ -113,7 +114,7 @@ def forward(self, x: torch.Tensor,
                                  batch_first=True,
                                  enforce_sorted=True)
         x, _ = self.rnn(x)
-        x = pad_packed_sequence(x, batch_first=True)
+        x, x_lengths = pad_packed_sequence(x, batch_first=True)
         if self.out_feats > 0:
             x = self.output(x)
 
@@ -149,7 +150,7 @@ def change_config(self, override_dropouts, dropout_rate):
 
     @staticmethod
     def filter_args(**kwargs):
-        args = filter_func_args(RNNEncoder.__init__, **kwargs)
+        args = filter_func_args(RNNEncoder.__init__, kwargs)
         return args
 
     @staticmethod
@@ -166,7 +167,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         parser.add_argument(
             "--hid-feats",
-            default=512,
+            default=1024,
             type=int,
             help=("num of hidden dimensions of RNN layers"),
         )
@@ -182,7 +183,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         parser.add_argument(
             "--proj-feats",
-            default=512,
+            default=0,
             type=int,
             help=("projection features of LSTM layers"),
         )
@@ -225,7 +226,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="whether to subsaple input features x4",
         )
         parser.add_argument("--subsampling-act",
-                            default="relu6",
+                            default="relu",
                             help="activation for subsampler block")
 
         if "dropout_rate" not in skip:
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index 265f2c9b..e9c50197 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -30,7 +30,7 @@ class Hypothesis:
     log_prob: float  # log prob of ys
 
     # Optional LSTM predictor state.
-    pred_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
+    pred_state: Optional[Tuple[torch.Tensor, ...]] = None
 
 
 class RNNTransducerDecoder(NetArch):
@@ -115,6 +115,7 @@ def _make_predictor(self):
             pred_args = filter_func_args(ConvPredictor.__init__,
                                          self.predictor_args)
             self.predictor = ConvPredictor(**pred_args)
+            self.predictor_args["out_feats"] = self.predictor.embed_dim
         else:
             raise ValueError(f"Unknown predictor type {pred_type}")
 
@@ -263,7 +264,7 @@ def _rnnt_loss_k2_pruned(self, x: torch.Tensor, x_lengths: torch.Tensor,
             simple_loss_scale = 1.0 - r * (1.0 - self.simple_loss_scale)
             pruned_loss_scale = 0.1 + 0.9 * r
             self.cur_step += 1
-            print(simple_loss_scale, pruned_loss_scale)
+            #print(simple_loss_scale, pruned_loss_scale)
 
         loss = simple_loss_scale * loss_simple + pruned_loss_scale * loss_pruned
 
@@ -338,7 +339,7 @@ def decode_greedy(self,
 
         sos = torch.tensor([blank_id], device=device,
                            dtype=torch.int64).reshape(1, 1)
-        pred_out, (h, c) = self.predictor(sos)
+        pred_out, state = self.predictor(sos)
         T = x.size(1)
         t = 0
         hyp = []
@@ -357,7 +358,7 @@ def decode_greedy(self,
             if y != blank_id:
                 hyp.append(y.item())
                 y = y.reshape(1, 1)
-                pred_out, (h, c) = self.predictor(y, (h, c))
+                pred_out, state = self.predictor(y, state)
 
                 sym_per_utt += 1
                 sym_per_frame += 1
@@ -379,7 +380,7 @@ def decode_time_sync_beam_search(self,
         device = x.device
 
         sos = torch.tensor([blank_id], device=device).reshape(1, 1)
-        pred_out, (h, c) = self.predictor(sos)
+        pred_out, state = self.predictor(sos)
         T = x.size(1)
         t = 0
         B = [Hypothesis(ys=[blank_id], log_prob=0.0, pred_state=None)]
@@ -498,7 +499,7 @@ def decode_align_length_sync_beam_search(
         device = x.device
 
         sos = torch.tensor([blank_id], device=device).reshape(1, 1)
-        pred_out, (h, c) = self.predictor(sos)
+        pred_out, state = self.predictor(sos)
         T = x.size(1)
         #t = 0
         B = [Hypothesis(ys=[blank_id], log_prob=0.0, pred_state=None)]
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 540697f7..65e5884d 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -11,14 +11,15 @@
 import torch
 import torch.nn as nn
 
-torch_model_registry = {}
-
 
 class TorchModel(nn.Module):
+    """Base class for all Pytorch Models and NNet architectures
+    """
+    registry = {}
 
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
-        torch_model_registry[cls.__name__] = cls
+        TorchModel.registry[cls.__name__] = cls
 
     def __init__(self):
         super().__init__()
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
index 1bb5a644..fb93b439 100644
--- a/hyperion/torch/utils/masking.py
+++ b/hyperion/torch/utils/masking.py
@@ -14,7 +14,12 @@ def scale_seq_lengths(lengths, max_out_length, max_in_length=None):
     if max_in_length is None:
         max_in_length = lengths.max()
 
-    return torch.div(lengths * max_out_length, max_in_length, rounding_mode="floor")
+    if max_in_length == max_out_length:
+        return lengths
+
+    return torch.div(lengths * max_out_length,
+                     max_in_length,
+                     rounding_mode="floor")
 
 
 def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):

From 8b81ffaa8d315eb630a3fafbffa83b316212f671 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 3 May 2023 15:44:38 -0400
Subject: [PATCH 096/154] adv.v2 adapted to persephone branck

---
 egs/voxceleb/adv.v2/cmd.sh                    |   3 +-
 egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml |   4 -
 egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml |   4 -
 ...bank80_stmn_lresnet34_attacknet_same.v1.sh |   8 +-
 .../local/calibrate_voxceleb1_o_clean.sh      |   4 +-
 .../run_022_attack_type_classif_allknown.sh   |  33 ++--
 .../adv.v2/run_023_snr_classif_allknown.sh    |  36 ++--
 .../run_024_threat_model_classif_allknown.sh  |  35 ++--
 ...un_031_attack_type_verif_and_noveltydet.sh |  33 ++--
 egs/voxceleb/adv.v2/run_032_snr_verif.sh      |  29 ++--
 .../adv.v2/run_033_threat_model_verif.sh      |  30 ++--
 .../steps_backend/eval-calibration-v1.py      |  69 ++++++++
 .../adv.v2/steps_backend/eval-classif-perf.py |   4 +-
 .../steps_backend/train-calibration-v1.py     |  94 ++++++++++
 egs/voxceleb/v1.1/README.md                   |  63 ++++++-
 .../train_cfwseresnet34_xvec_stage1_v3.0.yaml |  71 ++++++++
 .../train_cfwseresnet34_xvec_stage2_v3.0.yaml |  69 ++++++++
 .../train_cwseresnet34_xvec_stage1_v3.0.yaml  |  71 ++++++++
 .../train_cwseresnet34_xvec_stage2_v3.0.yaml  |  69 ++++++++
 .../train_fwseresnet34_xvec_stage1_v3.0.yaml  |  71 ++++++++
 .../train_fwseresnet34_xvec_stage2_v3.0.yaml  |  69 ++++++++
 .../conf/train_resnet34_xvec_stage1_v3.0.yaml |  70 ++++++++
 .../train_tseresnet34_xvec_stage1_v3.0.yaml   |  71 ++++++++
 .../train_tseresnet34_xvec_stage2_v3.0.yaml   |  69 ++++++++
 .../config_fbank80_stmn_cfwseresnet34.v3.0.sh |  44 +++++
 .../config_fbank80_stmn_cwseresnet34.v3.0.sh  |  45 +++++
 .../config_fbank80_stmn_fwseresnet34.v3.0.sh  |  44 +++++
 .../config_fbank80_stmn_resnet34.v3.0.sh      |  44 +++++
 .../config_fbank80_stmn_tseresnet34.v3.0.sh   |  44 +++++
 .../eval_xvec_logits_from_wav.py}             |   0
 .../generate_adv_attacks_xvector_classif.py   |  10 +-
 .../bin/generate_adv_attacks_xvector_verif.py |  83 ++++-----
 hyperion/bin/prepare_data.py                  |   2 +
 hyperion/data_prep/data_prep.py               |  57 +++++-
 hyperion/data_prep/voxceleb2.py               | 163 +++++++++++++-----
 .../adv_attacks/random_attack_factory.py      |   7 +-
 hyperion/torch/layer_blocks/se_blocks.py      |  10 +-
 hyperion/utils/__init__.py                    |   3 +
 hyperion/utils/class_info.py                  |  12 +-
 hyperion/utils/dataset.py                     | 159 +++++++++++++++++
 hyperion/utils/info_table.py                  |   7 +-
 hyperion/utils/misc.py                        |   4 +
 42 files changed, 1569 insertions(+), 248 deletions(-)
 create mode 100755 egs/voxceleb/adv.v2/steps_backend/eval-calibration-v1.py
 create mode 100755 egs/voxceleb/adv.v2/steps_backend/train-calibration-v1.py
 create mode 100644 egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_resnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
 rename hyperion/{bin_deprec2/torch-eval-xvec-logits-from-wav.py => bin/eval_xvec_logits_from_wav.py} (100%)
 create mode 100644 hyperion/utils/dataset.py

diff --git a/egs/voxceleb/adv.v2/cmd.sh b/egs/voxceleb/adv.v2/cmd.sh
index 56b7eeeb..8f2d9b19 100755
--- a/egs/voxceleb/adv.v2/cmd.sh
+++ b/egs/voxceleb/adv.v2/cmd.sh
@@ -13,7 +13,8 @@
 if [ "$(hostname -d)" == "cm.gemini" ];then
     #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
-    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 20G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
diff --git a/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml b/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
index d07a2126..03a4b141 100644
--- a/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
+++ b/egs/voxceleb/adv.v2/conf/lresnet34_atnet.yaml
@@ -3,8 +3,6 @@ data:
     dataset:
       class_names:
       - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
       return_segment_info:
       - class_id
     sampler:
@@ -20,8 +18,6 @@ data:
     dataset:
       class_names:
       - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
       return_segment_info:
       - class_id
     sampler:
diff --git a/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml b/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
index 94e26f24..a617622c 100644
--- a/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
+++ b/egs/voxceleb/adv.v2/conf/res2net50_atnet.yaml
@@ -3,8 +3,6 @@ data:
     dataset:
       class_names:
       - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
       return_segment_info:
       - class_id
     sampler:
@@ -20,8 +18,6 @@ data:
     dataset:
       class_names:
       - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
       return_segment_info:
       - class_id
     sampler:
diff --git a/egs/voxceleb/adv.v2/global_conf/config_spknet_fbank80_stmn_lresnet34_attacknet_same.v1.sh b/egs/voxceleb/adv.v2/global_conf/config_spknet_fbank80_stmn_lresnet34_attacknet_same.v1.sh
index 5ae7f68e..ed10ff0a 100644
--- a/egs/voxceleb/adv.v2/global_conf/config_spknet_fbank80_stmn_lresnet34_attacknet_same.v1.sh
+++ b/egs/voxceleb/adv.v2/global_conf/config_spknet_fbank80_stmn_lresnet34_attacknet_same.v1.sh
@@ -6,8 +6,8 @@
 spknet_command=resnet
 spknet_data=voxceleb2cat_train
 spknet_config=conf/lresnet34_spknet.yaml
-spknet_batch_size_1gpu=128
-spknet_eff_batch_size=512 # effective batch size
+#spknet_batch_size_1gpu=128
+#spknet_eff_batch_size=512 # effective batch size
 spknet_name=lresnet34
 spknet_dir=exp/xvector_nnets/$spknet_name
 spknet=$spknet_dir/model_ep0070.pth
@@ -26,8 +26,8 @@ spkv_attacks_common_opts="--save-failed" #save failed attacks also
 # Attack model LResNet34 configuration
 sign_nnet_command=resnet
 sign_nnet_config=conf/lresnet34_atnet.yaml
-sign_nnet_batch_size_1gpu=128
-sign_nnet_eff_batch_size=512 # effective batch size
+#sign_nnet_batch_size_1gpu=128
+#sign_nnet_eff_batch_size=512 # effective batch size
 sign_nnet_name=lresnet34
 
 # SNRs in -100, 100
diff --git a/egs/voxceleb/adv.v2/local/calibrate_voxceleb1_o_clean.sh b/egs/voxceleb/adv.v2/local/calibrate_voxceleb1_o_clean.sh
index 736c3fb0..01c06036 100755
--- a/egs/voxceleb/adv.v2/local/calibrate_voxceleb1_o_clean.sh
+++ b/egs/voxceleb/adv.v2/local/calibrate_voxceleb1_o_clean.sh
@@ -30,7 +30,7 @@ train_scores=$score_dir/voxceleb1_scores
 train_key=data/voxceleb1_test/trials_o_clean
 
 $cmd $cal_score_dir/train_cal_tel.log \
-     steps_be/train-calibration-v1.py --score-file $train_scores \
+     steps_backend/train-calibration-v1.py --score-file $train_scores \
      --key-file $train_key --model-file $model_file --prior $prior --lambda-reg $l2_reg
 
 ndxs=(voxceleb1_test/trials_o_clean)
@@ -43,7 +43,7 @@ do
     scores_out=$cal_score_dir/${scores[$i]}_scores
     ndx=data/${ndxs[$i]}
     $cmd $cal_score_dir/eval_cal_${scores[$i]}.log \
-	 steps_be/eval-calibration-v1.py --in-score-file $scores_in \
+	 steps_backend/eval-calibration-v1.py --in-score-file $scores_in \
 	 --ndx-file $ndx --model-file $model_file --out-score-file $scores_out &
 
 done
diff --git a/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh b/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
index 53e8e5a6..71c0c89f 100755
--- a/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
@@ -10,9 +10,7 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=4
 xvec_use_gpu=false
 xvec_chunk_length=12800
 
@@ -27,15 +25,9 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/$attack_type_split_tag
 
 args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
@@ -53,18 +45,17 @@ if [ $stage -le 1 ]; then
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_dir/trainval_wav.scp \
-	--time-durs-file $list_dir/trainval_utt2dur \
-	--train-list $list_dir/train_utt2attack \
-	--val-list $list_dir/val_utt2attack \
-	--class-file $list_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
+	train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	--data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	--data.train.dataset.segments-file $list_dir/train_utt2attack \
+	--data.train.dataset.class-file $list_dir/class_file \
+	--data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	--data.val.dataset.segments-file $list_dir/val_utt2attack \
+	--trainer.exp-path $sign_nnet_dir $args \
 	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
+
 fi
 
 if [ $stage -le 2 ]; then
@@ -82,7 +73,7 @@ fi
 proj_dir=$sign_dir/test/tsne_${attack_type_split_tag}
 if [ $stage -le 3 ];then
     echo "Make TSNE plots on all test attacks"
-    echo "Result will be left in $proj_idr"
+    echo "Result will be left in $proj_dir"
     for p in 30 100 250
     do
 	for e in 12 64
@@ -112,7 +103,7 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ];then
-    echo "Compute cofusion matrices"
+    echo "Compute confusion matrices"
     echo "Result is left in $logits_dir/test/eval_acc.log"
     $train_cmd $logits_dir/test/eval_acc.log \
         hyp_utils/conda_env.sh steps_backend/eval-classif-perf.py \
diff --git a/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh b/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
index 79bf810a..a928ae29 100755
--- a/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
@@ -10,9 +10,7 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
 xvec_use_gpu=false
 xvec_chunk_length=12800
 
@@ -27,15 +25,9 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/$snr_split_tag
 
 args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
@@ -53,19 +45,16 @@ if [ $stage -le 1 ]; then
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_dir/trainval_wav.scp \
-	--time-durs-file $list_dir/trainval_utt2dur \
-	--train-list $list_dir/train_utt2attack \
-	--val-list $list_dir/val_utt2attack \
-	--class-file $list_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
-    
+	train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	--data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	--data.train.dataset.segments-file $list_dir/train_utt2attack \
+	--data.train.dataset.class-file $list_dir/class_file \
+	--data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	--data.val.dataset.segments-file $list_dir/val_utt2attack \
+	--trainer.exp-path $sign_nnet_dir $args \
+	--num-gpus $ngpu 
 fi
 
 if [ $stage -le 2 ]; then
@@ -83,7 +72,7 @@ fi
 proj_dir=$sign_dir/test/tsne
 if [ $stage -le 3 ];then
     echo "Make TSNE plots on all test attacks"
-    echo "Result will be left in $proj_idr"
+    echo "Result will be left in $proj_dir"
     for p in 30 100 250
     do
 	for e in 12 64
@@ -101,7 +90,6 @@ if [ $stage -le 3 ];then
     wait
 fi
 
-
 if [ $stage -le 4 ]; then
     echo "Eval signature network logits on test attacks"
     mkdir -p $list_dir/test
@@ -114,7 +102,7 @@ if [ $stage -le 4 ]; then
 fi
 
 if [ $stage -le 5 ];then
-    echo "Compute cofusion matrices"
+    echo "Compute confusion matrices"
     echo "Result is left in $logits_dir/test/eval_acc.log"
     $train_cmd $logits_dir/test/eval_acc.log \
         hyp_utils/conda_env.sh steps_backend/eval-classif-perf.py \
diff --git a/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh b/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
index 3a4e9147..bed225a3 100755
--- a/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
@@ -10,7 +10,6 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
 num_workers=4
 xvec_use_gpu=false
@@ -27,16 +26,10 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/$threat_model_split_tag
 list_attack_type_dir=data/$attack_type_split_tag
 
 args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
@@ -53,19 +46,17 @@ if [ $stage -le 1 ]; then
     echo "Train signature network on all attacks"
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_dir/trainval_wav.scp \
-	--time-durs-file $list_dir/trainval_utt2dur \
-	--train-list $list_dir/train_utt2attack \
-	--val-list $list_dir/val_utt2attack \
-	--class-file $list_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
+	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	      --data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	      --data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	      --data.train.dataset.segments-file $list_dir/train_utt2attack \
+	      --data.train.dataset.class-file $list_dir/class_file \
+	      --data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	      --data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
+	      --data.val.dataset.segments-file $list_dir/val_utt2attack \
+	      --trainer.exp-path $sign_nnet_dir $args \
+	      --num-gpus $ngpu 
 fi
 
 if [ $stage -le 2 ]; then
@@ -83,7 +74,7 @@ fi
 proj_dir=$sign_dir/test/tsne_${attack_type_split_tag}
 if [ $stage -le 3 ];then
     echo "Make TSNE plots on all test attacks with colors indicating attack type"
-    echo "Result will be left in $proj_idr"
+    echo "Result will be left in $proj_dir"
     for p in 30 100 250
     do
 	for e in 12 64
@@ -137,7 +128,7 @@ if [ $stage -le 5 ]; then
 fi
 
 if [ $stage -le 6 ];then
-    echo "Compute cofusion matrices"
+    echo "Compute confusion matrices"
     echo "Result is left in $logits_dir/test/eval_acc.log"
     $train_cmd $logits_dir/test/eval_acc.log \
         hyp_utils/conda_env.sh steps_backend/eval-classif-perf.py \
diff --git a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
index 31cd6139..55cb8459 100755
--- a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
+++ b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
@@ -10,9 +10,7 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=4
 xvec_use_gpu=false
 xvec_chunk_length=12800
 
@@ -27,18 +25,12 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 #list with only the known attacks
 list_someknown_dir=data/$sk_attack_type_split_tag
 # list with all the attacks
 list_all_dir=data/$attack_type_split_tag
 
 args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
@@ -55,19 +47,18 @@ if [ $stage -le 1 ]; then
     echo "Train attack signature network on known attacks only"
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_someknown_dir/trainval_wav.scp \
-	--time-durs-file $list_someknown_dir/trainval_utt2dur \
-	--train-list $list_someknown_dir/train_utt2attack \
-	--val-list $list_someknown_dir/val_utt2attack \
-	--class-file $list_someknown_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
+	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
+	      --data.train.dataset.class-file $list_someknown_dir/class_file \
+	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
+	      --trainer.exp-path $sign_nnet_dir $args \
+	      --num-gpus $ngpu 
+
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/egs/voxceleb/adv.v2/run_032_snr_verif.sh b/egs/voxceleb/adv.v2/run_032_snr_verif.sh
index 8e4f0d41..3886c339 100755
--- a/egs/voxceleb/adv.v2/run_032_snr_verif.sh
+++ b/egs/voxceleb/adv.v2/run_032_snr_verif.sh
@@ -10,9 +10,7 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=4
 xvec_use_gpu=false
 xvec_chunk_length=12800
 
@@ -27,9 +25,6 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 #list with only the known attacks
 list_someknown_dir=data/$sk_snr_split_tag
 # list with all the attacks
@@ -55,19 +50,17 @@ if [ $stage -le 1 ]; then
     echo "Train attack signature network on known attacks only"
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_someknown_dir/trainval_wav.scp \
-	--time-durs-file $list_someknown_dir/trainval_utt2dur \
-	--train-list $list_someknown_dir/train_utt2attack \
-	--val-list $list_someknown_dir/val_utt2attack \
-	--class-file $list_someknown_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
+	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
+	      --data.train.dataset.class-file $list_someknown_dir/class_file \
+	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
+	      --trainer.exp-path $sign_nnet_dir $args \
+	      --num-gpus $ngpu 
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh b/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
index 1e87d749..392bffb5 100755
--- a/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
+++ b/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
@@ -10,9 +10,7 @@ set -e
 stage=1
 ngpu=1
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=4
 xvec_use_gpu=false
 xvec_chunk_length=12800
 
@@ -27,9 +25,6 @@ else
     xvec_cmd="$train_cmd"
 fi
 
-batch_size=$(($sign_nnet_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $sign_nnet_eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 #list with only the known attacks
 list_someknown_dir=data/$sk_threat_model_split_tag
 # list with all the attacks
@@ -56,19 +51,18 @@ if [ $stage -le 1 ]; then
     echo "Train attack signature network on known attacks only"
     mkdir -p $sign_nnet_dir/log
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
-	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-	torch-train-xvec-from-wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--audio-path $list_someknown_dir/trainval_wav.scp \
-	--time-durs-file $list_someknown_dir/trainval_utt2dur \
-	--train-list $list_someknown_dir/train_utt2attack \
-	--val-list $list_someknown_dir/val_utt2attack \
-	--class-file $list_someknown_dir/class_file \
-	--batch-size $batch_size \
-	--num-workers $num_workers \
-	--grad-acc-steps $grad_acc_steps \
-	--num-gpus $ngpu \
-	--log-interval $log_interval \
-	--exp-path $sign_nnet_dir $args
+	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
+	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
+	      --data.train.dataset.class-file $list_someknown_dir/class_file \
+	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
+	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
+	      --trainer.exp-path $sign_nnet_dir $args \
+	      --num-gpus $ngpu 
+
 fi
 
 if [ $stage -le 2 ]; then
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-calibration-v1.py b/egs/voxceleb/adv.v2/steps_backend/eval-calibration-v1.py
new file mode 100755
index 00000000..fdd5516f
--- /dev/null
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-calibration-v1.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+ Evals calibration
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.trial_scores import TrialScores
+from hyperion.utils.trial_key import TrialKey
+from hyperion.utils.trial_ndx import TrialNdx
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+
+
+def eval_calibration(in_score_file, ndx_file, model_file, out_score_file):
+
+    logging.info("load ndx: %s" % ndx_file)
+    try:
+        ndx = TrialNdx.load_txt(ndx_file)
+    except:
+        ndx = TrialKey.load_txt(ndx_file)
+
+    logging.info("load scores: %s" % in_score_file)
+    scr = TrialScores.load_txt(in_score_file)
+    scr = scr.align_with_ndx(ndx)
+
+    logging.info("load model: %s" % model_file)
+    lr = LR.load(model_file)
+    logging.info("apply calibration")
+    s_cal = lr.predict(scr.scores.ravel())
+    scr.scores = np.reshape(s_cal, scr.scores.shape)
+
+    logging.info("save scores: %s" % out_score_file)
+    scr.save_txt(out_score_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Evals linear calibration")
+
+    parser.add_argument("--in-score-file", dest="in_score_file", required=True)
+    parser.add_argument("--out-score-file", dest="out_score_file", required=True)
+    parser.add_argument("--ndx-file", dest="ndx_file", required=True)
+    parser.add_argument("--model-file", dest="model_file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_calibration(**namespace_to_dict(args))
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
index c174cb3b..d3d828a5 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-classif-perf.py
@@ -52,7 +52,7 @@ def eval_classif_perf(score_file, key_file, class_file, output_path=None, **kwar
     acc = compute_accuracy(y_true, y_pred)
     logging.info("Classification accuracy %.2f %%" % (acc * 100))
 
-    labels = np.arange(len(classes), dtype=np.int)
+    labels = np.arange(len(classes), dtype=int)
     C = compute_confusion_matrix(y_true, y_pred, labels=labels, normalize=False)
     logging.info("Unnormalized Confusion Matrix:")
     print_confusion_matrix(C, labels_true=classes)
@@ -69,8 +69,6 @@ def eval_classif_perf(score_file, key_file, class_file, output_path=None, **kwar
     parser.add_argument("--score-file", required=True)
     parser.add_argument("--key-file", required=True)
     parser.add_argument("--class-file", required=True)
-
-    # parser.add_argument('--output-path', dest='output_path', required=True)
     parser.add_argument(
         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
     )
diff --git a/egs/voxceleb/adv.v2/steps_backend/train-calibration-v1.py b/egs/voxceleb/adv.v2/steps_backend/train-calibration-v1.py
new file mode 100755
index 00000000..489ceed9
--- /dev/null
+++ b/egs/voxceleb/adv.v2/steps_backend/train-calibration-v1.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+  Trains calibration for SRE18 tel condition
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.trial_scores import TrialScores
+from hyperion.utils.trial_key import TrialKey
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+
+
+def train_calibration(score_file, key_file, model_file, prior, lambda_reg, verbose):
+
+    logging.info("load key: %s" % key_file)
+    key = TrialKey.load_txt(key_file)
+    logging.info("load scores: %s" % score_file)
+    scr = TrialScores.load_txt(score_file)
+    tar, non = scr.get_tar_non(key)
+    ntar = len(tar)
+    nnon = len(non)
+
+    min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
+        % (min_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+    )
+
+    logging.info("train calibration")
+    x = np.concatenate((tar, non))
+    y = np.concatenate(
+        (np.ones((ntar,), dtype="int32"), np.zeros((nnon,), dtype="int32"))
+    )
+    lr = LR(
+        prior=prior,
+        lambda_reg=lambda_reg,
+        bias_scaling=1,
+        solver="liblinear",
+        verbose=verbose,
+    )
+    lr.fit(x, y)
+    print(lr.A)
+    print(lr.b)
+    logging.info("save calibration at %s" % model_file)
+    lr.save(model_file)
+
+    logging.info("calibrate scores")
+    tar_cal = lr.predict(tar)
+    non_cal = lr.predict(non)
+    act_dcf, p_miss, p_fa = compute_act_dcf(tar_cal, non_cal, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f"
+        % (act_dcf, p_miss * 100, p_fa * 100, n_miss, n_fa)
+    )
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Trains llr calibration")
+
+    parser.add_argument("--score-file", dest="score_file", required=True)
+    parser.add_argument("--key-file", dest="key_file", required=True)
+    parser.add_argument("--model-file", dest="model_file", required=True)
+    parser.add_argument("--prior", dest="prior", type=float, default=0.01)
+    parser.add_argument("--lambda-reg", dest="lambda_reg", type=float, default=1e-5)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    logging.debug(args)
+
+    train_calibration(**namespace_to_dict(args))
diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index b8a17dc6..23e0a26f 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -95,10 +95,26 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
 | | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
 | | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.40 | 0.065 |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.048 | 0.071 |
+| | | | Cosine + AS-Norm | 0.70 | 0.039 | 0.048 |
+| | | | Cosine + QMF | 0.62 | 0.034 | 0.042 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.76 | 0.048 | 0.071 |
+| | | | Cosine + AS-Norm | 0.70 | 0.041 | 0.061 |
+| | | | Cosine + QMF | 0.62 | 0.037 | 0.056 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.48 | 0.077 |
+| | | | Cosine + AS-Norm | 0.68 | 0.040 | 0.062|
+| | | | Cosine + QMF | 0.62 | 0.036 | 0.063 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.040 | 0.065 |
 | | | | Cosine + AS-Norm | 0.52 | 0.33 | 0.045 |
 | | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
 
+
 ### VoxCeleb 1 Entire-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -109,6 +125,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
 | | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
 | | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.86 | 0.054 | 0.098 |
+| | | | Cosine + AS-Norm | 0.81 | 0.049 | 0.087 |
+| | | | Cosine + QMF | 0.77 | 0.046 | 0.082  |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.89 | 0.058 | 0.098 |
+| | | | Cosine + AS-Norm | 0.84 | 0.053 | 0.087|
+| | | | Cosine + QMF | 0.80 | 0.050  | 0.081 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.83 | 0.053 | 0.098 |
+| | | | Cosine + AS-Norm | 0.78 | 0.047| 0.085 |
+| | | | Cosine + QMF | 0.74 | 0.045 | 0.081 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
 | | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
 | | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
@@ -123,6 +154,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
 | | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
 | | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.62 | 0.098 | 0.164 |
+| | | | Cosine + AS-Norm | 1.45 | 0.085 | 0.142 |
+| | | | Cosine + QMF | 1.36 | 0.082 | 0.137 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.70 | 0.1 |  0.165 |
+| | | | Cosine + AS-Norm | 1.50 | 0.086 | 0.138 |
+| | | | Cosine + QMF | 1.44 | 0.085  | 0.139 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.59 | 0.096 | 0.165 |
+| | | | Cosine + AS-Norm | 1.41 | 0.083 | 0.143 |
+| | | | Cosine + QMF | 1.34 | 0.079 | 0.136 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
 | | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
 | | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
@@ -137,6 +183,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
 | | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
 | | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.19 | 0.142 | 0.242 |
+| | | | Cosine + AS-Norm | 2.00 | 0.133 | 0.254 |
+| | | | Cosine + QMF | 1.86 | 0.126 | 0.229 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.34 | 0.145 | 0.246 |
+| | | | Cosine + AS-Norm | 2.10 | 0.135 | 0.248 |
+| | | | Cosine + QMF | 2.01 | 0.127 | 0.218 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.239 |
+| | | | Cosine + AS-Norm | 1.99 | 0.127 | 0.232 |
+| | | | Cosine + QMF | 1.87 | 0.119 | 0.216 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
+| | | | Cosine + AS-Norm | | | |
+| | | | Cosine + QMF |  |  |  |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
 | | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
 | | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
diff --git a/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..727f40a3
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: cfwseresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+  se_r: 32
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..04665cac
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..e2fb4c40
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: seresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+  se_r: 32
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 25
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..04665cac
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..db559c14
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: fwseresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+  se_r: 4
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..04665cac
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_resnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_resnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..bff4a00b
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_resnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: resnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..1d864080
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: tseresnet34
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+  se_r: 128
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..04665cac
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
new file mode 100644
index 00000000..32c91da2
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_cfwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=false #true
+do_qmf=false #true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
new file mode 100644
index 00000000..dbbf6fa7
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
@@ -0,0 +1,45 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_cwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0025.pth
+
+
+nnet_s2_base_cfg=conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
new file mode 100644
index 00000000..62b02c28
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
new file mode 100644
index 00000000..c49936e0
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_resnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
new file mode 100644
index 00000000..42af2d52
--- /dev/null
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_tseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_tseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_tseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=false #true
+do_qmf=false #true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
similarity index 100%
rename from hyperion/bin_deprec2/torch-eval-xvec-logits-from-wav.py
rename to hyperion/bin/eval_xvec_logits_from_wav.py
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 88b0b1d9..27a7e573 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -160,7 +164,7 @@ def generate_attacks(
 
     logging.info("opening audio read stream: %s" % (wav_file))
     audio_args = AR.filter_args(**kwargs)
-    audio_reader = AR(wav_file)
+    audio_reader = AR(wav_file ** audio_args)
     wav_scale = audio_reader.wav_scale
 
     logging.info("opening audio write stream: %s" % (output_wav_dir))
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index a4df5091..882a36a6 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -102,20 +106,6 @@ def init_model(model_path, embed_layer, cal_file, threshold, **kwargs):
     xvector_model.freeze()
     logging.info("xvector-model={}".format(xvector_model))
 
-    # feat_args = AFF.filter_args(prefix='feats', **kwargs)
-    # logging.info('initializing feature extractor args={}'.format(feat_args))
-    # feat_extractor = AFF.create(**feat_args)
-
-    # mvn_args = MVN.filter_args(prefix='mvn', **kwargs)
-    # mvn = None
-    # if mvn_args['norm_mean'] or mvn_args['norm_var']:
-    #     logging.info('initializing short-time mvn args={}'.format(mvn_args))
-    #     mvn = MVN(**mvn_args)
-
-    # logging.info('loading model {}'.format(model_path))
-    # xvector_model = TML.load(model_path)
-    # xvector_model.freeze()
-
     calibrator = None
     if cal_file is not None:
         logging.info("loading calibration params {}".format(cal_file))
@@ -200,16 +190,17 @@ def generate_attacks(
     key, x_e = read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts)
     x_e = torch.as_tensor(x_e, dtype=torch.get_default_dtype())
 
-    logging.info("opening audio read stream: %s" % (test_wav_file))
+    logging.info("opening audio read stream: %s", test_wav_file)
     audio_args = AR.filter_args(**kwargs)
-    audio_reader = AR(test_wav_file)
+    audio_reader = AR(test_wav_file, **audio_args)
     wav_scale = audio_reader.wav_scale
+    kwargs["wav_scale"] = wav_scale
 
-    logging.info("opening audio write stream: %s" % (output_wav_dir))
+    logging.info("opening audio write stream: %s", output_wav_dir)
     audio_writer = AW(output_wav_dir, audio_format="flac")
 
     if vad_spec is not None:
-        logging.info("opening VAD stream: %s" % (vad_spec))
+        logging.info("opening VAD stream: %s", vad_spec)
         v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
 
     attack_factory = init_attack_factory(**kwargs)
@@ -217,7 +208,7 @@ def generate_attacks(
 
     for j in range(key.num_tests):
         t1 = time.time()
-        logging.info("scoring test utt %s" % (key.seg_set[j]))
+        logging.info("scoring test utt %s", key.seg_set[j])
         s, fs = audio_reader.read([key.seg_set[j]])
         s = s[0]
         fs = fs[0]
@@ -235,13 +226,11 @@ def generate_attacks(
             )
             model.vad_t = vad
             logging.info(
-                "utt %s detected %d/%d (%.2f %%) speech frames"
-                % (
-                    key.seg_set[j],
-                    speech_frames,
-                    tot_frames,
-                    speech_frames / tot_frames * 100,
-                )
+                "utt %s detected %d/%d (%.2f %%) speech frames",
+                key.seg_set[j],
+                speech_frames,
+                tot_frames,
+                speech_frames / tot_frames * 100,
             )
 
         t2 = time.time()
@@ -253,23 +242,23 @@ def generate_attacks(
             if key.tar[i, j] or key.non[i, j]:
                 t3 = time.time()
                 if skip_attack(key.tar[i, j], p_tar_attack, p_non_attack):
-                    logging.info("skipping attack for tar trial %s" % (trial_id))
+                    logging.info("skipping attack for tar trial %s", trial_id)
                     continue
 
-                model.x_e = x_e[i].to(device)
+                model.x_e = x_e[i : i + 1].to(device)
                 with torch.no_grad():
                     score_benign = model(s)
 
                 if key.tar[i, j] and score_benign < 0:
                     logging.info(
-                        "target trial %s failed benign classification, skipping..."
-                        % (trial_id)
+                        "target trial %s failed benign classification, skipping...",
+                        trial_id,
                     )
                     continue
                 elif key.non[i, j] and score_benign > 0:
                     logging.info(
-                        "non-target trial %s failed benign classification, skipping..."
-                        % (trial_id)
+                        "non-target trial %s failed benign classification, skipping...",
+                        trial_id,
                     )
                     continue
 
@@ -293,19 +282,19 @@ def generate_attacks(
                     success = False
                     if not save_failed:
                         logging.info(
-                            "attack on target trial %s failed, skipping..." % (trial_id)
+                            "attack on target trial %s failed, skipping...", trial_id
                         )
                         continue
                 elif key.non[i, j] and score_adv < 0:
                     success = False
                     if not save_failed:
                         logging.info(
-                            "attack on non-target trial %s failed benign classification, skipping..."
-                            % (trial_id)
+                            "attack on non-target trial %s failed benign classification, skipping...",
+                            trial_id,
                         )
                         continue
                 if success:
-                    logging.info("attack on trial %s successful" % (trial_id))
+                    logging.info("attack on trial %s successful", trial_id)
 
                 stats_ij = compute_stats_adv_attack(s, s_adv)
                 stats_ij = [float(stat.detach().cpu().numpy()[0]) for stat in stats_ij]
@@ -344,18 +333,16 @@ def generate_attacks(
                 (
                     "utt %s total-time=%.3f read-time=%.3f trial-time=%.3f n_trials=%d "
                     "rt-factor=%.4f"
-                )
-                % (
-                    key.seg_set[j],
-                    t7 - t1,
-                    t2 - t1,
-                    trial_time,
-                    num_trials,
-                    num_trials * len(s) / fs / (t7 - t1),
-                )
+                ),
+                key.seg_set[j],
+                t7 - t1,
+                t2 - t1,
+                trial_time,
+                num_trials,
+                num_trials * len(s) / fs / (t7 - t1),
             )
 
-    logging.info("saving attack info to %s" % (attack_info_file))
+    logging.info("saving attack info to %s", attack_info_file)
     Path(attack_info_file).parent.mkdir(parents=True, exist_ok=True)
 
     with open(attack_info_file, "w") as f:
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
index b7370b9b..df212a94 100755
--- a/hyperion/bin/prepare_data.py
+++ b/hyperion/bin/prepare_data.py
@@ -14,6 +14,7 @@
 )
 
 from hyperion.data_prep import DataPrep
+from hyperion.hyp_defs import config_logger
 
 
 def make_parser(data_prep_class):
@@ -34,6 +35,7 @@ def make_parser(data_prep_class):
         subcommands.add_subcommand(k, parser_k)
 
     args = parser.parse_args()
+    config_logger(1)
     data_prep_class = DataPrep.registry[args.subcommand]
     args = namespace_to_dict(args)[args.subcommand]
 
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index 966adeef..bb91e3a5 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -4,6 +4,9 @@
 """
 from jsonargparse import ActionYesNo
 from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+from ..utils import PathLike
 
 
 class DataPrep:
@@ -14,15 +17,24 @@ class DataPrep:
       output_dir: output data directory
       use_kaldi_ids: puts speaker-id in front of segment id like kaldi
       target_sample_freq: target sampling frequency to convert the audios to.
+      num_threads: number of parallel threads
     """
 
     registry = {}
 
-    def __init__(self, corpus_dir, output_dir, use_kaldi_ids, target_sample_freq):
+    def __init__(
+        self,
+        corpus_dir: PathLike,
+        output_dir: PathLike,
+        use_kaldi_ids: bool,
+        target_sample_freq: int,
+        num_threads: int = 10,
+    ):
         self.corpus_dir = Path(corpus_dir)
         self.output_dir = Path(output_dir)
         self.use_kaldi_ids = use_kaldi_ids
         self.target_sample_freq = target_sample_freq
+        self.num_threads = num_threads
 
         self.output_dir.mkdir(exist_ok=True, parents=True)
 
@@ -34,6 +46,42 @@ def __init_subclass__(cls, **kwargs):
     def dataset_name():
         raise NotImplementedError()
 
+    @staticmethod
+    def _get_recording_duration(scp, i, n):
+        from ..io import SequentialAudioReader as AR
+
+        durations = []
+        fss = []
+        with AR(scp, part_idx=i, num_parts=n) as reader:
+            for data in reader:
+                key, x, fs = data
+                duration = x.shape[0] / fs
+                fss.append(fs)
+                durations.append(duration)
+
+        return fss, durations
+
+    def get_recording_duration(self, recording_set):
+
+        from ..utils import SCPList
+        import itertools
+
+        scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
+        futures = []
+        with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
+            for i in range(self.num_threads):
+                future = pool.submit(
+                    DataPrep._get_recording_duration, scp, i, self.num_threads
+                )
+                futures.append(future)
+
+        res = [f.result() for f in tqdm(futures)]
+        fss = list(itertools.chain(*[r[0] for r in res]))
+        durations = list(itertools.chain(*[r[0] for r in res]))
+
+        recording_set["duration"] = durations
+        recording_set["sample_freq"] = fss
+
     @staticmethod
     def add_class_args(parser):
         parser.add_argument(
@@ -54,3 +102,10 @@ def add_class_args(parser):
             type=int,
             help="""target sampling frequency to convert the audios to""",
         )
+
+        parser.add_argument(
+            "--num-threads",
+            default=10,
+            type=int,
+            help="""number of parallel threads""",
+        )
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index 25692349..5c04f86c 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -5,12 +5,15 @@
 from jsonargparse import ActionYesNo
 from pathlib import Path
 import re
+import logging
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
 
 import pandas as pd
 import numpy as np
 
-from ..utils.misc import urlretrieve_progress
-from ..utils import RecordingSet, SegmentSet, ClassInfo
+from ..utils.misc import urlretrieve_progress, PathLike
+from ..utils import RecordingSet, SegmentSet, ClassInfo, Dataset
 from .data_prep import DataPrep
 
 
@@ -28,14 +31,20 @@ class VoxCeleb2DataPrep(DataPrep):
 
     def __init__(
         self,
-        corpus_dir,
-        subset,
-        cat_videos,
-        output_dir,
-        use_kaldi_ids,
-        target_sample_freq,
+        corpus_dir: PathLike,
+        subset: str,
+        cat_videos: bool,
+        output_dir: PathLike,
+        use_kaldi_ids: bool,
+        target_sample_freq: int,
+        num_threads: int = 10,
     ):
-        super().__init__(corpus_dir, output_dir, use_kaldi_ids, target_sample_freq)
+        if cat_videos:
+            use_kaldi_ids = True
+        super().__init__(
+            corpus_dir, output_dir, use_kaldi_ids, target_sample_freq, num_threads
+        )
+
         self.subset = subset
         self.cat_videos = cat_videos
 
@@ -69,8 +78,9 @@ def _get_metadata(self):
                 file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
 
         df_meta = pd.read_csv(file_path, sep="\t")
-        print(df_meta.head())
-        df_meta.set_index("VoxCeleb2 ID")
+        df_meta.rename(columns=str.strip, inplace=True)
+        df_meta = df_meta.applymap(lambda x: str.strip(x) if isinstance(x, str) else x)
+        df_meta.set_index("VoxCeleb2 ID", inplace=True)
         return df_meta
 
     def _get_langs_est(self):
@@ -84,49 +94,83 @@ def _get_langs_est(self):
 
         df_lang = pd.read_csv(file_path, sep=",")
 
-        def get_video(x):
-            x = re.sub("/.*.wav$", "", x)
-            x = re.sub("^.*/", "", x)
-            return x
+        if self.cat_videos:
+
+            def get_video(x):
+                x = re.sub("/[^/]*.wav$", "", x)
+                return re.sub("/", "-", x)
 
-        df_lang["video"] = df_lang["filename"].apply(get_video)
-        df_lang["filename"].drop(["filename"], axis=1, inplace=True)
+        elif self.use_kaldi_ids:
+
+            def get_video(x):
+                x = re.sub(".wav$", "", x)
+                return re.sub("/", "-", x)
+
+        else:
+
+            def get_video(x):
+                x = re.sub(".wav$", "", x)
+                x = re.sub("^[^/]*/", "", x)
+                return re.sub("/", "-", x)
+
+        df_lang["id"] = df_lang["filename"].apply(get_video)
+        df_lang.drop(["filename"], axis=1, inplace=True)
         df_lang.drop_duplicates(inplace=True)
-        df_lang.set_index("video")
+        df_lang.set_index("id", inplace=True)
+        df_lang["lang"] = df_lang["lang"].apply(str.lower)
         return df_lang
 
+    @staticmethod
+    def make_cat_list(lists_cat_dir, rec_id, rec_files, video_idx, i):
+        list_file = lists_cat_dir / f"{rec_id}.txt"
+        with open(list_file, "w") as fw:
+            rec_idx = (video_idx == i).nonzero()[0]
+            recs_i = [f"file {rec_files[j]}" for j in rec_idx]
+            recs_i.sort()
+            recs_i = "\n".join(recs_i)
+            fw.write(f"{recs_i}\n")
+
+        file_path = (
+            f"ffmpeg -v 8 -f concat -safe 0 -i {list_file} -f wav -acodec pcm_s16le -|"
+        )
+        return file_path
+
     def prepare(self):
+        logging.info("getting audio meta-data")
         df_meta = self._get_metadata()
+        logging.info("getting language estimations")
         df_lang = self._get_langs_est()
         rec_dir = self.corpus_dir / self.subset
+        logging.info("searching audio files in %s", str(rec_dir))
         rec_files = list(rec_dir.glob("**/*.m4a"))
         speakers = [f.parents[1].name for f in rec_files]
         video_ids = [f.parent.name for f in rec_files]
-        if self.concat_videos:
+        if self.cat_videos:
             lists_cat_dir = self.output_dir / "lists_cat"
             lists_cat_dir.mkdir(exist_ok=True, parents=True)
             uniq_video_ids, uniq_video_idx, video_idx = np.unique(
                 video_ids, return_index=True, return_inverse=True
             )
             rec_ids = uniq_video_ids
-            speakers = speakers[uniq_video_idx]
-            if self.use_kaldi_ids:
-                rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
-            else:
-                rec_ids = uniq_video_ids
+            speakers = [speakers[i] for i in uniq_video_idx]
+            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
 
             file_paths = []
-            for i, video_id in enumerate(uniq_video_ids):
-                list_file = lists_cat_dir / f"{video_id}.txt"
-                with open(list_file, "w") as fw:
-                    rec_mask = video_idx == i
-                    recs_i = rec_files[rec_mask]
-                    for rec in recs_i:
-                        fw.write(f"{rec}\n")
-
-                file_path = f"ffmpeg -v 8 -f concat -safe 0 -i {list_file} -f wav -acodec pcm_s16le -|"
-                file_paths.append(file_path)
-
+            futures = []
+            logging.info("making video cat lists")
+            with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
+                for i, rec_id in enumerate(rec_ids):
+                    future = pool.submit(
+                        VoxCeleb2DataPrep.make_cat_list,
+                        lists_cat_dir,
+                        rec_id,
+                        rec_files,
+                        video_idx,
+                        i,
+                    )
+                    futures.append(future)
+
+            file_paths = [f.result() for f in tqdm(futures)]
             video_ids = uniq_video_ids
 
         else:
@@ -139,21 +183,48 @@ def prepare(self):
                 rec_ids = [f"{v}-{f}" for v, f in zip(video_ids, file_names)]
 
             file_paths = []
-            for rec_file in rec_files:
+            logging.info("making pipe commands")
+            for rec_file in tqdm(rec_files):
                 file_path = f"ffmpeg -v 8 -i {rec_file} -f wav -acodec pcm_s16le - |"
                 file_paths.append(file_path)
 
-        recs = pd.DataFrame({"id": rec_ids, "file_path": file_paths})
+        logging.info("making RecordingSet")
+        recs = pd.DataFrame({"id": rec_ids, "storage_path": file_paths})
         recs = RecordingSet(recs)
+        recs.sort()
+
+        logging.info("getting recording durations")
+        self.get_recording_duration(recs)
+        if self.target_sample_freq:
+            recs["target_sample_freq"] = self.target_sample_freq
+
+        logging.info("making SegmentsSet")
         segments = pd.DataFrame(
             {
                 "id": rec_ids,
                 "video_ids": video_ids,
                 "speaker": speakers,
                 "gender": df_meta.loc[speakers, "Gender"],
+                "language_est": [
+                    df_lang.loc[r, "lang"] if r in df_lang.index else "N/A"
+                    for r in rec_ids
+                ],
+                "language_est_conf": [
+                    df_lang.loc[r, "confidence"] if r in df_lang.index else "N/A"
+                    for r in rec_ids
+                ],
+                # "duration": recs.loc[rec_ids, "duration"],
             }
         )
+        print(
+            recs.loc[rec_ids, "duration"],
+            len(segments),
+            len(recs.loc[rec_ids, "duration"]),
+        )
         segments = SegmentSet(segments)
+        segments.sort()
+
+        logging.info("making speaker info file")
         uniq_speakers = np.unique(speakers)
         speakers = pd.DataFrame(
             {
@@ -164,6 +235,18 @@ def prepare(self):
         )
         speakers = ClassInfo(speakers)
 
-        print(recs)
-        print(segments)
-        print(speakers)
+        logging.info("making language info file")
+        languages = np.unique(df_lang["lang"])
+        languages = ClassInfo(pd.DataFrame({"id": languages}))
+
+        logging.info("making dataset")
+        dataset = Dataset(
+            segments,
+            {"speaker": speakers, "languages": languages},
+            {"recordings": recs},
+        )
+        logging.info("saving dataset at %s", self.output_dir)
+        dataset.save(self.output_dir)
+        logging.info(
+            "datasets containts %d segments, %d speakers", len(segments), len(speakers)
+        )
diff --git a/hyperion/torch/adv_attacks/random_attack_factory.py b/hyperion/torch/adv_attacks/random_attack_factory.py
index 0c83bc56..a91c99ac 100644
--- a/hyperion/torch/adv_attacks/random_attack_factory.py
+++ b/hyperion/torch/adv_attacks/random_attack_factory.py
@@ -128,7 +128,7 @@ def _sample_attack_args(self):
         )
         attack_args["max_iter"] = self._randint(self.min_iter, self.max_iter)
         attack_args["abort_early"] = self.abort_early
-        attack_args["c"] = self._uniform(self.min_c, self.max_c)
+        attack_args["initial_c"] = self._uniform(self.min_c, self.max_c)
         attack_args["reduce_c"] = self.reduce_c
         attack_args["c_incr_factor"] = self.c_incr_factor
         attack_args["tau_decr_factor"] = self.tau_decr_factor
@@ -220,10 +220,9 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--norms",
-            type=float,
-            default=[float("inf")],
+            default=["inf"],
             nargs="+",
-            choices=[float("inf"), 1, 2],
+            choices=["inf", "1", "2"],
             help=("Attack perturbation norms"),
         )
 
diff --git a/hyperion/torch/layer_blocks/se_blocks.py b/hyperion/torch/layer_blocks/se_blocks.py
index b14c2b60..fd98db2e 100644
--- a/hyperion/torch/layer_blocks/se_blocks.py
+++ b/hyperion/torch/layer_blocks/se_blocks.py
@@ -43,7 +43,7 @@ def _standardize_mask(self, mask):
 
         return mask
 
-    def compute_scale_logit(self, x, x_mask=None):
+    def compute_scale_logits(self, x, x_mask=None):
         """comptue the scale before the sigmoid
 
         Args:
@@ -74,8 +74,8 @@ def forward(self, x, x_mask=None):
         Returns:
           Tensor with shape = (batch, channels, heigh, width).
         """
-        scale_logit = self.compute_scale_logit(x, x_mask)
-        scale = self.sigmoid(scale_logit)
+        scale_logits = self.compute_scale_logits(x, x_mask)
+        scale = self.sigmoid(scale_logits)
         y = scale * x
         return y
 
@@ -201,6 +201,10 @@ def __init__(
     ):
         super().__init__()
         self.cw_se = SEBlock2d(num_channels, r, activation)
+        # the bottlenet features will have at least dimension 4
+        if num_feats // r < 4:
+            r = num_feats // 4
+
         self.fw_se = SEBlock2d(num_feats, r, activation)
 
     def forward(self, x, x_mask=None):
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index 67f492f9..ecde6139 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -3,12 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .misc import PathLike
+from .dataset import Dataset
 from .class_info import ClassInfo
 from .feature_set import FeatureSet
 from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from .recording_set import RecordingSet
 from .rttm import RTTM
 from .scp_list import SCPList
+
 # from .ext_segment_list import ExtSegmentList
 from .segment_list import SegmentList
 from .segment_set import SegmentSet
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index 9e158d87..70ee82c8 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -22,6 +22,7 @@ def __init__(self, df):
             self.df["weights"] /= self.df["weights"].sum()
 
     def add_class_idx(self):
+        self.sort()
         self.df["class_idx"] = [i for i in range(len(self.df))]
 
     def set_uniform_weights(self):
@@ -38,18 +39,21 @@ def exp_weights(self, x):
         weights = self.df["weights"] ** x
         self.set_weights(weights)
 
-    def set_zero_weight(self, id):
-        self.df.loc[id, "weights"] = 0
+    def set_zero_weight(self, ids):
+        self.df.loc[ids, "weights"] = 0
         self.df["weights"] /= self.df["weights"].sum()
 
     @property
-    def weights(self, id):
-        return self.df.loc[id, "weights"]
+    def weights(self, ids):
+        return self.df.loc[ids, "weights"]
 
     @property
     def num_classes(self):
         return self.df["class_idx"].values.max() + 1
 
+    def sort_by_idx(self, ascending=True):
+        self.sort("class_idx", ascending)
+
     @classmethod
     def load(cls, file_path, sep=None):
         """Loads utt2info list from text file.
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
new file mode 100644
index 00000000..546dd715
--- /dev/null
+++ b/hyperion/utils/dataset.py
@@ -0,0 +1,159 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from typing import Dict, Optional
+from pathlib import Path
+import yaml
+
+from .segment_set import SegmentSet
+from .recording_set import RecordingSet
+from .feature_set import FeatureSet
+from .class_info import ClassInfo
+from .misc import PathLike
+
+
+class Dataset:
+    """ Class that contains all objects 
+        (segments, recordings, features, class_infos) that 
+        conform a dataset
+    """
+
+    def __init__(
+        self,
+        segments: SegmentSet,
+        classes: Optional[Dict[str, ClassInfo]] = None,
+        recordings: Optional[Dict[str, RecordingSet]] = None,
+        features: Optional[Dict[str, FeatureSet]] = None,
+    ):
+        self._segments = segments
+        self._classes = classes
+        self._recordings = recordings
+        self._features = features
+
+    @property
+    def segments(self):
+        return self._segments
+
+    @property
+    def recordings(self):
+        return self._recordings
+
+    @property
+    def features(self):
+        return self._features
+
+    @property
+    def classes(self):
+        return self._classes
+
+    @staticmethod
+    def resolve_dataset_path(dataset_path):
+        dataset_path = Path(dataset_path)
+        ext = dataset_path.suffix
+        if ext in [".yaml", "yml"]:
+            dataset_file = dataset_path
+            dataset_dir = dataset_path.parent
+        else:
+            dataset_file = dataset_path / "dataset.yaml"
+            dataset_dir = dataset_path
+
+        return dataset_dir, dataset_file
+
+    @staticmethod
+    def resolve_file_path(dataset_dir, file_path):
+        if file_path.is_file():
+            return file_path
+
+        return dataset_dir / file_path
+
+    def save(self, dataset_path: PathLike):
+        """Saves all the dataset objects.
+
+        Args:
+         dataset_path: str/Path indicating directory 
+          to save the dataset or .yaml file to save 
+          the dataset info.
+
+        """
+        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        dataset = {}
+        if self.segments is not None:
+            file_name = "segments.csv"
+            dataset["segments"] = file_name
+            file_path = dataset_dir / file_name
+            self.segments.save(file_path)
+
+        if self.recordings is not None:
+            file_names = {}
+            for k, v in self.recordings.items():
+                file_name = k + ".csv"
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                v.save(file_path)
+
+            dataset["recordings"] = file_names
+
+        if self.features is not None:
+            file_names = {}
+            for k, v in self.features.items():
+                file_name = k + ".csv"
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                v.save(file_path)
+
+            dataset["features"] = file_names
+
+        if self.classes is not None:
+            file_names = {}
+            for k, v in self.classes.items():
+                file_name = k + ".csv"
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                v.save(file_path)
+
+            dataset["classes"] = file_names
+
+        with open(dataset_file, "w") as f:
+            yaml.dump(dataset, f)
+
+    @classmethod
+    def load(cls, dataset_path: PathLike):
+        """Loads all the dataset objects.
+
+        Args:
+         dataset_path: str/Path indicating directory 
+          to save the dataset or .yaml file to save 
+          the dataset info.
+
+        """
+        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        with open(dataset_file, "w") as f:
+            dataset = yaml.safe_load(f)
+
+        assert "segments" in dataset
+        segments = SegmentSet.load(
+            Dataset.resolve_file_path(dataset_dir, dataset["segments"])
+        )
+        classes = None
+        recordings = None
+        features = None
+        if "classes" in dataset:
+            classes = {}
+            for k, v in dataset["classes"]:
+                classes[k] = ClassInfo.load(Dataset.resolve_file_path(dataset_dir, v))
+
+        if "recordings" in dataset:
+            recordings = {}
+            for k, v in dataset["recordings"]:
+                recordings[k] = RecordingSet.load(
+                    Dataset.resolve_file_path(dataset_dir, v)
+                )
+
+        if "features" in dataset:
+            features = {}
+            for k, v in dataset["features"]:
+                features[k] = FeatureSet.load(Dataset.resolve_file_path(dataset_dir, v))
+
+        return cls(segments, classes, recordings, features)
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index f2262217..fdf854c1 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -14,7 +14,7 @@
 from .list_utils import split_list, split_list_group_by_key
 
 
-class InfoTable(object):
+class InfoTable:
     """This is a base class to store information about recordings, segments,
     features, etc.
 
@@ -131,7 +131,10 @@ def load(cls, file_path, sep=None):
 
     def sort(self, column="id", ascending=True):
         """Sorts the table by column"""
-        self.df.sort_values(by=column, inplace=True, ascending=ascending)
+        if column == "id":
+            self.df.sort_index(inplace=True, ascending=ascending)
+        else:
+            self.df.sort_values(by=column, inplace=True, ascending=ascending)
 
     def split(self, idx, num_parts, group_by=None):
         """Splits SCPList into num_parts and return part idx.
diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index 369962fd..4ab3ce0a 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -4,10 +4,14 @@
 
  Miscellaneous functions
 """
+from typing import TypeVar
 from inspect import signature
+from pathlib import Path
 
 import numpy as np
 
+PathLike = TypeVar("PathLike", str, Path, None)
+
 
 def generate_data(g):
     while 1:

From e44eb755e1da333c401f5f003cfa110e470a0a40 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 3 May 2023 16:44:00 -0400
Subject: [PATCH 097/154] isort everything again

---
 hyperion/bin/adv_finetune_xvector_from_wav.py     | 10 +++-------
 hyperion/bin/apply_mvn_select_frames.py           |  5 +++--
 hyperion/bin/audio_to_duration.py                 |  5 +++--
 hyperion/bin/compute_energy_vad.py                |  5 +++--
 hyperion/bin/compute_mfcc_feats.py                |  5 +++--
 hyperion/bin/copy_feats.py                        |  1 +
 hyperion/bin/decode_wav2transducer.py             |  5 +++--
 hyperion/bin/decode_wav2vec2rnn_transducer.py     |  5 +++--
 .../eval_xvec_cosine_scoring_from_adv_test_wav.py |  5 +++--
 ...ec_cosine_scoring_from_adv_test_wav_wavegan.py |  5 +++--
 .../eval_xvec_cosine_scoring_from_art_test_wav.py |  9 +++++----
 .../bin/eval_xvec_cosine_scoring_from_test_wav.py |  5 +++--
 ...c_cosine_scoring_from_transfer_adv_test_wav.py |  5 +++--
 ...c_cosine_scoring_from_transfer_art_test_wav.py |  9 +++++----
 hyperion/bin/eval_xvec_logits_from_wav.py         |  5 +++--
 hyperion/bin/extract_wav2vec2xvectors.py          |  7 ++++---
 hyperion/bin/extract_xvectors_from_feats.py       |  5 +++--
 hyperion/bin/extract_xvectors_from_wav.py         |  5 +++--
 .../bin/extract_xvectors_slidwin_from_feats.py    |  7 ++++---
 hyperion/bin/extract_xvectors_slidwin_from_wav.py |  7 ++++---
 hyperion/bin/finetune_wav2vec2transducer.py       |  5 +++--
 hyperion/bin/finetune_wav2vec2xvector.py          |  5 +++--
 hyperion/bin/finetune_xvector_dfr_from_feats.py   |  5 +++--
 hyperion/bin/finetune_xvector_dfr_from_wav.py     |  5 +++--
 hyperion/bin/finetune_xvector_from_feats.py       |  5 +++--
 hyperion/bin/finetune_xvector_from_wav.py         |  5 +++--
 .../bin/generate_adv_attacks_xvector_classif.py   |  4 ++--
 .../bin/generate_adv_attacks_xvector_verif.py     |  4 ++--
 hyperion/bin/make_babble_noise_audio_files.py     |  7 ++++---
 hyperion/bin/pack_wav_rirs.py                     |  5 +++--
 hyperion/bin/plot_embedding_tsne.py               |  5 +++--
 hyperion/bin/plot_embedding_tsne_per_class.py     |  5 +++--
 hyperion/bin/prepare_data.py                      |  8 ++------
 hyperion/bin/preprocess_audio_files.py            |  7 ++++---
 hyperion/bin/train_wav2rnn_transducer.py          |  5 +++--
 hyperion/bin/train_wav2vec2rnn_transducer.py      |  5 +++--
 hyperion/bin/train_wav2vec2transducer.py          |  5 +++--
 hyperion/bin/train_wav2vec2xvector.py             |  5 +++--
 hyperion/bin/train_xvector_from_feats.py          |  5 +++--
 hyperion/bin/train_xvector_from_wav.py            |  2 +-
 hyperion/bin_deprec/ark2hyp.py                    |  1 +
 hyperion/bin_deprec/arkvad2nist.py                |  1 +
 hyperion/bin_deprec/compute-gmm-post.py           |  3 ++-
 hyperion/bin_deprec/eval-2class-performance.py    |  1 +
 hyperion/bin_deprec/eval-elbo-ubm.py              |  1 +
 hyperion/bin_deprec/eval-q-scoring-homo-gbe.py    |  1 +
 hyperion/bin_deprec/eval-score-norm.py            |  1 +
 hyperion/bin_deprec/h5vad2nist.py                 |  1 +
 hyperion/bin_deprec/init-ubm.py                   |  3 ++-
 hyperion/bin_deprec/scores2lre_format.py          |  1 +
 .../torch-train-conformer-enc-v1-vq-dvae.py       |  1 +
 .../torch-train-conformer-enc-v1-vq-vae.py        |  1 +
 hyperion/bin_deprec/torch-train-dc1d-dvae.py      |  1 +
 hyperion/bin_deprec/torch-train-dc1d-vae.py       |  1 +
 hyperion/bin_deprec/torch-train-dc2d-dvae.py      |  1 +
 hyperion/bin_deprec/torch-train-dc2d-vae.py       |  1 +
 hyperion/bin_deprec/torch-train-resnet1d-dvae.py  |  1 +
 hyperion/bin_deprec/torch-train-resnet1d-vae.py   |  1 +
 .../bin_deprec/torch-train-resnet1d-vq-dvae.py    |  1 +
 .../bin_deprec/torch-train-resnet1d-vq-vae.py     |  1 +
 hyperion/bin_deprec/torch-train-resnet2d-dvae.py  |  1 +
 hyperion/bin_deprec/torch-train-resnet2d-vae.py   |  5 +++--
 .../bin_deprec/torch-train-resnet2d-vq-dvae.py    |  1 +
 .../bin_deprec/torch-train-resnet2d-vq-vae.py     |  1 +
 .../torch-train-transformer-enc-v1-dvae.py        |  1 +
 .../torch-train-transformer-enc-v1-vae.py         |  1 +
 .../torch-train-transformer-enc-v1-vq-dvae.py     |  1 +
 .../torch-train-transformer-enc-v1-vq-vae.py      |  1 +
 hyperion/bin_deprec/torch-train-xvector.py        |  1 +
 hyperion/bin_deprec/train-q-scoring-homo-gbe.py   |  1 +
 hyperion/bin_deprec/vectors2scores.py             |  1 +
 hyperion/bin_deprec2/apply-mvn-select-frames.py   |  5 +++--
 hyperion/bin_deprec2/compute-mfcc-feats.py        |  5 +++--
 hyperion/bin_deprec2/copy-feats.py                |  1 +
 hyperion/bin_deprec2/eval-cos-1vs1.py             |  1 +
 hyperion/bin_deprec2/eval-linear-gbe-up.py        |  1 +
 hyperion/bin_deprec2/eval-linear-gbe.py           |  1 +
 hyperion/bin_deprec2/eval-linear-svmc.py          |  1 +
 hyperion/bin_deprec2/eval-logistic-regression.py  |  1 +
 hyperion/bin_deprec2/eval-plda-1vs1.py            |  1 +
 hyperion/bin_deprec2/eval-plda-nvs1.py            |  1 +
 hyperion/bin_deprec2/merge-h5-files.py            |  1 +
 hyperion/bin_deprec2/pack-audio-files.py          |  3 ++-
 hyperion/bin_deprec2/plot-vector-hist.py          |  1 +
 hyperion/bin_deprec2/rttm-to-bin-vad.py           |  1 +
 hyperion/bin_deprec2/segments-to-bin-vad.py       |  5 +++--
 .../torch-adv-finetune-xvec-from-wav.py           |  5 +++--
 hyperion/bin_deprec2/torch-adv-finetune-xvec.py   |  5 +++--
 hyperion/bin_deprec2/torch-compute-mfcc-feats.py  |  5 +++--
 hyperion/bin_deprec2/torch-eval-vae.py            |  1 +
 ...ec-cosine-scoring-from-adv-test-wav-wavegan.py |  5 +++--
 ...-eval-xvec-cosine-scoring-from-adv-test-wav.py |  5 +++--
 ...-eval-xvec-cosine-scoring-from-art-test-wav.py |  7 ++++---
 ...orch-eval-xvec-cosine-scoring-from-test-wav.py |  5 +++--
 ...c-cosine-scoring-from-transfer-adv-test-wav.py |  5 +++--
 ...c-cosine-scoring-from-transfer-art-test-wav.py |  7 ++++---
 .../torch-extract-xvectors-from-wav-with-rttm.py  |  5 +++--
 .../torch-extract-xvectors-slidwin-from-wav.py    |  7 ++++---
 .../bin_deprec2/torch-extract-xvectors-slidwin.py |  5 +++--
 .../torch-extract-xvectors-vae-preproc.py         |  5 +++--
 hyperion/bin_deprec2/torch-extract-xvectors.py    |  5 +++--
 hyperion/bin_deprec2/torch-train-dc1d-ae.py       |  1 +
 hyperion/bin_deprec2/torch-train-dvae.py          |  5 +++--
 .../torch-train-efficientnet-xvec-from-wav.py     |  5 +++--
 .../bin_deprec2/torch-train-efficientnet-xvec.py  |  5 +++--
 .../torch-train-resnet-xvec-from-wav.py           |  5 +++--
 hyperion/bin_deprec2/torch-train-resnet-xvec.py   |  5 +++--
 .../torch-train-resnet1d-xvec-from-wav.py         |  5 +++--
 .../torch-train-spinenet-xvec-from-wav.py         |  5 +++--
 .../bin_deprec2/torch-train-tdnn-xvec-from-wav.py |  5 +++--
 hyperion/bin_deprec2/torch-train-tdnn-xvec.py     |  5 +++--
 .../torch-train-transformer-xvec-v1-from-wav.py   |  5 +++--
 .../torch-train-transformer-xvec-v1.py            |  5 +++--
 hyperion/bin_deprec2/torch-train-vae.py           |  5 +++--
 hyperion/bin_deprec2/torch-train-vq-dvae.py       |  5 +++--
 hyperion/bin_deprec2/torch-train-vq-vae.py        |  5 +++--
 hyperion/bin_deprec2/train-cw-up.py               |  1 +
 hyperion/bin_deprec2/train-cw.py                  |  1 +
 hyperion/bin_deprec2/train-gaussianizer.py        |  1 +
 hyperion/bin_deprec2/train-lda.py                 |  1 +
 hyperion/bin_deprec2/train-linear-gbe-up.py       |  1 +
 hyperion/bin_deprec2/train-linear-gbe.py          |  1 +
 hyperion/bin_deprec2/train-linear-svmc.py         |  1 +
 hyperion/bin_deprec2/train-logistic-regression.py |  1 +
 hyperion/bin_deprec2/train-mvn.py                 |  1 +
 hyperion/bin_deprec2/train-nda.py                 |  1 +
 hyperion/bin_deprec2/train-pca.py                 |  1 +
 hyperion/bin_deprec2/train-plda.py                |  1 +
 hyperion/data_prep/data_prep.py                   |  9 ++++++---
 hyperion/data_prep/voxceleb2.py                   | 14 +++++++-------
 hyperion/torch/adv_attacks/art_attack_factory.py  |  2 +-
 .../torch/adv_attacks/random_attack_factory.py    |  3 ++-
 hyperion/torch/adv_defenses/wave_gan_white.py     |  4 +++-
 hyperion/torch/data/audio_dataset.py              |  5 +++--
 hyperion/torch/data/bucketing_seg_sampler.py      |  1 +
 .../torch/data/class_weighted_embed_sampler.py    |  3 ++-
 .../data/class_weighted_seg_chunk_sampler.py      |  3 ++-
 hyperion/torch/data/embed_dataset.py              |  3 ++-
 hyperion/torch/data/embed_sampler.py              |  3 ++-
 hyperion/torch/data/feat_seq_dataset.py           |  3 ++-
 hyperion/torch/data/hyp_sampler.py                |  3 ++-
 hyperion/torch/data/paired_feat_seq_dataset.py    |  1 +
 hyperion/torch/data/seg_chunk_sampler.py          |  3 ++-
 hyperion/torch/data/seg_sampler.py                |  3 ++-
 hyperion/torch/data/weighted_embed_sampler.py     |  1 +
 hyperion/torch/data/weighted_seq_sampler.py       |  3 ++-
 hyperion/torch/layer_blocks/__init__.py           |  3 ++-
 hyperion/torch/layer_blocks/etdnn_blocks.py       |  1 +
 hyperion/torch/layer_blocks/resetdnn_blocks.py    |  1 +
 hyperion/torch/layer_blocks/transducer_joiner.py  |  3 ++-
 .../torch/layer_blocks/transducer_predictor.py    |  3 ++-
 hyperion/torch/layers/__init__.py                 |  3 ++-
 hyperion/torch/layers/activation_factory.py       |  2 +-
 hyperion/torch/layers/global_pool.py              |  1 +
 hyperion/torch/layers/mvn.py                      |  3 ++-
 hyperion/torch/layers/pool_factory.py             |  3 ++-
 hyperion/torch/layers/spec_augment.py             |  3 ++-
 hyperion/torch/loggers/logger.py                  |  1 +
 hyperion/torch/loggers/logger_list.py             |  1 +
 hyperion/torch/lr_schedulers/factory.py           |  3 ++-
 hyperion/torch/models/__init__.py                 |  6 +++---
 hyperion/torch/models/transducer/conformer.py     |  3 ++-
 hyperion/torch/models/transducer/decoder.py       |  3 ++-
 hyperion/torch/models/transducer/joiner.py        |  3 ++-
 hyperion/torch/models/transducer/transducer.py    |  5 +++--
 hyperion/torch/models/tvector/tvector.py          |  3 ++-
 .../wav2transducer/hf_wav2rnn_transducer.py       |  3 ++-
 .../models/wav2transducer/hf_wav2transducer.py    |  3 ++-
 .../wav2transducer/hf_wav2vec2_transducer.py      |  3 ++-
 .../hf_wav2vec2conformer_v1_rnn_transducer.py     |  3 ++-
 .../hf_wav2vec2rnn_rnn_transducer.py              |  3 ++-
 .../wav2transducer/hf_wav2vec2rnn_transducer.py   |  3 ++-
 .../models/wav2transducer/wav2rnn_transducer.py   |  3 ++-
 .../wav2xvectors/hf_hubert2resnet1d_xvector.py    |  3 ++-
 .../wav2xvectors/hf_wav2vec2resnet1d_xvector.py   |  3 ++-
 .../torch/models/wav2xvectors/hf_wav2xvector.py   |  3 ++-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py     |  3 ++-
 .../models/wav2xvectors/wav2resnet1d_xvector.py   |  3 ++-
 .../models/wav2xvectors/wav2resnet_xvector.py     |  3 ++-
 hyperion/torch/models/wav2xvectors/wav2xvector.py |  3 ++-
 .../models/xvectors/efficient_net_xvector.py      |  3 ++-
 .../torch/models/xvectors/resnet1d_xvector.py     |  3 ++-
 hyperion/torch/models/xvectors/resnet_xvector.py  |  3 ++-
 .../torch/models/xvectors/spinenet_xvector.py     |  3 ++-
 hyperion/torch/models/xvectors/tdnn_xvector.py    |  3 ++-
 .../models/xvectors/transformer_xvector_v1.py     |  3 ++-
 hyperion/torch/models/xvectors/xvector.py         |  4 ++--
 hyperion/torch/narchs/audio_feats_mvn.py          |  3 ++-
 hyperion/torch/narchs/classif_head.py             |  3 ++-
 hyperion/torch/narchs/conformer_encoder_v1.py     |  8 +++++---
 hyperion/torch/narchs/dc1d_decoder.py             |  3 ++-
 hyperion/torch/narchs/dc1d_encoder.py             |  3 ++-
 hyperion/torch/narchs/dc2d_decoder.py             |  3 ++-
 hyperion/torch/narchs/dc2d_encoder.py             |  3 ++-
 hyperion/torch/narchs/efficient_net.py            |  3 ++-
 hyperion/torch/narchs/etdnn.py                    |  1 +
 hyperion/torch/narchs/net_arch.py                 |  1 +
 hyperion/torch/narchs/resetdnn.py                 |  1 +
 hyperion/torch/narchs/resnet.py                   | 15 +++++----------
 hyperion/torch/narchs/resnet1d_decoder.py         |  3 ++-
 hyperion/torch/narchs/resnet1d_encoder.py         |  3 ++-
 hyperion/torch/narchs/resnet2d_decoder.py         |  3 ++-
 hyperion/torch/narchs/resnet2d_encoder.py         |  3 ++-
 hyperion/torch/narchs/rnn_encoder.py              |  3 ++-
 hyperion/torch/narchs/rnn_transducer_decoder.py   |  8 +++++---
 hyperion/torch/narchs/spinenet.py                 |  1 +
 hyperion/torch/narchs/tdnn.py                     |  1 +
 hyperion/torch/narchs/transformer_encoder_v1.py   |  3 ++-
 hyperion/torch/optim/factory.py                   |  3 ++-
 hyperion/torch/tpm/hf/hf_hubert.py                |  5 +++--
 hyperion/torch/tpm/hf/hf_wav2vec2.py              |  5 +++--
 hyperion/torch/tpm/hf/hf_wav2vec_base.py          |  5 +++--
 hyperion/torch/tpm/hf/hf_wavlm.py                 |  5 +++--
 hyperion/torch/trainers/ae_trainer.py             |  3 ++-
 hyperion/torch/trainers/dvae_trainer.py           |  3 ++-
 hyperion/torch/trainers/torch_trainer.py          |  5 +++--
 hyperion/torch/trainers/transducer_trainer.py     |  5 +++--
 hyperion/torch/trainers/vae_trainer.py            |  3 ++-
 hyperion/torch/trainers/vq_dvae_trainer.py        |  3 ++-
 hyperion/torch/trainers/vq_vae_trainer.py         |  3 ++-
 hyperion/torch/trainers/xvector_adv_trainer.py    |  3 ++-
 .../trainers/xvector_adv_trainer_from_wav.py      |  3 ++-
 .../trainers/xvector_trainer_deep_feat_reg.py     |  3 ++-
 hyperion/torch/utils/ddp.py                       |  7 ++++---
 hyperion/torch/utils/metric_acc.py                |  1 +
 hyperion/utils/__init__.py                        |  5 ++---
 hyperion/utils/dataset.py                         |  9 +++++----
 hyperion/utils/lexicon.py                         |  1 +
 hyperion/utils/misc.py                            |  2 +-
 hyperion/utils/text.py                            |  1 +
 requirements.txt                                  |  2 ++
 231 files changed, 500 insertions(+), 290 deletions(-)

diff --git a/hyperion/bin/adv_finetune_xvector_from_wav.py b/hyperion/bin/adv_finetune_xvector_from_wav.py
index f387c7ac..7be882e0 100755
--- a/hyperion/bin/adv_finetune_xvector_from_wav.py
+++ b/hyperion/bin/adv_finetune_xvector_from_wav.py
@@ -11,12 +11,8 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 import torch
 import torch.nn as nn
@@ -24,8 +20,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.adv_attacks import AttackFactory
 from hyperion.torch.data import AudioDataset as AD
-from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
+from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import EfficientNetXVector as EXVec
 from hyperion.torch.models import ResNet1dXVector as R1dXVec
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
index 53a01d6d..a2456dc9 100755
--- a/hyperion/bin/apply_mvn_select_frames.py
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -18,8 +21,6 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def process_feats(
diff --git a/hyperion/bin/audio_to_duration.py b/hyperion/bin/audio_to_duration.py
index ac8852a4..38e8dff2 100755
--- a/hyperion/bin/audio_to_duration.py
+++ b/hyperion/bin/audio_to_duration.py
@@ -9,11 +9,12 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.utils import SegmentSet
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def audio_to_duration(audio_file, output_file, **kwargs):
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index e4d47ef0..15d74f3a 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -9,12 +9,13 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.feats import EnergyVAD
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def compute_vad(input_path, output_path, write_num_frames, **kwargs):
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index c8193e5c..a83f95d1 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -9,14 +9,15 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin/copy_feats.py b/hyperion/bin/copy_feats.py
index 4549caec..0385cc55 100755
--- a/hyperion/bin/copy_feats.py
+++ b/hyperion/bin/copy_feats.py
@@ -12,6 +12,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index 420f8a9f..81fa8803 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -13,6 +13,9 @@
 import numpy as np
 import pandas as pd
 import sentencepiece as spm
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -25,8 +28,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/decode_wav2vec2rnn_transducer.py b/hyperion/bin/decode_wav2vec2rnn_transducer.py
index 4fdc3140..8ef8d414 100755
--- a/hyperion/bin/decode_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/decode_wav2vec2rnn_transducer.py
@@ -13,6 +13,9 @@
 import numpy as np
 import pandas as pd
 import sentencepiece as spm
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -26,8 +29,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 19ba6546..bb01162f 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -10,6 +10,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -26,8 +29,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index 2f7506c7..c483ce39 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -12,6 +12,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,8 +32,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 4a654212..fba182c4 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -11,10 +11,13 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import torch.nn as nn
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -30,8 +33,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 7ab46d11..3cfde93e 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -24,8 +27,6 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index b2f6736d..44bdf59d 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -10,6 +10,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -26,8 +29,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 0973d3ff..676575fd 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -11,10 +11,13 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import torch.nn as nn
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -30,8 +33,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index b95b2a7c..da6389fb 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -11,6 +11,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index de0a8637..37d6a2a6 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -11,8 +11,11 @@
 
 import numpy as np
 import pandas as pd
-import torch
 import torchaudio.transforms as tat
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -21,8 +24,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 resamplers = {}
 
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index 13ad4277..926e0bcc 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -19,8 +22,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index 4f48bbdc..addabbcf 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -11,6 +11,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index fb6583e2..e3d2fcbb 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -10,8 +10,11 @@
 import time
 
 import numpy as np
-import torch
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
@@ -20,8 +23,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index 9f1728eb..2b1bba3b 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -11,8 +11,11 @@
 
 import numpy as np
 import pandas as pd
-import torch
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -22,8 +25,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/finetune_wav2vec2transducer.py b/hyperion/bin/finetune_wav2vec2transducer.py
index 6f17f800..df267e72 100755
--- a/hyperion/bin/finetune_wav2vec2transducer.py
+++ b/hyperion/bin/finetune_wav2vec2transducer.py
@@ -12,6 +12,9 @@
 
 import k2
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -22,8 +25,6 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index d9d9c281..b3edd9b5 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,8 +26,6 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/finetune_xvector_dfr_from_feats.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
index 17cafb85..2ac01025 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_feats.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_feats.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -22,8 +25,6 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_dfr_from_wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
index f7832a47..ff97d3ca 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_wav.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin/finetune_xvector_from_feats.py b/hyperion/bin/finetune_xvector_from_feats.py
index ac9c2d0b..7a1fb5a9 100755
--- a/hyperion/bin/finetune_xvector_from_feats.py
+++ b/hyperion/bin/finetune_xvector_from_feats.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -20,8 +23,6 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 2e120815..227892ea 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -10,6 +10,9 @@
 import time
 from pathlib import Path
 
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -25,8 +28,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 092a5029..a058893d 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -11,12 +11,12 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import torch.nn as nn
 import yaml
 from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
                           namespace_to_dict)
 
+import torch
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index 5fae0bbb..83375cb6 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -11,12 +11,12 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import torch.nn as nn
 import yaml
 from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
                           namespace_to_dict)
 
+import torch
+import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 4a356037..972ff01f 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -10,14 +10,15 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
 
 
 def make_noise(xs):
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index c5ddd25c..dccf58da 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -10,11 +10,12 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
index e2157e3e..e011dfe8 100755
--- a/hyperion/bin/plot_embedding_tsne.py
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -13,12 +13,13 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 6af0202c..6f35f074 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -13,14 +13,15 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.clustering import AHC
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
 from hyperion.utils.math import cosine_scoring
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
index df212a94..4105f482 100755
--- a/hyperion/bin/prepare_data.py
+++ b/hyperion/bin/prepare_data.py
@@ -6,12 +6,8 @@
 import logging
 from pathlib import Path
 
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 from hyperion.data_prep import DataPrep
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index e8adfd16..2f4e5cbc 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -10,14 +10,15 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 8930b299..26fcf72c 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -12,6 +12,9 @@
 
 import k2
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -20,8 +23,6 @@
 from hyperion.torch.models import Wav2RNNRNNTransducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index 7018c406..5daffb6d 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -12,6 +12,9 @@
 
 import k2
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -22,8 +25,6 @@
                                    HFWav2Vec2RNNTransducer)
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 55f3b996..ce53be86 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -12,6 +12,9 @@
 
 import k2
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 8e1653b1..5e7ecafa 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -22,8 +25,6 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index 71bba080..7f4ab0fa 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,8 +28,6 @@
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index a979b56b..5c999dd1 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -10,10 +10,10 @@
 import time
 from pathlib import Path
 
-import torch
 from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
                           namespace_to_dict)
 
+import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
diff --git a/hyperion/bin_deprec/ark2hyp.py b/hyperion/bin_deprec/ark2hyp.py
index a25c561b..abcb4457 100755
--- a/hyperion/bin_deprec/ark2hyp.py
+++ b/hyperion/bin_deprec/ark2hyp.py
@@ -13,6 +13,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import HypDataWriter, KaldiDataReader
 
diff --git a/hyperion/bin_deprec/arkvad2nist.py b/hyperion/bin_deprec/arkvad2nist.py
index 15a04f67..559371be 100755
--- a/hyperion/bin_deprec/arkvad2nist.py
+++ b/hyperion/bin_deprec/arkvad2nist.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.io import KaldiDataReader
 
 
diff --git a/hyperion/bin_deprec/compute-gmm-post.py b/hyperion/bin_deprec/compute-gmm-post.py
index 45d17623..58675336 100755
--- a/hyperion/bin_deprec/compute-gmm-post.py
+++ b/hyperion/bin_deprec/compute-gmm-post.py
@@ -14,12 +14,13 @@
 import time
 
 import numpy as np
+from keras import backend as K
+
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter
 from hyperion.pdfs import DiagGMM
 from hyperion.transforms import TransformList
-from keras import backend as K
 
 
 def to_sparse(r, num_comp):
diff --git a/hyperion/bin_deprec/eval-2class-performance.py b/hyperion/bin_deprec/eval-2class-performance.py
index d149deb2..eff16830 100755
--- a/hyperion/bin_deprec/eval-2class-performance.py
+++ b/hyperion/bin_deprec/eval-2class-performance.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.metrics import compute_eer
 from hyperion.utils.trial_key import TrialKey
diff --git a/hyperion/bin_deprec/eval-elbo-ubm.py b/hyperion/bin_deprec/eval-elbo-ubm.py
index 5f2eab28..bf4839db 100755
--- a/hyperion/bin_deprec/eval-elbo-ubm.py
+++ b/hyperion/bin_deprec/eval-elbo-ubm.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.pdfs import DiagGMM
diff --git a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
index 7817b570..4548e49b 100755
--- a/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/eval-q-scoring-homo-gbe.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.classifiers import QScoringHomoGBE as GBE
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/bin_deprec/eval-score-norm.py b/hyperion/bin_deprec/eval-score-norm.py
index 4f66a8e4..4b620518 100755
--- a/hyperion/bin_deprec/eval-score-norm.py
+++ b/hyperion/bin_deprec/eval-score-norm.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.score_norm import *
 from hyperion.utils.trial_ndx import TrialNdx
diff --git a/hyperion/bin_deprec/h5vad2nist.py b/hyperion/bin_deprec/h5vad2nist.py
index 21d61d3a..fb45c22b 100755
--- a/hyperion/bin_deprec/h5vad2nist.py
+++ b/hyperion/bin_deprec/h5vad2nist.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import HypDataReader
 
diff --git a/hyperion/bin_deprec/init-ubm.py b/hyperion/bin_deprec/init-ubm.py
index 77aed464..204ca855 100755
--- a/hyperion/bin_deprec/init-ubm.py
+++ b/hyperion/bin_deprec/init-ubm.py
@@ -15,11 +15,12 @@
 import time
 
 import numpy as np
+from keras import backend as K
+
 from hyperion.helpers import SequenceReader as SR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.pdfs import DiagGMM
 from hyperion.utils.multithreading import threadsafe_generator
-from keras import backend as K
 
 
 @threadsafe_generator
diff --git a/hyperion/bin_deprec/scores2lre_format.py b/hyperion/bin_deprec/scores2lre_format.py
index fcba8804..717c1535 100755
--- a/hyperion/bin_deprec/scores2lre_format.py
+++ b/hyperion/bin_deprec/scores2lre_format.py
@@ -12,6 +12,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.utils.trial_ndx import TrialNdx
 from hyperion.utils.trial_scores import TrialScores
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
index 5c1b19fc..608a5271 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
index 326175ab..a4cc54e6 100755
--- a/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-conformer-enc-v1-vq-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc1d-dvae.py b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
index 7a4f9634..1b88beba 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc1d-vae.py b/hyperion/bin_deprec/torch-train-dc1d-vae.py
index 1de4560a..dd5d2e72 100755
--- a/hyperion/bin_deprec/torch-train-dc1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc1d-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc2d-dvae.py b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
index 5bbc53bf..3f7cb17d 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-dc2d-vae.py b/hyperion/bin_deprec/torch-train-dc2d-vae.py
index b073c4c0..5b97f55c 100755
--- a/hyperion/bin_deprec/torch-train-dc2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-dc2d-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
index c10c6fe7..ca6f6996 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
index cf460a0a..a6218567 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
index a1b13d95..89448754 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
index a773d9aa..4a84bbff 100755
--- a/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet1d-vq-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
index a3857701..3f6cd6ba 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
index 695472cb..4e853230 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vae.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import ResNet2dEncoder as Encoder
 from hyperion.torch.trainers import VAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
index fdcc0c47..5e0add50 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
index 17d4c474..6398d959 100755
--- a/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-resnet2d-vq-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
index ff8ef4dc..0137e101 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-dvae.py
@@ -11,6 +11,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
index 92dad725..71021825 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
index 18888706..a6908c4f 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-dvae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
index 566ea106..b3b07682 100755
--- a/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
+++ b/hyperion/bin_deprec/torch-train-transformer-enc-v1-vq-vae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec/torch-train-xvector.py b/hyperion/bin_deprec/torch-train-xvector.py
index 88147d37..4c69eb25 100755
--- a/hyperion/bin_deprec/torch-train-xvector.py
+++ b/hyperion/bin_deprec/torch-train-xvector.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
diff --git a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
index 48967f58..8a348728 100755
--- a/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
+++ b/hyperion/bin_deprec/train-q-scoring-homo-gbe.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.classifiers import QScoringHomoGBE as GBE
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/bin_deprec/vectors2scores.py b/hyperion/bin_deprec/vectors2scores.py
index 2ff635c2..ab4be8ac 100755
--- a/hyperion/bin_deprec/vectors2scores.py
+++ b/hyperion/bin_deprec/vectors2scores.py
@@ -11,6 +11,7 @@
 import time
 
 import numpy as np
+
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.utils.trial_scores import TrialScores
 
diff --git a/hyperion/bin_deprec2/apply-mvn-select-frames.py b/hyperion/bin_deprec2/apply-mvn-select-frames.py
index 53a01d6d..a2456dc9 100755
--- a/hyperion/bin_deprec2/apply-mvn-select-frames.py
+++ b/hyperion/bin_deprec2/apply-mvn-select-frames.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -18,8 +21,6 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def process_feats(
diff --git a/hyperion/bin_deprec2/compute-mfcc-feats.py b/hyperion/bin_deprec2/compute-mfcc-feats.py
index c8193e5c..a83f95d1 100755
--- a/hyperion/bin_deprec2/compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/compute-mfcc-feats.py
@@ -9,14 +9,15 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin_deprec2/copy-feats.py b/hyperion/bin_deprec2/copy-feats.py
index 4549caec..0385cc55 100755
--- a/hyperion/bin_deprec2/copy-feats.py
+++ b/hyperion/bin_deprec2/copy-feats.py
@@ -12,6 +12,7 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
diff --git a/hyperion/bin_deprec2/eval-cos-1vs1.py b/hyperion/bin_deprec2/eval-cos-1vs1.py
index f60fdd4b..de508333 100755
--- a/hyperion/bin_deprec2/eval-cos-1vs1.py
+++ b/hyperion/bin_deprec2/eval-cos-1vs1.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.np.transforms import LNorm, TransformList
diff --git a/hyperion/bin_deprec2/eval-linear-gbe-up.py b/hyperion/bin_deprec2/eval-linear-gbe-up.py
index ba646498..d82bf967 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe-up.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-linear-gbe.py b/hyperion/bin_deprec2/eval-linear-gbe.py
index 9828944d..cf788392 100755
--- a/hyperion/bin_deprec2/eval-linear-gbe.py
+++ b/hyperion/bin_deprec2/eval-linear-gbe.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-linear-svmc.py b/hyperion/bin_deprec2/eval-linear-svmc.py
index 3b8b644b..ba4c5e81 100755
--- a/hyperion/bin_deprec2/eval-linear-svmc.py
+++ b/hyperion/bin_deprec2/eval-linear-svmc.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-logistic-regression.py b/hyperion/bin_deprec2/eval-logistic-regression.py
index 56507a9a..992ca7b8 100755
--- a/hyperion/bin_deprec2/eval-logistic-regression.py
+++ b/hyperion/bin_deprec2/eval-logistic-regression.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import ClassifTrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import HypDataWriter as HDW
diff --git a/hyperion/bin_deprec2/eval-plda-1vs1.py b/hyperion/bin_deprec2/eval-plda-1vs1.py
index 1a966f57..5a810cf7 100755
--- a/hyperion/bin_deprec2/eval-plda-1vs1.py
+++ b/hyperion/bin_deprec2/eval-plda-1vs1.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/eval-plda-nvs1.py b/hyperion/bin_deprec2/eval-plda-nvs1.py
index 5ead954a..5c5d200c 100755
--- a/hyperion/bin_deprec2/eval-plda-nvs1.py
+++ b/hyperion/bin_deprec2/eval-plda-nvs1.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/merge-h5-files.py b/hyperion/bin_deprec2/merge-h5-files.py
index 51207343..aeda3bab 100755
--- a/hyperion/bin_deprec2/merge-h5-files.py
+++ b/hyperion/bin_deprec2/merge-h5-files.py
@@ -12,6 +12,7 @@
 import time
 
 import numpy as np
+
 from hyperion.io import H5Merger
 
 
diff --git a/hyperion/bin_deprec2/pack-audio-files.py b/hyperion/bin_deprec2/pack-audio-files.py
index a843825a..5d544df4 100755
--- a/hyperion/bin_deprec2/pack-audio-files.py
+++ b/hyperion/bin_deprec2/pack-audio-files.py
@@ -11,12 +11,13 @@
 import time
 
 import numpy as np
+from scipy import ndimage, signal
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import PackedAudioWriter as Writer
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.io import WSpecifier as WS
-from scipy import ndimage, signal
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin_deprec2/plot-vector-hist.py b/hyperion/bin_deprec2/plot-vector-hist.py
index a4d842c0..75236726 100755
--- a/hyperion/bin_deprec2/plot-vector-hist.py
+++ b/hyperion/bin_deprec2/plot-vector-hist.py
@@ -15,6 +15,7 @@
 
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import TransformList
diff --git a/hyperion/bin_deprec2/rttm-to-bin-vad.py b/hyperion/bin_deprec2/rttm-to-bin-vad.py
index 610a0019..19e98d8f 100755
--- a/hyperion/bin_deprec2/rttm-to-bin-vad.py
+++ b/hyperion/bin_deprec2/rttm-to-bin-vad.py
@@ -11,6 +11,7 @@
 
 import numpy as np
 import pandas as pd
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.utils import RTTM, SegmentList
diff --git a/hyperion/bin_deprec2/segments-to-bin-vad.py b/hyperion/bin_deprec2/segments-to-bin-vad.py
index 56e6bf9f..24021a4b 100755
--- a/hyperion/bin_deprec2/segments-to-bin-vad.py
+++ b/hyperion/bin_deprec2/segments-to-bin-vad.py
@@ -10,11 +10,12 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.utils import SegmentList
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def segments_to_bin_vad(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
index 9dde434d..ad33515c 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec-from-wav.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,8 +26,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
index 88d21cdb..850233e2 100755
--- a/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
+++ b/hyperion/bin_deprec2/torch-adv-finetune-xvec.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -22,8 +25,6 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorAdvTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
index 17565a3c..07f71bfb 100755
--- a/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
+++ b/hyperion/bin_deprec2/torch-compute-mfcc-feats.py
@@ -8,6 +8,9 @@
 import sys
 import time
 
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -15,8 +18,6 @@
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.torch.layers import AudioFeatsFactory as AFF
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin_deprec2/torch-eval-vae.py b/hyperion/bin_deprec2/torch-eval-vae.py
index bf99dddd..d676b0f1 100755
--- a/hyperion/bin_deprec2/torch-eval-vae.py
+++ b/hyperion/bin_deprec2/torch-eval-vae.py
@@ -16,6 +16,7 @@
 matplotlib.use("Agg")
 # matplotlib.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
 import matplotlib.pyplot as plt
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
index 1c00ed2a..aaa91214 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav-wavegan.py
@@ -12,6 +12,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,8 +32,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
index 27d36d6f..437127b2 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-adv-test-wav.py
@@ -10,6 +10,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -26,8 +29,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
index f9b77f11..8d4add76 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-art-test-wav.py
@@ -11,9 +11,12 @@
 
 import numpy as np
 import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
-from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -29,8 +32,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
index 9f6801ef..0e9493c0 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-test-wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -24,8 +27,6 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
index 6fdca983..e0754498 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-adv-test-wav.py
@@ -10,6 +10,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -26,8 +29,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
index 7ef4815c..0f9f375d 100755
--- a/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
+++ b/hyperion/bin_deprec2/torch-eval-xvec-cosine-scoring-from-transfer-art-test-wav.py
@@ -11,9 +11,12 @@
 
 import numpy as np
 import pandas as pd
+from art.classifiers import PyTorchClassifier
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
-from art.classifiers import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -29,8 +32,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
index a9785a61..fc494448 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-from-wav-with-rttm.py
@@ -11,6 +11,9 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import RTTM, Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
index 7453e0ba..c85fe4c9 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin-from-wav.py
@@ -11,8 +11,11 @@
 
 import numpy as np
 import pandas as pd
-import torch
 import yaml
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
+import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -22,8 +25,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
index 3153b312..6da57e16 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-slidwin.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -19,8 +22,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
index 347c80f8..6edf60ed 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors-vae-preproc.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -19,8 +22,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-extract-xvectors.py b/hyperion/bin_deprec2/torch-extract-xvectors.py
index 83d21692..76d941e0 100755
--- a/hyperion/bin_deprec2/torch-extract-xvectors.py
+++ b/hyperion/bin_deprec2/torch-extract-xvectors.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -19,8 +22,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin_deprec2/torch-train-dc1d-ae.py b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
index cdba46b3..50ac7d42 100755
--- a/hyperion/bin_deprec2/torch-train-dc1d-ae.py
+++ b/hyperion/bin_deprec2/torch-train-dc1d-ae.py
@@ -10,6 +10,7 @@
 import time
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/bin_deprec2/torch-train-dvae.py b/hyperion/bin_deprec2/torch-train-dvae.py
index 6c21bbcf..808bfbba 100755
--- a/hyperion/bin_deprec2/torch-train-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-dvae.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,8 +27,6 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import DVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
index 9db86225..f256f735 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec-from-wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -20,8 +23,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
index 124e9cb3..622ac62e 100755
--- a/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-efficientnet-xvec.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.models import EfficientNetXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
index 6b9455df..3d135b18 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec-from-wav.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,8 +28,6 @@
 # from hyperion.torch.lr_schedulers import LRSchedulerFactory as LRSF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 # import torch.multiprocessing as mp
 
diff --git a/hyperion/bin_deprec2/torch-train-resnet-xvec.py b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
index f035032a..f976cc6e 100755
--- a/hyperion/bin_deprec2/torch-train-resnet-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-resnet-xvec.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.models import ResNetXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
index 616e2cd3..3ee6bf18 100755
--- a/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-resnet1d-xvec-from-wav.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
index f579a807..0857ce5c 100755
--- a/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-spinenet-xvec-from-wav.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -22,8 +25,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
index 486b1d92..7bbbff03 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec-from-wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -20,8 +23,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
index be429344..5614f1b9 100755
--- a/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
+++ b/hyperion/bin_deprec2/torch-train-tdnn-xvec.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.models import TDNNXVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
index 3e91da90..6b361583 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1-from-wav.py
@@ -10,6 +10,9 @@
 import time
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -20,8 +23,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
index d08a58a3..62164f15 100755
--- a/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
+++ b/hyperion/bin_deprec2/torch-train-transformer-xvec-v1.py
@@ -12,6 +12,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -21,8 +24,6 @@
 from hyperion.torch.models import TransformerXVectorV1 as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin_deprec2/torch-train-vae.py b/hyperion/bin_deprec2/torch-train-vae.py
index 6f545795..4c41d49c 100755
--- a/hyperion/bin_deprec2/torch-train-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vae.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,8 +27,6 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-dvae.py b/hyperion/bin_deprec2/torch-train-vq-dvae.py
index 449c3b49..5de1bbd4 100755
--- a/hyperion/bin_deprec2/torch-train-vq-dvae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-dvae.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,8 +27,6 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VQDVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/torch-train-vq-vae.py b/hyperion/bin_deprec2/torch-train-vq-vae.py
index 17dea6aa..2a95f853 100755
--- a/hyperion/bin_deprec2/torch-train-vq-vae.py
+++ b/hyperion/bin_deprec2/torch-train-vq-vae.py
@@ -11,6 +11,9 @@
 from pathlib import Path
 
 import numpy as np
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,8 +27,6 @@
                                    TransformerEncoderV1)
 from hyperion.torch.trainers import VQVAETrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 enc_dict = {
     "dc1d": DC1dEncoder,
diff --git a/hyperion/bin_deprec2/train-cw-up.py b/hyperion/bin_deprec2/train-cw-up.py
index bab22ce7..c1c372ad 100755
--- a/hyperion/bin_deprec2/train-cw-up.py
+++ b/hyperion/bin_deprec2/train-cw-up.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-cw.py b/hyperion/bin_deprec2/train-cw.py
index e8c693c1..cabca7c2 100755
--- a/hyperion/bin_deprec2/train-cw.py
+++ b/hyperion/bin_deprec2/train-cw.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-gaussianizer.py b/hyperion/bin_deprec2/train-gaussianizer.py
index 4718d3df..aeb51e46 100755
--- a/hyperion/bin_deprec2/train-gaussianizer.py
+++ b/hyperion/bin_deprec2/train-gaussianizer.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-lda.py b/hyperion/bin_deprec2/train-lda.py
index 9dfe394f..1887a72f 100755
--- a/hyperion/bin_deprec2/train-lda.py
+++ b/hyperion/bin_deprec2/train-lda.py
@@ -13,6 +13,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import LDA, SbSw, TransformList
diff --git a/hyperion/bin_deprec2/train-linear-gbe-up.py b/hyperion/bin_deprec2/train-linear-gbe-up.py
index 9435d0ad..9986b6bc 100755
--- a/hyperion/bin_deprec2/train-linear-gbe-up.py
+++ b/hyperion/bin_deprec2/train-linear-gbe-up.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBEUP as GBE
diff --git a/hyperion/bin_deprec2/train-linear-gbe.py b/hyperion/bin_deprec2/train-linear-gbe.py
index 75fe0b67..e9455cb8 100755
--- a/hyperion/bin_deprec2/train-linear-gbe.py
+++ b/hyperion/bin_deprec2/train-linear-gbe.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearGBE as GBE
diff --git a/hyperion/bin_deprec2/train-linear-svmc.py b/hyperion/bin_deprec2/train-linear-svmc.py
index f48a573e..90ff8768 100755
--- a/hyperion/bin_deprec2/train-linear-svmc.py
+++ b/hyperion/bin_deprec2/train-linear-svmc.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LinearSVMC as SVM
diff --git a/hyperion/bin_deprec2/train-logistic-regression.py b/hyperion/bin_deprec2/train-logistic-regression.py
index f7036879..1aa128a3 100755
--- a/hyperion/bin_deprec2/train-logistic-regression.py
+++ b/hyperion/bin_deprec2/train-logistic-regression.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.classifiers import LogisticRegression as LR
diff --git a/hyperion/bin_deprec2/train-mvn.py b/hyperion/bin_deprec2/train-mvn.py
index ff03175b..2d10b116 100755
--- a/hyperion/bin_deprec2/train-mvn.py
+++ b/hyperion/bin_deprec2/train-mvn.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.pdfs.core import Normal
diff --git a/hyperion/bin_deprec2/train-nda.py b/hyperion/bin_deprec2/train-nda.py
index ec73db2a..946a8baa 100755
--- a/hyperion/bin_deprec2/train-nda.py
+++ b/hyperion/bin_deprec2/train-nda.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import NDA, NSbSw, TransformList
diff --git a/hyperion/bin_deprec2/train-pca.py b/hyperion/bin_deprec2/train-pca.py
index 9d9ae7a9..25dcb366 100755
--- a/hyperion/bin_deprec2/train-pca.py
+++ b/hyperion/bin_deprec2/train-pca.py
@@ -13,6 +13,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import VectorReader as VR
 from hyperion.hyp_defs import config_logger
 from hyperion.np.transforms import PCA, TransformList
diff --git a/hyperion/bin_deprec2/train-plda.py b/hyperion/bin_deprec2/train-plda.py
index f8d24366..520f4cd7 100755
--- a/hyperion/bin_deprec2/train-plda.py
+++ b/hyperion/bin_deprec2/train-plda.py
@@ -14,6 +14,7 @@
 import time
 
 import numpy as np
+
 from hyperion.helpers import PLDAFactory as F
 from hyperion.helpers import VectorClassReader as VCR
 from hyperion.hyp_defs import config_logger
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index bb91e3a5..d9f6b238 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -2,10 +2,12 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionYesNo
-from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+from jsonargparse import ActionYesNo
 from tqdm import tqdm
+
 from ..utils import PathLike
 
 
@@ -63,9 +65,10 @@ def _get_recording_duration(scp, i, n):
 
     def get_recording_duration(self, recording_set):
 
-        from ..utils import SCPList
         import itertools
 
+        from ..utils import SCPList
+
         scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
         futures = []
         with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index 5c04f86c..d8b9dd99 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -2,18 +2,18 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionYesNo
-from pathlib import Path
-import re
 import logging
-from tqdm import tqdm
+import re
 from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
 
-import pandas as pd
 import numpy as np
+import pandas as pd
+from jsonargparse import ActionYesNo
+from tqdm import tqdm
 
-from ..utils.misc import urlretrieve_progress, PathLike
-from ..utils import RecordingSet, SegmentSet, ClassInfo, Dataset
+from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
 
diff --git a/hyperion/torch/adv_attacks/art_attack_factory.py b/hyperion/torch/adv_attacks/art_attack_factory.py
index 801ba948..678470f5 100644
--- a/hyperion/torch/adv_attacks/art_attack_factory.py
+++ b/hyperion/torch/adv_attacks/art_attack_factory.py
@@ -4,7 +4,7 @@
 """
 
 import numpy as np
-from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 try:
     from art.attacks import evasion as attacks
diff --git a/hyperion/torch/adv_attacks/random_attack_factory.py b/hyperion/torch/adv_attacks/random_attack_factory.py
index 133e5e2b..a91c99ac 100644
--- a/hyperion/torch/adv_attacks/random_attack_factory.py
+++ b/hyperion/torch/adv_attacks/random_attack_factory.py
@@ -5,9 +5,10 @@
 
 import math
 
-import torch
 from jsonargparse import ActionParser, ArgumentParser
 
+import torch
+
 from .attack_factory import AttackFactory as AF
 
 
diff --git a/hyperion/torch/adv_defenses/wave_gan_white.py b/hyperion/torch/adv_defenses/wave_gan_white.py
index af51dc00..5d045f08 100644
--- a/hyperion/torch/adv_defenses/wave_gan_white.py
+++ b/hyperion/torch/adv_defenses/wave_gan_white.py
@@ -8,9 +8,10 @@
 
 import librosa
 import numpy as np
-import torch
 import yaml
 
+import torch
+
 try:
     # import parallel_wavegan.models
     from parallel_wavegan.layers import PQMF
@@ -20,6 +21,7 @@
     pass
 
 from sklearn.preprocessing import StandardScaler
+
 from torch import nn
 
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index cc9a3a5e..1e42a1c3 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -11,10 +11,11 @@
 import pandas as pd
 #import k2
 import sentencepiece as spm
-import torch
-import torch.distributed as dist
 import torchaudio.transforms as tat
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+import torch
+import torch.distributed as dist
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessAudioReader as AR
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index 78bf89b6..c890627e 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -7,6 +7,7 @@
 import math
 
 import numpy as np
+
 import torch
 import torch.distributed as dist
 
diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
index aed9105d..edf1c00d 100644
--- a/hyperion/torch/data/class_weighted_embed_sampler.py
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -9,9 +9,10 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index b551f342..7fbfbd71 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -9,9 +9,10 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index 31fd00fd..519f498d 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -10,9 +10,10 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
diff --git a/hyperion/torch/data/embed_sampler.py b/hyperion/torch/data/embed_sampler.py
index 8836fe2a..65adcba6 100644
--- a/hyperion/torch/data/embed_sampler.py
+++ b/hyperion/torch/data/embed_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/feat_seq_dataset.py b/hyperion/torch/data/feat_seq_dataset.py
index 68dea5c3..bb487dda 100644
--- a/hyperion/torch/data/feat_seq_dataset.py
+++ b/hyperion/torch/data/feat_seq_dataset.py
@@ -12,9 +12,10 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index c5097723..d1bcb0a8 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -2,9 +2,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/data/paired_feat_seq_dataset.py b/hyperion/torch/data/paired_feat_seq_dataset.py
index fc17593e..eff2ed58 100644
--- a/hyperion/torch/data/paired_feat_seq_dataset.py
+++ b/hyperion/torch/data/paired_feat_seq_dataset.py
@@ -6,6 +6,7 @@
 import logging
 
 import numpy as np
+
 import torch
 
 from ...utils.utt2info import Utt2Info
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 76054cd8..2933dcc6 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -8,9 +8,10 @@
 
 import numpy as np
 import pandas as pd
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 74726f63..39d1eed2 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
-import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+import torch
+
 from .hyp_sampler import HypSampler
 
 
diff --git a/hyperion/torch/data/weighted_embed_sampler.py b/hyperion/torch/data/weighted_embed_sampler.py
index 22da93f9..5870512a 100644
--- a/hyperion/torch/data/weighted_embed_sampler.py
+++ b/hyperion/torch/data/weighted_embed_sampler.py
@@ -7,6 +7,7 @@
 import math
 
 import numpy as np
+
 import torch
 from torch.utils.data import Sampler
 
diff --git a/hyperion/torch/data/weighted_seq_sampler.py b/hyperion/torch/data/weighted_seq_sampler.py
index 345c2429..b6f0b670 100644
--- a/hyperion/torch/data/weighted_seq_sampler.py
+++ b/hyperion/torch/data/weighted_seq_sampler.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.distributed as dist
-from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index 0487ae4f..22cc629d 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -31,7 +31,8 @@
 from .spine_blocks import BlockSpec, SpineConv, SpineEndpoints, SpineResample
 from .tdnn_blocks import TDNNBlock
 from .transducer_joiner import TransducerJoiner
-from .transducer_predictor import TransducerRNNPredictor, TransducerConvPredictor
+from .transducer_predictor import (TransducerConvPredictor,
+                                   TransducerRNNPredictor)
 from .transformer_conv2d_subsampler import TransformerConv2dSubsampler
 from .transformer_encoder_v1 import TransformerEncoderBlockV1
 from .transformer_feedforward import (Conv1dLinear, Conv1dx2,
diff --git a/hyperion/torch/layer_blocks/etdnn_blocks.py b/hyperion/torch/layer_blocks/etdnn_blocks.py
index 10fd09b3..b6afdd29 100644
--- a/hyperion/torch/layer_blocks/etdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/etdnn_blocks.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
diff --git a/hyperion/torch/layer_blocks/resetdnn_blocks.py b/hyperion/torch/layer_blocks/resetdnn_blocks.py
index 1af632fb..dfea3720 100644
--- a/hyperion/torch/layer_blocks/resetdnn_blocks.py
+++ b/hyperion/torch/layer_blocks/resetdnn_blocks.py
@@ -5,6 +5,7 @@
 #
 
 import numpy as np
+
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
diff --git a/hyperion/torch/layer_blocks/transducer_joiner.py b/hyperion/torch/layer_blocks/transducer_joiner.py
index 738c0cda..d2a7310d 100644
--- a/hyperion/torch/layer_blocks/transducer_joiner.py
+++ b/hyperion/torch/layer_blocks/transducer_joiner.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Optional, Tuple
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 class TransducerJoiner(nn.Module):
diff --git a/hyperion/torch/layer_blocks/transducer_predictor.py b/hyperion/torch/layer_blocks/transducer_predictor.py
index 6f43343a..256753c5 100644
--- a/hyperion/torch/layer_blocks/transducer_predictor.py
+++ b/hyperion/torch/layer_blocks/transducer_predictor.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Optional, Tuple
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index b2aa1692..6b508b0e 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -17,6 +17,7 @@
 from .mvn import MeanVarianceNorm
 from .norm_layer_factory import NormLayer1dFactory, NormLayer2dFactory
 from .pool_factory import GlobalPool1dFactory
-from .pos_encoder import NoPosEncoder, PosEncoder, RelPosEncoder, ConvPosEncoder
+from .pos_encoder import (ConvPosEncoder, NoPosEncoder, PosEncoder,
+                          RelPosEncoder)
 from .spec_augment import AxisMasker, SpecAugment, SpecWarper
 from .subpixel_convs import ICNR1d, ICNR2d, SubPixelConv1d, SubPixelConv2d
diff --git a/hyperion/torch/layers/activation_factory.py b/hyperion/torch/layers/activation_factory.py
index 9d972f95..f2467962 100644
--- a/hyperion/torch/layers/activation_factory.py
+++ b/hyperion/torch/layers/activation_factory.py
@@ -6,7 +6,7 @@
 
 import torch.nn as nn
 
-from .swish import Swish, DoubleSwish, Swish6, DoubleSwish6
+from .swish import DoubleSwish, DoubleSwish6, Swish, Swish6
 
 act_dict = {
     "elu": nn.ELU,
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 85ba92f6..5e38494f 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -6,6 +6,7 @@
 import math
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index b90a65c8..4b4c5927 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -2,9 +2,10 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 
 class MeanVarianceNorm(nn.Module):
diff --git a/hyperion/torch/layers/pool_factory.py b/hyperion/torch/layers/pool_factory.py
index c0e573af..84d0cbf1 100644
--- a/hyperion/torch/layers/pool_factory.py
+++ b/hyperion/torch/layers/pool_factory.py
@@ -2,9 +2,10 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
+import torch.nn as nn
+
 from .global_pool import *
 
 
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index 761a4e31..f4e03842 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -4,10 +4,11 @@
 """
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
-from jsonargparse import ActionParser, ArgumentParser
 
 count = 0
 
diff --git a/hyperion/torch/loggers/logger.py b/hyperion/torch/loggers/logger.py
index 46c1130d..7e9c91f2 100644
--- a/hyperion/torch/loggers/logger.py
+++ b/hyperion/torch/loggers/logger.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.distributed as dist
 
 
diff --git a/hyperion/torch/loggers/logger_list.py b/hyperion/torch/loggers/logger_list.py
index 20ae58ec..0291a01f 100644
--- a/hyperion/torch/loggers/logger_list.py
+++ b/hyperion/torch/loggers/logger_list.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.distributed as dist
 
 from .tensorboard_logger import TensorBoardLogger as TBL
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index ab518ad4..cf003ca7 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -2,9 +2,10 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import torch
 from jsonargparse import ActionParser, ArgumentParser
 
+import torch
+
 from .cos_lr import AdamCosineLR, CosineLR
 from .exp_lr import ExponentialLR
 from .invpow_lr import InvPowLR
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 0ab63adf..06838ddd 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -4,11 +4,11 @@
 
 """
 
+from .transducer import RNNRNNTransducer, RNNTransducer
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
-from .transducer import RNNTransducer, RNNRNNTransducer
-from .wav2transducer import (HFWav2Vec2RNNRNNTransducer,
-                             HFWav2Vec2ConformerV1RNNTransducer,
+from .wav2transducer import (HFWav2Vec2ConformerV1RNNTransducer,
+                             HFWav2Vec2RNNRNNTransducer,
                              HFWav2Vec2RNNTransducer, HFWav2Vec2Transducer)
 from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
                            HFWavLM2ResNet1dXVector)
diff --git a/hyperion/torch/models/transducer/conformer.py b/hyperion/torch/models/transducer/conformer.py
index f62621af..511cc178 100644
--- a/hyperion/torch/models/transducer/conformer.py
+++ b/hyperion/torch/models/transducer/conformer.py
@@ -20,9 +20,10 @@
 import warnings
 from typing import List, Optional, Tuple
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 from hyperion.utils.text import make_pad_mask, subsequent_chunk_mask
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch import Tensor, nn
 
 from .transformer import Transformer
diff --git a/hyperion/torch/models/transducer/decoder.py b/hyperion/torch/models/transducer/decoder.py
index 7f3698d7..484f6f38 100644
--- a/hyperion/torch/models/transducer/decoder.py
+++ b/hyperion/torch/models/transducer/decoder.py
@@ -17,9 +17,10 @@
 import logging
 from typing import Optional, Tuple
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 # TODO(fangjun): Support switching between LSTM and GRU
diff --git a/hyperion/torch/models/transducer/joiner.py b/hyperion/torch/models/transducer/joiner.py
index 983f064a..a7c2e35b 100644
--- a/hyperion/torch/models/transducer/joiner.py
+++ b/hyperion/torch/models/transducer/joiner.py
@@ -14,9 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 
 class Joiner(nn.Module):
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer.py
index c9ba365e..938149ec 100644
--- a/hyperion/torch/models/transducer/transducer.py
+++ b/hyperion/torch/models/transducer/transducer.py
@@ -26,10 +26,11 @@
 
 import logging
 
-import torch
-import torch.nn as nn
 import torchaudio
 import torchaudio.functional
+
+import torch
+import torch.nn as nn
 from hyperion.utils.text import add_sos
 
 from ...torch_model import TorchModel
diff --git a/hyperion/torch/models/tvector/tvector.py b/hyperion/torch/models/tvector/tvector.py
index a4e4d148..a46fc324 100644
--- a/hyperion/torch/models/tvector/tvector.py
+++ b/hyperion/torch/models/tvector/tvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ClassifHead, ConformerEncoderV1, TorchNALoader
 from ..layer_blocks import TDNNBlock
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
index 47dfd910..1d16675c 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
@@ -7,9 +7,10 @@
 from dataclasses import dataclass
 from typing import Dict, List, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
index 4cebfd66..4f1c500d 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2transducer.py
@@ -5,9 +5,10 @@
 import contextlib
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
index bd58e2cd..123c9de8 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2_transducer.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..transducer import Transducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py
index 09b0196e..3b18de3a 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2conformer_v1_rnn_transducer.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..transducer import ConformerV1RNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
index a4d2b0cc..d9eeaebe 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_rnn_transducer.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..transducer import RNNRNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
index f4e02a23..fe82f734 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..transducer import RNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
index 458e7cae..4b2f235b 100644
--- a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Tuple, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index fb528809..b75ac53f 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFHubert
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 739213b4..8a17379c 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 6ff8f8b4..5599fa1e 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -5,9 +5,10 @@
 import contextlib
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index 87e9a6f8..56a19130 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -5,9 +5,10 @@
 import logging
 from typing import Dict, Optional, Union
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWavLM
 from ..xvectors import ResNet1dXVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index b545bfaf..0d9f1bc4 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNet1dXVector
 from .wav2xvector import Wav2XVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
index 51e045da..1f7283a0 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNetXVector
 from .wav2xvector import Wav2XVector
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 838f3342..4c21f478 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -4,9 +4,10 @@
 """
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index dc864f1c..132bb51d 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import EfficientNet as EN
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index bd22f1ae..20865880 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNet1dEncoder as Encoder
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index 21c4ce81..0e9eba22 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNetFactory as RNF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index e2dbbb2c..0b27a840 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -5,9 +5,10 @@
 """
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import SpineNetFactory as SNF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index af8731d5..38262cc3 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TDNNFactory as TF
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 913c9550..25e9c894 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -5,9 +5,10 @@
 
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TransformerEncoderV1 as TE
 from .xvector import XVector
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index fb5fab46..8556104a 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -6,9 +6,10 @@
 from enum import Enum
 from typing import Optional
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ....utils.misc import filter_func_args
 from ...layer_blocks import TDNNBlock
@@ -17,7 +18,6 @@
 from ...torch_model import TorchModel
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
-
 # class XVectorTrainMode(Enum):
 #     full = 0
 #     frozen = 1
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 8a877d5e..160ee61b 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -2,9 +2,10 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ..layers import AudioFeatsFactory as AFF
 from ..layers import MeanVarianceNorm as MVN
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index b5008f28..9f9b280b 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -4,9 +4,10 @@
 """
 
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn import Linear
 
 from ...utils.misc import filter_func_args
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index ed328223..97cb6d5b 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -3,17 +3,19 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser, ActionYesNo
 
 from ...utils.misc import filter_func_args
 from ..layer_blocks import ConformerEncoderBlockV1 as EBlock
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from ..layers import ActivationFactory as AF
+from ..layers import ConvPosEncoder, NoPosEncoder
 from ..layers import NormLayer1dFactory as NLF
-from ..layers import NoPosEncoder, PosEncoder, RelPosEncoder, ConvPosEncoder
-from ..utils import seq_lengths_to_mask, scale_seq_lengths
+from ..layers import PosEncoder, RelPosEncoder
+from ..utils import scale_seq_lengths, seq_lengths_to_mask
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index 57d9adec..f5ab74d5 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -5,9 +5,10 @@
 
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC1dDecBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index aaf1bb2d..0c331a5e 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -4,9 +4,10 @@
 """
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks.dc1d_blocks import DC1dEncBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index 87a18bfe..4106cbfd 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -5,9 +5,10 @@
 
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC2dDecBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index 70eeac3c..ce7b9677 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -5,9 +5,10 @@
 
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import DC2dEncBlock
 from ..layers import ActivationFactory as AF
diff --git a/hyperion/torch/narchs/efficient_net.py b/hyperion/torch/narchs/efficient_net.py
index 1eddc3ff..b9efdcef 100644
--- a/hyperion/torch/narchs/efficient_net.py
+++ b/hyperion/torch/narchs/efficient_net.py
@@ -5,9 +5,10 @@
 
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn import Dropout, Linear
 
 from ..layer_blocks import MBConvBlock, MBConvInOutBlock
diff --git a/hyperion/torch/narchs/etdnn.py b/hyperion/torch/narchs/etdnn.py
index d2b2d298..a73439b7 100644
--- a/hyperion/torch/narchs/etdnn.py
+++ b/hyperion/torch/narchs/etdnn.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from torch.nn import Conv1d, Linear
diff --git a/hyperion/torch/narchs/net_arch.py b/hyperion/torch/narchs/net_arch.py
index 4b39804c..9a3fc65f 100644
--- a/hyperion/torch/narchs/net_arch.py
+++ b/hyperion/torch/narchs/net_arch.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch.nn as nn
 
 from ..torch_model import TorchModel
diff --git a/hyperion/torch/narchs/resetdnn.py b/hyperion/torch/narchs/resetdnn.py
index c4dc7784..eb964fa5 100644
--- a/hyperion/torch/narchs/resetdnn.py
+++ b/hyperion/torch/narchs/resetdnn.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index bccd0646..858cf4ea 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -5,20 +5,15 @@
 import logging
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
-from ..layer_blocks import (
-    Res2NetBasicBlock,
-    Res2NetBNBlock,
-    ResNetBasicBlock,
-    ResNetBNBlock,
-    ResNetEndpointBlock,
-    ResNetInputBlock,
-    SEResNetBasicBlock,
-    SEResNetBNBlock,
-)
+from ..layer_blocks import (Res2NetBasicBlock, Res2NetBNBlock,
+                            ResNetBasicBlock, ResNetBNBlock,
+                            ResNetEndpointBlock, ResNetInputBlock,
+                            SEResNetBasicBlock, SEResNetBNBlock)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..utils import scale_seq_lengths, seq_lengths_to_mask
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index d65bab00..0c577174 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -4,9 +4,10 @@
 """
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC1dDecBlock, ResNet1dBasicDecBlock,
                             ResNet1dBNDecBlock, SEResNet1dBasicDecBlock,
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index ab184467..5bdad186 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -7,9 +7,10 @@
 import math
 
 import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC1dEncBlock, Res2Net1dBasicBlock,
                             Res2Net1dBNBlock, ResNet1dBasicBlock,
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index 50369c8d..426b37f5 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -5,9 +5,10 @@
 
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC2dDecBlock, ResNet2dBasicDecBlock,
                             ResNet2dBNDecBlock, SEResNet2dBasicDecBlock,
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index 8a76e348..84e6599e 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -6,9 +6,10 @@
 import logging
 import math
 
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layer_blocks import (DC2dEncBlock, Res2Net2dBasicBlock,
                             Res2Net2dBNBlock, ResNet2dBasicBlock,
diff --git a/hyperion/torch/narchs/rnn_encoder.py b/hyperion/torch/narchs/rnn_encoder.py
index 0c3b623a..7df33274 100644
--- a/hyperion/torch/narchs/rnn_encoder.py
+++ b/hyperion/torch/narchs/rnn_encoder.py
@@ -8,9 +8,10 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 
 from ...utils.misc import filter_func_args
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index e9c50197..8db6c23a 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -6,12 +6,13 @@
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 
-import torch
-import torch.nn as nn
 import torchaudio
 import torchaudio.functional
 from jsonargparse import ActionParser, ArgumentParser
 
+import torch
+import torch.nn as nn
+
 try:
     import k2
 except ModuleNotFoundError:
@@ -19,8 +20,9 @@
 
 from ...utils.misc import filter_func_args
 from ...utils.text import add_sos
+from ..layer_blocks import TransducerConvPredictor as ConvPredictor
 from ..layer_blocks import TransducerJoiner as Joiner
-from ..layer_blocks import TransducerRNNPredictor as RNNPredictor, TransducerConvPredictor as ConvPredictor
+from ..layer_blocks import TransducerRNNPredictor as RNNPredictor
 from .net_arch import NetArch
 
 
diff --git a/hyperion/torch/narchs/spinenet.py b/hyperion/torch/narchs/spinenet.py
index da47ffe5..117c0733 100644
--- a/hyperion/torch/narchs/spinenet.py
+++ b/hyperion/torch/narchs/spinenet.py
@@ -6,6 +6,7 @@
 import logging
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
diff --git a/hyperion/torch/narchs/tdnn.py b/hyperion/torch/narchs/tdnn.py
index 8ac9be79..55e47e6a 100644
--- a/hyperion/torch/narchs/tdnn.py
+++ b/hyperion/torch/narchs/tdnn.py
@@ -4,6 +4,7 @@
 """
 
 import numpy as np
+
 import torch
 import torch.nn as nn
 from torch.nn import Linear
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index fd3de235..4468185e 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -3,9 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from ..layer_blocks import TransformerEncoderBlockV1 as EBlock
diff --git a/hyperion/torch/optim/factory.py b/hyperion/torch/optim/factory.py
index aa1acdc8..95117b05 100644
--- a/hyperion/torch/optim/factory.py
+++ b/hyperion/torch/optim/factory.py
@@ -4,9 +4,10 @@
 """
 import logging
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.optim as optim
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_args
 from .radam import RAdam
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 659f9dde..b2198924 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -6,11 +6,12 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-import torch
-import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from transformers import HubertConfig, HubertModel
 
+import torch
+import torch.nn as nn
+
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index 76d80aa4..e1f21153 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -6,11 +6,12 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-import torch
-import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from transformers import Wav2Vec2Config, Wav2Vec2Model
 
+import torch
+import torch.nn as nn
+
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 5dd6a539..b0a815c7 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -8,11 +8,12 @@
 from turtle import right
 from typing import List, Optional, Tuple, Union
 
-import torch
-import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
 
+import torch
+import torch.nn as nn
+
 from ...torch_model import TorchModel
 from ...utils import scale_seq_lengths, seq_lengths_to_mask
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index eec88dec..0d5c5ad3 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -6,11 +6,12 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-import torch
-import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from transformers import WavLMConfig, WavLMModel
 
+import torch
+import torch.nn as nn
+
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
 
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 69e97cc6..9f5fafe6 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 0523ad44..e2d2d1f6 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 00a218f9..a6f20a8e 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -11,12 +11,13 @@
 from enum import Enum
 from pathlib import Path
 
+from fairscale.optim.grad_scaler import ShardedGradScaler
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.distributed as dist
 import torch.nn as nn
-from fairscale.optim.grad_scaler import ShardedGradScaler
-from jsonargparse import ActionParser, ArgumentParser
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 3f0b3f1f..3a9cc288 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -6,10 +6,11 @@
 import os
 from collections import OrderedDict as ODict
 
-import torch
-import torch.nn as nn
 import torchaudio
 from jsonargparse import ActionParser, ArgumentParser
+
+import torch
+import torch.nn as nn
 from torch.distributed.elastic.multiprocessing.errors import record
 
 from ...utils.misc import filter_func_args
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index ba401cb7..f4877dc6 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 03800e0d..fc9d98f1 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 40b6b10d..35946e96 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -7,10 +7,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index af915d6b..303427de 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -7,10 +7,11 @@
 import time
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 1e1b1778..2a012dde 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -7,10 +7,11 @@
 import time
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 4e791347..9d04af42 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -6,10 +6,11 @@
 import os
 from collections import OrderedDict as ODict
 
+from jsonargparse import ActionParser, ArgumentParser
+
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index ad9c825c..1aefb3d4 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -6,13 +6,14 @@
 import logging
 import os
 
-import torch
-import torch.distributed as dist
-import torch.nn as nn
 from fairscale.nn.data_parallel import \
     FullyShardedDataParallel as FullyShardedDDP
 from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
 
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+
 from .devices import open_device
 
 
diff --git a/hyperion/torch/utils/metric_acc.py b/hyperion/torch/utils/metric_acc.py
index 7b423a3e..a82c174a 100644
--- a/hyperion/torch/utils/metric_acc.py
+++ b/hyperion/torch/utils/metric_acc.py
@@ -6,6 +6,7 @@
 from collections import OrderedDict as ODict
 
 import numpy as np
+
 import torch
 import torch.distributed as dist
 
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index ffffc0b4..db035987 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -3,16 +3,15 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .misc import PathLike
-from .dataset import Dataset
 from .class_info import ClassInfo
+from .dataset import Dataset
 from .feature_set import FeatureSet
 from .hyp_dataclass import HypDataClass
 from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
+from .misc import PathLike
 from .recording_set import RecordingSet
 from .rttm import RTTM
 from .scp_list import SCPList
-
 # from .ext_segment_list import ExtSegmentList
 from .segment_list import SegmentList
 from .segment_set import SegmentSet
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index 546dd715..efb7c114 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -3,15 +3,16 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from typing import Dict, Optional
 from pathlib import Path
+from typing import Dict, Optional
+
 import yaml
 
-from .segment_set import SegmentSet
-from .recording_set import RecordingSet
-from .feature_set import FeatureSet
 from .class_info import ClassInfo
+from .feature_set import FeatureSet
 from .misc import PathLike
+from .recording_set import RecordingSet
+from .segment_set import SegmentSet
 
 
 class Dataset:
diff --git a/hyperion/utils/lexicon.py b/hyperion/utils/lexicon.py
index 80bd7c1e..6128c0ff 100644
--- a/hyperion/utils/lexicon.py
+++ b/hyperion/utils/lexicon.py
@@ -22,6 +22,7 @@
 from typing import List, Tuple
 
 import k2
+
 import torch
 
 
diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index 4ab3ce0a..6fb7d24b 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -4,9 +4,9 @@
 
  Miscellaneous functions
 """
-from typing import TypeVar
 from inspect import signature
 from pathlib import Path
+from typing import TypeVar
 
 import numpy as np
 
diff --git a/hyperion/utils/text.py b/hyperion/utils/text.py
index be70f638..2846fdbf 100644
--- a/hyperion/utils/text.py
+++ b/hyperion/utils/text.py
@@ -12,6 +12,7 @@
 
 import numpy as np
 import pandas as pd
+
 import torch
 
 
diff --git a/requirements.txt b/requirements.txt
index 6f1c8bc1..c3410829 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,9 @@ yapf
 jsonargparse>=3.5.0
 wandb>=0.10.30
 librosa>=0.8.1
+isort
 black
 twine
 wheel
 transformers>=4.16.2
+sentencepiece>=0.1.97

From 35391de52990806d4802a7e034abe0dc84d675ff Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 4 May 2023 09:55:06 -0400
Subject: [PATCH 098/154] new vox2 dataprep

---
 hyperion/data_prep/data_prep.py               |   8 +-
 hyperion/data_prep/voxceleb2.py               |  16 +-
 .../torch/narchs/rnn_transducer_decoder.py    | 407 +++++++++---------
 3 files changed, 224 insertions(+), 207 deletions(-)

diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index d9f6b238..fb6fc6c5 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -2,6 +2,7 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 
@@ -66,21 +67,22 @@ def _get_recording_duration(scp, i, n):
     def get_recording_duration(self, recording_set):
 
         import itertools
-
         from ..utils import SCPList
 
         scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
         futures = []
+        logging.info("submitting threats...")
         with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
-            for i in range(self.num_threads):
+            for i in tqdm(range(self.num_threads)):
                 future = pool.submit(
                     DataPrep._get_recording_duration, scp, i, self.num_threads
                 )
                 futures.append(future)
 
+        logging.info("waiting threats...")
         res = [f.result() for f in tqdm(futures)]
         fss = list(itertools.chain(*[r[0] for r in res]))
-        durations = list(itertools.chain(*[r[0] for r in res]))
+        durations = list(itertools.chain(*[r[1] for r in res]))
 
         recording_set["duration"] = durations
         recording_set["sample_freq"] = fss
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index d8b9dd99..a1a9f0c3 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -158,8 +158,9 @@ def prepare(self):
             file_paths = []
             futures = []
             logging.info("making video cat lists")
+            logging.info("submitting threats...")
             with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
-                for i, rec_id in enumerate(rec_ids):
+                for i, rec_id in tqdm(enumerate(rec_ids)):
                     future = pool.submit(
                         VoxCeleb2DataPrep.make_cat_list,
                         lists_cat_dir,
@@ -170,6 +171,7 @@ def prepare(self):
                     )
                     futures.append(future)
 
+            logging.info("waiting threats...")
             file_paths = [f.result() for f in tqdm(futures)]
             video_ids = uniq_video_ids
 
@@ -213,14 +215,14 @@ def prepare(self):
                     df_lang.loc[r, "confidence"] if r in df_lang.index else "N/A"
                     for r in rec_ids
                 ],
-                # "duration": recs.loc[rec_ids, "duration"],
+                "duration": recs.loc[rec_ids, "duration"].values,
             }
         )
-        print(
-            recs.loc[rec_ids, "duration"],
-            len(segments),
-            len(recs.loc[rec_ids, "duration"]),
-        )
+        # print(
+        #     recs.loc[rec_ids, "duration"],
+        #     len(segments),
+        #     len(recs.loc[rec_ids, "duration"]),
+        # )
         segments = SegmentSet(segments)
         segments.sort()
 
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index 8db6c23a..763ec67c 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -16,7 +16,7 @@
 try:
     import k2
 except ModuleNotFoundError:
-    from ...utils import dummy_k2 as k2
+    from ..utils import dummy_k2 as k2
 
 from ...utils.misc import filter_func_args
 from ...utils.text import add_sos
@@ -99,10 +99,8 @@ def __init__(
 
         if self.rnnt_loss == "k2_pruned":
             self.simple_am_proj = nn.Linear(in_feats, vocab_size)
-            self.simple_lm_proj = nn.Linear(self.predictor.out_feats,
-                                            vocab_size)
-            self.register_buffer("cur_step", torch.as_tensor(0,
-                                                             dtype=torch.int))
+            self.simple_lm_proj = nn.Linear(self.predictor.out_feats, vocab_size)
+            self.register_buffer("cur_step", torch.as_tensor(0, dtype=torch.int))
 
     def _make_predictor(self):
         pred_type = self.predictor_args["pred_type"]
@@ -110,12 +108,10 @@ def _make_predictor(self):
         self.predictor_args["vocab_size"] = self.vocab_size
         self.predictor_args["blank_id"] = self.blank_id
         if pred_type == "rnn":
-            pred_args = filter_func_args(RNNPredictor.__init__,
-                                         self.predictor_args)
+            pred_args = filter_func_args(RNNPredictor.__init__, self.predictor_args)
             self.predictor = RNNPredictor(**pred_args)
         elif pred_type == "conv":
-            pred_args = filter_func_args(ConvPredictor.__init__,
-                                         self.predictor_args)
+            pred_args = filter_func_args(ConvPredictor.__init__, self.predictor_args)
             self.predictor = ConvPredictor(**pred_args)
             self.predictor_args["out_feats"] = self.predictor.embed_dim
         else:
@@ -127,8 +123,7 @@ def _make_joiner(self):
         if joiner_type == "basic":
             pred_feats = self.predictor_args["out_feats"]
             hid_feats = self.joiner_args["hid_feats"]
-            self.joiner = Joiner(self.in_feats, pred_feats, hid_feats,
-                                 self.vocab_size)
+            self.joiner = Joiner(self.in_feats, pred_feats, hid_feats, self.vocab_size)
         else:
             raise ValueError(f"Unknown joiner type {joiner_type}")
 
@@ -152,9 +147,14 @@ def get_config(self):
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    def _rnnt_loss_torchaudio(self, x: torch.Tensor, x_lengths: torch.Tensor,
-                              y: torch.Tensor, y_lengths: torch.Tensor,
-                              pred_out: torch.Tensor):
+    def _rnnt_loss_torchaudio(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: torch.Tensor,
+        y_lengths: torch.Tensor,
+        pred_out: torch.Tensor,
+    ):
         logits = self.joiner(x, pred_out)
         # rnnt_loss requires 0 padded targets
         # Note: y does not start with SOS
@@ -170,14 +170,17 @@ def _rnnt_loss_torchaudio(self, x: torch.Tensor, x_lengths: torch.Tensor,
         )
         return loss
 
-    def _rnnt_loss_k2(self, x: torch.Tensor, x_lengths: torch.Tensor,
-                      y: torch.Tensor, y_lengths: torch.Tensor,
-                      pred_out: torch.Tensor):
+    def _rnnt_loss_k2(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: torch.Tensor,
+        y_lengths: torch.Tensor,
+        pred_out: torch.Tensor,
+    ):
         y_padded = y.pad(mode="constant", padding_value=0)
         y_padded = y_padded.to(torch.int64)
-        boundary = torch.zeros((x.size(0), 4),
-                               dtype=torch.int64,
-                               device=x.device)
+        boundary = torch.zeros((x.size(0), 4), dtype=torch.int64, device=x.device)
         boundary[:, 2] = y_lengths
         boundary[:, 3] = x_lengths
 
@@ -195,15 +198,18 @@ def _rnnt_loss_k2(self, x: torch.Tensor, x_lengths: torch.Tensor,
             )
         return loss
 
-    def _rnnt_loss_k2_pruned(self, x: torch.Tensor, x_lengths: torch.Tensor,
-                             y: torch.Tensor, y_lengths: torch.Tensor,
-                             pred_out: torch.Tensor):
+    def _rnnt_loss_k2_pruned(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        y: torch.Tensor,
+        y_lengths: torch.Tensor,
+        pred_out: torch.Tensor,
+    ):
 
         y_padded = y.pad(mode="constant", padding_value=0)
         y_padded = y_padded.to(torch.int64)
-        boundary = torch.zeros((x.size(0), 4),
-                               dtype=torch.int64,
-                               device=x.device)
+        boundary = torch.zeros((x.size(0), 4), dtype=torch.int64, device=x.device)
         boundary[:, 2] = y_lengths
         boundary[:, 3] = x_lengths
 
@@ -266,7 +272,7 @@ def _rnnt_loss_k2_pruned(self, x: torch.Tensor, x_lengths: torch.Tensor,
             simple_loss_scale = 1.0 - r * (1.0 - self.simple_loss_scale)
             pruned_loss_scale = 0.1 + 0.9 * r
             self.cur_step += 1
-            #print(simple_loss_scale, pruned_loss_scale)
+            # print(simple_loss_scale, pruned_loss_scale)
 
         loss = simple_loss_scale * loss_simple + pruned_loss_scale * loss_pruned
 
@@ -288,44 +294,48 @@ def forward(
         loss_simple = loss_pruned = None
         if self.rnnt_loss == "k2_pruned":
             loss, loss_simple, loss_pruned = self._rnnt_loss_k2_pruned(
-                x, x_lengths, y, y_lengths, pred_out)
+                x, x_lengths, y, y_lengths, pred_out
+            )
         elif self.rnnt_loss == "k2":
             loss = self._rnnt_loss_k2(x, x_lengths, y, y_lengths, pred_out)
         elif self.rnnt_loss == "torchaudio":
             loss_simple = loss_pruned = None
-            loss = self._rnnt_loss_torchaudio(x, x_lengths, y, y_lengths,
-                                              pred_out)
+            loss = self._rnnt_loss_torchaudio(x, x_lengths, y, y_lengths, pred_out)
 
         return loss, loss_simple, loss_pruned
 
-    def decode(self,
-               x: torch.Tensor,
-               x_lengths: torch.Tensor = None,
-               method="time_sync_beam_search",
-               beam_width: int = 5,
-               max_sym_per_frame: int = 3,
-               max_sym_per_utt: int = 1000) -> List[int]:
+    def decode(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor = None,
+        method="time_sync_beam_search",
+        beam_width: int = 5,
+        max_sym_per_frame: int = 3,
+        max_sym_per_utt: int = 1000,
+    ) -> List[int]:
         if method == "time_sync_beam_search":
-            return self.decode_time_sync_beam_search(x,
-                                                     x_lengths,
-                                                     beam_width=beam_width)
+            return self.decode_time_sync_beam_search(
+                x, x_lengths, beam_width=beam_width
+            )
         elif method == "align_length_sync_beam_search":
             return self.decode_align_length_sync_beam_search(
+                x, x_lengths, beam_width=beam_width, max_sym_per_utt=max_sym_per_utt
+            )
+        elif method == "greedy":
+            return self.decode_greedy(
                 x,
                 x_lengths,
-                beam_width=beam_width,
-                max_sym_per_utt=max_sym_per_utt)
-        elif method == "greedy":
-            return self.decode_greedy(x,
-                                      x_lengths,
-                                      max_sym_per_frame=max_sym_per_frame,
-                                      max_sym_per_utt=max_sym_per_utt)
-
-    def decode_greedy(self,
-                      x: torch.Tensor,
-                      x_lengths: torch.Tensor = None,
-                      max_sym_per_frame: int = 3,
-                      max_sym_per_utt: int = 1000) -> List[int]:
+                max_sym_per_frame=max_sym_per_frame,
+                max_sym_per_utt=max_sym_per_utt,
+            )
+
+    def decode_greedy(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor = None,
+        max_sym_per_frame: int = 3,
+        max_sym_per_utt: int = 1000,
+    ) -> List[int]:
         """
         Args:
           x: encoder embeddings with shape = (N, T, C)
@@ -339,8 +349,7 @@ def decode_greedy(self,
         blank_id = self.blank_id
         device = x.device
 
-        sos = torch.tensor([blank_id], device=device,
-                           dtype=torch.int64).reshape(1, 1)
+        sos = torch.tensor([blank_id], device=device, dtype=torch.int64).reshape(1, 1)
         pred_out, state = self.predictor(sos)
         T = x.size(1)
         t = 0
@@ -350,7 +359,7 @@ def decode_greedy(self,
         sym_per_utt = 0
 
         while t < T and sym_per_utt < max_sym_per_utt:
-            x_t = x[:, t:t + 1, :]
+            x_t = x[:, t : t + 1, :]
             logits = self.joiner(x_t, pred_out)  # (1, 1, 1, vocab_size)
             # logits is
 
@@ -371,10 +380,9 @@ def decode_greedy(self,
 
         return hyp
 
-    def decode_time_sync_beam_search(self,
-                                     x: torch.Tensor,
-                                     x_lengths: torch.Tensor = None,
-                                     beam_width: int = 5) -> List[int]:
+    def decode_time_sync_beam_search(
+        self, x: torch.Tensor, x_lengths: torch.Tensor = None, beam_width: int = 5
+    ) -> List[int]:
         assert x.ndim == 3
         assert x.size(0) == 1, x.size(0)
 
@@ -389,11 +397,10 @@ def decode_time_sync_beam_search(self,
         max_u = 20000  # terminate after this number of steps
         u = 0
 
-        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor,
-                                                   torch.Tensor]]] = {}
+        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]] = {}
 
         while t < T and u < max_u:
-            x_t = x[:, t:t + 1, :]
+            x_t = x[:, t : t + 1, :]
             A = B
             B = []
 
@@ -406,13 +413,9 @@ def decode_time_sync_beam_search(self,
                 cached_key = "_".join(map(str, y_star.ys))
 
                 if cached_key not in cache:
-                    pred_in = torch.tensor([y_star.ys[-1]],
-                                           device=device).reshape(1, 1)
+                    pred_in = torch.tensor([y_star.ys[-1]], device=device).reshape(1, 1)
 
-                    pred_out, pred_state = self.predictor(
-                        pred_in,
-                        y_star.pred_state,
-                    )
+                    pred_out, pred_state = self.predictor(pred_in, y_star.pred_state,)
                     cache[cached_key] = (pred_out, pred_state)
                 else:
                     pred_out, pred_state = cache[cached_key]
@@ -443,7 +446,7 @@ def decode_time_sync_beam_search(self,
                 topk_log_prob = log_prob.topk(beam_width, dim=-1)
 
                 # Second, choose other labels
-                #for i, v in enumerate(log_prob.tolist()):
+                # for i, v in enumerate(log_prob.tolist()):
                 for v, i in zip(*topk_log_prob):
                     v = v.item()
                     i = i.item()
@@ -452,9 +455,7 @@ def decode_time_sync_beam_search(self,
                     new_ys = y_star.ys + [i]
                     new_log_prob = y_star.log_prob + v
                     new_hyp = Hypothesis(
-                        ys=new_ys,
-                        log_prob=new_log_prob,
-                        pred_state=pred_state,
+                        ys=new_ys, log_prob=new_log_prob, pred_state=pred_state,
                     )
                     A.append(new_hyp)
 
@@ -462,12 +463,9 @@ def decode_time_sync_beam_search(self,
                 # check whether B contains more than "beam" elements more probable
                 # than the most probable in A
                 A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
-                #print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
+                # print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
                 B = sorted(
-                    [
-                        hyp
-                        for hyp in B if hyp.log_prob > A_most_probable.log_prob
-                    ],
+                    [hyp for hyp in B if hyp.log_prob > A_most_probable.log_prob],
                     key=lambda hyp: hyp.log_prob,
                     reverse=True,
                 )
@@ -483,17 +481,17 @@ def decode_time_sync_beam_search(self,
                     break
             t += 1
 
-        best_hyp = max(B,
-                       key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
+        best_hyp = max(B, key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
         ys = best_hyp.ys[1:]  # [1:] to remove the blank
         return ys
 
     def decode_align_length_sync_beam_search(
-            self,
-            x: torch.Tensor,
-            x_lengths: torch.Tensor,
-            beam_width: int = 5,
-            max_sym_per_utt: int = 1000) -> List[int]:
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        beam_width: int = 5,
+        max_sym_per_utt: int = 1000,
+    ) -> List[int]:
         assert x.ndim == 3
         assert x.size(0) == 1, x.size(0)
 
@@ -503,39 +501,34 @@ def decode_align_length_sync_beam_search(
         sos = torch.tensor([blank_id], device=device).reshape(1, 1)
         pred_out, state = self.predictor(sos)
         T = x.size(1)
-        #t = 0
+        # t = 0
         B = [Hypothesis(ys=[blank_id], log_prob=0.0, pred_state=None)]
-        #max_u = 20000  # terminate after this number of steps
-        #u = 0
+        # max_u = 20000  # terminate after this number of steps
+        # u = 0
 
-        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor,
-                                                   torch.Tensor]]] = {}
+        cache: Dict[str, Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]] = {}
         F = []
-        #for t < T and u < max_u:
+        # for t < T and u < max_u:
         for i in range(T + max_sym_per_utt):
             A = []
             for y_star in B:
-                #while u < max_u:
+                # while u < max_u:
                 u = len(y_star.ys) - 1
                 t = i - u
                 if t >= T:
                     continue
 
-                #y_star = max(A, key=lambda hyp: hyp.log_prob)
-                #A.remove(y_star)
-                x_t = x[:, t:t + 1, :]
+                # y_star = max(A, key=lambda hyp: hyp.log_prob)
+                # A.remove(y_star)
+                x_t = x[:, t : t + 1, :]
                 # Note: y_star.ys is unhashable, i.e., cannot be used
                 # as a key into a dict
                 cached_key = "_".join(map(str, y_star.ys))
 
                 if cached_key not in cache:
-                    pred_in = torch.tensor([y_star.ys[-1]],
-                                           device=device).reshape(1, 1)
+                    pred_in = torch.tensor([y_star.ys[-1]], device=device).reshape(1, 1)
 
-                    pred_out, pred_state = self.predictor(
-                        pred_in,
-                        y_star.pred_state,
-                    )
+                    pred_out, pred_state = self.predictor(pred_in, y_star.pred_state,)
                     cache[cached_key] = (pred_out, pred_state)
                 else:
                     pred_out, pred_state = cache[cached_key]
@@ -563,7 +556,7 @@ def decode_align_length_sync_beam_search(
                 topk_log_prob = log_prob.topk(beam_width, dim=-1)
 
                 # Second, choose other labels
-                #for i, v in enumerate(log_prob.tolist()):
+                # for i, v in enumerate(log_prob.tolist()):
                 for v, i in zip(*topk_log_prob):
                     v = v.item()
                     i = i.item()
@@ -572,20 +565,16 @@ def decode_align_length_sync_beam_search(
                     new_ys = y_star.ys + [i]
                     new_log_prob = y_star.log_prob + v
                     new_hyp = Hypothesis(
-                        ys=new_ys,
-                        log_prob=new_log_prob,
-                        pred_state=pred_state,
+                        ys=new_ys, log_prob=new_log_prob, pred_state=pred_state,
                     )
                     A.append(new_hyp)
 
                 # check whether B contains more than "beam_width" elements more probable
                 # than the most probable in A
-                #A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
-                #print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
+                # A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
+                # print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
                 B0 = sorted(
-                    [hyp for hyp in A],
-                    key=lambda hyp: hyp.log_prob,
-                    reverse=True,
+                    [hyp for hyp in A], key=lambda hyp: hyp.log_prob, reverse=True,
                 )
                 B = []
                 B_ys = set()
@@ -605,8 +594,7 @@ def decode_align_length_sync_beam_search(
                     B = B[:beam_width]
                     break
 
-        best_hyp = max(F,
-                       key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
+        best_hyp = max(F, key=lambda hyp: hyp.log_prob / max(1, len(hyp.ys[1:])))
         ys = best_hyp.ys[1:]  # [1:] to remove the blank
         return ys
 
@@ -617,8 +605,9 @@ def change_config(
         rnn_dropout_rate: float = 0.0,
     ):
         logging.info("changing decoder config")
-        self.predictor.change_config(override_dropouts, embed_dropout_rate,
-                                     rnn_dropout_rate)
+        self.predictor.change_config(
+            override_dropouts, embed_dropout_rate, rnn_dropout_rate
+        )
 
     @staticmethod
     def filter_args(**kwargs):
@@ -638,49 +627,58 @@ def add_pred_args(parser):
             "--pred-type",
             default="rnn",
             choices=["rnn", "conv"],
-            help=
-            """type of predictor between RNN and Convolutional [rnn, conv]""")
-        pred_parser.add_argument("--embed-dim",
-                                 default=1024,
-                                 type=int,
-                                 help=("token embedding dimension"))
+            help="""type of predictor between RNN and Convolutional [rnn, conv]""",
+        )
+        pred_parser.add_argument(
+            "--embed-dim", default=1024, type=int, help=("token embedding dimension")
+        )
         pred_parser.add_argument(
             "--embed-dropout-rate",
             default=0.0,
             type=float,
-            help=("dropout prob for predictor input embeddings"))
-        pred_parser.add_argument("--rnn-dropout-rate",
-                                 default=0.0,
-                                 type=float,
-                                 help="""dropout prob for decoder RNN """)
+            help=("dropout prob for predictor input embeddings"),
+        )
+        pred_parser.add_argument(
+            "--rnn-dropout-rate",
+            default=0.0,
+            type=float,
+            help="""dropout prob for decoder RNN """,
+        )
         pred_parser.add_argument(
             "--rnn-type",
             default="lstm",
             choices=["lstm", "gru"],
-            help=
-            """type of recurrent network for thep predictor in [lstm, gru]""")
-
-        pred_parser.add_argument("--num-layers",
-                                 default=2,
-                                 type=int,
-                                 help="""number of layers of the predictor """)
-
-        pred_parser.add_argument("--hid-feats",
-                                 default=512,
-                                 type=int,
-                                 help="""hidden features of the predictor""")
-        pred_parser.add_argument("--out-feats",
-                                 default=512,
-                                 type=int,
-                                 help="""output features of the predictor""")
-        pred_parser.add_argument("--context-size",
-                                 default=2,
-                                 type=int,
-                                 help="""context length of the convolutional 
-                                 predictor, 1->bigram, 2-> trigram,...""")
-
-        parser.add_argument("--predictor",
-                            action=ActionParser(parser=pred_parser))
+            help="""type of recurrent network for thep predictor in [lstm, gru]""",
+        )
+
+        pred_parser.add_argument(
+            "--num-layers",
+            default=2,
+            type=int,
+            help="""number of layers of the predictor """,
+        )
+
+        pred_parser.add_argument(
+            "--hid-feats",
+            default=512,
+            type=int,
+            help="""hidden features of the predictor""",
+        )
+        pred_parser.add_argument(
+            "--out-feats",
+            default=512,
+            type=int,
+            help="""output features of the predictor""",
+        )
+        pred_parser.add_argument(
+            "--context-size",
+            default=2,
+            type=int,
+            help="""context length of the convolutional 
+                                 predictor, 1->bigram, 2-> trigram,...""",
+        )
+
+        parser.add_argument("--predictor", action=ActionParser(parser=pred_parser))
 
     @staticmethod
     def add_joiner_args(parser):
@@ -690,39 +688,43 @@ def add_joiner_args(parser):
             "--joiner-type",
             default="basic",
             choices=["basic"],
-            help=
-            """type of joiner network, there is only basic joiner for now""")
-        pred_parser.add_argument("--hid-feats",
-                                 default=512,
-                                 type=int,
-                                 help="""hidden features of the joiner""")
-        parser.add_argument("--joiner",
-                            action=ActionParser(parser=pred_parser))
+            help="""type of joiner network, there is only basic joiner for now""",
+        )
+        pred_parser.add_argument(
+            "--hid-feats",
+            default=512,
+            type=int,
+            help="""hidden features of the joiner""",
+        )
+        parser.add_argument("--joiner", action=ActionParser(parser=pred_parser))
 
     @staticmethod
-    def add_class_args(parser,
-                       prefix=None,
-                       skip=set(["in_feats", "blank_id", "vocab_size"])):
+    def add_class_args(
+        parser, prefix=None, skip=set(["in_feats", "blank_id", "vocab_size"])
+    ):
 
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
         if "in_feats" not in skip:
-            parser.add_argument("--in-feats",
-                                type=int,
-                                required=True,
-                                help=("input feature dimension"))
+            parser.add_argument(
+                "--in-feats", type=int, required=True, help=("input feature dimension")
+            )
         if "blank_id" not in skip:
-            parser.add_argument("--blank-id",
-                                type=int,
-                                default=0,
-                                help=("blank id from tokenizer model"))
+            parser.add_argument(
+                "--blank-id",
+                type=int,
+                default=0,
+                help=("blank id from tokenizer model"),
+            )
         if "vocab_size" not in skip:
-            parser.add_argument("--vocab-size",
-                                type=int,
-                                required=True,
-                                help=("output prediction dimension"))
+            parser.add_argument(
+                "--vocab-size",
+                type=int,
+                required=True,
+                help=("output prediction dimension"),
+            )
 
         RNNTransducerDecoder.add_pred_args(parser)
         RNNTransducerDecoder.add_joiner_args(parser)
@@ -730,56 +732,62 @@ def add_class_args(parser,
             "--rnnt-loss",
             default="k2_pruned",
             choices=["torchaudio", "k2", "k2_pruned"],
-            help="""type of rnn-t loss between torchaudio, k2 or k2_pruned.""")
+            help="""type of rnn-t loss between torchaudio, k2 or k2_pruned.""",
+        )
         parser.add_argument(
             "--rnnt-type",
             default="regular",
             choices=["regular", "modified", "constrained"],
-            help=
-            """type of rnn-t loss between regular, modified or constrained.""")
+            help="""type of rnn-t loss between regular, modified or constrained.""",
+        )
         parser.add_argument(
             "--delay-penalty",
             default=0.0,
             type=float,
-            help=
-            """penalize symbol delay, which is used to make symbol emit earlier
-            for streaming models.""")
+            help="""penalize symbol delay, which is used to make symbol emit earlier
+            for streaming models.""",
+        )
         parser.add_argument(
             "--reduction",
             default="sum",
             choices=["sum", "mean"],
-            help="""type of reduction for rnn-t loss between sum or mean""")
+            help="""type of reduction for rnn-t loss between sum or mean""",
+        )
         parser.add_argument(
             "--prune-range",
             default=5,
             type=int,
             help="""how many symbols to keep for each frame in k2 rnn-t 
-            pruned loss.""")
+            pruned loss.""",
+        )
         parser.add_argument(
             "--lm-scale",
             default=0.25,
             type=float,
-            help="""language model scale in rnn-t smoothed loss""")
+            help="""language model scale in rnn-t smoothed loss""",
+        )
         parser.add_argument(
             "--am-scale",
             default=0.0,
             type=float,
-            help="""acoustic model scale in rnn-t smoothed loss""")
+            help="""acoustic model scale in rnn-t smoothed loss""",
+        )
         parser.add_argument(
             "--simple-loss-scale",
             default=0.5,
             type=float,
-            help="""weight of rnn-t simple loss when using k2 pruned loss""")
+            help="""weight of rnn-t simple loss when using k2 pruned loss""",
+        )
         parser.add_argument(
             "--pruned-warmup-steps",
             default=2000,
             type=int,
             help="""number of steps to warm up the k2 rnn-t pruned loss 
-            from 0.1 to 1""")
+            from 0.1 to 1""",
+        )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def add_finetune_args(parser, prefix=None, skip=set()):
@@ -794,16 +802,21 @@ def add_finetune_args(parser, prefix=None, skip=set()):
             action=ActionYesNo,
             help=(
                 "whether to use the dropout probabilities passed in the "
-                "arguments instead of the defaults in the pretrained model."))
-        parser.add_argument("--embed-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder input embeddings"))
-        parser.add_argument("--rnn-dropout-rate",
-                            default=0.0,
-                            type=float,
-                            help=("dropout prob for decoder RNN "))
+                "arguments instead of the defaults in the pretrained model."
+            ),
+        )
+        parser.add_argument(
+            "--embed-dropout-rate",
+            default=0.0,
+            type=float,
+            help=("dropout prob for decoder input embeddings"),
+        )
+        parser.add_argument(
+            "--rnn-dropout-rate",
+            default=0.0,
+            type=float,
+            help=("dropout prob for decoder RNN "),
+        )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))

From cf861bc7b30f9c318ed20308588c71856a545933 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 8 May 2023 14:49:09 -0400
Subject: [PATCH 099/154] fix new vox2 dataprep durations, scp -> RecordingSet

---
 egs/librispeech/v1/run_011_train_asr.sh       |  12 +-
 egs/librispeech/v1/run_011_train_asr_old.sh   |  12 +-
 .../adv.v1.1/run_005_train_victim_xvector.sh  |   4 +-
 .../run_007_train_transfer_xvector.sh         |   4 +-
 .../run_008_adv_finetune_victim_xvector.sh    |   4 +-
 .../adv.v2/run_011_train_victim_xvector.sh    |   4 +-
 .../run_022_attack_type_classif_allknown.sh   |   4 +-
 .../adv.v2/run_023_snr_classif_allknown.sh    |   4 +-
 .../run_024_threat_model_classif_allknown.sh  |   4 +-
 ...un_031_attack_type_verif_and_noveltydet.sh |   4 +-
 egs/voxceleb/adv.v2/run_032_snr_verif.sh      |   4 +-
 .../adv.v2/run_033_threat_model_verif.sh      |   4 +-
 .../config_fbank80_stmn_cfwseresnet34.v3.0.sh |   4 +-
 .../config_fbank80_stmn_fwseresnet34.v3.0.sh  |   4 +-
 .../config_fbank80_stmn_resnet34.v3.0.sh      |   2 +-
 egs/voxceleb/v1.1/run_011_train_xvector.sh    |   8 +-
 egs/voxceleb/v2/run_011_train_xvector.sh      |  12 +-
 hyperion/bin/apply_mvn_select_frames.py       |  36 +-
 hyperion/bin/compute_energy_vad.py            |  21 +-
 hyperion/bin/compute_mfcc_feats.py            |  20 +-
 hyperion/bin/decode_wav2transducer.py         | 110 ++--
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |  21 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |  22 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |  26 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |  15 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |   3 +-
 ...sine_scoring_from_transfer_art_test_wav.py |  20 +-
 hyperion/bin/eval_xvec_logits_from_wav.py     |  19 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |  17 +-
 hyperion/bin/extract_xvectors_from_wav.py     |  16 +-
 .../extract_xvectors_slidwin_from_feats.py    |  15 +-
 .../bin/extract_xvectors_slidwin_from_wav.py  |  18 +-
 .../generate_adv_attacks_xvector_classif.py   |  15 +-
 .../bin/generate_adv_attacks_xvector_verif.py |  10 +-
 hyperion/bin/pack_wav_rirs.py                 |  10 +-
 hyperion/data_prep/data_prep.py               |   8 +-
 hyperion/io/ark_data_reader.py                | 179 ++++---
 hyperion/io/ark_data_writer.py                |  42 +-
 hyperion/io/audio_reader.py                   | 409 ++++++++-------
 hyperion/io/audio_writer.py                   |  84 +--
 hyperion/io/bin_vad_reader.py                 |   3 +-
 hyperion/io/data_reader.py                    |  62 ++-
 hyperion/io/data_rw_factory.py                |  51 +-
 hyperion/io/data_writer.py                    |  51 +-
 hyperion/io/h5_data_reader.py                 | 204 +++++---
 hyperion/io/h5_data_writer.py                 |  32 +-
 hyperion/io/old_audio_reader.py               | 477 ++++++++++++++++++
 hyperion/io/vad_rw_factory.py                 |  10 +-
 hyperion/torch/data/audio_dataset.py          | 160 +++---
 hyperion/utils/feature_set.py                 |  16 +-
 hyperion/utils/info_table.py                  |  27 +-
 hyperion/utils/segment_set.py                 |  27 +
 hyperion/utils/utt2info.py                    |   2 +-
 53 files changed, 1525 insertions(+), 827 deletions(-)
 create mode 100644 hyperion/io/old_audio_reader.py

diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v1/run_011_train_asr.sh
index 99b0065e..81ebbeae 100755
--- a/egs/librispeech/v1/run_011_train_asr.sh
+++ b/egs/librispeech/v1/run_011_train_asr.sh
@@ -49,11 +49,11 @@ if [ $stage -le 1 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
     train_wav2vec2rnn_transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s1_dir $args \
@@ -75,11 +75,11 @@ if [ $stage -le 2 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s2_dir $args \
@@ -103,11 +103,11 @@ if [ $stage -le 3 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s3_dir $args \
diff --git a/egs/librispeech/v1/run_011_train_asr_old.sh b/egs/librispeech/v1/run_011_train_asr_old.sh
index 3d0e6eb1..3c9f4f5b 100755
--- a/egs/librispeech/v1/run_011_train_asr_old.sh
+++ b/egs/librispeech/v1/run_011_train_asr_old.sh
@@ -49,11 +49,11 @@ if [ $stage -le 1 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu --max-split-size-mb 512 \
     train_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s1_dir $args \
@@ -75,11 +75,11 @@ if [ $stage -le 2 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s2_dir $args \
@@ -103,11 +103,11 @@ if [ $stage -le 3 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2transducer.py $nnet_type \
     --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
-    --data.train.dataset.audio-file $train_dir/wav.scp \
+    --data.train.dataset.recordings-file $train_dir/wav.scp \
     --data.train.dataset.segments-file $train_dir/utt2spk \
     --data.train.dataset.bpe-model $bpe_model \
     --data.train.dataset.text-file $train_dir/text \
-    --data.val.dataset.audio-file $val_dir/wav.scp \
+    --data.val.dataset.recordings-file $val_dir/wav.scp \
     --data.val.dataset.segments-file $val_dir/utt2spk \
     --data.val.dataset.text-file $val_dir/text \
     --trainer.exp-path $nnet_s3_dir $args \
diff --git a/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh b/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
index 37a91211..aa779902 100755
--- a/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
+++ b/egs/voxceleb/adv.v1.1/run_005_train_victim_xvector.sh
@@ -44,11 +44,11 @@ if [ $stage -le 1 ]; then
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir \
diff --git a/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh b/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
index 70bab280..420ac59d 100755
--- a/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
+++ b/egs/voxceleb/adv.v1.1/run_007_train_transfer_xvector.sh
@@ -54,11 +54,11 @@ if [ $stage -le 1 ]; then
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir \
diff --git a/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh b/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
index 12f1e5fd..4f2c137b 100755
--- a/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
+++ b/egs/voxceleb/adv.v1.1/run_008_adv_finetune_victim_xvector.sh
@@ -53,11 +53,11 @@ if [ $stage -le 1 ]; then
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     adv_finetune_xvector_from_wav.py $nnet_type --cfg $nnet_cfg $nnet_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir \
diff --git a/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh b/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
index 971b88a3..a1acb1f6 100755
--- a/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
+++ b/egs/voxceleb/adv.v2/run_011_train_victim_xvector.sh
@@ -40,11 +40,11 @@ if [ $stage -le 1 ]; then
     --gpu $ngpu $nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     train_xvector_from_wav.py $nnet_type --cfg $nnet_cfg \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_dir \
diff --git a/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh b/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
index 71c0c89f..b453260f 100755
--- a/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_022_attack_type_classif_allknown.sh
@@ -46,11 +46,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.train.dataset.recordings-file $list_dir/trainval_wav.scp \
 	--data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	--data.train.dataset.segments-file $list_dir/train_utt2attack \
 	--data.train.dataset.class-file $list_dir/class_file \
-	--data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.val.dataset.recordings-file $list_dir/trainval_wav.scp \
 	--data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	--data.val.dataset.segments-file $list_dir/val_utt2attack \
 	--trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh b/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
index a928ae29..de811505 100755
--- a/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_023_snr_classif_allknown.sh
@@ -46,11 +46,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	--data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.train.dataset.recordings-file $list_dir/trainval_wav.scp \
 	--data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	--data.train.dataset.segments-file $list_dir/train_utt2attack \
 	--data.train.dataset.class-file $list_dir/class_file \
-	--data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	--data.val.dataset.recordings-file $list_dir/trainval_wav.scp \
 	--data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	--data.val.dataset.segments-file $list_dir/val_utt2attack \
 	--trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh b/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
index bed225a3..aa17a1ae 100755
--- a/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
+++ b/egs/voxceleb/adv.v2/run_024_threat_model_classif_allknown.sh
@@ -48,11 +48,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	      --data.train.dataset.audio-file $list_dir/trainval_wav.scp \
+	      --data.train.dataset.recordings-file $list_dir/trainval_wav.scp \
 	      --data.train.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	      --data.train.dataset.segments-file $list_dir/train_utt2attack \
 	      --data.train.dataset.class-file $list_dir/class_file \
-	      --data.val.dataset.audio-file $list_dir/trainval_wav.scp \
+	      --data.val.dataset.recordings-file $list_dir/trainval_wav.scp \
 	      --data.val.dataset.time-durs-file $list_dir/trainval_utt2dur \
 	      --data.val.dataset.segments-file $list_dir/val_utt2attack \
 	      --trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
index 55cb8459..4ce703ba 100755
--- a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
+++ b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
@@ -49,11 +49,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
 	      --data.train.dataset.class-file $list_someknown_dir/class_file \
-	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
 	      --trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/adv.v2/run_032_snr_verif.sh b/egs/voxceleb/adv.v2/run_032_snr_verif.sh
index 3886c339..12d42c99 100755
--- a/egs/voxceleb/adv.v2/run_032_snr_verif.sh
+++ b/egs/voxceleb/adv.v2/run_032_snr_verif.sh
@@ -52,11 +52,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
 	      --data.train.dataset.class-file $list_someknown_dir/class_file \
-	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
 	      --trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh b/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
index 392bffb5..cbfaaa81 100755
--- a/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
+++ b/egs/voxceleb/adv.v2/run_033_threat_model_verif.sh
@@ -53,11 +53,11 @@ if [ $stage -le 1 ]; then
     $cuda_cmd --gpu $ngpu $sign_nnet_dir/log/train.log \
 	      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
 	      train_xvector_from_wav.py  $sign_nnet_command --cfg $sign_nnet_config \
-	      --data.train.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.train.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.train.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.train.dataset.segments-file $list_someknown_dir/train_utt2attack \
 	      --data.train.dataset.class-file $list_someknown_dir/class_file \
-	      --data.val.dataset.audio-file $list_someknown_dir/trainval_wav.scp \
+	      --data.val.dataset.recordings-file $list_someknown_dir/trainval_wav.scp \
 	      --data.val.dataset.time-durs-file $list_someknown_dir/trainval_utt2dur \
 	      --data.val.dataset.segments-file $list_someknown_dir/val_utt2attack \
 	      --trainer.exp-path $sign_nnet_dir $args \
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
index 32c91da2..fdb3147f 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
@@ -26,8 +26,8 @@ nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
 
 # back-end
 do_plda=false
-do_snorm=false #true
-do_qmf=false #true
+do_snorm=true
+do_qmf=true
 do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
index 62b02c28..7aa61f00 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
@@ -26,8 +26,8 @@ nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
 
 # back-end
 do_plda=false
-do_snorm=true
-do_qmf=true
+do_snorm=false #true
+do_qmf=false #true
 do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
index c49936e0..b194d1bd 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
@@ -16,7 +16,7 @@ nnet_name=${feat_type}_resnet34.v3.0
 
 nnet_s1_base_cfg=conf/train_resnet34_xvec_stage1_v3.0.yaml
 nnet_s1_name=$nnet_name.s1
-nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name.kk2
 nnet_s1=$nnet_s1_dir/model_ep0035.pth
 
 nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
diff --git a/egs/voxceleb/v1.1/run_011_train_xvector.sh b/egs/voxceleb/v1.1/run_011_train_xvector.sh
index a051c136..c8ab552e 100755
--- a/egs/voxceleb/v1.1/run_011_train_xvector.sh
+++ b/egs/voxceleb/v1.1/run_011_train_xvector.sh
@@ -44,11 +44,11 @@ if [ $stage -le 1 ]; then
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     train_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_s1_dir \
@@ -67,11 +67,11 @@ if [ $stage -le 2 ]; then
     --gpu $ngpu $nnet_s2_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --in-model-file $nnet_s1 \
diff --git a/egs/voxceleb/v2/run_011_train_xvector.sh b/egs/voxceleb/v2/run_011_train_xvector.sh
index 0eddb1a6..bc3b5420 100755
--- a/egs/voxceleb/v2/run_011_train_xvector.sh
+++ b/egs/voxceleb/v2/run_011_train_xvector.sh
@@ -47,11 +47,11 @@ if [ $stage -le 1 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     train_wav2vec2xvector.py $nnet_type \
     --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --trainer.exp-path $nnet_s1_dir $args \
@@ -71,11 +71,11 @@ if [ $stage -le 2 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2xvector.py $nnet_type \
     --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --in-model-file $nnet_s1 \
@@ -96,11 +96,11 @@ if [ $stage -le 3 ]; then
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
     finetune_wav2vec2xvector.py $nnet_type \
     --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
-    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
     --data.train.dataset.time-durs-file $list_dir/utt2dur \
     --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
     --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
-    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
     --data.val.dataset.time-durs-file $list_dir/utt2dur \
     --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
     --in-model-file $nnet_s2 \
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
index a2456dc9..f5a3ce15 100755
--- a/hyperion/bin/apply_mvn_select_frames.py
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -10,8 +10,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -28,7 +32,6 @@ def process_feats(
     output_spec,
     vad_spec,
     write_num_frames_spec,
-    scp_sep,
     path_prefix,
     vad_path_prefix,
     part_idx,
@@ -51,25 +54,16 @@ def process_feats(
 
     logging.info("opening output stream: %s" % (output_spec))
     with DWF.create(
-        output_spec,
-        compress=compress,
-        compression_method=compression_method,
-        scp_sep=scp_sep,
+        output_spec, compress=compress, compression_method=compression_method,
     ) as writer:
 
         logging.info("opening input stream: %s" % (output_spec))
         with DRF.create(
-            input_spec,
-            path_prefix=path_prefix,
-            scp_sep=scp_sep,
-            part_idx=part_idx,
-            num_parts=num_parts,
+            input_spec, path_prefix=path_prefix, part_idx=part_idx, num_parts=num_parts,
         ) as reader:
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s" % (vad_spec))
-                v_reader = RDRF.create(
-                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
-                )
+                v_reader = RDRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
                 key, data = reader.read(1)
@@ -112,28 +106,20 @@ def process_feats(
     parser.add_argument(
         "--write-num-frames", dest="write_num_frames_spec", default=None
     )
-    parser.add_argument(
-        "--scp-sep", dest="scp_sep", default=" ", help=("scp file field separator")
-    )
     parser.add_argument(
         "--path-prefix", dest="path_prefix", default=None, help=("scp file_path prefix")
     )
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
     parser.add_argument(
         "--part-idx",
-        dest="part_idx",
         type=int,
         default=1,
         help=("splits the list of files in num-parts and process part_idx"),
     )
     parser.add_argument(
         "--num-parts",
-        dest="num_parts",
         type=int,
         default=1,
         help=("splits the list of files in num-parts and process part_idx"),
@@ -141,14 +127,12 @@ def process_feats(
 
     parser.add_argument(
         "--compress",
-        dest="compress",
         default=False,
         action="store_true",
         help="Lossy compress the features",
     )
     parser.add_argument(
         "--compression-method",
-        dest="compression_method",
         default="auto",
         choices=compression_methods,
         help=(
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index 15d74f3a..058f982a 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -9,8 +9,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -26,14 +30,14 @@ def compute_vad(input_path, output_path, write_num_frames, **kwargs):
     input_args = AR.filter_args(**kwargs)
     reader = AR(input_path, **input_args)
 
-    writer = DWF.create(output_path, scp_sep=" ")
+    writer = DWF.create(output_path)
 
     if write_num_frames is not None:
         f_num_frames = open(write_num_frames, "w")
 
     for data in reader:
         key, x, fs = data
-        logging.info("Extracting VAD for %s" % (key))
+        logging.info("Extracting VAD for %s", key)
         t1 = time.time()
         y = vad.compute(x)
         dt = (time.time() - t1) * 1000
@@ -41,8 +45,13 @@ def compute_vad(input_path, output_path, write_num_frames, **kwargs):
         num_speech_frames = np.sum(y)
         prob_speech = num_speech_frames / y.shape[0] * 100
         logging.info(
-            "Extracted VAD for %s detected %d/%d (%f %%) speech frames, elapsed-time=%.2f ms. real-time-factor=%.2f"
-            % (key, num_speech_frames, y.shape[0], prob_speech, dt, rtf)
+            "Extracted VAD for %s detected %d/%d (%f %%) speech frames, elapsed-time=%.2f ms. real-time-factor=%.2f",
+            key,
+            num_speech_frames,
+            y.shape[0],
+            prob_speech,
+            dt,
+            rtf,
         )
         writer.write([key], [y])
         if write_num_frames is not None:
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index a83f95d1..ca6e26f7 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -9,8 +9,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -35,10 +39,7 @@ def compute_mfcc_feats(
         reader = DRF.create(input_path, **input_args)
 
     writer = DWF.create(
-        output_path,
-        scp_sep=" ",
-        compress=compress,
-        compression_method=compression_method,
+        output_path, compress=compress, compression_method=compression_method,
     )
 
     if write_num_frames is not None:
@@ -55,8 +56,11 @@ def compute_mfcc_feats(
         dt = (time.time() - t1) * 1000
         rtf = dt / (mfcc.frame_shift * y.shape[0])
         logging.info(
-            "Extracted MFCC for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f"
-            % (key, y.shape[0], dt, rtf)
+            "Extracted MFCC for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f",
+            key,
+            y.shape[0],
+            dt,
+            rtf,
         )
         writer.write([key], [y])
 
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index 81fa8803..c7de38f1 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -13,8 +13,12 @@
 import numpy as np
 import pandas as pd
 import sentencepiece as spm
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -23,8 +27,7 @@
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
-                                                              greedy_search)
+from hyperion.torch.models.wav2transducer.beam_search import beam_search, greedy_search
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
@@ -48,10 +51,11 @@ def load_model(model_path, device):
 
 
 def decode_one_batch(
-        model: nn.Module,
-        sp: spm.SentencePieceProcessor,
-        x: torch.Tensor,
-        decoding_method="beam_search") -> Dict[str, List[List[str]]]:
+    model: nn.Module,
+    sp: spm.SentencePieceProcessor,
+    x: torch.Tensor,
+    decoding_method="beam_search",
+) -> Dict[str, List[List[str]]]:
     """Decode one batch and return the result in a dict. The dict has the
     following format:
         - key: It indicates the setting used for decoding. For example,
@@ -77,7 +81,7 @@ def decode_one_batch(
       the returned dict.
     """
     device = model.device
-    feature = x  #batch["inputs"]
+    feature = x  # batch["inputs"]
     assert x.shape[0] == 1
     assert feature.ndim == 2
 
@@ -87,7 +91,8 @@ def decode_one_batch(
     feature_lens = torch.Tensor([x.shape[1]]).int()
 
     encoder_out, hid_feats, encoder_out_lens = model.forward_feats(
-        x=feature, x_lengths=feature_lens)
+        x=feature, x_lengths=feature_lens
+    )
 
     hyps = []
     batch_size = encoder_out.size(0)
@@ -114,8 +119,9 @@ def decode_one_batch(
         return hyps[0]
 
 
-def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
-                      use_gpu, **kwargs):
+def decode_transducer(
+    input_spec, output_spec, model_path, bpe_model, use_gpu, **kwargs
+):
 
     device = init_device(use_gpu)
     model = load_model(model_path, device)
@@ -129,10 +135,10 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
 
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output: %s" % (output_spec))
-    # with DWF.create(output_spec, scp_sep=scp_sep) as writer:
     with open(output_spec, "w") as writer:
-        logging.info("opening input stream: {} with args={}".format(
-            input_spec, ar_args))
+        logging.info(
+            "opening input stream: {} with args={}".format(input_spec, ar_args)
+        )
         with AR(input_spec, **ar_args) as reader:
             while not reader.eof():
                 t1 = time.time()
@@ -147,65 +153,69 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
                 logging.info("processing utt %s" % (key0))
                 for aug_id in range(num_augs):
                     t3 = time.time()
-                    key, x = key0, x0  #augment(key0, x0, augmenter, aug_df, aug_id)
+                    key, x = key0, x0  # augment(key0, x0, augmenter, aug_df, aug_id)
                     t4 = time.time()
                     with torch.no_grad():
                         x = torch.tensor(
-                            x[None, :],
-                            dtype=torch.get_default_dtype()).to(device)
+                            x[None, :], dtype=torch.get_default_dtype()
+                        ).to(device)
 
                         t5 = time.time()
                         tot_frames = x.shape[1]
 
                         logging.info(
-                            "utt %s detected %d/%d (%.2f %%) speech frames" % (
+                            "utt %s detected %d/%d (%.2f %%) speech frames"
+                            % (
                                 key,
                                 x.shape[1],
                                 tot_frames,
                                 x.shape[1] / tot_frames * 100,
-                            ))
+                            )
+                        )
 
                         t6 = time.time()
                         if x.shape[1] == 0:
-                            y = np.zeros((model.embed_dim, ),
-                                         dtype=float_cpu())
+                            y = np.zeros((model.embed_dim,), dtype=float_cpu())
                         else:
                             y = decode_one_batch(model=model, sp=sp, x=x)
 
                     t7 = time.time()
-                    writer.write(key + ' ' + ' '.join(y) + "\n")
+                    writer.write(key + " " + " ".join(y) + "\n")
 
                     t8 = time.time()
                     read_time = t2 - t1
                     tot_time = read_time + t8 - t3
                     logging.info(
-                        ("utt %s total-time=%.3f read-time=%.3f "
-                         "aug-time=%.3f feat-time=%.3f "
-                         "vad-time=%.3f embed-time=%.3f write-time=%.3f "
-                         "rt-factor=%.2f") % (
-                             key,
-                             tot_time,
-                             read_time,
-                             t4 - t3,
-                             t5 - t4,
-                             t6 - t5,
-                             t7 - t6,
-                             t8 - t7,
-                             x0.shape[0] / fs[0] / tot_time,
-                         ))
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "aug-time=%.3f feat-time=%.3f "
+                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                            "rt-factor=%.2f"
+                        )
+                        % (
+                            key,
+                            tot_time,
+                            read_time,
+                            t4 - t3,
+                            t5 - t4,
+                            t6 - t5,
+                            t7 - t6,
+                            t8 - t7,
+                            x0.shape[0] / fs[0] / tot_time,
+                        )
+                    )
 
 
 if __name__ == "__main__":
 
     parser = ArgumentParser(
-        description=("Extracts x-vectors from waveform computing "
-                     "acoustic features on the fly"))
+        description=(
+            "Extracts x-vectors from waveform computing " "acoustic features on the fly"
+        )
+    )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument("--input", dest="input_spec", required=True)
-    parser.add_argument("--scp-sep",
-                        default=" ",
-                        help=("scp file field separator"))
 
     AR.add_class_args(parser)
 
@@ -216,16 +226,12 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
     parser.add_argument("--bpe-model", required=True)
 
     parser.add_argument("--output", dest="output_spec", required=True)
-    parser.add_argument("--use-gpu",
-                        default=False,
-                        action="store_true",
-                        help="extract xvectors in gpu")
-    parser.add_argument("-v",
-                        "--verbose",
-                        dest="verbose",
-                        default=1,
-                        choices=[0, 1, 2, 3],
-                        type=int)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
 
     args = parser.parse_args()
     config_logger(args.verbose)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index bb01162f..10ea491c 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -10,8 +10,12 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -188,7 +192,7 @@ def eval_cosine_scoring(
     attack = AttackFactory.create(model, **attack_args)
     if vad_spec is not None:
         logging.info("opening VAD stream: %s", vad_spec)
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -327,9 +331,9 @@ def eval_cosine_scoring(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--v-file", dest="v_file", required=True)
-    parser.add_argument("--key-file", dest="key_file", default=None)
-    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--key-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
     parser.add_argument("--test-wav-file", required=True)
 
     AR.add_class_args(parser)
@@ -337,10 +341,7 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index c483ce39..a6f535b3 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -7,13 +7,18 @@
 import os
 import sys
 import time
+
 # [Added Sonal May21]
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -243,7 +248,7 @@ def eval_cosine_scoring_wavegan(
     attack = AttackFactory.create(model, **attack_args)
     if vad_spec is not None:
         logging.info("opening VAD stream: %s" % (vad_spec))
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -384,9 +389,9 @@ def eval_cosine_scoring_wavegan(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--v-file", dest="v_file", required=True)
-    parser.add_argument("--key-file", dest="key_file", default=None)
-    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--key-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
     parser.add_argument("--test-wav-file", required=True)
 
     AR.add_class_args(parser)
@@ -394,10 +399,7 @@ def eval_cosine_scoring_wavegan(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index fba182c4..5ba42477 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -13,8 +13,12 @@
 import pandas as pd
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -25,8 +29,9 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
@@ -195,7 +200,7 @@ def eval_cosine_scoring(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s" % (vad_spec))
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -354,9 +359,9 @@ def eval_cosine_scoring(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--v-file", dest="v_file", required=True)
-    parser.add_argument("--key-file", dest="key_file", default=None)
-    parser.add_argument("--enroll-file", dest="enroll_file", required=True)
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--key-file", default=None)
+    parser.add_argument("--enroll-file", required=True)
     parser.add_argument("--test-wav-file", required=True)
 
     AR.add_class_args(parser)
@@ -364,10 +369,7 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 3cfde93e..c3732bd3 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -10,8 +10,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -122,7 +126,7 @@ def eval_cosine_scoring(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s" % (vad_spec))
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((ndx.num_models, ndx.num_tests), dtype="float32")
     with torch.no_grad():
@@ -217,10 +221,7 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index 44bdf59d..c00cf286 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -205,7 +205,7 @@ def eval_cosine_scoring(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s", vad_spec)
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -361,7 +361,6 @@ def eval_cosine_scoring(
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
         "--vad-path-prefix",
-        dest="vad_path_prefix",
         default=None,
         help=("scp file_path prefix for vad"),
     )
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 676575fd..4f2b82ab 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -13,8 +13,12 @@
 import pandas as pd
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -25,8 +29,9 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
@@ -213,7 +218,7 @@ def eval_cosine_scoring(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s" % (vad_spec))
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -386,10 +391,7 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index da6389fb..2f5cf3da 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -11,8 +11,12 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -93,7 +97,6 @@ def eval_xvec(
     output_spec,
     vad_spec,
     write_num_frames_spec,
-    scp_sep,
     vad_path_prefix,
     model_path,
     chunk_length,
@@ -125,8 +128,8 @@ def eval_xvec(
         num_augs = 1
 
     ar_args = AR.filter_args(**kwargs)
-    logging.info("opening output stream: %s" % (output_spec))
-    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+    logging.info("opening output stream: %s", output_spec)
+    with DWF.create(output_spec) as writer:
 
         logging.info(
             "opening input stream: {} with args={}".format(input_spec, ar_args)
@@ -135,9 +138,7 @@ def eval_xvec(
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s" % (vad_spec))
-                v_reader = VRF.create(
-                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
-                )
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
                 t1 = time.time()
@@ -243,7 +244,7 @@ def eval_xvec(
     parser.add_argument(
         "--write-num-frames", dest="write_num_frames_spec", default=None
     )
-    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
+
     parser.add_argument(
         "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
     )
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 37d6a2a6..c4c4676f 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import torchaudio.transforms as tat
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -44,6 +48,7 @@ def get_resampler(source_fs, target_fs):
     resamplers[source_fs] = resampler_f
     return resampler_f
 
+
 resamplers = {}
 
 
@@ -122,7 +127,6 @@ def extract_xvectors(
     output_spec,
     vad_spec,
     write_speech_dur,
-    scp_sep,
     vad_path_prefix,
     model_path,
     hf_chunk_length,
@@ -157,16 +161,14 @@ def extract_xvectors(
     ar_args = AR.filter_args(**kwargs)
     ar_args["wav_scale"] = 1.0
     logging.info("opening output stream: %s", output_spec)
-    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+    with DWF.create(output_spec) as writer:
 
         logging.info(f"opening input stream: {input_spec} with args={ar_args}")
         with AR(input_spec, **ar_args) as reader:
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(
-                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
-                )
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
                 t1 = time.time()
@@ -283,7 +285,6 @@ def extract_xvectors(
     parser.add_argument("--input", dest="input_spec", required=True)
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument("--write-speech-dur", default=None)
-    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
     parser.add_argument(
         "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
     )
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index addabbcf..1da1ac05 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -11,8 +11,12 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -95,7 +99,6 @@ def extract_xvectors(
     output_spec,
     vad_spec,
     write_num_frames_spec,
-    scp_sep,
     vad_path_prefix,
     model_path,
     chunk_length,
@@ -129,7 +132,7 @@ def extract_xvectors(
 
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s", output_spec)
-    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+    with DWF.create(output_spec) as writer:
 
         logging.info(
             "opening input stream: {} with args={}".format(input_spec, ar_args)
@@ -138,9 +141,7 @@ def extract_xvectors(
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(
-                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
-                )
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
             while not reader.eof():
                 t1 = time.time()
@@ -249,7 +250,6 @@ def extract_xvectors(
     parser.add_argument(
         "--write-num-frames", dest="write_num_frames_spec", default=None
     )
-    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
     parser.add_argument(
         "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
     )
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index e3d2fcbb..eaf0a5cc 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -11,8 +11,12 @@
 
 import numpy as np
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -78,7 +82,7 @@ def extract_xvectors(
     model = load_model(model_path, device)
 
     if write_timestamps_spec is not None:
-        time_writer = DWF.create(write_timestamps_spec, scp_sep=" ")
+        time_writer = DWF.create(write_timestamps_spec)
 
     dr_args = DRF.filter_args(**kwargs)
     logging.info("opening output stream: %s" % (output_spec))
@@ -205,10 +209,7 @@ def extract_xvectors(
     )
     parser.add_argument("--slidwin-params-path", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     MVN.add_class_args(parser, prefix="mvn")
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index 2b1bba3b..a31bd614 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -83,7 +87,6 @@ def extract_xvectors(
     vad_spec,
     write_timestamps_spec,
     slidwin_params_path,
-    scp_sep,
     vad_path_prefix,
     model_path,
     chunk_length,
@@ -109,7 +112,7 @@ def extract_xvectors(
     feat_snip_edges = feat_args["snip_edges"]
 
     if write_timestamps_spec is not None:
-        time_writer = DWF.create(write_timestamps_spec, scp_sep=scp_sep)
+        time_writer = DWF.create(write_timestamps_spec)
 
     if aug_cfg is not None:
         augmenter = SpeechAugment.create(aug_cfg, rng=rng)
@@ -121,7 +124,7 @@ def extract_xvectors(
 
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s", output_spec)
-    with DWF.create(output_spec, scp_sep=scp_sep) as writer:
+    with DWF.create(output_spec) as writer:
 
         logging.info(
             "opening input stream: {} with args={}".format(input_spec, ar_args)
@@ -130,9 +133,7 @@ def extract_xvectors(
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(
-                    vad_spec, path_prefix=vad_path_prefix, scp_sep=scp_sep
-                )
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
                 t1 = time.time()
@@ -275,7 +276,6 @@ def extract_xvectors(
     )
     parser.add_argument("--slidwin-params-path", default=None)
 
-    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
     parser.add_argument(
         "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
     )
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index a058893d..8c6f38a6 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -168,7 +172,7 @@ def generate_attacks(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s" % (vad_spec))
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     keys, class_names, class_ids = read_utt_list(
         list_file, class2int_file, part_idx, num_parts
@@ -329,10 +333,7 @@ def generate_attacks(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        dest="vad_path_prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index 83375cb6..fbd3a5fb 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -12,8 +12,12 @@
 import numpy as np
 import pandas as pd
 import yaml
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 import torch.nn as nn
@@ -197,7 +201,7 @@ def generate_attacks(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s", vad_spec)
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix, scp_sep=" ")
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     attack_factory = init_attack_factory(**kwargs)
     attacks_info = {}
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index dccf58da..4aafa075 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -10,8 +10,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
@@ -20,7 +24,7 @@
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
 
-    writer = DWF.create(output_spec, scp_sep=" ", compress=False)
+    writer = DWF.create(output_spec, compress=False)
     t1 = time.time()
     with AR(input_path, wav_scale=1) as reader:
         for data in reader:
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index fb6fc6c5..19420761 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -50,12 +50,12 @@ def dataset_name():
         raise NotImplementedError()
 
     @staticmethod
-    def _get_recording_duration(scp, i, n):
+    def _get_recording_duration(recordings, i, n):
         from ..io import SequentialAudioReader as AR
 
         durations = []
         fss = []
-        with AR(scp, part_idx=i, num_parts=n) as reader:
+        with AR(recordings, part_idx=i + 1, num_parts=n) as reader:
             for data in reader:
                 key, x, fs = data
                 duration = x.shape[0] / fs
@@ -69,13 +69,13 @@ def get_recording_duration(self, recording_set):
         import itertools
         from ..utils import SCPList
 
-        scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
+        # scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
         futures = []
         logging.info("submitting threats...")
         with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
             for i in tqdm(range(self.num_threads)):
                 future = pool.submit(
-                    DataPrep._get_recording_duration, scp, i, self.num_threads
+                    DataPrep._get_recording_duration, recording_set, i, self.num_threads
                 )
                 futures.append(future)
 
diff --git a/hyperion/io/ark_data_reader.py b/hyperion/io/ark_data_reader.py
index 3919ddfa..6cf22d5f 100644
--- a/hyperion/io/ark_data_reader.py
+++ b/hyperion/io/ark_data_reader.py
@@ -4,15 +4,15 @@
 """
 
 import multiprocessing as threading
-import sys
+from typing import Union, Optional, List, Callable, Tuple
 
 import numpy as np
 
 from ..hyp_defs import float_cpu
-from ..utils.kaldi_io_funcs import (init_kaldi_input_stream, is_token, peek,
-                                    read_token)
+from ..utils.kaldi_io_funcs import init_kaldi_input_stream, is_token, peek, read_token
 from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
-from ..utils.scp_list import SCPList
+
+from ..utils import FeatureSet, PathLike
 from .data_reader import RandomAccessDataReader, SequentialDataReader
 
 
@@ -27,10 +27,9 @@ class SequentialArkDataReader(SequentialDataReader):
         part_idx: It splits the input into num_parts and writes only
                   part part_idx, where part_idx=1,...,num_parts.
         num_parts: Number of parts to split the input data.
-        split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, **kwargs):
+    def __init__(self, file_path: PathLike, **kwargs):
         super().__init__(file_path, **kwargs)
         self.f = None
         self.lock = threading.Lock()
@@ -42,7 +41,7 @@ def close(self):
             self.f.close()
             self.f = None
 
-    def _seek(self, offset):
+    def _seek(self, offset: int):
         """Moves the pointer of the input file.
 
         Args:
@@ -52,7 +51,7 @@ def _seek(self, offset):
         delta = offset - cur_pos
         self.f.seek(delta, 1)
 
-    def _open_archive(self, file_path, offset=0):
+    def _open_archive(self, file_path: PathLike, offset: int = 0):
         """Opens the current file if it is not open and moves the
            file pointer to a given position.
            Closes previous open Ark files.
@@ -69,7 +68,7 @@ def _open_archive(self, file_path, offset=0):
         if offset > 0:
             self._seek(offset)
 
-    def read_num_rows(self, num_records=0, assert_same_dim=True):
+    def read_num_rows(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -86,7 +85,7 @@ def read_num_rows(self, num_records=0, assert_same_dim=True):
         num_rows = np.array([s[0] if len(s) == 2 else 1 for s in shapes], dtype=int)
         return keys, num_rows
 
-    def read_dims(self, num_records=0, assert_same_dim=True):
+    def read_dims(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -120,10 +119,8 @@ class SequentialArkFileDataReader(SequentialArkDataReader):
         split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, **kwargs):
-        super(SequentialArkFileDataReader, self).__init__(
-            file_path, permissive=False, **kwargs
-        )
+    def __init__(self, file_path: PathLike, **kwargs):
+        super().__init__(file_path, permissive=False, **kwargs)
         self._open_archive(self.file_path)
         self._eof = False
         self._keys = None
@@ -151,7 +148,7 @@ def keys(self):
 
         return self._keys
 
-    def read_shapes(self, num_records=0, assert_same_dim=True):
+    def read_shapes(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -188,7 +185,13 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
 
         return keys, shapes
 
-    def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        num_records: int = 0,
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads next num_records feature matrices/vectors.
 
         Args:
@@ -206,12 +209,8 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
           key: List of recording names.
           data: List of feature matrices/vectors or 3D/2D numpy array.
         """
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
         keys = []
         data = []
         count = 0
@@ -264,28 +263,25 @@ class SequentialArkScriptDataReader(SequentialArkDataReader):
         part_idx: It splits the input into num_parts and writes only
                   part part_idx, where part_idx=1,...,num_parts.
         num_parts: Number of parts to split the input data.
-        split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, path_prefix=None, scp_sep=" ", **kwargs):
-        super(SequentialArkScriptDataReader, self).__init__(
-            file_path, permissive=False, **kwargs
-        )
-        self.scp = SCPList.load(self.file_path, sep=scp_sep)
+    def __init__(
+        self, file_path: PathLike, path_prefix: Optional[PathLike] = None, **kwargs
+    ):
+        super().__init__(file_path, permissive=False, **kwargs)
+        self.feature_set = FeatureSet.load(self.file_path, sep=scp_sep)
 
         if self.num_parts > 1:
-            self.scp = self.scp.split(
-                self.part_idx, self.num_parts, group_by_key=self.split_by_key
-            )
+            self.feature_set = self.feature_set.split(self.part_idx, self.num_parts)
 
         if path_prefix is not None:
-            self.scp.add_prefix_to_filepath(path_prefix)
+            self.feature_set.add_prefix_to_storage_path(path_prefix)
 
         self.cur_item = 0
 
     @property
     def keys(self):
-        return self.scp.key
+        return self.feature_set["id"]
 
     def reset(self):
         """Closes all the open Ark files and puts the read pointer pointing
@@ -295,9 +291,9 @@ def reset(self):
 
     def eof(self):
         """Returns True when all the elements in the scp have been read."""
-        return self.cur_item == len(self.scp)
+        return self.cur_item == len(self.feature_set)
 
-    def read_shapes(self, num_records=0, assert_same_dim=True):
+    def read_shapes(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -318,15 +314,18 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
         for i in range(num_records):
             if self.eof():
                 break
-            key, file_path, offset, range_spec = self.scp[self.cur_item]
-
-            row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
 
+            feature_spec = self.feature_set.iloc[self.cur_item]
+            key = feature_spec["id"]
+            offset = feature_spec["storage_byte"]
+            file_path = feature_spec["storage_path"]
             self._open_archive(file_path, offset)
             binary = init_kaldi_input_stream(self.f)
             shape_i = KaldiMatrix.read_shape(self.f, binary, sequential_mode=True)
-
-            shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
+                shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
 
             keys.append(key)
             shapes.append(shape_i)
@@ -338,7 +337,13 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
 
         return keys, shapes
 
-    def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        num_records: int = 0,
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads next num_records feature matrices/vectors.
 
         Args:
@@ -359,12 +364,8 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
         if num_records == 0:
             num_records = len(self.scp) - self.cur_item
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
 
         keys = []
         data = []
@@ -373,7 +374,14 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
                 if self.eof():
                     break
 
-                key, file_path, offset, range_spec = self.scp[self.cur_item]
+                feature_spec = self.feature_set.iloc[self.cur_item]
+                key = feature_spec["id"]
+                offset = feature_spec["storage_byte"]
+                file_path = feature_spec["storage_path"]
+                if "start" in feature_spec and "num_frames" in feature_spec:
+                    range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                else:
+                    range_spec = None
 
                 row_offset_i = row_offset[i] if row_offset_is_list else row_offset
                 num_rows_i = num_rows[i] if num_rows_is_list else num_rows
@@ -417,21 +425,24 @@ class RandomAccessArkDataReader(RandomAccessDataReader):
                    features after reading them from disk.
         permissive: If True, if the data that we want to read is not in the file
                     it returns an empty matrix, if False it raises an exception.
-        scp_sep: Separator for scp files (default ' ').
     """
 
     def __init__(
-        self, file_path, path_prefix=None, transform=None, permissive=False, scp_sep=" "
+        self,
+        file_path: PathLike,
+        path_prefix: Optional[PathLike] = None,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+        permissive: bool = False,
     ):
-        super(RandomAccessArkDataReader, self).__init__(
-            file_path, transform, permissive
-        )
+        super().__init__(file_path, transform, permissive)
 
-        self.scp = SCPList.load(self.file_path, sep=scp_sep)
+        self.feature_set = FeatureSet.load(self.file_path)
         if path_prefix is not None:
-            self.scp.add_prefix_to_filepath(path_prefix)
+            self.feature_set.add_prefix_to_storage_path(path_prefix)
 
-        archives, archive_idx = np.unique(self.scp.file_path, return_inverse=True)
+        archives, archive_idx = np.unique(
+            self.feature_set["storage_path"], return_inverse=True
+        )
         self.archives = archives
         self.archive_idx = archive_idx
         self.f = [None] * len(self.archives)
@@ -448,7 +459,7 @@ def close(self):
                 f.close()
         self.f = [None] * len(self.f)
 
-    def _open_archive(self, key_idx, offset=0):
+    def _open_archive(self, key_idx: int, offset: int = 0):
         """Opens the Ark file correspoding to a given feature/matrix
            if it is not already open and moves the file pointer to the
            point where we can read that feature matrix.
@@ -473,7 +484,9 @@ def _open_archive(self, key_idx, offset=0):
 
         return f, self.locks[archive_idx]
 
-    def read_num_rows(self, keys, assert_same_dim=True):
+    def read_num_rows(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -489,7 +502,9 @@ def read_num_rows(self, keys, assert_same_dim=True):
         num_rows = np.array([s[0] if len(s) == 2 else 1 for s in shapes], dtype=np.int)
         return num_rows
 
-    def read_dims(self, keys, assert_same_dim=True):
+    def read_dims(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -507,7 +522,9 @@ def read_dims(self, keys, assert_same_dim=True):
             assert np.all(dims == dims[0])
         return dims
 
-    def read_shapes(self, keys, assert_same_dim=True):
+    def read_shapes(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -525,25 +542,26 @@ def read_shapes(self, keys, assert_same_dim=True):
         shapes = []
         for key in keys:
 
-            if not (key in self.scp):
+            if not (key in self.feature_set.index):
                 if self.permissive:
                     shapes.append((0,))
                     continue
                 else:
                     raise Exception("Key %s not found" % key)
 
-            index = self.scp.get_index(key)
-            _, file_path, offset, range_spec = self.scp[index]
-
-            row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
-
+            index = self.feature_set.get_loc(key)
+            feature_spec = self.feature_set.loc[key]
+            offset = feature_spec["storage_byte"]
             f, lock = self._open_archive(index)
             with lock:
                 f.seek(offset, 0)
                 binary = init_kaldi_input_stream(f)
                 shape_i = KaldiMatrix.read_shape(f, binary, sequential_mode=False)
 
-            shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
+                shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
 
             shapes.append(shape_i)
 
@@ -553,7 +571,13 @@ def read_shapes(self, keys, assert_same_dim=True):
 
         return shapes
 
-    def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        keys: Union[str, List[str], np.array],
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads the feature matrices/vectors for the recordings in keys.
 
         Args:
@@ -574,12 +598,8 @@ def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
         if isinstance(keys, str):
             keys = [keys]
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
         if row_offset_is_list:
             assert len(row_offset) == len(keys)
         if num_rows_is_list:
@@ -588,15 +608,20 @@ def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
         data = []
         for i, key in enumerate(keys):
 
-            if not (key in self.scp):
+            if not (key in self.feature_set.index):
                 if self.permissive:
                     data.append(np.array([], dtype=float_cpu()))
                     continue
                 else:
                     raise Exception("Key %s not found" % key)
 
-            index = self.scp.get_index(key)
-            _, file_path, offset, range_spec = self.scp[index]
+            index = self.feature_set.get_loc(key)
+            feature_spec = self.feature_set.loc[key]
+            offset = feature_spec["storage_byte"]
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+            else:
+                range_spec = None
 
             row_offset_i = row_offset[i] if row_offset_is_list else row_offset
             num_rows_i = num_rows[i] if num_rows_is_list else num_rows
diff --git a/hyperion/io/ark_data_writer.py b/hyperion/io/ark_data_writer.py
index 58f5c0a1..6adf78b2 100644
--- a/hyperion/io/ark_data_writer.py
+++ b/hyperion/io/ark_data_writer.py
@@ -3,15 +3,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
+from typing import Union, Optional, List
 
 import numpy as np
 
 from ..hyp_defs import float_save
-from ..utils.kaldi_io_funcs import (init_kaldi_output_stream, is_token,
-                                    write_token)
+from ..utils.kaldi_io_funcs import init_kaldi_output_stream, is_token, write_token
 from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
-from ..utils.scp_list import SCPList
+from ..utils import PathLike
 from .data_writer import DataWriter
 
 
@@ -28,11 +27,17 @@ class ArkDataWriter(DataWriter):
                           {auto (default), speech_feat,
                            2byte-auto, 2byte-signed-integer,
                            1byte-auto, 1byte-unsigned-integer, 1byte-0-1}.
-      scp_sep: Separator for scp files (default ' ').
+
     """
 
-    def __init__(self, archive_path, script_path=None, binary=True, **kwargs):
-        super(ArkDataWriter, self).__init__(archive_path, script_path, **kwargs)
+    def __init__(
+        self,
+        archive_path: PathLike,
+        script_path: Optional[PathLike] = None,
+        binary: bool = True,
+        **kwargs,
+    ):
+        super().__init__(archive_path, script_path, **kwargs)
         self.binary = binary
 
         if binary:
@@ -40,10 +45,9 @@ def __init__(self, archive_path, script_path=None, binary=True, **kwargs):
         else:
             self.f = open(archive_path, "w")
 
-        if script_path is not None:
-            self.f_script = open(script_path, "w")
-        else:
-            self.f_script = None
+        if script_path is not None and not self.script_is_scp:
+            row = self.script_sep.join(["id", "storage_path", "storage_byte"])
+            self.f_script.write(f"{row}\n")
 
     def __exit__(self, exc_type, exc_value, traceback):
         """Function required when exiting from contructions of type
@@ -67,7 +71,7 @@ def flush(self):
         if self.f_script is not None:
             self.f_script.flush()
 
-    def _convert_data(self, data):
+    def _convert_data(self, data: np.array):
         """Converts the feature matrix from numpy array to KaldiMatrix
         or KaldiCompressedMatrix.
         """
@@ -89,7 +93,11 @@ def _convert_data(self, data):
 
         raise ValueError("Data is not ndarray or KaldiMatrix")
 
-    def write(self, keys, data):
+    def write(
+        self,
+        keys: Union[str, List[str], np.array],
+        data: Union[np.array, List[np.array]],
+    ):
         """Writes data to file.
 
         Args:
@@ -114,9 +122,11 @@ def write(self, keys, data):
             data_i.write(self.f, self.binary)
 
             if self.f_script is not None:
-                self.f_script.write(
-                    "%s%s%s:%d\n" % (key_i, self.scp_sep, self.archive_path, pos)
-                )
+                if self.script_is_scp:
+                    self.f_script.write(f"{key_i} {self.archive_path}:{pos}\n")
+                else:
+                    row = self.script_sep.join([key_i, self.archive_path, str(pos)])
+                    self.f_script.write(f"{row}\n")
 
             if self._flush:
                 self.flush()
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index 69cfa65b..1052ce8c 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -10,11 +10,13 @@
 import subprocess
 
 import numpy as np
+import pandas as pd
 import soundfile as sf
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from typing import Union, Optional, List
 
 from ..hyp_defs import float_cpu
-from ..utils import SCPList, SegmentList
+from ..utils import RecordingSet, SegmentSet, PathLike
 
 valid_ext = [
     ".wav",
@@ -34,7 +36,7 @@
     ".sds",
     ".sf",
     ".voc",
-    "w64",
+    ".w64",
     ".wve",
     ".xi",
 ]
@@ -44,38 +46,36 @@ class AudioReader(object):
     """Class to read audio files from wav, flac or pipe
 
     Attributes:
-         file_path:     scp file with formant file_key wavspecifier (audio_file/pipe) or SCPList object.
-         segments_path: segments file with format: segment_id file_id tbeg tend
+         recordings: RecordingSet or file path to RecordingSet
+         segments:   SegmentSet or file path to SegmentSet
          wav_scale:     multiplies signal by scale factor
     """
 
-    def __init__(self, file_path, segments_path=None, wav_scale=2**15 - 1):
-        self.file_path = file_path
-        if isinstance(file_path, SCPList):
-            self.scp = file_path
-        else:
-            self.scp = SCPList.load(file_path, sep=" ", is_wav=True)
-
-        self.segments_path = segments_path
-        if segments_path is None:
-            self.segments = None
-            self.with_segments = False
-        else:
+    def __init__(
+        self,
+        recordings: Union[RecordingSet, PathLike],
+        segments: Union[SegmentSet, PathLike, None] = None,
+        wav_scale: float = 2 ** 15 - 1,
+    ):
+        if not isinstance(recordings, RecordingSet):
+            recordings = RecordingSet.load(recordings)
+
+        self.recordings = recordings
+
+        self.with_segments = False
+        if segments is not None:
             self.with_segments = True
-            if isinstance(file_path, SegmentList):
-                self.segments = segments_path
-            else:
-                self.segments = SegmentList.load(segments_path,
-                                                 sep=" ",
-                                                 index_by_file=False)
+            if not isinstance(segments, SegmentSet):
+                segments = SegmentSet.load(segments)
 
+        self.segments = segments
         self.wav_scale = wav_scale
 
     @property
     def keys(self):
         if self.with_segments:
-            return np.asarray(self.segments["segment_id"])
-        return self.scp.key
+            return self.segments["id"].values
+        return self.recordings["id"].values
 
     def __enter__(self):
         """Function required when entering contructions of type
@@ -94,10 +94,12 @@ def __exit__(self, exc_type, exc_value, traceback):
         pass
 
     @staticmethod
-    def read_wavspecifier(wavspecifier,
-                          scale=2**15,
-                          time_offset=0,
-                          time_dur=0):
+    def read_wavspecifier(
+        wavspecifier: PathLike,
+        scale: float = 2 ** 15,
+        time_offset: float = 0.0,
+        time_dur: float = 0.0,
+    ):
         """Reads an audiospecifier (audio_file/pipe)
            It reads from pipe or from all the files that can be read
            by `libsndfile <http://www.mega-nerd.com/libsndfile/#Features>`
@@ -113,59 +115,123 @@ def read_wavspecifier(wavspecifier,
         wavspecifier = wavspecifier.strip()
         if wavspecifier[-1] == "|":
             wavspecifier = wavspecifier[:-1]
-            x, fs = AudioReader.read_pipe(wavspecifier, scale)
-            if time_offset == 0 and time_dur == 0:
-                return x, fs
-
-            start_sample = int(math.floor(time_offset * fs))
-            num_samples = int(math.floor(time_dur * fs))
-            if num_samples == 0:
-                return x[start_sample:], fs
-
-            end_sample = start_sample + num_samples
-            assert end_sample <= len(x)
-            return x[start_sample:end_sample], fs
+            return AudioReader.read_pipe(wavspecifier, scale, time_offset, time_dur)
 
         ext = os.path.splitext(wavspecifier)[1]
         if ext in valid_ext:
-            if time_offset == 0 and time_dur == 0:
-                x, fs = sf.read(wavspecifier, dtype=float_cpu())
-                x *= scale
-                return x, fs
-
-            with sf.SoundFile(wavspecifier, "r") as f:
-                fs = f.samplerate
-                start_sample = int(math.floor(time_offset * fs))
-                num_samples = int(math.floor(time_dur * fs))
-                f.seek(start_sample)
-                if num_samples > 0:
-                    x = scale * f.read(num_samples, dtype=float_cpu())
-                else:
-                    x = scale * f.read(dtype=float_cpu())
-                return x, fs
+            return AudioReader.read_file(wavspecifier, scale, time_offset, time_dur)
 
         raise Exception("Unknown format for %s" % (wavspecifier))
 
     @staticmethod
-    def read_pipe(wavspecifier, scale=2**15):
+    def read_pipe(
+        wavspecifier: PathLike,
+        scale: float = 2 ** 15,
+        time_offset: float = 0,
+        time_dur: float = 0,
+    ):
         """Reads wave file from a pipe
         Args:
           wavspecifier: Shell command with pipe output
           scale:        Multiplies signal by scale factor
         """
-        # proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-        proc = subprocess.Popen(wavspecifier,
-                                shell=True,
-                                stdout=subprocess.PIPE)
+        if wavspecifier[-1] == "|":
+            wavspecifier = wavspecifier[:-1]
+
+        proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE)
         pipe = proc.communicate()[0]
         if proc.returncode != 0:
-            raise Exception("Wave read pipe command %s returned code %d" %
-                            (wavspecifier, proc.returncode))
+            raise Exception(
+                "Wave read pipe command %s returned code %d"
+                % (wavspecifier, proc.returncode)
+            )
         x, fs = sf.read(io.BytesIO(pipe), dtype=float_cpu())
         x *= scale
-        return x, fs
+        if time_offset == 0 and time_dur == 0:
+            return x, fs
+
+        start_sample = int(math.floor(time_offset * fs))
+        num_samples = int(math.floor(time_dur * fs))
+        if num_samples == 0:
+            return x[start_sample:], fs
+
+        end_sample = start_sample + num_samples
+        assert end_sample <= len(x)
+        return x[start_sample:end_sample], fs
+
+    @staticmethod
+    def read_file_sf(
+        wavspecifier: PathLike,
+        scale: float = 2 ** 15,
+        time_offset: float = 0,
+        time_dur: float = 0,
+    ):
+        if time_offset == 0 and time_dur == 0:
+            x, fs = sf.read(wavspecifier, dtype=float_cpu())
+            x *= scale
+            return x, fs
+
+        with sf.SoundFile(wavspecifier, "r") as f:
+            fs = f.samplerate
+            start_sample = int(math.floor(time_offset * fs))
+            num_samples = int(math.floor(time_dur * fs))
+            f.seek(start_sample)
+            if num_samples > 0:
+                x = scale * f.read(num_samples, dtype=float_cpu())
+            else:
+                x = scale * f.read(dtype=float_cpu())
+
+            return x, fs
+
+    @staticmethod
+    def read_file(
+        wavspecifier: PathLike,
+        scale: float = 2 ** 15,
+        time_offset: float = 0,
+        time_dur: float = 0,
+    ):
+        try:
+            return AudioReader.read_file_sf(wavspecifier, scale, time_offset, time_dur)
+        except:
+            # some files produce error in the fseek after reading the data,
+            # this seems an issue from pysoundfile or soundfile lib itself
+            # we try to read from
+            # time-offset to the end of the file, and remove the extra frames later,
+            # this solves the problem in most cases
+            logging.info(
+                (
+                    "error-1 reading keys=%s offset=%f duration=%f"
+                    "retrying reading until end-of-file ..."
+                ),
+                wavspecifier,
+                time_offset,
+                time_dur,
+            )
+            try:
+                x, fs = AudioReader.read_file_sf(wavspecifier, scale, time_offset)
+                num_samples = int(math.floor(time_dur * fs))
+                x = x[:num_samples]
+                return x, fs
+            except:
+                logging.info(
+                    (
+                        "error-2 reading keys=%s offset=%f duration=%f"
+                        "retrying reading full file ..."
+                    ),
+                    wavspecifier,
+                    time_offset,
+                    time_dur,
+                )
+
+                x, fs = AudioReader.read_file_sf(wavspecifier, scale)
+                start_sample = int(math.floor(time_offset * fs))
+                num_samples = int(math.floor(time_dur * fs))
+                x = x[start_sample : start_sample + num_samples]
+                return x, fs
 
-    def _read_segment(self, segment, time_offset=0, time_dur=0):
+    def _read_segment(
+        self, segment: pd.Series, time_offset: float = 0, time_dur: float = 0
+    ):
         """Reads a wave segment
 
         Args:
@@ -173,28 +239,11 @@ def _read_segment(self, segment, time_offset=0, time_dur=0):
         Returns:
           Wave, sampling frequency
         """
-        file_id = segment["file_id"]
-        t_beg = segment["tbeg"] + time_offset
-        t_end = segment["tend"]
-        if time_dur > 0:
-            t_end_new = t_beg + time_dur
-            assert t_end_new <= t_end
-            t_end = t_end_new
-
-        file_path, _, _ = self.scp[file_id]
-        x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale)
-        num_samples_i = len(x_i)
-        s_beg = int(t_beg * fs_i)
-        if s_beg >= num_samples_i:
-            raise Exception(
-                "segment %s tbeg=%.2f (num_sample=%d) longer that wav file %s (num_samples=%d)"
-                % (file_id, t_beg, s_beg, file_id, num_samples_i))
-
-        s_end = int(t_end * fs_i)
-        if s_end > num_samples_i or t_end < 0:
-            s_end = num_samples_i
-
-        x_i = x_i[s_beg:s_end]
+        recording_id = segment["recording_id"]
+        t_start = segment["start"] + time_offset
+        t_dur = segment["duration"]
+        storage_path = self.recordings.loc[recording_id, "storage_path"]
+        x_i, fs_i = self.read_wavspecifier(storage_path, self.wav_scale, t_start, t_dur)
         return x_i, fs_i
 
     def read(self):
@@ -202,27 +251,23 @@ def read(self):
 
 
 class SequentialAudioReader(AudioReader):
-
     def __init__(
         self,
-        file_path,
-        segments_path=None,
-        wav_scale=2**15 - 1,
-        part_idx=1,
-        num_parts=1,
+        recordings: Union[RecordingSet, PathLike],
+        segments: Union[SegmentSet, PathLike, None] = None,
+        wav_scale: float = 2 ** 15 - 1,
+        part_idx: int = 1,
+        num_parts: int = 1,
     ):
-        super().__init__(file_path, segments_path, wav_scale=wav_scale)
+        super().__init__(recordings, segments, wav_scale=wav_scale)
         self.cur_item = 0
         self.part_idx = part_idx
         self.num_parts = num_parts
         if self.num_parts > 1:
             if self.with_segments:
-                self.segments = self.segments.split(self.part_idx,
-                                                    self.num_parts)
+                self.segments = self.segments.split(self.part_idx, self.num_parts)
             else:
-                self.scp = self.scp.split(self.part_idx,
-                                          self.num_parts,
-                                          group_by_key=False)
+                self.recordings = self.recordings.split(self.part_idx, self.num_parts)
 
     def __iter__(self):
         """Needed to build an iterator, e.g.:
@@ -262,9 +307,9 @@ def eof(self):
         """
         if self.with_segments:
             return self.cur_item == len(self.segments)
-        return self.cur_item == len(self.scp)
+        return self.cur_item == len(self.recordings)
 
-    def read(self, num_records=0, time_offset=0, time_durs=0):
+    def read(self, num_records: int = 0, time_offset: float = 0, time_durs: float = 0):
         """Reads next num_records audio files
 
         Args:
@@ -281,7 +326,7 @@ def read(self, num_records=0, time_offset=0, time_durs=0):
             if self.with_segments:
                 num_records = len(self.segments) - self.cur_item
             else:
-                num_records = len(self.scp) - self.cur_item
+                num_records = len(self.recordings) - self.cur_item
 
         offset_is_list = isinstance(time_offset, (list, np.ndarray))
         dur_is_list = isinstance(time_durs, (list, np.ndarray))
@@ -297,13 +342,14 @@ def read(self, num_records=0, time_offset=0, time_durs=0):
             dur_i = time_durs[i] if dur_is_list else time_durs
 
             if self.with_segments:
-                segment = self.segments[self.cur_item]
-                key = segment["segment_id"]
+                segment = self.segments.iloc[self.cur_item]
+                key = segment["id"]
                 x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
             else:
-                key, file_path, _, _ = self.scp[self.cur_item]
-                x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale,
-                                                   offset_i, dur_i)
+                key, file_path = self.recordings.iloc[self.cur_item]
+                x_i, fs_i = self.read_wavspecifier(
+                    file_path, self.wav_scale, offset_i, dur_i
+                )
 
             keys.append(key)
             data.append(x_i)
@@ -318,14 +364,14 @@ def filter_args(**kwargs):
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix: Optional[str] = None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
             "--wav-scale",
-            default=2**15 - 1,
+            default=2 ** 15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
@@ -334,38 +380,50 @@ def add_class_args(parser, prefix=None):
                 "--part-idx",
                 type=int,
                 default=1,
-                help=("splits the list of files into num-parts and "
-                      "processes part-idx"),
+                help=(
+                    "splits the list of files into num-parts and " "processes part-idx"
+                ),
             )
             parser.add_argument(
                 "--num-parts",
                 type=int,
                 default=1,
-                help=("splits the list of files into num-parts and "
-                      "processes part-idx"),
+                help=(
+                    "splits the list of files into num-parts and " "processes part-idx"
+                ),
             )
         except:
             pass
 
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
+                "--" + prefix, action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
 
 
 class RandomAccessAudioReader(AudioReader):
+    def __init__(
+        self,
+        recordings: Union[RecordingSet, PathLike],
+        segments: Union[SegmentSet, PathLike, None] = None,
+        wav_scale: float = 2 ** 15 - 1,
+    ):
+        super().__init__(recordings, segments, wav_scale)
 
-    def __init__(self, file_path, segments_path=None, wav_scale=2**15 - 1):
-        super().__init__(file_path, segments_path, wav_scale)
-
-    def _read(self, keys, time_offset=0, time_durs=0):
+    def read(
+        self,
+        keys: Union[str, List, np.array],
+        time_offset: float = 0,
+        time_durs: float = 0,
+    ):
         """Reads the waveforms  for the recordings in keys.
 
         Args:
           keys: List of recording/segment_ids names.
+          time_offset: float or float list with time-offsets
+          time_durs: float or float list with durations 
 
         Returns:
           data: List of waveforms
@@ -384,93 +442,92 @@ def _read(self, keys, time_offset=0, time_durs=0):
             dur_i = time_durs[i] if dur_is_list else time_durs
 
             if self.with_segments:
-                if not (key in self.segments):
+                if not (key in self.segments.index):
                     raise Exception("Key %s not found" % key)
 
-                segment = self.segments[key]
+                segment = self.segments.loc[key]
                 x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
             else:
-                if not (key in self.scp):
+                if not (key in self.recordings.index):
                     raise Exception("Key %s not found" % key)
 
-                file_path, _, _ = self.scp[key]
-                x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale,
-                                                   offset_i, dur_i)
+                file_path = self.recordings.loc[key, "storage_path"]
+                x_i, fs_i = self.read_wavspecifier(
+                    file_path, self.wav_scale, offset_i, dur_i
+                )
 
             data.append(x_i)
             fs.append(fs_i)
 
         return data, fs
 
-    def read(self, keys, time_offset=0, time_durs=0):
-        """Reads the waveforms  for the recordings in keys.
-
-        Args:
-          keys: List of recording/segment_ids names.
-
-        Returns:
-          data: List of waveforms
-          fs: List of sampling freq.
-        """
-        try:
-            x, fs = self._read(keys,
-                               time_offset=time_offset,
-                               time_durs=time_durs)
-        except:
-            if isinstance(keys, str):
-                keys = [keys]
-
-            if not isinstance(time_offset, (list, np.ndarray)):
-                time_offset = [time_offset] * len(keys)
-            if not isinstance(time_durs, (list, np.ndarray)):
-                time_durs = [time_durs] * len(keys)
-
-            try:
-                # some files produce error in the fseek after reading the data,
-                # this seems an issue from pysoundfile or soundfile lib itself
-                # we try to read from
-                # time-offset to the end of the file, and remove the extra frames later,
-                # this solves the problem in most cases
-                logging.info(("error-1 reading at keys={} offset={} "
-                              "retrying reading until end-of-file ...").format(
-                                  keys, time_offset))
-                x, fs = self._read(keys, time_offset=time_offset)
-                for i in range(len(x)):
-                    end_sample = int(time_durs[i] * fs[i])
-                    x[i] = x[i][:end_sample]
-            except:
-                # try to read the full file
-                logging.info(("error-2 reading at key={}, "
-                              "retrying reading full file ...").format(keys))
-                x, fs = self._read(keys)
-                for i in range(len(x)):
-                    start_sample = int(time_offset[i] * fs[i])
-                    end_sample = start_sample + int(time_durs[i] * fs[i])
-                    x[i] = x[i][start_sample:end_sample]
-
-        return x, fs
+    # def read(self, keys, time_offset=0, time_durs=0):
+    #     """Reads the waveforms  for the recordings in keys.
+
+    #     Args:
+    #       keys: List of recording/segment_ids names.
+
+    #     Returns:
+    #       data: List of waveforms
+    #       fs: List of sampling freq.
+    #     """
+    #     try:
+    #         x, fs = self._read(keys, time_offset=time_offset, time_durs=time_durs)
+    #     except:
+    #         if isinstance(keys, str):
+    #             keys = [keys]
+
+    #         if not isinstance(time_offset, (list, np.ndarray)):
+    #             time_offset = [time_offset] * len(keys)
+    #         if not isinstance(time_durs, (list, np.ndarray)):
+    #             time_durs = [time_durs] * len(keys)
+
+    #         try:
+    #             logging.info(
+    #                 (
+    #                     "error-1 reading at keys={} offset={} "
+    #                     "retrying reading until end-of-file ..."
+    #                 ).format(keys, time_offset)
+    #             )
+    #             x, fs = self._read(keys, time_offset=time_offset)
+    #             for i in range(len(x)):
+    #                 end_sample = int(time_durs[i] * fs[i])
+    #                 x[i] = x[i][:end_sample]
+    #         except:
+    #             # try to read the full file
+    #             logging.info(
+    #                 (
+    #                     "error-2 reading at key={}, " "retrying reading full file ..."
+    #                 ).format(keys)
+    #             )
+    #             x, fs = self._read(keys)
+    #             for i in range(len(x)):
+    #                 start_sample = int(time_offset[i] * fs[i])
+    #                 end_sample = start_sample + int(time_durs[i] * fs[i])
+    #                 x[i] = x[i][start_sample:end_sample]
+
+    #     return x, fs
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("wav_scale", )
+        valid_args = ("wav_scale",)
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix: Optional[str] = None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
             "--wav-scale",
-            default=2**15 - 1,
+            default=2 ** 15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
+                "--" + prefix, action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/io/audio_writer.py b/hyperion/io/audio_writer.py
index f98a3251..e416c209 100644
--- a/hyperion/io/audio_writer.py
+++ b/hyperion/io/audio_writer.py
@@ -8,12 +8,16 @@
 
 import numpy as np
 import soundfile as sf
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from typing import Union, Optional, List
+from pathlib import Path
 
 from ..hyp_defs import float_cpu
 from ..utils.kaldi_io_funcs import is_token
-from ..utils.scp_list import SCPList
+from ..utils import PathLike
 from .audio_reader import valid_ext
 
+
 subtype_to_npdtype = {
     "PCM_32": "int32",
     "ALAW": "int16",
@@ -37,25 +41,23 @@ class AudioWriter(object):
 
     Attributes:
       output_path: output data file path.
-      script_path: optional output scp file.
+      script_path: optional output kaldi .scp or pandas .csv file.
       audio_format:   audio file format
       audio_subtype: subtype of audio in [PCM_16, PCM_32, FLOAT, DOUBLE, ...],
                if None, it uses soundfile defaults (recommended)
-      scp_sep: Separator for scp files (default ' ').
     """
 
     def __init__(
         self,
-        output_path,
-        script_path=None,
-        audio_format="wav",
-        audio_subtype=None,
-        scp_sep=" ",
+        output_path: PathLike,
+        script_path: Optional[PathLike] = None,
+        audio_format: str = "wav",
+        audio_subtype: Optional[str] = None,
     ):
-        self.output_path = output_path
-        self.script_path = script_path
+        self.output_path = Path(output_path)
+        self.script_path = Path(script_path) if script_path is not None else None
         self.audio_format = audio_format
-        self.scp_sep = scp_sep
+        self.output_path.mkdir(exist_ok=True, parents=True)
 
         assert "." + self.audio_format in valid_ext
         if audio_subtype is None:
@@ -64,16 +66,23 @@ def __init__(
             self.subtype = audio_subtype
             assert sf.check_format(self.audio_format, self.subtype)
 
-        if not os.path.exists(output_path):
-            try:
-                os.makedirs(output_path)
-            except FileExistsError:
-                pass
-
+        self.script_is_scp = False
+        self.script_sep = None
+        self.f_script = None
         if script_path is not None:
-            self.f_script = open(script_path, "w")
-        else:
-            self.f_script = None
+            self.script_path.parent.mkdir(exist_ok=True, parents=True)
+            script_ext = self.script_path.suffix
+            self.script_is_scp = script_ext == ".scp"
+
+            if self.script_is_scp:
+                self.f_script = open(self.script_path, "w")
+            else:
+                self.script_sep = "," if script_ext == ".csv" else "\t"
+                self.f_script = open(self.script_path, "w", "utf-8")
+                row = self.script_sep.join(
+                    ["id", "storage_path", "duration", "sample_freq"]
+                )
+                self.f_script.write(f"{row}\n")
 
     def __enter__(self):
         """Function required when entering contructions of type
@@ -96,7 +105,12 @@ def close(self):
         if self.f_script is not None:
             self.f_script.close()
 
-    def write(self, keys, data, fs):
+    def write(
+        self,
+        keys: Union[str, List[str], np.array],
+        data: Union[np.array, List[np.array]],
+        fs: Union[int, float, List[int], List[float], np.array],
+    ):
         """Writes waveform to audio file.
 
         Args:
@@ -120,14 +134,21 @@ def write(self, keys, data, fs):
                 file_basename,
                 self.audio_format,
             )
-            fs_i = fs[i] if fs_is_list else fs
+            fs_i = int(fs[i]) if fs_is_list else fs
             data_i = data[i].astype(dtype, copy=False)
             sf.write(output_file, data_i, fs_i, subtype=self.subtype)
 
             output_files.append(output_file)
 
             if self.f_script is not None:
-                self.f_script.write("%s%s%s\n" % (key_i, self.scp_sep, output_file))
+                if self.script_is_scp:
+                    self.f_script.write(f"{key_i} {output_file}\n")
+                else:
+                    duration_i = data_i.shape[-1] / fs_i
+                    row = self.script_sep.join(
+                        [key_i, output_file, str(duration_i), str(fs_i)]
+                    )
+                    self.f_script.write(f"{row}\n")
                 self.f_script.flush()
 
         return output_files
@@ -146,29 +167,30 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         # parser.add_argument(p1+'output-wav-scale', default=1, type=float,
         #                      help=('scale to divide the waveform before writing'))
 
         parser.add_argument(
-            p1 + "output-audio-format",
+            "--output-audio-format",
             default="flac",
             choices=["flac", "ogg", "wav"],
             help=("ouput audio format"),
         )
 
         parser.add_argument(
-            p1 + "output-audio-subtype",
+            "--output-audio-subtype",
             default=None,
             choices=["pcm_16", "pcm_24", "float", "double", "vorbis"],
             help=("coding format for audio file"),
         )
 
-        # parser.add_argument(p1+'output-fs', default=16000, type=int,
-        #                      help=('output sample frequency'))
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/io/bin_vad_reader.py b/hyperion/io/bin_vad_reader.py
index e4e64777..82e2a0c5 100644
--- a/hyperion/io/bin_vad_reader.py
+++ b/hyperion/io/bin_vad_reader.py
@@ -18,13 +18,12 @@ def __init__(
         self,
         rspecifier,
         path_prefix=None,
-        scp_sep=" ",
         frame_length=25,
         frame_shift=10,
         snip_edges=False,
     ):
 
-        r = DRF.create(rspecifier, path_prefix, scp_sep=scp_sep)
+        r = DRF.create(rspecifier, path_prefix)
         super().__init__(r.file_path, r.permissive)
         self.r = r
         self.frame_shift = frame_shift
diff --git a/hyperion/io/data_reader.py b/hyperion/io/data_reader.py
index bbefa62d..73c120b5 100644
--- a/hyperion/io/data_reader.py
+++ b/hyperion/io/data_reader.py
@@ -6,18 +6,24 @@
 import logging
 import multiprocessing
 from abc import ABCMeta, abstractmethod
+from typing import Union, Optional, List, Callable, Tuple
 
 import numpy as np
 
 from ..hyp_defs import float_cpu
 from ..np.transforms import TransformList
-from ..utils.scp_list import SCPList
+from ..utils import PathLike
 
 
 class DataReader(object):
     __metaclass__ = ABCMeta
 
-    def __init__(self, file_path, transform=None, permissive=False):
+    def __init__(
+        self,
+        file_path: PathLike,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+        permissive: bool = False,
+    ):
         """Abstract base class to read Ark or hdf5 feature files.
 
         Attributes:
@@ -57,7 +63,7 @@ def close(self):
         pass
 
     @staticmethod
-    def _squeeze(data, permissive=False):
+    def _squeeze(data: np.array, permissive: bool = False):
         """Converts list of matrices to 3D numpy array or
            list of vectors to 2D numpy array.
 
@@ -121,7 +127,7 @@ def _combine_ranges(read_range, row_offset, num_rows):
         return row_offset, num_rows
 
     @staticmethod
-    def _apply_range_to_shape(shape, row_offset, num_rows):
+    def _apply_range_to_shape(shape: Tuple[int, int], row_offset: int, num_rows: int):
         """Modifies shape given the user defined row_offset and num_rows to read.
            If we are reading a matrix of shape (100,4) and row_offset=10, num_rows=20,
            it returns (20,4).
@@ -158,25 +164,22 @@ class SequentialDataReader(DataReader):
         part_idx: It splits the input into num_parts and writes only
                   part part_idx, where part_idx=1,...,num_parts.
         num_parts: Number of parts to split the input data.
-        split_by_key: If True, all the elements with the same key go to the same part.
     """
 
     __metaclass__ = ABCMeta
 
     def __init__(
         self,
-        file_path,
-        transform=None,
-        permissive=False,
-        part_idx=1,
-        num_parts=1,
-        split_by_key=False,
+        file_path: PathLike,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+        permissive: bool = False,
+        part_idx: int = 1,
+        num_parts: int = 1,
     ):
         super().__init__(file_path, transform, permissive)
         self.lock = multiprocessing.Lock()
         self.part_idx = part_idx
         self.num_parts = num_parts
-        self.split_by_key = split_by_key
 
     def __iter__(self):
         """Needed to build an iterator, e.g.:
@@ -218,7 +221,7 @@ def eof(self):
         return False
 
     @abstractmethod
-    def read_num_rows(self, num_records=0, assert_same_dim=True):
+    def read_num_rows(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -234,7 +237,7 @@ def read_num_rows(self, num_records=0, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read_dims(self, num_records=0, assert_same_dim=True):
+    def read_dims(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -250,7 +253,7 @@ def read_dims(self, num_records=0, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read_shapes(self, num_records=0, assert_same_dim=True):
+    def read_shapes(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -266,7 +269,13 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read(self, num_records=0, squeeze=False, offset=0, num_rows=0):
+    def read(
+        self,
+        num_records: int = 0,
+        squeeze: bool = False,
+        offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads next num_records feature matrices/vectors.
 
         Args:
@@ -290,7 +299,12 @@ def read(self, num_records=0, squeeze=False, offset=0, num_rows=0):
 class RandomAccessDataReader(DataReader):
     __metaclass__ = ABCMeta
 
-    def __init__(self, file_path, transform=None, permissive=False):
+    def __init__(
+        self,
+        file_path: PathLike,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+        permissive: bool = False,
+    ):
         """Abstract base class to read Ark or hdf5 feature files in
            random order.
 
@@ -305,7 +319,7 @@ def __init__(self, file_path, transform=None, permissive=False):
         super().__init__(file_path, transform, permissive)
 
     @abstractmethod
-    def read_num_rows(self, keys=None, assert_same_dim=True):
+    def read_num_rows(self, keys: Union[str, List[str]], assert_same_dim: bool = True):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -320,7 +334,7 @@ def read_num_rows(self, keys=None, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read_dims(self, keys=None, assert_same_dim=True):
+    def read_dims(self, keys: Union[str, List[str]], assert_same_dim: bool = True):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -335,7 +349,7 @@ def read_dims(self, keys=None, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read_shapes(self, keys=None, assert_same_dim=True):
+    def read_shapes(self, keys: Union[str, List[str]], assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -350,7 +364,13 @@ def read_shapes(self, keys=None, assert_same_dim=True):
         pass
 
     @abstractmethod
-    def read(self, keys, squeeze=False, offset=0, num_rows=0):
+    def read(
+        self,
+        keys: Union[str, List[str]],
+        squeeze: bool = False,
+        offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads the feature matrices/vectors for the recordings in keys.
 
         Args:
diff --git a/hyperion/io/data_rw_factory.py b/hyperion/io/data_rw_factory.py
index 7868baae..b56e8c27 100644
--- a/hyperion/io/data_rw_factory.py
+++ b/hyperion/io/data_rw_factory.py
@@ -4,10 +4,13 @@
 """
 
 import logging
+from typing import Union, Optional, List, Callable, Tuple
 
 from jsonargparse import ActionParser, ArgumentParser
+import numpy as np
 
 from ..utils.kaldi_matrix import compression_methods
+from ..utils import PathLike
 from .ark_data_reader import RandomAccessArkDataReader as RADR
 from .ark_data_reader import SequentialArkFileDataReader as SAFDR
 from .ark_data_reader import SequentialArkScriptDataReader as SASDR
@@ -17,8 +20,7 @@
 from .h5_data_reader import SequentialH5FileDataReader as SH5FDR
 from .h5_data_reader import SequentialH5ScriptDataReader as SH5SDR
 from .h5_data_writer import H5DataWriter as H5DW
-from .rw_specifiers import (ArchiveType, RSpecifier, RSpecType, WSpecifier,
-                            WSpecType)
+from .rw_specifiers import ArchiveType, RSpecifier, RSpecType, WSpecifier, WSpecType
 
 
 class DataWriterFactory(object):
@@ -27,7 +29,9 @@ class DataWriterFactory(object):
     """
 
     @staticmethod
-    def create(wspecifier, compress=False, compression_method="auto", scp_sep=" "):
+    def create(
+        wspecifier: PathLike, compress: bool = False, compression_method: str = "auto"
+    ):
         if isinstance(wspecifier, str):
             wspecifier = WSpecifier.create(wspecifier)
 
@@ -43,7 +47,6 @@ def create(wspecifier, compress=False, compression_method="auto", scp_sep=" "):
                     flush=wspecifier.flush,
                     compress=compress,
                     compression_method=compression_method,
-                    scp_sep=scp_sep,
                 )
             else:
                 return ADW(
@@ -53,21 +56,19 @@ def create(wspecifier, compress=False, compression_method="auto", scp_sep=" "):
                     flush=wspecifier.flush,
                     compress=compress,
                     compression_method=compression_method,
-                    scp_sep=scp_sep,
                 )
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("scp_sep", "compress", "compression_method")
+        valid_args = ("compress", "compression_method")
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix: Optional[PathLike] = None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
         parser.add_argument("--compress", default=False, action="store_true")
         parser.add_argument(
             "--compression-method", default="auto", choices=compression_methods
@@ -80,7 +81,7 @@ def add_class_args(parser, prefix=None):
 
 class SequentialDataReaderFactory(object):
     @staticmethod
-    def create(rspecifier, path_prefix=None, scp_sep=" ", **kwargs):
+    def create(rspecifier: PathLike, path_prefix: Optional[PathLike] = None, **kwargs):
 
         if isinstance(rspecifier, str):
             rspecifier = RSpecifier.create(rspecifier)
@@ -92,27 +93,21 @@ def create(rspecifier, path_prefix=None, scp_sep=" ", **kwargs):
                 return SAFDR(rspecifier.archive, **kwargs)
         else:
             if rspecifier.archive_type == ArchiveType.H5:
-                return SH5SDR(rspecifier.script, path_prefix, scp_sep=scp_sep, **kwargs)
+                return SH5SDR(rspecifier.script, path_prefix, **kwargs)
             else:
-                return SASDR(rspecifier.script, path_prefix, scp_sep=scp_sep, **kwargs)
+                return SASDR(rspecifier.script, path_prefix, **kwargs)
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("scp_sep", "path_prefix", "part_idx", "num_parts")
+        valid_args = ("path_prefix", "part_idx", "num_parts")
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix: Optional[PathLike] = None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        try:
-            parser.add_argument(
-                "--scp-sep", default=" ", help=("scp file field separator")
-            )
-        except:
-            pass
         parser.add_argument(
             "--path-prefix", default=None, help=("scp file_path prefix")
         )
@@ -139,7 +134,11 @@ def add_class_args(parser, prefix=None):
 
 class RandomAccessDataReaderFactory(object):
     @staticmethod
-    def create(rspecifier, path_prefix=None, transform=None, scp_sep=" "):
+    def create(
+        rspecifier: PathLike,
+        path_prefix: Optional[PathLike] = None,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+    ):
         if isinstance(rspecifier, str):
             rspecifier = RSpecifier.create(rspecifier)
         logging.debug(rspecifier.__dict__)
@@ -162,7 +161,6 @@ def create(rspecifier, path_prefix=None, transform=None, scp_sep=" "):
                     path_prefix,
                     transform=transform,
                     permissive=rspecifier.permissive,
-                    scp_sep=scp_sep,
                 )
             else:
                 return RADR(
@@ -170,26 +168,19 @@ def create(rspecifier, path_prefix=None, transform=None, scp_sep=" "):
                     path_prefix,
                     transform=transform,
                     permissive=rspecifier.permissive,
-                    scp_sep=scp_sep,
                 )
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("scp_sep", "path_prefix")
+        valid_args = "path_prefix"
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
-    def add_class_args(parser, prefix=None):
+    def add_class_args(parser, prefix: Optional[PathLike] = None):
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        try:
-            parser.add_argument(
-                "--scp-sep", default=" ", help=("scp file field separator")
-            )
-        except:
-            pass
         parser.add_argument(
             "--path-prefix", default=None, help=("scp file_path prefix")
         )
diff --git a/hyperion/io/data_writer.py b/hyperion/io/data_writer.py
index cf2bb4f9..8adbf87a 100644
--- a/hyperion/io/data_writer.py
+++ b/hyperion/io/data_writer.py
@@ -5,9 +5,13 @@
 
 import os
 from abc import ABCMeta, abstractmethod
+from typing import Union, Optional, List
+from pathlib import Path
+import numpy as np
+from ..utils import PathLike
 
 
-class DataWriter(object):
+class DataWriter:
     """Abstract base class to write Ark or hdf5 feature files.
 
     Attributes:
@@ -19,35 +23,42 @@ class DataWriter(object):
                           {auto (default), speech_feat,
                            2byte-auto, 2byte-signed-integer,
                            1byte-auto, 1byte-unsigned-integer, 1byte-0-1}.
-      scp_sep: Separator for scp files (default ' ').
     """
 
     __metaclass__ = ABCMeta
 
     def __init__(
         self,
-        archive_path,
-        script_path=None,
-        flush=False,
-        compress=False,
-        compression_method="auto",
-        scp_sep=" ",
+        archive_path: PathLike,
+        script_path: Optional[PathLike] = None,
+        flush: bool = False,
+        compress: bool = False,
+        compression_method: str = "auto",
     ):
-        self.archive_path = archive_path
-        self.script_path = script_path
+        self.archive_path = Path(archive_path)
+        self.script_path = Path(script_path) if script_path is not None else None
         self._flush = flush
         self.compress = compress
         self.compression_method = compression_method
-        self.scp_sep = scp_sep
 
-        archive_dir = os.path.dirname(archive_path)
-        if not os.path.exists(archive_dir):
-            os.makedirs(archive_dir)
+        archive_dir = self.archive_path.parent
+        archive_dir.mkdir(exist_ok=True, parents=True)
 
+        self.script_is_scp = False
+        self.script_sep = None
+        self.f_script = None
         if script_path is not None:
-            script_dir = os.path.dirname(script_path)
-            if not os.path.exists(script_dir):
-                os.makedirs(script_dir)
+            self.script_path.parent.mkdir(exist_ok=True, parents=True)
+            script_ext = self.script_path.suffix
+            self.script_is_scp = script_ext == ".scp"
+
+            if self.script_is_scp:
+                self.f_script = open(self.script_path, "w")
+            else:
+                self.script_sep = "," if script_ext == ".csv" else "\t"
+                self.f_script = open(self.script_path, "w", "utf-8")
+                row = self.script_sep.join(["id", "storage_path"])
+                self.f_script.write(f"{row}\n")
 
     def __enter__(self):
         """Function required when entering contructions of type
@@ -77,7 +88,11 @@ def flush(self):
         pass
 
     @abstractmethod
-    def write(self, key, data):
+    def write(
+        self,
+        keys: Union[str, List[str], np.array],
+        data: Union[np.array, List[np.array]],
+    ):
         """Writes data to file.
 
         Args:
diff --git a/hyperion/io/h5_data_reader.py b/hyperion/io/h5_data_reader.py
index dfefbec3..d509504d 100644
--- a/hyperion/io/h5_data_reader.py
+++ b/hyperion/io/h5_data_reader.py
@@ -6,8 +6,8 @@
 """
 
 import multiprocessing
-import sys
 import time
+from typing import Union, Optional, List, Callable, Tuple
 
 import h5py
 import numpy as np
@@ -16,11 +16,18 @@
 from ..utils.kaldi_io_funcs import is_token
 from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from ..utils.list_utils import split_list, split_list_group_by_key
-from ..utils.scp_list import SCPList
+
+# from ..utils.scp_list import SCPList
+from ..utils import FeatureSet, PathLike
 from .data_reader import RandomAccessDataReader, SequentialDataReader
 
 
-def _read_h5_data(dset, row_offset=0, num_rows=0, transform=None):
+def _read_h5_data(
+    dset,
+    row_offset: int = 0,
+    num_rows: int = 0,
+    transform: Optional[Callable[[np.array], np.array]] = None,
+):
     """Auxiliary function to read the feature matrix from hdf5 dataset.
        It decompresses the data if it was compressed.
 
@@ -74,7 +81,7 @@ class SequentialH5DataReader(SequentialDataReader):
         split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, **kwargs):
+    def __init__(self, file_path: PathLike, **kwargs):
         super().__init__(file_path, **kwargs)
         self.f = None
         self.cur_file = None
@@ -86,7 +93,7 @@ def close(self):
             self.f.close()
             self.f = None
 
-    def _open_archive(self, file_path):
+    def _open_archive(self, file_path: PathLike):
         """Opens the hdf5 file where the next matrix/vector is
         if it is not open.
         If there was another hdf5 file open, it closes it.
@@ -96,7 +103,7 @@ def _open_archive(self, file_path):
             self.cur_file = file_path
             self.f = h5py.File(file_path, "r")
 
-    def read_num_rows(self, num_records=0, assert_same_dim=True):
+    def read_num_rows(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -113,7 +120,7 @@ def read_num_rows(self, num_records=0, assert_same_dim=True):
         num_rows = np.array([s[0] if len(s) == 2 else 1 for s in shapes], dtype=int)
         return keys, num_rows
 
-    def read_dims(self, num_records=0, assert_same_dim=True):
+    def read_dims(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -147,7 +154,7 @@ class SequentialH5FileDataReader(SequentialH5DataReader):
         split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, **kwargs):
+    def __init__(self, file_path: PathLike, **kwargs):
         super().__init__(file_path, permissive=False, **kwargs)
         self._open_archive(self.file_path)
         self._keys = list(self.f.keys())
@@ -172,7 +179,7 @@ def eof(self):
         """Returns True when it reaches the end of the ark file."""
         return self.cur_item == len(self._keys)
 
-    def read_shapes(self, num_records=0, assert_same_dim=True):
+    def read_shapes(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -204,7 +211,13 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
 
         return keys, shapes
 
-    def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        num_records: int = 0,
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads next num_records feature matrices/vectors.
 
         Args:
@@ -225,12 +238,8 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
         if num_records == 0:
             num_records = len(self._keys) - self.cur_item
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
         keys = []
         data = []
         with self.lock:
@@ -268,7 +277,6 @@ class SequentialH5ScriptDataReader(SequentialH5DataReader):
                      the scp file. This is useful when data
                      is read from a different directory of that
                      it was created.
-        scp_sep: Separator for scp files (default ' ').
         transform: TransformList object, applies a transformation to the
                    features after reading them from disk.
         part_idx: It splits the input into num_parts and writes only
@@ -277,20 +285,20 @@ class SequentialH5ScriptDataReader(SequentialH5DataReader):
         split_by_key: If True, all the elements with the same key go to the same part.
     """
 
-    def __init__(self, file_path, path_prefix=None, scp_sep=" ", **kwargs):
+    def __init__(
+        self, file_path: PathLike, path_prefix: Optional[PathLike] = None, **kwargs
+    ):
         super().__init__(file_path, permissive=False, **kwargs)
 
-        self.scp = SCPList.load(self.file_path, sep=scp_sep)
+        self.feature_set = FeatureSet.load(self.file_path)
         if self.num_parts > 1:
-            self.scp = self.scp.split(
-                self.part_idx, self.num_parts, group_by_key=self.split_by_key
-            )
+            self.feature_set = self.feature_set.split(self.part_idx, self.num_parts)
         if path_prefix is not None:
-            self.scp.add_prefix_to_filepath(path_prefix)
+            self.feature_set.add_prefix_to_storage_path(path_prefix)
 
     @property
     def keys(self):
-        return self.scp.key
+        return self.feature_set["id"]
 
     def reset(self):
         """Closes all the open hdf5 files and puts the read pointer pointing
@@ -300,9 +308,9 @@ def reset(self):
 
     def eof(self):
         """Returns True when all the elements in the scp have been read."""
-        return self.cur_item == len(self.scp)
+        return self.cur_item == len(self.feature_set)
 
-    def read_shapes(self, num_records=0, assert_same_dim=True):
+    def read_shapes(self, num_records: int = 0, assert_same_dim: bool = True):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -316,7 +324,7 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
           List of tuples with num_records shapes.
         """
         if num_records == 0:
-            num_records = len(self.scp) - self.cur_item
+            num_records = len(self.feature_set) - self.cur_item
 
         keys = []
         shapes = []
@@ -324,14 +332,15 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
             if self.eof():
                 break
 
-            key, file_path, offset, range_spec = self.scp[self.cur_item]
-
-            row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
-
-            self._open_archive(file_path)
+            feature_spec = self.feature_set.iloc[self.cur_item]
+            key = feature_spec["id"]
 
+            self._open_archive(feature_spec["storage_path"])
             shape_i = self.f[key].shape
-            shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
+                shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
 
             keys.append(key)
             shapes.append(shape_i)
@@ -343,7 +352,13 @@ def read_shapes(self, num_records=0, assert_same_dim=True):
 
         return keys, shapes
 
-    def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        num_records: int = 0,
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads next num_records feature matrices/vectors.
 
         Args:
@@ -362,14 +377,10 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
           data: List of feature matrices/vectors or 3D/2D numpy array.
         """
         if num_records == 0:
-            num_records = len(self.scp) - self.cur_item
+            num_records = len(self.feature_set) - self.cur_item
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
 
         keys = []
         data = []
@@ -378,7 +389,13 @@ def read(self, num_records=0, squeeze=False, row_offset=0, num_rows=0):
                 if self.eof():
                     break
 
-                key, file_path, offset, range_spec = self.scp[self.cur_item]
+                feature_spec = self.feature_set.iloc[self.cur_item]
+                key = feature_spec["id"]
+                file_path = feature_spec["storage_path"]
+                if "start" in feature_spec and "num_frames" in feature_spec:
+                    range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                else:
+                    range_spec = None
 
                 row_offset_i = row_offset[i] if row_offset_is_list else row_offset
                 num_rows_i = num_rows[i] if num_rows_is_list else num_rows
@@ -413,11 +430,18 @@ class RandomAccessH5DataReader(RandomAccessDataReader):
                     it returns an empty matrix, if False it raises an exception.
     """
 
-    def __init__(self, file_path, transform=None, permissive=False):
+    def __init__(
+        self,
+        file_path: PathLike,
+        transform: Optional[Callable[[np.array], np.array]] = None,
+        permissive: bool = False,
+    ):
         super().__init__(file_path, transform, permissive)
         self.f = None
 
-    def read_num_rows(self, keys, assert_same_dim=True):
+    def read_num_rows(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the number of rows in the feature matrices of the dataset.
 
         Args:
@@ -433,7 +457,9 @@ def read_num_rows(self, keys, assert_same_dim=True):
         num_rows = np.array([s[0] if len(s) == 2 else 1 for s in shapes], dtype=int)
         return num_rows
 
-    def read_dims(self, keys, assert_same_dim=True):
+    def read_dims(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the number of columns in the feature matrices of the dataset.
 
         Args:
@@ -463,7 +489,7 @@ class RandomAccessH5FileDataReader(RandomAccessH5DataReader):
                    it returns an empty matrix, if False it raises an exception.
     """
 
-    def __init__(self, file_path, **kwargs):
+    def __init__(self, file_path: PathLike, **kwargs):
         super().__init__(file_path, **kwargs)
         self.lock = multiprocessing.Lock()
         self._open_archive(file_path)
@@ -474,7 +500,7 @@ def close(self):
             self.f.close()
             self.f = None
 
-    def _open_archive(self, file_path):
+    def _open_archive(self, file_path: PathLike):
         """Open the hdf5 file it it is not open."""
         if self.f is None:
             self.close()
@@ -484,7 +510,9 @@ def _open_archive(self, file_path):
     def keys(self):
         return list(self.f.keys())
 
-    def read_shapes(self, keys, assert_same_dim=True):
+    def read_shapes(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -518,7 +546,13 @@ def read_shapes(self, keys, assert_same_dim=True):
 
         return shapes
 
-    def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        keys: Union[str, List[str], np.array],
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads the feature matrices/vectors for the recordings in keys.
 
         Args:
@@ -539,12 +573,8 @@ def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
         if isinstance(keys, str):
             keys = [keys]
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
         if row_offset_is_list:
             assert len(row_offset) == len(keys)
         if num_rows_is_list:
@@ -589,17 +619,20 @@ class RandomAccessH5ScriptDataReader(RandomAccessH5DataReader):
                    features after reading them from disk.
         permissive: If True, if the data that we want to read is not in the file
                     it returns an empty matrix, if False it raises an exception.
-        scp_sep: Separator for scp files (default ' ').
     """
 
-    def __init__(self, file_path, path_prefix=None, scp_sep=" ", **kwargs):
+    def __init__(
+        self, file_path: PathLike, path_prefix: Optional[PathLike] = None, **kwargs
+    ):
         super().__init__(file_path, **kwargs)
 
-        self.scp = SCPList.load(self.file_path, sep=scp_sep)
+        self.feature_set = FeatureSet.load(self.file_path)
         if path_prefix is not None:
-            self.scp.add_prefix_to_filepath(path_prefix)
+            self.feature_set.add_prefix_to_storage_path(path_prefix)
 
-        archives, archive_idx = np.unique(self.scp.file_path, return_inverse=True)
+        archives, archive_idx = np.unique(
+            self.feature_set["storage_path"], return_inverse=True
+        )
         self.archives = archives
         self.archive_idx = archive_idx
         self.f = [None] * len(self.archives)
@@ -614,9 +647,9 @@ def close(self):
 
     @property
     def keys(self):
-        return self.scp.key
+        return self.feature_set["id"]
 
-    def _open_archive(self, key_idx):
+    def _open_archive(self, key_idx: int):
         """Opens the hdf5 file correspoding to a given feature/matrix
            if it is not already open.
 
@@ -633,7 +666,9 @@ def _open_archive(self, key_idx):
 
         return self.f[archive_idx], self.locks[archive_idx]
 
-    def read_shapes(self, keys, assert_same_dim=True):
+    def read_shapes(
+        self, keys: Union[str, List[str], np.array], assert_same_dim: bool = True
+    ):
         """Reads the shapes in the feature matrices of the dataset.
 
         Args:
@@ -651,18 +686,15 @@ def read_shapes(self, keys, assert_same_dim=True):
         shapes = []
         for key in keys:
 
-            if not (key in self.scp):
+            if not (key in self.feature_set.index):
                 if self.permissive:
                     shapes.append((0,))
                     continue
                 else:
                     raise Exception("Key %s not found" % key)
 
-            index = self.scp.get_index(key)
-            _, file_path, offset, range_spec = self.scp[index]
-
-            row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
-
+            index = self.feature_set.get_loc(key)
+            feature_spec = self.feature_set.loc[key]
             f, lock = self._open_archive(index)
             if not (key in f):
                 if self.permissive:
@@ -673,8 +705,12 @@ def read_shapes(self, keys, assert_same_dim=True):
 
             with lock:
                 shape_i = f[key].shape
-            shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
-            # print('%s %d %.2f' % (key,time.time()-t1, len(shapes)/len(keys)*100.))
+
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+                row_offset_i, num_rows_i = self._combine_ranges(range_spec, 0, 0)
+                shape_i = self._apply_range_to_shape(shape_i, row_offset_i, num_rows_i)
+
             shapes.append(shape_i)
 
         if assert_same_dim:
@@ -683,7 +719,13 @@ def read_shapes(self, keys, assert_same_dim=True):
 
         return shapes
 
-    def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
+    def read(
+        self,
+        keys: Union[str, List[str], np.array],
+        squeeze: bool = False,
+        row_offset: int = 0,
+        num_rows: int = 0,
+    ):
         """Reads the feature matrices/vectors for the recordings in keys.
 
         Args:
@@ -704,12 +746,8 @@ def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
         if isinstance(keys, str):
             keys = [keys]
 
-        row_offset_is_list = isinstance(row_offset, list) or isinstance(
-            row_offset, np.ndarray
-        )
-        num_rows_is_list = isinstance(num_rows, list) or isinstance(
-            num_rows, np.ndarray
-        )
+        row_offset_is_list = isinstance(row_offset, (list, np.ndarray))
+        num_rows_is_list = isinstance(num_rows, (list, np.ndarray))
         if row_offset_is_list:
             assert len(row_offset) == len(keys)
         if num_rows_is_list:
@@ -718,15 +756,19 @@ def read(self, keys, squeeze=False, row_offset=0, num_rows=0):
         data = []
         for i, key in enumerate(keys):
 
-            if not (key in self.scp):
+            if not (key in self.feature_set.index):
                 if self.permissive:
                     data.append(np.array([], dtype=float_cpu()))
                     continue
                 else:
                     raise Exception("Key %s not found" % key)
 
-            index = self.scp.get_index(key)
-            _, file_path, offset, range_spec = self.scp[index]
+            index = self.feature_set.get_loc(key)
+            feature_spec = self.feature_set.loc[key]
+            if "start" in feature_spec and "num_frames" in feature_spec:
+                range_spec = [feature_spec["start"], feature_spec["num_frames"]]
+            else:
+                range_spec = None
 
             row_offset_i = row_offset[i] if row_offset_is_list else row_offset
             num_rows_i = num_rows[i] if num_rows_is_list else num_rows
diff --git a/hyperion/io/h5_data_writer.py b/hyperion/io/h5_data_writer.py
index fed91d1e..c34aa0ca 100644
--- a/hyperion/io/h5_data_writer.py
+++ b/hyperion/io/h5_data_writer.py
@@ -3,7 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-import sys
+from typing import Union, Optional, List
 
 import h5py
 import numpy as np
@@ -11,7 +11,7 @@
 from ..hyp_defs import float_save
 from ..utils.kaldi_io_funcs import is_token
 from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
-from ..utils.scp_list import SCPList
+from ..utils import PathLike
 from .data_writer import DataWriter
 
 
@@ -27,18 +27,18 @@ class H5DataWriter(DataWriter):
                           {auto (default), speech_feat,
                            2byte-auto, 2byte-signed-integer,
                            1byte-auto, 1byte-unsigned-integer, 1byte-0-1}.
-      scp_sep: Separator for scp files (default ' ').
     """
 
-    def __init__(self, archive_path, script_path=None, **kwargs):
+    def __init__(
+        self, archive_path: PathLike, script_path: Optional[PathLike] = None, **kwargs
+    ):
 
         super().__init__(archive_path, script_path, **kwargs)
 
         self.f = h5py.File(archive_path, "w")
-        if script_path is None:
-            self.f_script = None
-        else:
-            self.f_script = open(script_path, "w")
+        if script_path is not None and not self.script_is_scp:
+            row = self.script_sep.join(["id", "storage_path"])
+            self.f_script.write(f"{row}\n")
 
     def __exit__(self, exc_type, exc_value, traceback):
         """Function required when exiting from contructions of type
@@ -64,7 +64,7 @@ def flush(self):
         if self.f_script is not None:
             self.f_script.flush()
 
-    def _convert_data(self, data):
+    def _convert_data(self, data: np.array):
         """Converts data to the format for saving.
         Compresses the data it needed.
         Args:
@@ -85,7 +85,11 @@ def _convert_data(self, data):
         else:
             raise ValueError("Data is not ndarray")
 
-    def write(self, keys, data):
+    def write(
+        self,
+        keys: Union[str, List[str], np.array],
+        data: Union[np.array, List[np.array]],
+    ):
         """Writes data to file.
 
         Args:
@@ -108,9 +112,11 @@ def write(self, keys, data):
                     dset.attrs[k] = v
 
             if self.f_script is not None:
-                self.f_script.write(
-                    "%s%s%s\n" % (key_i, self.scp_sep, self.archive_path)
-                )
+                if self.script_is_scp:
+                    self.f_script.write(f"{key_i} {self.archive_path}\n")
+                else:
+                    row = self.script_sep.join([key_i, self.archive_path])
+                    self.f_script.write(f"{row}\n")
 
             if self._flush:
                 self.flush()
diff --git a/hyperion/io/old_audio_reader.py b/hyperion/io/old_audio_reader.py
new file mode 100644
index 00000000..341f04a4
--- /dev/null
+++ b/hyperion/io/old_audio_reader.py
@@ -0,0 +1,477 @@
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import io
+import logging
+import math
+import os
+import subprocess
+
+import numpy as np
+import soundfile as sf
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ..hyp_defs import float_cpu
+from ..utils import SCPList, SegmentList
+
+valid_ext = [
+    ".wav",
+    ".flac",
+    ".ogg",
+    ".au",
+    ".avr",
+    ".caf",
+    ".htk",
+    ".iff",
+    ".mat",
+    ".mpc",
+    ".oga",
+    ".pvf",
+    ".rf64",
+    ".sd2",
+    ".sds",
+    ".sf",
+    ".voc",
+    "w64",
+    ".wve",
+    ".xi",
+]
+
+
+class AudioReader(object):
+    """Class to read audio files from wav, flac or pipe
+
+    Attributes:
+         file_path:     scp file with formant file_key wavspecifier (audio_file/pipe) or SCPList object.
+         segments_path: segments file with format: segment_id file_id tbeg tend
+         wav_scale:     multiplies signal by scale factor
+    """
+
+    def __init__(self, file_path, segments_path=None, wav_scale=2 ** 15 - 1):
+        self.file_path = file_path
+        if isinstance(file_path, SCPList):
+            self.scp = file_path
+        else:
+            self.scp = SCPList.load(file_path, sep=" ", is_wav=True)
+
+        self.segments_path = segments_path
+        if segments_path is None:
+            self.segments = None
+            self.with_segments = False
+        else:
+            self.with_segments = True
+            if isinstance(file_path, SegmentList):
+                self.segments = segments_path
+            else:
+                self.segments = SegmentList.load(
+                    segments_path, sep=" ", index_by_file=False
+                )
+
+        self.wav_scale = wav_scale
+
+    @property
+    def keys(self):
+        if self.with_segments:
+            return np.asarray(self.segments["segment_id"])
+        return self.scp.key
+
+    def __enter__(self):
+        """Function required when entering contructions of type
+
+        with AudioReader('file.h5') as f:
+           keys, data = f.read()
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """Function required when exiting from contructions of type
+
+        with AudioReader('file.h5') as f:
+           keys, data = f.read()
+        """
+        pass
+
+    @staticmethod
+    def read_wavspecifier(wavspecifier, scale=2 ** 15, time_offset=0, time_dur=0):
+        """Reads an audiospecifier (audio_file/pipe)
+           It reads from pipe or from all the files that can be read
+           by `libsndfile <http://www.mega-nerd.com/libsndfile/#Features>`
+
+        Args:
+          wavspecifier: A pipe, wav, flac, ogg file etc.
+          scale:        Multiplies signal by scale factor
+          time_offset: float indicating the start time to read in the utterance.
+          time_durs: floats indicating the number of seconds to read from the utterance,
+                     if 0 it reads untils the end
+
+        """
+        wavspecifier = wavspecifier.strip()
+        if wavspecifier[-1] == "|":
+            wavspecifier = wavspecifier[:-1]
+            x, fs = AudioReader.read_pipe(wavspecifier, scale)
+            if time_offset == 0 and time_dur == 0:
+                return x, fs
+
+            start_sample = int(math.floor(time_offset * fs))
+            num_samples = int(math.floor(time_dur * fs))
+            if num_samples == 0:
+                return x[start_sample:], fs
+
+            end_sample = start_sample + num_samples
+            assert end_sample <= len(x)
+            return x[start_sample:end_sample], fs
+
+        ext = os.path.splitext(wavspecifier)[1]
+        if ext in valid_ext:
+            if time_offset == 0 and time_dur == 0:
+                x, fs = sf.read(wavspecifier, dtype=float_cpu())
+                x *= scale
+                return x, fs
+
+            with sf.SoundFile(wavspecifier, "r") as f:
+                fs = f.samplerate
+                start_sample = int(math.floor(time_offset * fs))
+                num_samples = int(math.floor(time_dur * fs))
+                f.seek(start_sample)
+                if num_samples > 0:
+                    x = scale * f.read(num_samples, dtype=float_cpu())
+                else:
+                    x = scale * f.read(dtype=float_cpu())
+                return x, fs
+
+        raise Exception("Unknown format for %s" % (wavspecifier))
+
+    @staticmethod
+    def read_pipe(wavspecifier, scale=2 ** 15):
+        """Reads wave file from a pipe
+        Args:
+          wavspecifier: Shell command with pipe output
+          scale:        Multiplies signal by scale factor
+        """
+        # proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        proc = subprocess.Popen(wavspecifier, shell=True, stdout=subprocess.PIPE)
+        pipe = proc.communicate()[0]
+        if proc.returncode != 0:
+            raise Exception(
+                "Wave read pipe command %s returned code %d"
+                % (wavspecifier, proc.returncode)
+            )
+        x, fs = sf.read(io.BytesIO(pipe), dtype=float_cpu())
+        x *= scale
+        return x, fs
+
+    def _read_segment(self, segment, time_offset=0, time_dur=0):
+        """Reads a wave segment
+
+        Args:
+          segment: pandas DataFrame (segment_id , file_id, tbeg, tend)
+        Returns:
+          Wave, sampling frequency
+        """
+        file_id = segment["file_id"]
+        t_beg = segment["tbeg"] + time_offset
+        t_end = segment["tend"]
+        if time_dur > 0:
+            t_end_new = t_beg + time_dur
+            assert t_end_new <= t_end
+            t_end = t_end_new
+
+        file_path, _, _ = self.scp[file_id]
+        x_i, fs_i = self.read_wavspecifier(file_path, self.wav_scale)
+        num_samples_i = len(x_i)
+        s_beg = int(t_beg * fs_i)
+        if s_beg >= num_samples_i:
+            raise Exception(
+                "segment %s tbeg=%.2f (num_sample=%d) longer that wav file %s (num_samples=%d)"
+                % (file_id, t_beg, s_beg, file_id, num_samples_i)
+            )
+
+        s_end = int(t_end * fs_i)
+        if s_end > num_samples_i or t_end < 0:
+            s_end = num_samples_i
+
+        x_i = x_i[s_beg:s_end]
+        return x_i, fs_i
+
+    def read(self):
+        pass
+
+
+class SequentialAudioReader(AudioReader):
+    def __init__(
+        self,
+        file_path,
+        segments_path=None,
+        wav_scale=2 ** 15 - 1,
+        part_idx=1,
+        num_parts=1,
+    ):
+        super().__init__(file_path, segments_path, wav_scale=wav_scale)
+        self.cur_item = 0
+        self.part_idx = part_idx
+        self.num_parts = num_parts
+        if self.num_parts > 1:
+            if self.with_segments:
+                self.segments = self.segments.split(self.part_idx, self.num_parts)
+            else:
+                self.scp = self.scp.split(
+                    self.part_idx, self.num_parts, group_by_key=False
+                )
+
+    def __iter__(self):
+        """Needed to build an iterator, e.g.:
+        r = SequentialAudioReader(...)
+        for key, s, fs in r:
+           print(key)
+           process(s)
+        """
+        return self
+
+    def __next__(self):
+        """Needed to build an iterator, e.g.:
+        r = SequentialAudioReader(...)
+        for key , s, fs in r:
+           process(s)
+        """
+        key, x, fs = self.read(1)
+        if len(key) == 0:
+            raise StopIteration
+        return key[0], x[0], fs[0]
+
+    def next(self):
+        """__next__ for Python 2"""
+        return self.__next__()
+
+    def reset(self):
+        """Returns the file pointer to the begining of the dataset,
+        then we can start reading the features again.
+        """
+        self.cur_item = 0
+
+    def eof(self):
+        """End of file.
+
+        Returns:
+          True, when we have read all the recordings in the dataset.
+        """
+        if self.with_segments:
+            return self.cur_item == len(self.segments)
+        return self.cur_item == len(self.scp)
+
+    def read(self, num_records=0, time_offset=0, time_durs=0):
+        """Reads next num_records audio files
+
+        Args:
+          num_records: Number of audio files to read.
+          time_offset: List of floats indicating the start time to read in the utterance.
+          time_durs: List of floats indicating the number of seconds to read from each utterance
+
+        Returns:
+          key: List of recording names.
+          data: List of waveforms
+          fs: list of sample freqs
+        """
+        if num_records == 0:
+            if self.with_segments:
+                num_records = len(self.segments) - self.cur_item
+            else:
+                num_records = len(self.scp) - self.cur_item
+
+        offset_is_list = isinstance(time_offset, (list, np.ndarray))
+        dur_is_list = isinstance(time_durs, (list, np.ndarray))
+
+        keys = []
+        data = []
+        fs = []
+        for i in range(num_records):
+            if self.eof():
+                break
+
+            offset_i = time_offset[i] if offset_is_list else time_offset
+            dur_i = time_durs[i] if dur_is_list else time_durs
+
+            if self.with_segments:
+                segment = self.segments[self.cur_item]
+                key = segment["segment_id"]
+                x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
+            else:
+                key, file_path, _, _ = self.scp[self.cur_item]
+                x_i, fs_i = self.read_wavspecifier(
+                    file_path, self.wav_scale, offset_i, dur_i
+                )
+
+            keys.append(key)
+            data.append(x_i)
+            fs.append(fs_i)
+            self.cur_item += 1
+
+        return keys, data, fs
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = ("part_idx", "num_parts", "wav_scale")
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--wav-scale",
+            default=2 ** 15 - 1,
+            type=float,
+            help=("multiplicative factor for waveform"),
+        )
+        try:
+            parser.add_argument(
+                "--part-idx",
+                type=int,
+                default=1,
+                help=(
+                    "splits the list of files into num-parts and " "processes part-idx"
+                ),
+            )
+            parser.add_argument(
+                "--num-parts",
+                type=int,
+                default=1,
+                help=(
+                    "splits the list of files into num-parts and " "processes part-idx"
+                ),
+            )
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
+
+    add_argparse_args = add_class_args
+
+
+class RandomAccessAudioReader(AudioReader):
+    def __init__(self, file_path, segments_path=None, wav_scale=2 ** 15 - 1):
+        super().__init__(file_path, segments_path, wav_scale)
+
+    def _read(self, keys, time_offset=0, time_durs=0):
+        """Reads the waveforms  for the recordings in keys.
+
+        Args:
+          keys: List of recording/segment_ids names.
+
+        Returns:
+          data: List of waveforms
+        """
+        if isinstance(keys, str):
+            keys = [keys]
+
+        offset_is_list = isinstance(time_offset, (list, np.ndarray))
+        dur_is_list = isinstance(time_durs, (list, np.ndarray))
+
+        data = []
+        fs = []
+        for i, key in enumerate(keys):
+
+            offset_i = time_offset[i] if offset_is_list else time_offset
+            dur_i = time_durs[i] if dur_is_list else time_durs
+
+            if self.with_segments:
+                if not (key in self.segments):
+                    raise Exception("Key %s not found" % key)
+
+                segment = self.segments[key]
+                x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
+            else:
+                if not (key in self.scp):
+                    raise Exception("Key %s not found" % key)
+
+                file_path, _, _ = self.scp[key]
+                x_i, fs_i = self.read_wavspecifier(
+                    file_path, self.wav_scale, offset_i, dur_i
+                )
+
+            data.append(x_i)
+            fs.append(fs_i)
+
+        return data, fs
+
+    def read(self, keys, time_offset=0, time_durs=0):
+        """Reads the waveforms  for the recordings in keys.
+
+        Args:
+          keys: List of recording/segment_ids names.
+
+        Returns:
+          data: List of waveforms
+          fs: List of sampling freq.
+        """
+        try:
+            x, fs = self._read(keys, time_offset=time_offset, time_durs=time_durs)
+        except:
+            if isinstance(keys, str):
+                keys = [keys]
+
+            if not isinstance(time_offset, (list, np.ndarray)):
+                time_offset = [time_offset] * len(keys)
+            if not isinstance(time_durs, (list, np.ndarray)):
+                time_durs = [time_durs] * len(keys)
+
+            try:
+                # some files produce error in the fseek after reading the data,
+                # this seems an issue from pysoundfile or soundfile lib itself
+                # we try to read from
+                # time-offset to the end of the file, and remove the extra frames later,
+                # this solves the problem in most cases
+                logging.info(
+                    (
+                        "error-1 reading at keys={} offset={} "
+                        "retrying reading until end-of-file ..."
+                    ).format(keys, time_offset)
+                )
+                x, fs = self._read(keys, time_offset=time_offset)
+                for i in range(len(x)):
+                    end_sample = int(time_durs[i] * fs[i])
+                    x[i] = x[i][:end_sample]
+            except:
+                # try to read the full file
+                logging.info(
+                    (
+                        "error-2 reading at key={}, " "retrying reading full file ..."
+                    ).format(keys)
+                )
+                x, fs = self._read(keys)
+                for i in range(len(x)):
+                    start_sample = int(time_offset[i] * fs[i])
+                    end_sample = start_sample + int(time_durs[i] * fs[i])
+                    x[i] = x[i][start_sample:end_sample]
+
+        return x, fs
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = ("wav_scale",)
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--wav-scale",
+            default=2 ** 15 - 1,
+            type=float,
+            help=("multiplicative factor for waveform"),
+        )
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix, action=ActionParser(parser=parser),
+            )
+
+    add_argparse_args = add_class_args
diff --git a/hyperion/io/vad_rw_factory.py b/hyperion/io/vad_rw_factory.py
index 32032d1d..fff1ab4a 100644
--- a/hyperion/io/vad_rw_factory.py
+++ b/hyperion/io/vad_rw_factory.py
@@ -6,8 +6,7 @@
 import logging
 
 from .bin_vad_reader import BinVADReader as BVR
-from .rw_specifiers import (ArchiveType, RSpecifier, RSpecType, WSpecifier,
-                            WSpecType)
+from .rw_specifiers import ArchiveType, RSpecifier, RSpecType, WSpecifier, WSpecType
 from .segment_vad_reader import SegmentVADReader as SVR
 
 
@@ -16,7 +15,6 @@ class VADReaderFactory(object):
     def create(
         rspecifier,
         path_prefix=None,
-        scp_sep=" ",
         frame_length=25,
         frame_shift=10,
         snip_edges=False,
@@ -33,7 +31,6 @@ def create(
                 return BVR(
                     rspecifier,
                     path_prefix,
-                    scp_sep,
                     frame_length=frame_length,
                     frame_shift=frame_shift,
                     snip_edges=snip_edges,
@@ -48,7 +45,6 @@ def create(
                 return BVR(
                     rspecifier,
                     path_prefix,
-                    scp_sep,
                     frame_length=frame_length,
                     frame_shift=frame_shift,
                     snip_edges=snip_edges,
@@ -57,7 +53,6 @@ def create(
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
-            "scp_sep",
             "path_prefix",
             "frame_shift",
             "frame_length",
@@ -72,9 +67,6 @@ def add_class_args(parser, prefix=None):
         else:
             p1 = "--" + prefix + "."
 
-        parser.add_argument(
-            p1 + "scp-sep", default=" ", help=("scp file field separator")
-        )
         parser.add_argument(
             p1 + "path-prefix", default=None, help=("scp file_path prefix")
         )
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 1e42a1c3..fa675fdb 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -9,7 +9,8 @@
 
 import numpy as np
 import pandas as pd
-#import k2
+
+# import k2
 import sentencepiece as spm
 import torchaudio.transforms as tat
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
@@ -25,16 +26,11 @@
 from ...utils.text import read_text
 from ..torch_defs import floatstr_torch
 
-#from torch.nn.utils.rnn import pad_sequence
-
-
-
 
 class AudioDataset(Dataset):
-
     def __init__(
         self,
-        audio_file,
+        recordings_file,
         segments_file,
         class_names=None,
         class_files=None,
@@ -46,7 +42,7 @@ def __init__(
         return_segment_info=None,
         return_orig=False,
         target_sample_freq=None,
-        wav_scale=2**15 - 1,
+        wav_scale=2 ** 15 - 1,
         is_val=False,
     ):
 
@@ -61,12 +57,6 @@ def __init__(
         self.rank = rank
         self.world_size = world_size
         self.epoch = 0
-
-        if rank == 0:
-            logging.info("opening audio reader %s", audio_file)
-
-        self.r = AR(audio_file, wav_scale=wav_scale)
-
         if rank == 0:
             logging.info("loading segments file %s", segments_file)
 
@@ -74,17 +64,17 @@ def __init__(
         if rank == 0:
             logging.info("dataset contains %d seqs", len(self.seg_set))
 
+        if rank == 0:
+            logging.info("opening audio reader %s", recordings_file)
+
+        audio_seg_set = self.seg_set if self.seg_set.has_time_marks else None
+        self.r = AR(recordings_file, segments=audio_seg_set, wav_scale=wav_scale)
+
         self.is_val = is_val
         if time_durs_file is not None:
-            if rank == 0:
-                logging.info("loading durations file %s", time_durs_file)
+            self._load_legacy_durations(time_durs_file)
 
-            time_durs = SegmentSet.load(time_durs_file)
-            self.seg_set["duration"] = time_durs.loc[
-                self.seg_set["id"]].class_id.values.astype(np.float,
-                                                           copy=False)
-        else:
-            assert "duration" in self.seg_set
+        assert "duration" in self.seg_set
 
         logging.info("loading class-info files")
         self._load_class_infos(class_names, class_files, is_val)
@@ -96,8 +86,9 @@ def __init__(
         if text_file is not None:
             logging.info("loading text files")
             self._load_text_infos(text_file, is_val)
-        self.return_segment_info = ([] if return_segment_info is None else
-                                    return_segment_info)
+        self.return_segment_info = (
+            [] if return_segment_info is None else return_segment_info
+        )
         self.return_orig = return_orig
 
         self.num_augs = num_augs
@@ -106,9 +97,18 @@ def __init__(
         self.target_sample_freq = target_sample_freq
         self.resamplers = {}
 
+    def _load_legacy_durations(self, time_durs_file):
+        if self.rank == 0:
+            logging.info("loading durations file %s", time_durs_file)
+
+        time_durs = SegmentSet.load(time_durs_file)
+        self.seg_set["duration"] = time_durs.loc[
+            self.seg_set["id"]
+        ].class_id.values.astype(np.float, copy=False)
+
     def _load_bpe_model(self, bpe_model, is_val):
         if self.rank == 0:
-            logging.info("loading bpe file %s" % bpe_model)
+            logging.info("loading bpe file %s", bpe_model)
         self.sp = spm.SentencePieceProcessor()
         self.sp.load(bpe_model)
         blank_id = self.sp.piece_to_id("<blk>")
@@ -118,7 +118,7 @@ def _load_text_infos(self, text_file, is_val):
         if text_file is None:
             return
         if self.rank == 0:
-            logging.info("loading text file %s" % text_file)
+            logging.info("loading text file %s", text_file)
 
         text = read_text(text_file)
         self.seg_set["text"] = text.loc[self.seg_set["id"]].text
@@ -131,8 +131,9 @@ def _load_class_infos(self, class_names, class_files, is_val):
 
         assert len(class_names) == len(class_files)
         for name, file in zip(class_names, class_files):
-            assert (name in self.seg_set
-                    ), f"class_name {name} not present in the segment set"
+            assert (
+                name in self.seg_set
+            ), f"class_name {name} not present in the segment set"
             if self.rank == 0:
                 logging.info("loading class-info file %s" % file)
             table = ClassInfo.load(file)
@@ -143,8 +144,9 @@ def _load_class_infos(self, class_names, class_files, is_val):
                 segment_class_ids = self.seg_set[name].unique()
                 for c_id in class_ids:
                     if c_id not in segment_class_ids:
-                        logging.warning("%s class: %s not present in dataset",
-                                        name, c_id)
+                        logging.warning(
+                            "%s class: %s not present in dataset", name, c_id
+                        )
 
     def _create_augmenters(self, aug_cfgs):
         self.augmenters = []
@@ -154,12 +156,11 @@ def _create_augmenters(self, aug_cfgs):
 
         for aug_cfg in aug_cfgs:
             logging.info(f"loading augmentation={aug_cfg}")
-            augmenter = SpeechAugment.create(aug_cfg,
-                                             random_seed=112358 +
-                                             1000 * self.rank)
+            augmenter = SpeechAugment.create(
+                aug_cfg, random_seed=112358 + 1000 * self.rank
+            )
             self.augmenters.append(augmenter)
-            self.reverb_context = max(augmenter.max_reverb_context,
-                                      self.reverb_context)
+            self.reverb_context = max(augmenter.max_reverb_context, self.reverb_context)
 
     def set_epoch(self, epoch):
         self.epoch = epoch
@@ -201,12 +202,13 @@ def _parse_segment_item(self, segment):
             assert duration <= self.seg_set.loc[seg_id].duration, (
                 f"{seg_id} with start={start} duration "
                 f"({self.seg_set.loc[seg_id].duration}) < "
-                f"chunk duration ({duration})")
+                f"chunk duration ({duration})"
+            )
         else:
             seg_id, start, duration = segment, 0, 0
 
-        if "start" in self.seg_set:
-            start += self.seg_set.loc[seg_id].start
+        # if "start" in self.seg_set:
+        #     start += self.seg_set.loc[seg_id].start
 
         return seg_id, start, duration
 
@@ -217,14 +219,23 @@ def _read_audio(self, seg_id, start, duration):
         start -= reverb_context
         read_duration = duration + reverb_context
 
+        # read audio
+        x, fs = self.r.read([seg_id], time_offset=start, time_durs=read_duration)
+        return x[0].astype(floatstr_torch(), copy=False), fs[0]
+
+    def _read_audio0(self, seg_id, start, duration):
+        # how much extra audio we need to load to
+        # calculate the reverb of the first part of the audio
+        reverb_context = min(self.reverb_context, start)
+        start -= reverb_context
+        read_duration = duration + reverb_context
+
         # read audio
         recording_id = self.seg_set.recording_ids(seg_id)
-        x, fs = self.r.read([recording_id],
-                            time_offset=start,
-                            time_durs=read_duration)
+        x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
         return x[0].astype(floatstr_torch(), copy=False), fs[0]
 
-    def _apply_augs(self, x, num_samples, reverb_context_samples):
+    def _apply_augs(self, x, reverb_context_samples):
         x_augs = {}
         # for each type of augmentation
         for i, augmenter in enumerate(self.augmenters):
@@ -233,7 +244,7 @@ def _apply_augs(self, x, num_samples, reverb_context_samples):
                 # augment x
                 x_aug, aug_info = augmenter(x)
                 # remove the extra left context used to compute the reverberation.
-                x_aug = x_aug[reverb_context_samples:len(x)]
+                x_aug = x_aug[reverb_context_samples : len(x)]
                 x_aug = x_aug.astype(floatstr_torch(), copy=False)
                 x_augs[f"x_aug_{i}_{j}"] = x_aug
 
@@ -300,7 +311,7 @@ def __getitem__(self, segment):
             else:
                 num_samples = int(duration * fs)
             reverb_context_samples = len(x) - num_samples
-            x_augs = self._apply_augs(x, num_samples, reverb_context_samples)
+            x_augs = self._apply_augs(x, reverb_context_samples)
             data.update(x_augs)
 
             # add original non augmented audio
@@ -311,15 +322,6 @@ def __getitem__(self, segment):
         else:
             data["x"] = x
 
-        # try:
-        #     import soundfile as sf
-
-        #     for i, z in enumerate(r):
-        #         sf.write(f"file_{seg_id}.wav", z, fs, "PCM_16")
-        # except:
-        #     print("soundfile failed", flush=True)
-
-        # adds the segment labels
         seg_info = self._get_segment_info(seg_id)
         data.update(seg_info)
         return data
@@ -329,7 +331,7 @@ def filter_args(**kwargs):
 
         ar_args = AR.filter_args(**kwargs)
         valid_args = (
-            "audio_file",
+            "recordings_file",
             "segments_file",
             "aug_cfgs",
             "num_augs",
@@ -352,48 +354,43 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        if "audio_file" not in skip:
+        if "recordings_file" not in skip:
             parser.add_argument(
-                "--audio-file",
+                "--recordings-file",
                 required=True,
-                help=("audio manifest file"),
+                help=("recordings manifest file (kaldi .scp or pandas .csv)"),
             )
 
         if "segments_file" not in skip:
             parser.add_argument(
                 "--segments-file",
                 required=True,
-                help=("segments manifest file"),
+                help=("segments manifest file (kaldi .scp or pandas .csv)"),
             )
 
         parser.add_argument(
             "--class-names",
             default=None,
             nargs="+",
-            help=
-            ("list with the names of the types of classes in the datasets, e.g., speaker, language"
-             ),
+            help=(
+                "list with the names of the types of classes in the datasets, e.g., speaker, language"
+            ),
         )
 
         parser.add_argument(
-            "--class-files",
-            default=None,
-            nargs="+",
-            help=("list of class info files"),
+            "--class-files", default=None, nargs="+", help=("list of class info files"),
         )
 
         parser.add_argument(
             "--time-durs-file",
             default=None,
-            help=
-            ("segment to duration in secs file, if durations are not in segments_file"
-             ),
+            help=(
+                "(deprecated) segment to duration in secs file, if durations are not in segments_file"
+            ),
         )
 
         parser.add_argument(
-            "--bpe-model",
-            default=None,
-            help=("bpe model for the text label"),
+            "--bpe-model", default=None, help=("bpe model for the text label"),
         )
 
         parser.add_argument(
@@ -418,32 +415,31 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--return-segment-info",
             default=None,
             nargs="+",
-            help=
-            ("list of columns of the segment file which should be returned as supervisions"
-             ),
+            help=(
+                "list of columns of the segment file which should be returned as supervisions"
+            ),
         )
         parser.add_argument(
             "--return-orig",
             default=False,
             action=ActionYesNo,
-            help=
-            ("when using augmentation, whether or not to return also the original audio"
-             ),
+            help=(
+                "when using augmentation, whether or not to return also the original audio"
+            ),
         )
 
         parser.add_argument(
             "--target-sample-freq",
             default=None,
             type=int,
-            help=
-            ("target sampling frequencey, if not None all audios are converted to this sample freq"
-             ),
+            help=(
+                "target sampling frequencey, if not None all audios are converted to this sample freq"
+            ),
         )
 
         AR.add_class_args(parser)
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
             # help='audio dataset options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/utils/feature_set.py b/hyperion/utils/feature_set.py
index 2b2f0aaf..7e40dfd6 100644
--- a/hyperion/utils/feature_set.py
+++ b/hyperion/utils/feature_set.py
@@ -9,6 +9,7 @@
 import pandas as pd
 
 from .info_table import InfoTable
+from .misc import PathLike
 
 
 class FeatureSet(InfoTable):
@@ -16,6 +17,9 @@ def __init__(self, df):
         super().__init__(df)
         assert "storage_path" in df
 
+    def add_prefix_to_storage_path(self, prefix: PathLike):
+        self.df["storge_path"] = self.df["storage_path"].apply(lambda x: f"{prefix}{x}")
+
     def save(self, file_path, sep=None):
         """Saves info table to file
 
@@ -31,14 +35,14 @@ def save(self, file_path, sep=None):
             from .scp_list import SCPList
 
             offset = self.df["storage_byte"] if "storage_byte" in self.df else None
-            range = None
+            range_spec = None
             if "start" and "num_frames" in self.df:
-                range = [
+                range_spec = [
                     np.array([s, n], dtype=np.int64)
                     for s, n in self.df[["start", "num_frames"]]
                 ]
             scp = SCPList(
-                self.df["id"].values, self.df["storage_path"].values, offset, range
+                self.df["id"].values, self.df["storage_path"].values, offset, range_spec
             )
             scp.save(file_path)
             return
@@ -67,9 +71,9 @@ def load(cls, file_path, sep=None):
             if scp.offset is not None:
                 df["storage_byte"] = scp.offset
 
-            if scp.range is not None:
-                df["start"] = [r[0] for r in scp.range]
-                df["num_frames"] = [r[0] for r in scp.range]
+            if scp.range_spec is not None:
+                df["start"] = [r[0] for r in scp.range_spec]
+                df["num_frames"] = [r[1] for r in scp.range_spec]
 
             return cls(df)
 
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index a3a1da27..5a4f27d2 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -22,6 +22,7 @@ class InfoTable:
     Attributes:
       df: pandas dataframe.
     """
+
     def __init__(self, df):
         self.df = df
         assert "id" in df, f"info_table={df}"
@@ -137,10 +138,7 @@ def load(cls, file_path, sep=None, name="class_id"):
                 sep=" ",
                 header=None,
                 names=["id", name],
-                dtype={
-                    "id": np.str,
-                    name: np.str
-                },
+                dtype={"id": np.str, name: np.str},
             )
         else:
             if sep is None:
@@ -163,17 +161,16 @@ def split(self, idx, num_parts, group_by=None):
         Args:
           idx: Part to return from 1 to num_parts.
           num_parts: Number of parts to split the list.
-          group_by_field: All the lines with the same value in column
+          group_by: All the lines with the same value in column
                           groub_by_field go to the same part
 
         Returns:
-          Sub Utt2Info object
+          Sub InfoTable object
         """
-        if group_by is None:
+        if group_by is None or group_by == "id":
             _, idx1 = split_list(self.df["id"], idx, num_parts)
         else:
-            _, idx1 = split_list_group_by_key(self.df[group_by], idx,
-                                              num_parts)
+            _, idx1 = split_list_group_by_key(self.df[group_by], idx, num_parts)
 
         df = self.df.iloc[idx1]
         return self.__class__(df)
@@ -192,14 +189,10 @@ def merge(cls, tables):
         df = pd.concat(df_list)
         return cls(df)
 
-    def filter(self,
-               items=None,
-               iindex=None,
-               columns=None,
-               by="id",
-               keep=True):
-        assert (items is None or iindex is None
-                ), "items and iindex cannot be not None at the same time"
+    def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
+        assert (
+            items is None or iindex is None
+        ), "items and iindex cannot be not None at the same time"
         df = self.df
 
         if not keep:
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
index f9da69fa..d51edc34 100644
--- a/hyperion/utils/segment_set.py
+++ b/hyperion/utils/segment_set.py
@@ -9,9 +9,36 @@
 class SegmentSet(InfoTable):
     def __init__(self, df):
         super().__init__(df)
+        if "start" in df and "recording_id" not in df:
+            df["recording_id"] = df["id"]
+
+        if "start" not in df and "recording_id" in df:
+            df["start"] = 0.0
+
+    @property
+    def has_time_marks(self):
+        return (
+            "recording_id" in self.df and "start" in self.df and "duration" in self.df
+        )
+
+    @property
+    def has_recording_ids(self):
+        return "recording_id" in self.df
 
     def recording_ids(self, ids):
         if "recording_id" in self.df:
             return self.df.loc[ids, "recording_id"]
 
         return ids
+
+    def recording_time_marks(self, ids):
+        if "recording" in self.df:
+            rec_col = "recording_id"
+        else:
+            rec_col = "id"
+
+        assert "duration" in self.df
+        if "start" not in self.df:
+            self.df["start"] = 0.0
+
+        return self.df.loc[ids, [rec_col, "start", "duration"]]
diff --git a/hyperion/utils/utt2info.py b/hyperion/utils/utt2info.py
index 9785d021..edf2c23a 100644
--- a/hyperion/utils/utt2info.py
+++ b/hyperion/utils/utt2info.py
@@ -142,7 +142,7 @@ def save(self, file_path, sep=" "):
         self.utt_info.to_csv(file_path, sep=sep, header=False, index=False)
 
     @classmethod
-    def load(cls, file_path, sep=" ", dtype={0: np.str, 1: np.str}):
+    def load(cls, file_path, sep=" ", dtype={0: np.str_, 1: np.str_}):
         """Loads utt2info list from text file.
 
         Args:

From c408f7428b7443761a0142a7b010dacf16aeaf2b Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 11 May 2023 14:15:47 -0400
Subject: [PATCH 100/154] some fixes in sre21

---
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml | 102 +++++++++++++
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |  66 +++++++++
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  47 +-----
 .../v1.16k/local/make_voxceleb1cat_v2.pl      |   4 +-
 egs/sre21-av-a/v1.16k/run_002_compute_evad.sh |  39 -----
 .../v1.16k/run_011_train_xvector.sh           |  53 ++++++-
 egs/voxceleb/v1.1/README.md                   |  52 ++++---
 ...train_res2net50w26s4_xvec_stage1_v3.0.yaml |  72 +++++++++
 ...train_res2net50w26s4_xvec_stage2_v3.0.yaml |  69 +++++++++
 ...train_res2net50w26s8_xvec_stage1_v3.0.yaml |  72 +++++++++
 ...train_res2net50w26s8_xvec_stage2_v3.0.yaml |  69 +++++++++
 .../train_tseresnet34_xvec_stage1_v3.0.yaml   |   4 +-
 .../config_fbank80_stmn_tseresnet34.v3.0.sh   |   2 +-
 hyperion/np/classifiers/svmc.py               | 138 +++++++++---------
 hyperion/np/np_model.py                       |   5 +
 hyperion/torch/layer_blocks/res2net_blocks.py |   3 -
 hyperion/torch/trainers/xvector_trainer.py    |  29 ----
 17 files changed, 608 insertions(+), 218 deletions(-)
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml

diff --git a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..01cfa082
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,102 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model: 
+  resnet_enc:
+    in_feats: 80
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 8192
+    dropout_rate: 0.0
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 35000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..24b1c081
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
index c8732c36..1b7c3764 100644
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
+++ b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -9,72 +9,34 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=1
-min_chunk=4
-max_chunk=4
 lr=0.02
 
 nnet_type=resnet1d
-block_type=seres2bn # squeeze-excitation res2net bottleneck
-channels=2048
-ep_channels=8192
-width_factor=1
-scale=8
-se_r=16
 dropout=0
 
-attstats_inner=128
 embed_dim=256
 s=30
 margin_warmup=20
 margin=0.3
 
-nnet_opt="--resnet_enc.in-feats 80 \
-		     --resnet_enc.in-conv-channels $channels \
-		     --resnet_enc.in-kernel-size 5 \
-		     --resnet_enc.in-stride 1 \
-		     --resnet_enc.resb-type $block_type \
-		     --resnet_enc.resb-repeats 1 1 1 1 \
-		     --resnet_enc.resb-channels $channels \
-		     --resnet_enc.resb-kernel-sizes 3 \
-		     --resnet_enc.resb-dilations 2 3 4 5 \
-		     --resnet_enc.resb-strides 1 \
-		     --resnet_enc.res2net-width-factor $width_factor \
-		     --resnet_enc.res2net-scale $scale \
-		     --resnet_enc.se-r $se_r \
-		     --resnet_enc.multilayer \
-                     --resnet_enc.multilayer-concat \
-                     --resnet_enc.endpoint-channels $ep_channels \
-		     --pool_net.pool-type ch-wise-att-mean+stddev \
-		     --pool_net.inner-feats $attstats_inner \
-		     --embed-dim $embed_dim"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 35000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
+nnet_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 nnet_name=${feat_type}_ecapatdnn2048x4_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 nnet_num_epochs=75
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 nnet=$nnet_dir/swa_model_ep0076.pth
-
+nnet=$nnet_dir/model_ep0004.pth
 # xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
 ft_eff_batch_size=128 # effective batch size
 ft_min_chunk=10
 ft_max_chunk=15
-ft_ipe=1
 ft_lr=0.01
 ft_nnet_num_epochs=15
 ft_margin=0.5
-ft_margin_warmup=3
 
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
+ft_nnet_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
 ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
 ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
 ft_nnet=$ft_nnet_dir/model_ep0007.pth
@@ -88,7 +50,4 @@ else
     plda_data=voxceleb2cat_train_augx${plda_num_augs}
 fi
 plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
 
diff --git a/egs/sre21-av-a/v1.16k/local/make_voxceleb1cat_v2.pl b/egs/sre21-av-a/v1.16k/local/make_voxceleb1cat_v2.pl
index 27b1f152..18b6d40c 100755
--- a/egs/sre21-av-a/v1.16k/local/make_voxceleb1cat_v2.pl
+++ b/egs/sre21-av-a/v1.16k/local/make_voxceleb1cat_v2.pl
@@ -31,7 +31,7 @@
 my $meta_path = "$data_base/vox1_meta.csv";
 if (! -e "$meta_path") {
     $meta_path = "$out_dir/vox1_meta.csv";
-    system("wget -O $meta_path $meta_url");
+    system("wget --no-check-certificate -O $meta_path $meta_url");
 }
 
 open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
@@ -53,7 +53,7 @@
 my $lid_path = "$data_base/lang_vox1_final.csv";
 if (! -e "$lid_path") {
     $lid_path = "$out_dir/lang_vox1_final.csv";
-    system("wget -O $lid_path $lid_url");
+    system("wget --no-check-certificate -O $lid_path $lid_url");
 }
 open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
 my %utt2lang = ();
diff --git a/egs/sre21-av-a/v1.16k/run_002_compute_evad.sh b/egs/sre21-av-a/v1.16k/run_002_compute_evad.sh
index f7aa7828..08f655ea 100755
--- a/egs/sre21-av-a/v1.16k/run_002_compute_evad.sh
+++ b/egs/sre21-av-a/v1.16k/run_002_compute_evad.sh
@@ -9,7 +9,6 @@ set -e
 nodes=fs01
 storage_name=$(date +'%m_%d_%H_%M')
 vaddir=`pwd`/exp/vad_e
-vad_config=conf/vad_16k.yaml
 
 stage=1
 config_file=default_config.sh
@@ -75,41 +74,3 @@ if [ $stage -le 3 ];then
   done
 fi
 
-# #Enroll multi-speaker Datasets with time marks
-# if [ $stage -le 3 ];then 
-#     for name in sre18_dev_enroll_vast sre18_eval_enroll_vast sre19_av_a_dev_enroll sre19_av_a_eval_enroll
-#     do
-# 	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-# 	nj=$(($num_spk < 40 ? $num_spk:40))
-# 	# we just run energy vad to get the utt2num_frames file
-# 	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-# 	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-# 	    data/${name} exp/make_vad/$name $vaddir
-# 	utils/fix_data_dir.sh data/${name}
-# 	local/sre18_diar_to_vad.sh data/${name} exp/make_vad $vaddir
-# 	utils/fix_data_dir.sh data/${name}
-#     done
-# fi
-
-# #Dihard Datasets
-# if [ $stage -le 4 ];then
-#     for name in dihard2_train_dev dihard2_train_eval
-#     do
-# 	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-# 	nj=$(($num_spk < 40 ? $num_spk:40))
-# 	# we just run energy vad to get the utt2num_frames file
-# 	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-# 	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-# 	    data/${name} exp/make_vad/$name $vaddir
-# 	hyp_utils/rttm_to_bin_vad.sh --nj 5 data/$name/vad.rttm data/$name $vaddir
-# 	utils/fix_data_dir.sh data/${name}
-#     done
-
-# fi
-
-# if [ $stage -le 5 ];then 
-#   utils/combine_data.sh --extra-files "utt2num_frames" data/dihard2_train data/dihard2_train_dev data/dihard2_train_eval
-#   utils/fix_data_dir.sh data/dihard2_train
-# fi
-
-
diff --git a/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh b/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
index 0608929c..7f405952 100755
--- a/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
+++ b/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
@@ -10,28 +10,67 @@ set -e
 stage=1
 ngpu=4
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
+num_workers=""
 
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh
 
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/${nnet_data}_proc_audio_no_sil
 
 args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
 fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type \
+    --cfg $nnet_base_cfg $nnet_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  mkdir -p $ft_nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $ft_nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type \
+    --cfg $ft_nnet_base_cfg $ft_nnet_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet \
+    --trainer.exp-path $ft_nnet_dir \
+    --num-gpus $ngpu \
+  
+fi
+exit
+
 # Network Training
 if [ $stage -le 1 ]; then
 
diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 23e0a26f..73b9bb4e 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -104,12 +104,12 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.48 | 0.077 |
 | | | | Cosine + AS-Norm | 0.68 | 0.040 | 0.062|
 | | | | Cosine + QMF | 0.62 | 0.036 | 0.063 |
-| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
-| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.78 | 0.053 | 0.082 |
+| | | | Cosine + AS-Norm | 0.70 | 0.043 | 0.076 |
+| | | | Cosine + QMF | 0.63 | 0.042 | 0.071 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.78 | 0.051 | 0.095 |
+| | | | Cosine + AS-Norm | 0.72 | 0.046 | 0.070 |
+| | | | Cosine + QMF | 0.67 | 0.039 | 0.074 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.040 | 0.065 |
 | | | | Cosine + AS-Norm | 0.52 | 0.33 | 0.045 |
 | | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
@@ -134,16 +134,18 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.83 | 0.053 | 0.098 |
 | | | | Cosine + AS-Norm | 0.78 | 0.047| 0.085 |
 | | | | Cosine + QMF | 0.74 | 0.045 | 0.081 |
-| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
-| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.91 | 0.057 | 0.100 |
+| | | | Cosine + AS-Norm | 0.85 | 0.052 | 0.089 |
+| | | | Cosine + QMF | 0.81 | 0.049 | 0.085 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.94 | 0.059 | 0.105 |
+| | | | Cosine + AS-Norm | 0.88 | 0.053 | 0.093 |
+| | | | Cosine + QMF | 0.84 | 0.051 | 0.088 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
 | | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
 | | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
 
+
+
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -163,16 +165,18 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.59 | 0.096 | 0.165 |
 | | | | Cosine + AS-Norm | 1.41 | 0.083 | 0.143 |
 | | | | Cosine + QMF | 1.34 | 0.079 | 0.136 |
-| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
-| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.75 | 0.104 | 0.171 |
+| | | | Cosine + AS-Norm | 1.56 | 0.091 | 0.152 |
+| | | | Cosine + QMF | 1.50 | 0.087 | 0.145 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.76 | 0.104 | 0.174 |
+| | | | Cosine + AS-Norm |  1.58 | 0.092 | 0.152 |
+| | | | Cosine + QMF | 1.51 | 0.089 | 0.149 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
 | | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
 | | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
 
+
+
 ### VoxSRC2022 dev
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -192,12 +196,12 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.239 |
 | | | | Cosine + AS-Norm | 1.99 | 0.127 | 0.232 |
 | | | | Cosine + QMF | 1.87 | 0.119 | 0.216 |
-| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
-| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF |  |  |  |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.36 | 0.153 | 0.259 |
+| | | | Cosine + AS-Norm | 2.18 | 0.139 | 0.249 |
+| | | | Cosine + QMF | 2.08 | 0.128 | 0.222 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.49 | 0.158 | 0.265 |
+| | | | Cosine + AS-Norm | 2.29 | 0.145 | 0.251 |
+| | | | Cosine + QMF | 2.17 | 0.133 | 0.230 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
 | | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
 | | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..5dda7913
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: res2net50
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 1.625
+  res2net_scale: 4
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..469e166b
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..40fb362e
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+feats: fbank80_specaug1_stmn_16k.yaml
+model: 
+  resnet_type: res2net50
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 30.0
+  margin: 0.2
+  margin_warmup_epochs: 5.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..469e166b
--- /dev/null
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 0
+  intertop_margin: 0.1
+  override_dropouts: true
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
index 1d864080..31dcaf9a 100644
--- a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
@@ -47,7 +47,7 @@ model:
   dropout_rate: 0.1
   norm_before: false
   hid_act: swish
-  se_r: 128
+  se_r: 256
 trainer:
   optim: 
     opt_type: adam
@@ -67,5 +67,5 @@ trainer:
   grad_clip: 250
   use_amp: true
   log_interval: 1000
-  epochs: 35
+  epochs: 25
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
index 42af2d52..00622772 100644
--- a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
+++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
@@ -17,7 +17,7 @@ nnet_name=${feat_type}_tseresnet34.v3.0
 nnet_s1_base_cfg=conf/train_tseresnet34_xvec_stage1_v3.0.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0035.pth
+nnet_s1=$nnet_s1_dir/model_ep0025.pth
 
 nnet_s2_base_cfg=conf/train_tseresnet34_xvec_stage2_v3.0.yaml
 nnet_s2_name=${nnet_name}.s2
diff --git a/hyperion/np/classifiers/svmc.py b/hyperion/np/classifiers/svmc.py
index 9311b8e8..6b54034b 100644
--- a/hyperion/np/classifiers/svmc.py
+++ b/hyperion/np/classifiers/svmc.py
@@ -9,20 +9,24 @@
 
 import numpy as np
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from sklearn.svm import SVC as SVC
+from sklearn.svm import SVC
 
 from ...hyp_defs import float_cpu
 from ...utils.math import softmax
+from ...utils.misc import filter_func_args
 from ..np_model import NPModel
 
 
-class GaussianSVMC(NPModel):
+class SVMC(NPModel):
     """Gaussian Support Vector Machine for Classification."""
 
     def __init__(
         self,
         C=1.0,
+        kernel="rbf",
+        degree=3,
         gamma="scale",
+        coef0=0.0,
         shrinking=True,
         probability=True,
         tol=0.0001,
@@ -32,7 +36,6 @@ def __init__(
         class_weight=None,
         random_state=None,
         max_iter=100,
-        model=None,
         verbose=0,
         balance_class_weight=True,
         lr_seed=1024,
@@ -48,25 +51,38 @@ def __init__(
         if random_state is None:
             random_state = np.random.RandomState(seed=lr_seed)
 
+        self.C = C
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.shrinking = shrinking
+        self.probability = probability
+        self.tol = tol
+        self.cache_size = cache_size
+        self.multi_class = multi_class
+        self.break_ties = break_ties
+        self.class_weight = class_weight
+
         self.balance_class_weight = balance_class_weight
-        if model is None:
-            self.svm = SVC(
-                C=C,
-                kernel="rbf",
-                gamma=gamma,
-                shrinking=shrinking,
-                probability=probability,
-                tol=tol,
-                cache_size=cache_size,
-                class_weight=class_weight,
-                verbose=verbose,
-                max_iter=max_iter,
-                decision_function_shape=multi_class,
-                break_ties=break_ties,
-                random_state=random_state,
-            )
-        else:
-            self.svm = model
+        self.svm = SVC(
+            C=C,
+            kernel=kernel,
+            gamma=gamma,
+            degree=degree,
+            coef0=coef0,
+            shrinking=shrinking,
+            probability=probability,
+            tol=tol,
+            cache_size=cache_size,
+            class_weight=class_weight,
+            verbose=verbose,
+            max_iter=max_iter,
+            decision_function_shape=multi_class,
+            break_ties=break_ties,
+            random_state=random_state,
+        )
+
         self.set_labels(labels)
 
     @property
@@ -84,6 +100,18 @@ def get_config(self):
           Dictionary with config hyperparams.
         """
         config = {
+            "C": self.C,
+            "kernel": self.kernel,
+            "gamma": self.gamma,
+            "degree": self.degree,
+            "coef0": self.coef0,
+            "shrinking": self.shrinking,
+            "probability": self.probability,
+            "tol": self.tol,
+            "cache_size": self.cache_size,
+            "multi_class": self.multi_class,
+            "break_ties": self.break_ties,
+            "class_weight": self.class_weight,
             "balance_class_weight": self.balance_class_weight,
             "labels": self.labels,
         }
@@ -135,7 +163,6 @@ def fit(self, x, class_ids, sample_weight=None):
           class_ids: class integer [0, num_classes-1] identifier (num_samples,)
           sample_weight: weight of each sample in the estimation (num_samples,)
         """
-        print("--------------", type(x[3, 2]), type(class_ids[20]), "--------------")
         self.svm.fit(x, class_ids)
         if self.svm.fit_status_:
             logging.warning("SVM did not converge")
@@ -153,9 +180,6 @@ def save(self, file_path):
         if not split_path[-1] == "sav":
             file_path = "".join(split_path[0] + ".sav")
         with open(file_path, "wb") as f:
-            # with h5py.File(file_path, "w") as f:
-            # config = self.to_json()
-            # f.create_dataset("config", data=np.array(config, dtype="S"))
             self.save_params(f)
 
     @classmethod
@@ -169,27 +193,17 @@ def load(cls, file_path):
           Model object.
         """
         split_path = os.path.splitext(file_path)
-        if not split_path[-1] == "sav":
-            file_path = "".join(split_path[0] + ".sav")
+        if not split_path[-1] == "pkl":
+            file_path = "".join(split_path[0] + ".pkl")
 
-        # with h5py.File(file_path, "r") as f:
         with open(file_path, "rb") as f:
-            # json_str = str(np.asarray(f["config"]).astype("U"))
-            # config = cls.load_config_from_json(json_str)
-            config = None
-            return cls.load_params(f, config)
+            return pickle.load(f)
 
     def save_params(self, f):
-        # params = {"A": self.A, "b": self.b}
-        # self._save_params_from_dict(f, params)
         pickle.dump(self, f)
 
     @classmethod
-    def load_params(cls, f, config):
-        # param_list = ["A", "b"]
-        # params = cls._load_params_to_dict(f, config["name"], param_list)
-        # kwargs = dict(list(config.items()) + list(params.items()))
-        # return cls(**kwargs)
+    def load_params(cls, f):
         svmc = pickle.load(f)
         return svmc
 
@@ -200,27 +214,7 @@ def filter_class_args(**kwargs):
         Returns:
           Hyperparamter dictionary to initialize the class.
         """
-        valid_args = (
-            "nu",
-            "gamma",
-            "shrinking",
-            "probability",
-            "tol",
-            "cache_size",
-            "multi_class",
-            "break_ties",
-            "class_weight",
-            "random_state",
-            "max_iter",
-            "verbose",
-            "balance_class_weight",
-            "lr_seed",
-            "model",
-            "labels",
-        )
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-    filter_train_args = filter_class_args
+        return filter_func_args(SVMC.__init__, **kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -240,17 +234,27 @@ def add_class_args(parser, prefix=None):
             type=float,
             help="inverse of regularization strength",
         )
-        # parser.add_argument(
-        #     "--class_weight",
-        #     default=None,
-        #     help="Class weights",
-        # )
+        parser.add_argument(
+            "--kernel",
+            default="rbf",
+            choices=["linear", "poly", "rbf", "sigmoid", "precomputed"],
+            help="kernel for svm",
+        )
+        parser.add_argument(
+            "--degree", defaut=3, type=int, help="degree of polynomial kernel"
+        )
         parser.add_argument(
             "--gamma",
             default="scale",
             choices=["scale", "auto"],
             help="Kernel coefficient for ‘rbf’",
         )
+        parser.add_argument(
+            "--coef0",
+            default=0.0,
+            type=float,
+            help="independent term of poly and sigmoid kernels",
+        )
         parser.add_argument(
             "--shrinking",
             default=True,
@@ -264,7 +268,7 @@ def add_class_args(parser, prefix=None):
             help="Whether to enable probability estimates",
         )
         parser.add_argument(
-            "--break_ties",
+            "--break-ties",
             default=True,
             type=bool,
             help="If true, predict will break ties according to the confidence values of decision_function; otherwise \
@@ -293,7 +297,7 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            "--cache_size",
+            "--cache-size",
             default=600,
             type=int,
             help="Specify the size of the kernel cache (in MB)",
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index ee464161..aa635fc5 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -99,6 +99,8 @@ def _save_params_from_dict(self, f, params, dtypes=None):
         """
         if dtypes is None:
             dtypes = dict((k, float_save()) for k in params)
+        elif isinstance(dtypes, type):
+            dtypes = dict((k, dtypes) for k in params)
 
         if self.name is None:
             prefix = ""
@@ -174,6 +176,9 @@ def _load_params_to_dict(f, name, params, dtypes=None):
         """
         if dtypes is None:
             dtypes = dict((k, float_cpu()) for k in params)
+        elif isinstance(dtypes, type):
+            dtypes = dict((k, dtypes) for k in params)
+
         if name is None:
             prefix = ""
         else:
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 73255a24..8de700c4 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -410,9 +410,6 @@ def forward(self, x, x_mask=None):
 
             x += residual
 
-        if not self.norm_before:
-            x = self.bn3(x)
-
         if self.dropout_rate > 0:
             x = self.dropout(x)
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index a9a9d98f..eddf47a7 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -88,35 +88,6 @@ def __init__(
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     loss,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
     @record
     def train_epoch(self, data_loader):
         """Training epoch loop

From 27d579cb7247bba1983cd4abac42e836e796355a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sat, 20 May 2023 17:42:13 -0400
Subject: [PATCH 101/154] sre21 16k recipe finished

---
 egs/sre21-av-a/v1.16k/README.md               |  51 ++-
 .../v1.16k/conf/lresnet34_lid_v1.yaml         |  59 ---
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml |   2 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |   2 +-
 .../v1.16k/conf/train_lresnet34_lid_v1.yaml   |  78 ++++
 ...train_res2net50w26s4_xvec_stage1_v1.0.yaml |  80 ++++
 ...train_res2net50w26s4_xvec_stage2_v1.0.yaml |  66 +++
 ...train_res2net50w26s8_xvec_stage1_v1.0.yaml |  82 ++++
 ...train_res2net50w26s8_xvec_stage2_v1.0.yaml |  66 +++
 ...in_tseres2net50w26s4_xvec_stage1_v1.0.yaml |  83 ++++
 ...in_tseres2net50w26s4_xvec_stage2_v1.0.yaml |  66 +++
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |   2 +-
 ...cs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh |  32 +-
 ...cs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh |  67 ----
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  71 +---
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  25 +-
 ...et50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh |   0
 ...cs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh |  49 +++
 ...et50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh |   0
 .../v1.16k/run_011_train_xvector.sh           |   1 -
 .../v1.16k/run_012_finetune_xvector.sh        |  61 ---
 egs/sre21-av-a/v1.16k/run_014_train_lid.sh    |  34 +-
 egs/sre21-av-a/v1.16k/run_041_eval_be_v2.sh   |   2 +-
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml | 104 +++++
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |  66 +++
 ...train_res2net50w26s8_xvec_stage1_v1.0.yaml |  82 ++++
 ...train_res2net50w26s8_xvec_stage2_v1.0.yaml |  65 +++
 ...in_tseres2net50w26s4_xvec_stage1_v1.0.yaml |  83 ++++
 ...in_tseres2net50w26s4_xvec_stage2_v1.0.yaml |  66 +++
 egs/sre21-av-a/v1.8k/default_config.sh        |   2 +-
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  50 +--
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  48 +++
 ...statsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh |  68 ----
 ...statsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh |  58 +++
 ...statsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh |  76 ----
 egs/sre21-av-a/v1.8k/run_011_train_xvector.sh |  54 ++-
 .../v1.8k/run_012_finetune_xvector.sh         |  61 ---
 egs/voxceleb/v1.1/run_002_compute_evad.sh     |   1 -
 egs/voxceleb/v1.2/run_001_prepare_data.sh     |  14 +-
 hyp_utils/create_data_link.pl                 | 132 ++++++
 hyp_utils/create_data_split_dirs.sh           |  46 +++
 hyp_utils/create_data_split_links.sh          |  23 ++
 hyp_utils/create_split_dir.pl                 |  92 +++++
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |   8 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |  14 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |   2 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |   4 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |   2 +-
 ...sine_scoring_from_transfer_art_test_wav.py |   3 +-
 hyperion/bin/eval_xvec_logits_from_wav.py     |  48 +--
 hyperion/bin/extract_xvectors_from_wav.py     |   2 +-
 .../bin/extract_xvectors_slidwin_from_wav.py  |  14 +-
 hyperion/bin/finetune_xvector_from_wav.py     |  77 +---
 hyperion/data_prep/__init__.py                |   2 +-
 hyperion/data_prep/voxceleb1.py               | 338 ++++++++++++++++
 hyperion/data_prep/voxceleb2.py               |  12 +-
 hyperion/np/classifiers/__init__.py           |   2 +-
 hyperion/torch/layers/global_pool.py          |   7 +-
 hyperion/torch/models/xvectors/xvector.py     |   6 +-
 hyperion/torch/narchs/audio_feats_mvn.py      |   9 +-
 hyperion/torch/narchs/classif_head.py         |   2 +-
 hyperion/torch/narchs/dc1d_decoder.py         |   4 +-
 hyperion/torch/narchs/dc1d_encoder.py         |   4 +-
 hyperion/torch/narchs/dc2d_decoder.py         |   4 +-
 hyperion/torch/narchs/dc2d_encoder.py         |   4 +-
 hyperion/torch/narchs/fcnet.py                |   2 +-
 hyperion/torch/narchs/resnet.py               |  16 +-
 hyperion/torch/narchs/resnet1d_decoder.py     |  14 +-
 hyperion/torch/narchs/resnet1d_encoder.py     |  20 +-
 hyperion/torch/narchs/resnet2d_decoder.py     |  14 +-
 hyperion/torch/narchs/resnet2d_encoder.py     |  19 +-
 hyperion/torch/narchs/resnet_factory.py       |   4 +-
 hyperion/torch/narchs/spinenet.py             |  16 +-
 hyperion/torch/narchs/spinenet_factory.py     |   4 +-
 hyperion/torch/narchs/tdnn_factory.py         |   4 +-
 .../torch/narchs/transformer_encoder_v1.py    |   4 +-
 .../trainers/xvector_trainer_from_wav.py      |   8 +-
 hyperion/torch/utils/masking.py               |   7 +-
 hyperion/utils/__init__.py                    |   2 +
 hyperion/utils/dataset.py                     | 379 +++++++++++++++---
 hyperion/utils/enrollment_map.py              |  86 ++++
 hyperion/utils/info_table.py                  |  12 +-
 hyperion/utils/segment_set.py                 |   4 +
 hyperion/utils/sparse_trial_key.py            |  58 +++
 hyperion/utils/trial_key.py                   |  82 +++-
 85 files changed, 2625 insertions(+), 868 deletions(-)
 delete mode 100644 egs/sre21-av-a/v1.16k/conf/lresnet34_lid_v1.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_lresnet34_lid_v1.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
 delete mode 100644 egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
 rename egs/sre21-av-a/v1.16k/global_conf/{ => deprecated}/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh (100%)
 create mode 100644 egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
 rename egs/sre21-av-a/v1.16k/global_conf/{ => deprecated}/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh (100%)
 delete mode 100755 egs/sre21-av-a/v1.16k/run_012_finetune_xvector.sh
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
 create mode 100644 egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
 delete mode 100644 egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
 create mode 100644 egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
 delete mode 100644 egs/sre21-av-a/v1.8k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
 delete mode 100755 egs/sre21-av-a/v1.8k/run_012_finetune_xvector.sh
 create mode 100755 hyp_utils/create_data_link.pl
 create mode 100755 hyp_utils/create_data_split_dirs.sh
 create mode 100755 hyp_utils/create_data_split_links.sh
 create mode 100755 hyp_utils/create_split_dir.pl
 create mode 100644 hyperion/data_prep/voxceleb1.py
 create mode 100644 hyperion/utils/enrollment_map.py

diff --git a/egs/sre21-av-a/v1.16k/README.md b/egs/sre21-av-a/v1.16k/README.md
index e35577d7..0f5d09ad 100644
--- a/egs/sre21-av-a/v1.16k/README.md
+++ b/egs/sre21-av-a/v1.16k/README.md
@@ -88,8 +88,6 @@ run_0xx_....sh --config-file global_conf/config_fbank80_stmn_res2net50w26s8_arcs
 
    - `run_011_train_xvector.sh`
       - Trains the x-vector network on 4sec chunks
-
-   - `run_012_finetune_xvector.sh`
       - Fine-tune x-vector network on 10-15 secs utts
 
    - `run_013_prepare_langid_train_data.sh`
@@ -110,8 +108,8 @@ run_0xx_....sh --config-file global_conf/config_fbank80_stmn_res2net50w26s8_arcs
    - `run_040_eval_be_v1.sh, run_041_eval_be_v2.sh, run_042_eval_be_v3.sh, run_042b_eval_be_v3.sh`
       - Evals different back-end versions:
          - V1: Back-end trained on all data without adaptation
-	 - V2: Centering + PCA + LNorm + PLDA (+S-Norm), Centering adapted to source and langauge, global PLDA adapted to SRE-Vox-CHN
-	 - V3: Centering + PCA + LNorm + PLDA (+S-Norm), Centering adapted to source and langauge, source dependent PLDA adapted to SRE-CHN or Vox-CHN
+	 - V2: Centering + PCA + LNorm + PLDA (+S-Norm), Centering adapted to source and language, global PLDA adapted to SRE-Vox-CHN
+	 - V3: Centering + PCA + LNorm + PLDA (+S-Norm), Centering adapted to source and language, source dependent PLDA adapted to SRE-CHN or Vox-CHN
 	 - V3b: V3 with hyperparmeters tuned for x-vectors trained on VoxCeleb only
 
    - `run_fus*.sh`
@@ -120,4 +118,47 @@ run_0xx_....sh --config-file global_conf/config_fbank80_stmn_res2net50w26s8_arcs
 
 ## Results
 
-TODO
+The back-end used for these results is:
+- back-end V2 (run_041_eval_be_v2.sh)
+- Without S-Norm
+- Scores are calibrated as indicated in the paper.
+
+## SRE16 Eval40% YUE
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.57   | 0.135 | 0.237 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.23  | 0.136 | 0.187 | 
+| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.38   | 0.147 | 0.189 |
+
+## SRE-CTS Superset dev set
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.37 | 0.076 | 0.106 | 
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.19 | 0.64 | 0.089 | 
+| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.15 | 0.61 | 0.102 |
+
+## SRE-CTS Superset dev set
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.37 | 0.076 | 0.106 | 
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.19 | 0.64 | 0.089 | 
+| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.15 | 0.61 | 0.102 |
+
+## SRE21 Audio Dev (official scoring tool)
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 5.91 | 0.393 | 0.409 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 5.22 | 0.370 | 0.377 |
+| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 4.79 | 0.309 | 0.325 |
+
+## SRE21 Audio Eval (official scoring tool)
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 5.68 | 0.395 | 0.401 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 4.92 | 0.405 | 0.412 |
+| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 4.80 | 0.357 | 0.360 |
diff --git a/egs/sre21-av-a/v1.16k/conf/lresnet34_lid_v1.yaml b/egs/sre21-av-a/v1.16k/conf/lresnet34_lid_v1.yaml
deleted file mode 100644
index 5451702f..00000000
--- a/egs/sre21-av-a/v1.16k/conf/lresnet34_lid_v1.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-min_chunk_length: 4.0
-max_chunk_length: 4.0
-return_fullseqs: false
-wav_scale: 32767
-batch_size: 512
-var_batch_size: false
-iters_per_epoch: 6.0
-train_aug_cfg: conf/reverb_noise_aug.yaml
-val_aug_cfg: conf/reverb_noise_aug.yaml
-feats: fbank64_stmn_nb_16k.yaml
-pool_net:
-  pool_type: ch-wise-att-mean+stddev
-  inner_feats: 32
-embed_dim: 32
-num_embed_layers: 1
-hid_act: relu6
-loss_type: arc-softmax
-s: 30.0
-margin: 0.3
-margin_warmup_epochs: 30.0
-dropout_rate: 0.0
-in_feats: 64
-resnet_type: lresnet34
-in_channels: 1
-conv_channels: 64
-base_channels: 64
-in_kernel_size: 3
-in_stride: 1
-in_norm: false
-no_maxpool: true
-optim:
-  opt_type: adam
-  lr: 0.02
-  # lr: 0.01
-  beta1: 0.9
-  beta2: 0.95
-  amsgrad: true
-  weight_decay: 1e-5
-lrsched:
-  lrsch_type: exp_lr
-  decay_rate: 0.5
-  decay_steps: 8000
-  hold_steps: 10000
-  min_lr: 1.0e-05
-  warmup_steps: 1000
-  update_lr_on_opt_step: true
-grad_acc_steps: 1
-epochs: 70
-log_interval: 100
-use_tensorboard: false
-use_wandb: false
-wandb:
-  mode: online
-ddp_type: ddp
-use_amp: true
-swa_start: 0
-swa_lr: 0.001
-swa_anneal_epochs: 10
-num_gpus: 4
diff --git a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
index 01cfa082..d68ea26e 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -68,6 +68,7 @@ model:
     multilayer_concat: true
     endpoint_channels: 8192
     dropout_rate: 0.0
+    hid_act: relu6
   pool_net:
     pool_type: ch-wise-att-mean+stddev
     inner_feats: 128
@@ -76,6 +77,7 @@ model:
   margin: 0.3
   margin_warmup_epochs: 20.0
   dropout_rate: 0.0
+  hid_act: relu6
 trainer:
   optim: 
     opt_type: adam
diff --git a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
index 24b1c081..e7f9969b 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -46,7 +46,7 @@ model:
 trainer:
   optim:
     opt_type: sgd
-    lr: 1e-3
+    lr: 0.01
     momentum: 0.9
     weight_decay: 1e-5
   lrsched:
diff --git a/egs/sre21-av-a/v1.16k/conf/train_lresnet34_lid_v1.yaml b/egs/sre21-av-a/v1.16k/conf/train_lresnet34_lid_v1.yaml
new file mode 100644
index 00000000..c46365db
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_lresnet34_lid_v1.yaml
@@ -0,0 +1,78 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_nb_16k.yaml
+model:
+  resnet_type: lresnet34
+  in_channels: 1
+  in_feats: 64
+  conv_channels: 64
+  in_kernel_size: 3
+  in_stride: 1
+  in_norm: false
+  no_maxpool: true
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 32
+  embed_dim: 32
+  num_embed_layers: 1
+  hid_act: relu6
+  loss_type: arc-softmax
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 30.0
+  dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: adam
+    lr: 0.02
+    beta1: 0.9
+    beta2: 0.95
+    amsgrad: true
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 10000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  epochs: 70
+  log_interval: 100
+  use_amp: true
+
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..7a9234b6
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,80 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  resnet_type: res2net50
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 1.625
+  res2net_scale: 4
+  pool_net:
+    pool_type: mean+stddev
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.05
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 50
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..9884bb4c
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 21
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..4c427202
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
@@ -0,0 +1,82 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  resnet_type: res2net50
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+  hid_act: relu6
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..f34b4896
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..10607607
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,83 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  resnet_type: tseres2net50
+  in_channels: 1
+  in_feats: 80
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 1.625
+  res2net_scale: 4
+  se_r: 256
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+  hid_act: relu6
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..f34b4896
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
index 1b7c3764..1da68697 100644
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
+++ b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -27,7 +27,7 @@ nnet_num_epochs=75
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 nnet=$nnet_dir/swa_model_ep0076.pth
-nnet=$nnet_dir/model_ep0004.pth
+
 # xvector full net finetuning with out-of-domain
 ft_eff_batch_size=128 # effective batch size
 ft_min_chunk=10
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
index 1903369e..6d14f27d 100644
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
+++ b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
@@ -1,4 +1,4 @@
-# LResNet34 x-vector with mixed precision training
+# Res2Net50 w26s4 x-vector with mixed precision training
 
 # acoustic features
 feat_config=conf/fbank80_stmn_16k.yaml
@@ -9,50 +9,33 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxcelebcat
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=24
 eff_batch_size=512 # effective batch size
-ipe=1
 min_chunk=4
 max_chunk=4
 lr=0.05
 
-nnet_type=res2net50 
+nnet_type=resnet
 dropout=0
 embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
 
 s=30
 margin_warmup=20
 margin=0.3
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 50 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
-nnet_num_epochs=60
+nnet_base_cfg=conf/train_res2net50w26s4_xvec_stage1_v1.0.yaml
+nnet_name=${feat_type}_res2net50w26s4_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
 nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0071.pth
-
+nnet=$nnet_dir/model_ep0061.pth
 
 # xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
 ft_eff_batch_size=128 # effective batch size
 ft_min_chunk=10
 ft_max_chunk=15
-ft_ipe=1
 ft_lr=0.01
-ft_nnet_num_epochs=21
 ft_margin=0.5
-ft_margin_warmup=5
 
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
+ft_nnet_base_cfg=conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
 ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
 ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
 ft_nnet=$ft_nnet_dir/model_ep0021.pth
@@ -61,7 +44,4 @@ ft_nnet=$ft_nnet_dir/model_ep0021.pth
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
 plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
 
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
deleted file mode 100644
index 344e1288..00000000
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-# LResNet34 x-vector with mixed precision training
-
-# acoustic features
-feat_config=conf/fbank80_stmn_16k.yaml
-feat_type=fbank80_stmn
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxcelebcat
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=24
-eff_batch_size=512 # effective batch size
-ipe=1
-min_chunk=4
-max_chunk=4
-lr=0.02
-
-nnet_type=res2net50 
-dropout=0
-embed_dim=256
-width_factor=3.25
-scale=8
-ws_tag=w26s8
-
-s=30
-margin_warmup=20
-margin=0.3
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 50 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
-nnet_num_epochs=60
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-#nnet=$nnet_dir/swa_model_ep0061.pth
-
-# xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
-ft_eff_batch_size=128 # effective batch size
-ft_min_chunk=10
-ft_max_chunk=10
-ft_ipe=1
-ft_lr=0.01
-ft_nnet_num_epochs=15
-ft_margin=0.5
-ft_margin_warmup=3
-
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
-ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
-ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
-ft_nnet=$ft_nnet_dir/model_ep0007.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
-
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
index cae32b57..0b62008e 100644
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
+++ b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -9,103 +9,40 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training 
 nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=1
 min_chunk=4
 max_chunk=4
 lr=0.02
 
-nnet_type=res2net50 
+nnet_type=resnet
 dropout=0
 embed_dim=256
-width_factor=3.25
-scale=8
-ws_tag=w26s8
 
 s=30
 margin_warmup=20
 margin=0.3
 attstats_inner=128
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --pool_net.pool-type ch-wise-att-mean+stddev --pool_net.inner-feats $attstats_inner"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=75
+nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
+nnet_name=${feat_type}_res2net50w26s8_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0070.pth
 nnet=$nnet_dir/swa_model_ep0076.pth
 
 # xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
 ft_eff_batch_size=128 # effective batch size
 ft_min_chunk=10
 ft_max_chunk=10
-ft_ipe=1
 ft_lr=0.01
-ft_nnet_num_epochs=15
 ft_margin=0.5
-ft_margin_warmup=3
 
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
+ft_nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
 ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
 ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
 ft_nnet=$ft_nnet_dir/model_ep0007.pth
 
-
-# xvector last-layer finetuning in-domain
-reg_layers_classif=0
-reg_layers_enc="0 1 2 3 4"
-nnet_adapt_data=voxcelebcat_sre_alllangs_mixfs_chnspks
-
-# ft2_batch_size_1gpu=4
-# ft2_eff_batch_size=128 # effective batch size
-# ft2_ipe=4
-# ft2_lr=0.01
-# ft2_nnet_num_epochs=12
-# ft2_margin_warmup=3
-# ft2_reg_weight_embed=0.1
-# ft2_min_chunk=10
-# ft2_max_chunk=60
-
-# ft2_opt_opt="--optim.opt-type sgd --optim.lr $ft2_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-# ft2_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
-# ft2_nnet_name=${ft_nnet_name}.ft_eaffine_rege_w${ft2_reg_weigth_embed}_${ft2_min_chunk}_${ft2_max_chunk}_sgdcos_lr${ft2_lr}_b${ft2_eff_batch_size}_amp.v2
-# ft2_nnet_dir=exp/xvector_nnets/$ft2_nnet_name
-# ft2_nnet=$ft2_nnet_dir/model_ep0010.pth
-
-
-# xvector full nnet finetuning
-ft2_batch_size_1gpu=6
-ft2_eff_batch_size=128 # effective batch size
-ft2_ipe=1
-ft2_lr=0.01
-ft2_nnet_num_epochs=15
-ft2_margin=0.5
-ft2_margin_warmup=3
-ft2_reg_weight_embed=0.1
-ft2_reg_weight_enc=0.1
-ft2_min_chunk=10
-ft2_max_chunk=10
-
-ft2_opt_opt="--optim.opt-type sgd --optim.lr $ft2_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft2_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
-ft2_nnet_name=${ft_nnet_name}.ft_reg_wenc${ft2_reg_weight_enc}_we${ft2_reg_weight_embed}_${ft2_min_chunk}_${ft2_max_chunk}_sgdcos_lr${ft2_lr}_b${ft2_eff_batch_size}_amp.v1
-ft2_nnet_dir=exp/xvector_nnets/$ft2_nnet_name
-ft2_nnet=$ft2_nnet_dir/model_ep0012.pth
-
-
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
 plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
-
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
index 96475c53..a57f16d9 100644
--- a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
+++ b/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -9,21 +9,15 @@ vad_config=conf/vad_16k.yaml
 
 # x-vector training
 nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=24
 eff_batch_size=512 # effective batch size
-ipe=1
 min_chunk=4
 max_chunk=4
 lr=0.02
 
-nnet_type=tseres2net50 
+nnet_type=resnet
 dropout=0
 embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
 se_r=256
 
 s=30
@@ -31,13 +25,8 @@ margin_warmup=20
 margin=0.3
 attstats_inner=128
 
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r --pool_net.pool-type ch-wise-att-mean+stddev --pool_net.inner-feats $attstats_inner"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_r${se_r}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=75
+nnet_base_cfg=conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
+nnet_name=${feat_type}_tseres2net50w26s4_r${se_r}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 nnet_dir=exp/xvector_nnets/$nnet_name
 nnet=$nnet_dir/model_ep0075.pth
 nnet=$nnet_dir/swa_model_ep0076.pth
@@ -49,12 +38,9 @@ ft_min_chunk=10
 ft_max_chunk=15
 ft_ipe=1
 ft_lr=0.01
-ft_nnet_num_epochs=15
 ft_margin=0.5
-ft_margin_warmup=3
 
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
+ft_nnet_base_cfg=conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
 ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
 ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
 ft_nnet=$ft_nnet_dir/model_ep0007.pth
@@ -69,7 +55,4 @@ else
     plda_data=voxceleb2cat_train_augx${plda_num_augs}
 fi
 plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
 
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh
similarity index 100%
rename from egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh
rename to egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.02_amp.v1.sh
diff --git a/egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh b/egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
new file mode 100644
index 00000000..b5863308
--- /dev/null
+++ b/egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.voxcelebcat.sh
@@ -0,0 +1,49 @@
+# Res2Net50 w26s8 x-vector with mixed precision training
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxcelebcat
+
+eff_batch_size=512 # effective batch size
+min_chunk=4
+max_chunk=4
+lr=0.02
+
+nnet_type=resnet
+dropout=0
+embed_dim=256
+
+s=30
+margin_warmup=20
+margin=0.3
+attstats_inner=128
+
+nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
+nnet_args="--model.pool_net.pool-type mean+stddev"
+nnet_name=${feat_type}_res2net50w26s8_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+#nnet=$nnet_dir/swa_model_ep0076.pth
+
+# xvector full net finetuning with out-of-domain
+ft_eff_batch_size=128 # effective batch size
+ft_min_chunk=10
+ft_max_chunk=10
+ft_lr=0.01
+ft_margin=0.5
+
+ft_nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
+ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
+ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
+ft_nnet=$ft_nnet_dir/model_ep0007.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+plda_type=splda
diff --git a/egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh
similarity index 100%
rename from egs/sre21-av-a/v1.16k/global_conf/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh
rename to egs/sre21-av-a/v1.16k/global_conf/deprecated/config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.02_amp.v1.sh
diff --git a/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh b/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
index 7f405952..d7ea8ed0 100755
--- a/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
+++ b/egs/sre21-av-a/v1.16k/run_011_train_xvector.sh
@@ -19,7 +19,6 @@ num_workers=""
 
 list_dir=data/${nnet_data}_proc_audio_no_sil
 
-args=""
 if [ -n "$num_workers" ];then
     extra_args="--data.train.data_loader.num-workers $num_workers"
 fi
diff --git a/egs/sre21-av-a/v1.16k/run_012_finetune_xvector.sh b/egs/sre21-av-a/v1.16k/run_012_finetune_xvector.sh
deleted file mode 100755
index 58a3fdc9..00000000
--- a/egs/sre21-av-a/v1.16k/run_012_finetune_xvector.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-ngpu=4
-config_file=default_config.sh
-resume=false
-interactive=false
-num_workers=3
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-batch_size=$(($ft_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $ft_eff_batch_size $ft_min_chunk $ft_max_chunk | awk '{ print int($2/($1*$4*2/($3+$4))+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${nnet_data}_proc_audio_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
-# Network Training
-if [ $stage -le 1 ]; then
-  mkdir -p $ft_nnet_dir/log
-  $cuda_cmd --gpu $ngpu $ft_nnet_dir/log/train.log \
-      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-      torch-finetune-xvec-from-wav.py --feats $feat_config $aug_opt \
-      --audio-path $list_dir/wav.scp \
-      --time-durs-file $list_dir/utt2dur \
-      --train-list $list_dir/lists_xvec/train.scp \
-      --val-list $list_dir/lists_xvec/val.scp \
-      --class-file $list_dir/lists_xvec/class2int \
-      --min-chunk-length $ft_min_chunk --max-chunk-length $ft_max_chunk \
-      --iters-per-epoch $ft_ipe \
-      --batch-size $batch_size \
-      --num-workers $num_workers $ft_opt_opt $ft_lrs_opt \
-      --grad-acc-steps $grad_acc_steps \
-      --epochs $ft_nnet_num_epochs \
-      --cos-scale $s --margin $ft_margin --margin-warmup-epochs $ft_margin_warmup \
-      --num-gpus $ngpu \
-      --log-interval $log_interval \
-      --in-model-path $nnet \
-      --train-mode ft-full \
-      --exp-path $ft_nnet_dir $args
-
-fi
-
-
diff --git a/egs/sre21-av-a/v1.16k/run_014_train_lid.sh b/egs/sre21-av-a/v1.16k/run_014_train_lid.sh
index 6251de97..35d2c0bc 100755
--- a/egs/sre21-av-a/v1.16k/run_014_train_lid.sh
+++ b/egs/sre21-av-a/v1.16k/run_014_train_lid.sh
@@ -10,19 +10,17 @@ set -e
 stage=1
 ngpu=4
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
-lid_ipe=1
+num_workers=""
+
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh
 
 list_dir=data/train_lid_proc_audio_no_sil
 
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
 fi
 
 if [ "$interactive" == "true" ];then
@@ -33,22 +31,20 @@ lid_nnet_dir=exp/lid_nnets/lresnet34_lid_v1
 # Network Training
 if [ $stage -le 1 ]; then
 
-  train_exec=torch-train-resnet-xvec-from-wav.py
   mkdir -p $lid_nnet_dir/log
   $cuda_cmd \
     --gpu $ngpu $lid_nnet_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    $train_exec --cfg conf/lresnet34_lid_v1.yaml \
-    --audio-path $list_dir/wav.scp \
-    --time-durs-file $list_dir/utt2dur \
-    --train-list $list_dir/lists_train_lid/train.scp \
-    --val-list $list_dir/lists_train_lid/val.scp \
-    --class-file $list_dir/lists_train_lid/class2int \
-    --iters-per-epoch $lid_ipe \
-    --num-workers $num_workers \
-    --num-gpus $ngpu \
-    --exp-path $lid_nnet_dir $args
-
+    train_xvector_from_wav.py resnet \
+    --cfg conf/train_lresnet34_lid_v1.yaml \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_train_lid/train.scp \
+    --data.train.dataset.class-file $list_dir/lists_train_lid/class2int \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_train_lid/val.scp \
+    --trainer.exp-path $lid_nnet_dir $extra_args \
+    --num-gpus $ngpu
 fi
 
-exit
diff --git a/egs/sre21-av-a/v1.16k/run_041_eval_be_v2.sh b/egs/sre21-av-a/v1.16k/run_041_eval_be_v2.sh
index 0941951f..73cb9a3d 100755
--- a/egs/sre21-av-a/v1.16k/run_041_eval_be_v2.sh
+++ b/egs/sre21-av-a/v1.16k/run_041_eval_be_v2.sh
@@ -195,7 +195,7 @@ if [ $stage -le 5 ]; then
     #SRE superset and 16
     echo "SRE Superset Dev"
     steps_be/eval_be_plda_snorm_v2_cts.sh \
-      --cmd "$train_cmd --mem 8G" \
+      --cmd "$train_cmd --mem 12G" \
       --plda_type $plda_type --ncoh $ncoh --num-parts 100 \
       data/sre_cts_superset_16k_dev/trials \
       data/sre_cts_superset_16k_dev/utt2enroll \
diff --git a/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..bc311234
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,104 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model: 
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 8192
+    dropout_rate: 0.0
+    hid_act: relu6
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+  hid_act: relu6
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 30000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..031e9ca3
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 15.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..416926d0
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
@@ -0,0 +1,82 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_type: res2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+  hid_act: relu6
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 35000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..16203033
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
@@ -0,0 +1,65 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
diff --git a/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..2d74799c
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,83 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_type: tseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 1.625
+  res2net_scale: 4
+  se_r: 256
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 256
+  cos_scale: 30.0
+  margin: 0.3
+  margin_warmup_epochs: 20.0
+  dropout_rate: 0.0
+  hid_act: relu6
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.02
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 1.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 10000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    warmup_steps: 1000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  swa_start: 65
+  swa_anneal_epochs: 5
+  swa_lr: 1e-3
+  use_amp: true
+  log_interval: 1000
+  epochs: 75
+  eff_batch_size: 512
diff --git a/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..f34b4896
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,66 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 8
+      max_chunk_length: 10.0
+      min_chunk_length: 10.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: uniform
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+feats: fbank80_stmn_16k.yaml
+model:
+  cos_scale: 30.0
+  margin: 0.5
+  margin_warmup_epochs: 3
+  intertop_margin: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 1e-5
+  lrsched:
+    lrsch_type: cos_lr
+    t: 2500
+    t_mul: 2
+    warm_restarts: true
+    gamma: 0.75
+    min_lr: 1e-4
+    warmup_steps: 100
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 128
+
diff --git a/egs/sre21-av-a/v1.8k/default_config.sh b/egs/sre21-av-a/v1.8k/default_config.sh
index 91a20745..74b76b0a 120000
--- a/egs/sre21-av-a/v1.8k/default_config.sh
+++ b/egs/sre21-av-a/v1.8k/default_config.sh
@@ -1 +1 @@
-global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
\ No newline at end of file
+global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
\ No newline at end of file
diff --git a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
index 69ad025b..65c2c924 100644
--- a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
+++ b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -9,53 +9,19 @@ vad_config=conf/vad_8k.yaml
 
 # x-vector training 
 nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
 
-batch_size_1gpu=16
 eff_batch_size=512 # effective batch size
-ipe=1
-min_chunk=4
-max_chunk=4
 lr=0.02
 
 nnet_type=resnet1d
-block_type=seres2bn # squeeze-excitation res2net bottleneck
-channels=2048
-ep_channels=8192
-width_factor=1
-scale=8
-se_r=16
 dropout=0
 
-attstats_inner=128
 embed_dim=256
 s=30
 margin_warmup=20
 margin=0.3
 
-nnet_opt="--resnet_enc.in-feats 64 \
-		     --resnet_enc.in-conv-channels $channels \
-		     --resnet_enc.in-kernel-size 5 \
-		     --resnet_enc.in-stride 1 \
-		     --resnet_enc.resb-type $block_type \
-		     --resnet_enc.resb-repeats 1 1 1 1 \
-		     --resnet_enc.resb-channels $channels \
-		     --resnet_enc.resb-kernel-sizes 3 \
-		     --resnet_enc.resb-dilations 2 3 4 5 \
-		     --resnet_enc.resb-strides 1 \
-		     --resnet_enc.res2net-width-factor $width_factor \
-		     --resnet_enc.res2net-scale $scale \
-		     --resnet_enc.se-r $se_r \
-		     --resnet_enc.multilayer \
-                     --resnet_enc.multilayer-concat \
-                     --resnet_enc.endpoint-channels $ep_channels \
-		     --pool_net.pool-type ch-wise-att-mean+stddev \
-		     --pool_net.inner-feats $attstats_inner \
-		     --embed-dim $embed_dim"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 30000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
+nnet_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 nnet_name=${feat_type}_ecapatdnn2048x4_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
 nnet_num_epochs=75
 nnet_dir=exp/xvector_nnets/$nnet_name
@@ -63,18 +29,14 @@ nnet=$nnet_dir/model_ep0070.pth
 nnet=$nnet_dir/swa_model_ep0076.pth
 
 # xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
 ft_eff_batch_size=128 # effective batch size
 ft_min_chunk=10
 ft_max_chunk=15
-ft_ipe=1
 ft_lr=0.01
 ft_nnet_num_epochs=15
 ft_margin=0.5
-ft_margin_warmup=3
 
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
+ft_nnet_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
 ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
 ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
 ft_nnet=$ft_nnet_dir/model_ep0007.pth
@@ -82,8 +44,10 @@ ft_nnet=$ft_nnet_dir/model_ep0007.pth
 # back-end
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
 plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
 
diff --git a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
new file mode 100644
index 00000000..824361d0
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -0,0 +1,48 @@
+# Res2Net50 w26s8 x-vector with mixed precision training
+
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=voxcelebcat_sre_alllangs_mixfs
+
+eff_batch_size=512 # effective batch size
+min_chunk=4
+max_chunk=4
+lr=0.02
+
+nnet_type=resnet
+dropout=0
+embed_dim=256
+
+s=30
+margin_warmup=20
+margin=0.3
+attstats_inner=128
+
+nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage1_v1.0.yaml
+nnet_name=${feat_type}_res2net50w26s8_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0070.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+
+# xvector full net finetuning with out-of-domain
+ft_eff_batch_size=128 # effective batch size
+ft_min_chunk=10
+ft_max_chunk=10
+ft_lr=0.01
+ft_margin=0.5
+
+ft_nnet_base_cfg=conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
+ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
+ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
+ft_nnet=$ft_nnet_dir/model_ep0007.pth
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+plda_type=splda
diff --git a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
deleted file mode 100644
index e1a923d7..00000000
--- a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-# LResNet34 x-vector with mixed precision training
-
-# acoustic features
-feat_config=conf/fbank64_stmn_8k.yaml
-feat_type=fbank64_stmn
-
-#vad
-vad_config=conf/vad_8k.yaml
-
-# x-vector training 
-nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=16
-eff_batch_size=512 # effective batch size
-ipe=1
-min_chunk=4
-max_chunk=4
-lr=0.02
-
-nnet_type=res2net50 
-dropout=0
-embed_dim=256
-width_factor=3.25
-scale=8
-ws_tag=w26s8
-
-s=30
-margin_warmup=20
-margin=0.3
-attstats_inner=128
-
-nnet_opt="--resnet-type $nnet_type --in-feats 64 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --pool_net.pool-type ch-wise-att-mean+stddev --pool_net.inner-feats $attstats_inner"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 35000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=75
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0070.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-
-# xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
-ft_eff_batch_size=128 # effective batch size
-ft_min_chunk=10
-ft_max_chunk=10
-ft_ipe=1
-ft_lr=0.01
-ft_nnet_num_epochs=15
-ft_margin=0.5
-ft_margin_warmup=3
-
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
-ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
-ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
-ft_nnet=$ft_nnet_dir/model_ep0007.pth
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
-
diff --git a/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
new file mode 100644
index 00000000..58010842
--- /dev/null
+++ b/egs/sre21-av-a/v1.8k/global_conf/config_fbank64_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh
@@ -0,0 +1,58 @@
+# Time SE Res2Net50 w26s4 x-vector with mixed precision training
+
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training
+nnet_data=voxcelebcat_sre_alllangs_mixfs
+
+eff_batch_size=512 # effective batch size
+min_chunk=4
+max_chunk=4
+lr=0.02
+
+nnet_type=resnet
+dropout=0
+embed_dim=256
+se_r=256
+
+s=30
+margin_warmup=20
+margin=0.3
+attstats_inner=128
+
+nnet_base_cfg=conf/train_tseres2net50w26s4_xvec_stage1_v1.0.yaml
+nnet_name=${feat_type}_tseres2net50w26s4_r${se_r}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
+nnet_dir=exp/xvector_nnets/$nnet_name
+nnet=$nnet_dir/model_ep0075.pth
+nnet=$nnet_dir/swa_model_ep0076.pth
+
+# xvector full net finetuning with out-of-domain
+ft_batch_size_1gpu=8
+ft_eff_batch_size=128 # effective batch size
+ft_min_chunk=10
+ft_max_chunk=10
+ft_ipe=1
+ft_lr=0.01
+ft_margin=0.5
+
+ft_nnet_base_cfg=conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
+ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
+ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
+ft_nnet=$ft_nnet_dir/model_ep0007.pth
+
+
+# back-end
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+
diff --git a/egs/sre21-av-a/v1.8k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh b/egs/sre21-av-a/v1.8k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
deleted file mode 100644
index 9f5c8e70..00000000
--- a/egs/sre21-av-a/v1.8k/global_conf/config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.05_amp.v1.sh
+++ /dev/null
@@ -1,76 +0,0 @@
-# Time SE Res2Net50 w26s4 x-vector with mixed precision training
-
-# acoustic features
-feat_config=conf/fbank80_stmn_8k.yaml
-feat_type=fbank80_stmn
-
-#vad
-vad_config=conf/vad_8k.yaml
-
-# x-vector training
-nnet_data=voxcelebcat_sre_alllangs_mixfs
-aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
-
-batch_size_1gpu=24
-eff_batch_size=512 # effective batch size
-ipe=1
-min_chunk=4
-max_chunk=4
-lr=0.02
-
-nnet_type=tseres2net50 
-dropout=0
-embed_dim=256
-width_factor=1.625
-scale=4
-ws_tag=w26s4
-se_r=256
-
-s=30
-margin_warmup=20
-margin=0.3
-attstats_inner=128
-
-nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale --se-r $se_r --pool_net.pool-type ch-wise-att-mean+stddev --pool_net.inner-feats $attstats_inner"
-
-opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 65 --swa-lr 1e-3 --swa-anneal-epochs 5"
-lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 10000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
-
-nnet_name=${feat_type}_${nnet_type}${ws_tag}_r${se_r}_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1
-nnet_num_epochs=75
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0075.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-
-# xvector full net finetuning with out-of-domain
-ft_batch_size_1gpu=8
-ft_eff_batch_size=128 # effective batch size
-ft_min_chunk=10
-ft_max_chunk=15
-ft_ipe=1
-ft_lr=0.01
-ft_nnet_num_epochs=21
-ft_nnet_num_epochs=45
-ft_margin=0.5
-ft_margin_warmup=3
-
-ft_opt_opt="--optim.opt-type sgd --optim.lr $ft_lr --optim.momentum 0.9 --optim.weight-decay 1e-5 --use-amp --var-batch-size"
-ft_lrs_opt="--lrsched.lrsch-type cos_lr --lrsched.t 2500 --lrsched.t-mul 2 --lrsched.warm-restarts --lrsched.gamma 0.75 --lrsched.min-lr 1e-4 --lrsched.warmup-steps 100 --lrsched.update-lr-on-opt-step"
-ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v1
-ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
-ft_nnet=$ft_nnet_dir/model_ep0014.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-# lda_dim=200
-# plda_y_dim=150
-# plda_z_dim=200
-
diff --git a/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh b/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
index 9891e812..d7ea8ed0 100755
--- a/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
+++ b/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
@@ -10,22 +10,17 @@ set -e
 stage=1
 ngpu=4
 config_file=default_config.sh
-resume=false
 interactive=false
-num_workers=8
+num_workers=""
 
 . parse_options.sh || exit 1;
 . $config_file
 . datapath.sh
 
-batch_size=$(($batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $eff_batch_size | awk '{ print int($2/$1+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
 list_dir=data/${nnet_data}_proc_audio_no_sil
 
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
 fi
 
 if [ "$interactive" == "true" ];then
@@ -35,6 +30,49 @@ fi
 # Network Training
 if [ $stage -le 1 ]; then
   
+  mkdir -p $nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type \
+    --cfg $nnet_base_cfg $nnet_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --trainer.exp-path $nnet_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  mkdir -p $ft_nnet_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $ft_nnet_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type \
+    --cfg $ft_nnet_base_cfg $ft_nnet_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.time-durs-file $list_dir/utt2dur \
+    --data.train.dataset.segments-file $list_dir/lists_xvec/train.scp \
+    --data.train.dataset.class-files $list_dir/lists_xvec/class2int \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.time-durs-file $list_dir/utt2dur \
+    --data.val.dataset.segments-file $list_dir/lists_xvec/val.scp \
+    --in-model-file $nnet \
+    --trainer.exp-path $ft_nnet_dir \
+    --num-gpus $ngpu \
+  
+fi
+exit
+
+# Network Training
+if [ $stage -le 1 ]; then
+
   if [[ ${nnet_type} =~ resnet1d ]]; then
     train_exec=torch-train-resnet1d-xvec-from-wav.py
   elif [[ ${nnet_type} =~ resnet ]] || [[ ${nnet_type} =~ resnext ]] || [[ ${nnet_type} =~ res2net ]] || [[ ${nnet_type} =~ res2next ]]; then
diff --git a/egs/sre21-av-a/v1.8k/run_012_finetune_xvector.sh b/egs/sre21-av-a/v1.8k/run_012_finetune_xvector.sh
deleted file mode 100755
index 58a3fdc9..00000000
--- a/egs/sre21-av-a/v1.8k/run_012_finetune_xvector.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# Copyright
-#                2019   Johns Hopkins University (Author: Jesus Villalba)
-# Apache 2.0.
-#
-. ./cmd.sh
-. ./path.sh
-set -e
-
-stage=1
-ngpu=4
-config_file=default_config.sh
-resume=false
-interactive=false
-num_workers=3
-
-. parse_options.sh || exit 1;
-. $config_file
-. datapath.sh
-
-batch_size=$(($ft_batch_size_1gpu*$ngpu))
-grad_acc_steps=$(echo $batch_size $ft_eff_batch_size $ft_min_chunk $ft_max_chunk | awk '{ print int($2/($1*$4*2/($3+$4))+0.5)}')
-log_interval=$(echo 100*$grad_acc_steps | bc)
-list_dir=data/${nnet_data}_proc_audio_no_sil
-
-args=""
-if [ "$resume" == "true" ];then
-    args="--resume"
-fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
-# Network Training
-if [ $stage -le 1 ]; then
-  mkdir -p $ft_nnet_dir/log
-  $cuda_cmd --gpu $ngpu $ft_nnet_dir/log/train.log \
-      hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-      torch-finetune-xvec-from-wav.py --feats $feat_config $aug_opt \
-      --audio-path $list_dir/wav.scp \
-      --time-durs-file $list_dir/utt2dur \
-      --train-list $list_dir/lists_xvec/train.scp \
-      --val-list $list_dir/lists_xvec/val.scp \
-      --class-file $list_dir/lists_xvec/class2int \
-      --min-chunk-length $ft_min_chunk --max-chunk-length $ft_max_chunk \
-      --iters-per-epoch $ft_ipe \
-      --batch-size $batch_size \
-      --num-workers $num_workers $ft_opt_opt $ft_lrs_opt \
-      --grad-acc-steps $grad_acc_steps \
-      --epochs $ft_nnet_num_epochs \
-      --cos-scale $s --margin $ft_margin --margin-warmup-epochs $ft_margin_warmup \
-      --num-gpus $ngpu \
-      --log-interval $log_interval \
-      --in-model-path $nnet \
-      --train-mode ft-full \
-      --exp-path $ft_nnet_dir $args
-
-fi
-
-
diff --git a/egs/voxceleb/v1.1/run_002_compute_evad.sh b/egs/voxceleb/v1.1/run_002_compute_evad.sh
index 7a2a9be5..4e82a87a 100755
--- a/egs/voxceleb/v1.1/run_002_compute_evad.sh
+++ b/egs/voxceleb/v1.1/run_002_compute_evad.sh
@@ -41,7 +41,6 @@ if [ $stage -le 1 ]; then
   fi
 fi
 
-#Train datasets
 if [ $stage -le 2 ];then
   if [ "$do_voxsrc22" == "true" ];then
     extra_data="voxsrc22_dev"
diff --git a/egs/voxceleb/v1.2/run_001_prepare_data.sh b/egs/voxceleb/v1.2/run_001_prepare_data.sh
index 831eb1bc..f956bc8c 100755
--- a/egs/voxceleb/v1.2/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.2/run_001_prepare_data.sh
@@ -20,17 +20,17 @@ if [ $stage -le 1 ];then
     prepare_data.py voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
     --cat-videos --use-kaldi-ids \
     --output-dir data/voxceleb2cat_train
-  #local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
 fi
-exit
+
 if [ $stage -le 2 ];then
   # prepare voxceleb1 for test
-  # This script is for the old version of the dataset
-  # local/make_voxceleb1_oeh.pl $voxceleb1_root data
-  # Use this for the newer version of voxceleb1:
-  local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
+  #hyp_utils/conda_env.sh \
+    prepare_data.py voxceleb1 --task test --corpus-dir $voxceleb1_root \
+    --use-kaldi-ids \
+    --output-dir data/voxceleb1_test
+  #local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
-
+exit
 if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
   local/prepare_voxsrc22_dev.py \
     --vox1-corpus-dir $voxceleb1_root \
diff --git a/hyp_utils/create_data_link.pl b/hyp_utils/create_data_link.pl
new file mode 100755
index 00000000..850f29f0
--- /dev/null
+++ b/hyp_utils/create_data_link.pl
@@ -0,0 +1,132 @@
+#!/usr/bin/env perl
+
+# Copyright 2013  Guoguo Chen
+#           2014  Johns Hopkins University (author: Daniel Povey)
+# Apache 2.0.
+#
+# This script distributes data onto different file systems by making symbolic
+# links. It is supposed to use together with utils/create_split_dir.pl, which
+# creates a "storage" directory that links to different file systems.
+#
+# If a sub-directory egs/storage does not exist, it does nothing. If it exists,
+# then it selects pseudo-randomly a number from those available in egs/storage/*
+# creates a link such as
+#
+#   egs/egs.3.4.ark -> storage/4/egs.3.4.ark
+#
+use strict;
+use warnings;
+use File::Basename;
+use File::Spec;
+use Getopt::Long;
+
+sub GetGCD {
+  my ($a, $b) = @_;
+  while ($a != $b) {
+    if ($a > $b) {
+      $a = $a - $b;
+    } else {
+      $b = $b - $a;
+    }
+  }
+  return $a;
+}
+
+my $Usage = <<EOU;
+create_data_link.pl:
+This script distributes data onto different file systems by making symbolic
+links. It is supposed to use together with utils/create_split_dir.pl, which
+creates a "storage" directory that links to different file systems.
+
+If a sub-directory foo/storage does not exist, it does nothing. If it exists,
+then it selects pseudo-randomly a number from those available in foo/storage/*
+creates a link such as
+
+  foo/egs.3.4.ark -> storage/4/egs.3.4.ark
+
+Usage: utils/create_data_link.pl <data-archive1> [<data-archive2> ... ]
+ e.g.: utils/create_data_link.pl foo/bar/egs.3.4.ark foo/bar/egs.3.5.ark
+ (note: the dirname, e.g. foo/bar/, must be the same in all cases).
+
+See also utils/remove_data_links.sh
+EOU
+
+GetOptions();
+
+if (@ARGV == 0) {
+  die $Usage;
+}
+
+my $example_fullpath = $ARGV[0];
+
+# Check if the storage has been created. If so, do nothing.
+my $dirname = dirname($example_fullpath);
+if (! -d "$dirname/storage") {
+  exit(0);
+}
+
+# Storage exists, create symbolic links in the next few steps.
+
+# First, get a list of the available storage directories, and check if they are
+# properly created.
+opendir(my $dh, "$dirname/storage/") || die "$0: Fail to open $dirname/storage/\n";
+my @storage_dirs = grep(/^[0-9]*$/, readdir($dh));
+closedir($dh);
+my $num_storage = scalar(@storage_dirs);
+for (my $x = 1; $x <= $num_storage; $x++) {
+  (-d "$dirname/storage/$x") || die "$0: $dirname/storage/$x does not exist\n";
+}
+
+# Second, get the coprime list.
+my @coprimes;
+for (my $n = 1; $n <= $num_storage; $n++) {
+  if (GetGCD($n, $num_storage) == 1) {
+    push(@coprimes, $n);
+  }
+}
+
+my $ret = 0;
+
+foreach my $fullpath (@ARGV) {
+  if ($dirname ne dirname($fullpath)) {
+    die "Mismatch in directory names of arguments: $example_fullpath versus $fullpath";
+  }
+
+  # Finally, work out the directory index where we should put the data to.
+  my $basename = basename($fullpath);
+  my $filename_numbers = $basename;
+  $filename_numbers =~ s/[^0-9]+/ /g;
+  my @filename_numbers = split(" ", $filename_numbers);
+  my $total = 0;
+  my $index = 0;
+  foreach my $x (@filename_numbers) {
+    if ($index >= scalar(@coprimes)) {
+      $index = 0;
+    }
+    $total += $x * $coprimes[$index];
+    $index++;
+  }
+  my $dir_index = $total % $num_storage + 1;
+
+  # Make the symbolic link.
+  if (-e $fullpath) {
+    unlink($fullpath);
+  }
+  if (symlink("storage/$dir_index/$basename", $fullpath) != 1) { # failure
+    $ret = 1;  # will exit with error status.
+  }
+}
+
+exit($ret);
+
+## testing:
+# rm -rf foo bar
+# mkdir -p bar/{1,2,3,4}
+# mkdir -p foo/storage
+# for x in 1 2 3 4; do ln -s ../../bar/$x foo/storage/$x; done
+# utils/create_data_link.pl utils/create_data_link.pl foo/1.3.ark  foo/2.3.ark
+# ls -l foo
+# total 0
+# lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 1.3.ark -> storage/3/1.3.ark
+# lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 2.3.ark -> storage/4/2.3.ark
+# drwxr-xr-x 2 dpovey fax 38 Sep  2 17:40 storage
diff --git a/hyp_utils/create_data_split_dirs.sh b/hyp_utils/create_data_split_dirs.sh
new file mode 100755
index 00000000..877b9e3f
--- /dev/null
+++ b/hyp_utils/create_data_split_dirs.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright
+#                2023   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+# Creates links to distrubute data into multiple nodes in clsp grid
+
+storage_name=$(date +'%m_%d_%H_%M')
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <output-dir> <storage-dir> <nodes>"
+  echo "$0 exp/vad_dir $USER/hyp-data/voxceleb/v1/vad/storage b0"
+fi
+output_dir=$1
+storage_dir=$2
+nodes=$3
+
+link_dir=$output_dir/storage
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $linkdir ]; then
+  echo "Prepare to distribute data over multiple $nodes nodes"
+  dir_name=$storage_dir/$storage_name/storage
+  if [ "$nodes" == "b0" ];then
+    utils/create_split_dir.pl \
+      hyp_utils/create_split_dir.pl \
+      /export/b{04,05,06,07}/$dir_name $link_dir
+  elif [ "$nodes" == "b1" ];then
+    hyp_utils/create_split_dir.pl \
+      /export/b{14,15,16,17}/$dir_name $link_dir
+  elif [ "$nodes" == "c0" ];then
+    hyp_utils/create_split_dir.pl \
+      /export/c{06,07,08,09}/$dir_name $link_dir
+  elif [ "$nodes" == "fs01" ];then
+    hyp_utils/create_split_dir.pl \
+      /export/fs01/$dir_name $link_dir
+  else
+    echo "we don't distribute data between multiple machines"
+  fi
+fi
+
+
+
diff --git a/hyp_utils/create_data_split_links.sh b/hyp_utils/create_data_split_links.sh
new file mode 100755
index 00000000..fb5b8ca0
--- /dev/null
+++ b/hyp_utils/create_data_split_links.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Copyright
+#                2023   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+# Creates links to distrubute data into multiple nodes in clsp grid
+
+storage_name=$(date +'%m_%d_%H_%M')
+
+echo "$0 $@"  # Print the command line for logging
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <output-file-pattern> < <num-jobs>"
+  echo "$0 exp/vad_dir/vad.JOB.ark 40"
+fi
+output_file_pattern=$1
+nj=$2
+
+for n in $(seq $nj); do
+  # the next command does nothing unless output_dir/storage exists, see
+  # utils/create_data_link.pl for more info.
+  output_file=$(echo $output_file_pattern | sed 's@\.JOB\.[^\.]*$@.'$n'.@')
+  hyp_utils/create_data_link.pl $output_file
+done
+
diff --git a/hyp_utils/create_split_dir.pl b/hyp_utils/create_split_dir.pl
new file mode 100755
index 00000000..ab952357
--- /dev/null
+++ b/hyp_utils/create_split_dir.pl
@@ -0,0 +1,92 @@
+#!/usr/bin/env perl
+
+# Copyright 2013  Guoguo Chen
+# Apache 2.0.
+#
+# This script creates storage directories on different file systems, and creates
+# symbolic links to those directories. For example, a command
+#
+#   utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
+#
+# will mkdir -p all of those directories, and will create links
+#
+#   egs/storage/1 -> /export/gpu-03/egs/storage
+#   egs/storage/2 -> /export/gpu-03/egs/storage
+#   ...
+#
+use strict;
+use warnings;
+use File::Spec;
+use Getopt::Long;
+
+my $Usage = <<EOU;
+create_split_dir.pl:
+This script creates storage directories on different file systems, and creates
+symbolic links to those directories.
+
+Usage: utils/create_split_dir.pl <actual_storage_dirs> <pseudo_storage_dir>
+ e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
+
+Allowed options:
+  --suffix    : Common suffix to <actual_storage_dirs>    (string, default = "")
+
+See also create_data_link.pl, which is intended to work with the resulting
+directory structure, and remove_data_links.sh
+EOU
+
+my $suffix="";
+GetOptions('suffix=s' => \$suffix);
+
+if (@ARGV < 2) {
+  die $Usage;
+}
+
+my $ans = 1;
+
+my $dir = pop(@ARGV);
+system("mkdir -p $dir 2>/dev/null");
+
+my @all_actual_storage = ();
+foreach my $file (@ARGV) {
+  push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix);
+}
+
+my $index = 1;
+foreach my $actual_storage (@all_actual_storage) {
+  my $pseudo_storage = "$dir/$index";
+
+  # If the symbolic link already exists, delete it.
+  if (-l $pseudo_storage) {
+    print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
+    $index++;
+    next;
+  }
+
+  # Create the destination directory and make the link.
+  system("mkdir -p $actual_storage 2>/dev/null");
+  if ($? != 0) {
+    print STDERR "$0: error creating directory $actual_storage\n";
+    exit(1);
+  }
+  { # create a README file for easier deletion.
+    open(R, ">$actual_storage/README.txt");
+    my $storage_dir = File::Spec->rel2abs($dir);
+    print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n";
+    print R "# The full list of directories where this data resides is:\n";
+    foreach my $d (@all_actual_storage) {
+      print R "$d\n";
+    }
+    close(R);
+  }
+  my $ret = symlink($actual_storage, $pseudo_storage);
+
+  # Process the returned values
+  $ans = $ans && $ret;
+  if (! $ret) {
+    print STDERR "Error linking $actual_storage to $pseudo_storage\n";
+  }
+
+  $index++;
+}
+
+exit($ans == 1 ? 0 : 1);
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 10ea491c..50fd5088 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -49,11 +49,11 @@ def __init__(
         self.sigma = sigma
 
     def forward(self, s_t):
-        # print('sigma0=', self.sigma)
+
         if self.sigma > 0:
             s_t = s_t + self.sigma * torch.randn_like(s_t)
-            # print('sigma1=', self.sigma)
-        f_t = self.feat_extractor(s_t)
+
+        f_t, _ = self.feat_extractor(s_t)
         if self.vad_t is not None:
             n_vad_frames = len(self.vad_t)
             n_feat_frames = f_t.shape[1]
@@ -320,7 +320,7 @@ def eval_cosine_scoring(
     )
     s.save_txt(score_file)
 
-    logging.info("saving stats to %s" % (stats_file))
+    logging.info("saving stats to %s", stats_file)
     attack_stats.to_csv(stats_file)
 
 
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index a6f535b3..5697404d 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -84,7 +84,7 @@ def forward(self, s_t):
         s_t = self.wav_scale * s_t
         # End of pre-processing defense
 
-        f_t = self.feat_extractor(s_t)
+        f_t, _ = self.feat_extractor(s_t)
         if self.vad_t is not None:
             n_vad_frames = len(self.vad_t)
             n_feat_frames = f_t.shape[1]
@@ -289,13 +289,11 @@ def eval_cosine_scoring_wavegan(
             vad = torch.tensor(vad, dtype=torch.bool).to(device)
             model.vad_t = vad
             logging.info(
-                "utt %s detected %d/%d (%.2f %%) speech frames"
-                % (
-                    key.seg_set[j],
-                    speech_frames,
-                    tot_frames,
-                    speech_frames / tot_frames * 100,
-                )
+                "utt %s detected %d/%d (%.2f %%) speech frames",
+                key.seg_set[j],
+                speech_frames,
+                tot_frames,
+                speech_frames / tot_frames * 100,
             )
 
         t2 = time.time()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 5ba42477..0ca1f740 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -113,7 +113,7 @@ def forward(self, s_t):
             s_t = s_t[0, 0]
 
         f_t = s_t
-        f_t = self.feat_extractor(s_t)
+        f_t, _ = self.feat_extractor(s_t)
         if self.vad_t is not None:
             n_vad_frames = len(self.vad_t)
             n_feat_frames = f_t.shape[1]
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index c3732bd3..49a762af 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -125,7 +125,7 @@ def eval_cosine_scoring(
     audio_reader = AR(test_wav_file, **audio_args)
 
     if vad_spec is not None:
-        logging.info("opening VAD stream: %s" % (vad_spec))
+        logging.info("opening VAD stream: %s", vad_spec)
         v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((ndx.num_models, ndx.num_tests), dtype="float32")
@@ -144,7 +144,7 @@ def eval_cosine_scoring(
 
             t2 = time.time()
             s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
-            x_t = feat_extractor(s)
+            x_t, _ = feat_extractor(s)
             t4 = time.time()
             tot_frames = x_t.shape[1]
             if vad_spec is not None:
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index c00cf286..b2c111ca 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -45,7 +45,7 @@ def __init__(
 
     def forward(self, s_t):
         f_t = s_t
-        f_t = self.feat_extractor(s_t)
+        f_t, _ = self.feat_extractor(s_t)
         if self.vad_t is not None:
             n_vad_frames = len(self.vad_t)
             n_feat_frames = f_t.shape[1]
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 4f2b82ab..8b6c8dae 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -59,8 +59,7 @@ def __init__(
         self.threshold = threshold
 
     def forward(self, s_t):
-        f_t = s_t
-        f_t = self.feat_extractor(s_t)
+        f_t, _ = self.feat_extractor(s_t)
         if self.vad_t is not None:
             n_vad_frames = len(self.vad_t)
             n_feat_frames = f_t.shape[1]
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index 2f5cf3da..98ba76b5 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -137,7 +137,7 @@ def eval_xvec(
         with AR(input_spec, **ar_args) as reader:
 
             if vad_spec is not None:
-                logging.info("opening VAD stream: %s" % (vad_spec))
+                logging.info("opening VAD stream: %s", vad_spec)
                 v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
@@ -160,7 +160,7 @@ def eval_xvec(
                             x[None, :], dtype=torch.get_default_dtype()
                         ).to(device)
 
-                        x = feat_extractor(x)
+                        x, _ = feat_extractor(x)
                         t5 = time.time()
                         tot_frames = x.shape[1]
                         if vad_spec is not None:
@@ -169,13 +169,11 @@ def eval_xvec(
                             x = x[:, vad]
 
                         logging.info(
-                            "utt %s detected %d/%d (%.2f %%) speech frames"
-                            % (
-                                key,
-                                x.shape[1],
-                                tot_frames,
-                                x.shape[1] / tot_frames * 100,
-                            )
+                            "utt %s detected %d/%d (%.2f %%) speech frames",
+                            key,
+                            x.shape[1],
+                            tot_frames,
+                            x.shape[1] / tot_frames * 100,
                         )
 
                         if random_utt_length:
@@ -200,27 +198,23 @@ def eval_xvec(
                     read_time = t2 - t1
                     tot_time = read_time + t8 - t3
                     logging.info(
-                        (
-                            "utt %s total-time=%.3f read-time=%.3f "
-                            "aug-time=%.3f feat-time=%.3f "
-                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
-                            "rt-factor=%.2f"
-                        )
-                        % (
-                            key,
-                            tot_time,
-                            read_time,
-                            t4 - t3,
-                            t5 - t4,
-                            t6 - t5,
-                            t7 - t6,
-                            t8 - t7,
-                            x0.shape[0] / fs[0] / tot_time,
-                        )
+                        "utt %s total-time=%.3f read-time=%.3f "
+                        "aug-time=%.3f feat-time=%.3f "
+                        "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                        "rt-factor=%.2f",
+                        key,
+                        tot_time,
+                        read_time,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        t7 - t6,
+                        t8 - t7,
+                        x0.shape[0] / fs[0] / tot_time,
                     )
 
     if write_num_frames_spec is not None:
-        logging.info("writing num-frames to %s" % (write_num_frames_spec))
+        logging.info("writing num-frames to %s", write_num_frames_spec)
         u2nf = Utt2Info.create(keys, info)
         u2nf.save(write_num_frames_spec)
 
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index 1da1ac05..f49a5fb0 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -163,7 +163,7 @@ def extract_xvectors(
                             x[None, :], dtype=torch.get_default_dtype()
                         ).to(device)
 
-                        x = feat_extractor(x)
+                        x, _ = feat_extractor(x)
                         t5 = time.time()
                         tot_frames = x.shape[1]
                         if vad_spec is not None:
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index a31bd614..9dc0aa2c 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -155,7 +155,7 @@ def extract_xvectors(
                             x[None, :], dtype=torch.get_default_dtype()
                         ).to(device)
 
-                        x = feat_extractor(x)
+                        x, _ = feat_extractor(x)
                         t5 = time.time()
                         tot_frames = x.shape[1]
                         if vad_spec is not None:
@@ -164,13 +164,11 @@ def extract_xvectors(
                             x = x[:, vad]
 
                         logging.info(
-                            "utt %s detected %d/%d (%.2f %%) speech frames"
-                            % (
-                                key,
-                                x.shape[1],
-                                tot_frames,
-                                x.shape[1] / tot_frames * 100,
-                            )
+                            "utt %s detected %d/%d (%.2f %%) speech frames",
+                            key,
+                            x.shape[1],
+                            tot_frames,
+                            x.shape[1] / tot_frames * 100,
                         )
 
                         t6 = time.time()
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 227892ea..7d602709 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -10,8 +10,12 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -239,72 +243,3 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
-
-
-# if __name__ == "__main__":
-
-#     parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
-#     parser.add_argument("--cfg", action=ActionConfigFile)
-
-#     train_parser = ArgumentParser(prog="")
-#     AD.add_class_args(train_parser, prefix="dataset", skip={})
-#     Sampler.add_class_args(train_parser, prefix="sampler")
-#     train_parser.add_argument(
-#         "--data_loader.num-workers",
-#         type=int,
-#         default=5,
-#         help="num_workers of data loader",
-#     )
-
-#     val_parser = ArgumentParser(prog="")
-#     AD.add_class_args(val_parser, prefix="dataset", skip={})
-#     Sampler.add_class_args(val_parser, prefix="sampler")
-#     val_parser.add_argument(
-#         "--data_loader.num-workers",
-#         type=int,
-#         default=5,
-#         help="num_workers of data loader",
-#     )
-#     data_parser = ArgumentParser(prog="")
-#     data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
-#     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
-#     parser.add_argument("--data", action=ActionParser(parser=data_parser))
-#     parser.link_arguments(
-#         "data.train.dataset.class_file", "data.val.dataset.class_file"
-#     )
-#     parser.link_arguments(
-#         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
-#     )
-#     parser.link_arguments(
-#         "data.train.sampler.batch_size", "data.val.sampler.batch_size"
-#     )
-
-#     AF.add_class_args(parser, prefix="feats")
-#     parser.add_argument("--in-model-path", required=True)
-
-#     XVec.add_finetune_args(parser, prefix="model")
-#     Trainer.add_class_args(
-#         parser, prefix="trainer", train_modes=XVec.valid_train_modes()
-#     )
-#     ddp.add_ddp_args(parser)
-
-#     parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
-#     parser.add_argument(
-#         "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
-#     )
-#     parser.add_argument("--local_rank", default=0, type=int)
-
-#     args = parser.parse_args()
-#     gpu_id = args.local_rank
-#     del args.local_rank
-
-#     if gpu_id == 0:
-#         try:
-#             config_file = Path(args.exp_path) / "config.yaml"
-#             parser.save(args, str(config_file), format="yaml", overwrite=True)
-#         except:
-#             pass
-
-#     # torch docs recommend using forkserver
-#     multiprocessing.set_start_method("forkserver")
-#     train_xvec(gpu_id, args)
diff --git a/hyperion/data_prep/__init__.py b/hyperion/data_prep/__init__.py
index 7caae8c4..9ae59246 100644
--- a/hyperion/data_prep/__init__.py
+++ b/hyperion/data_prep/__init__.py
@@ -3,6 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-# from .data_prep import data_prep_registry
 from .data_prep import DataPrep
 from .voxceleb2 import VoxCeleb2DataPrep
+from .voxceleb1 import VoxCeleb1DataPrep
diff --git a/hyperion/data_prep/voxceleb1.py b/hyperion/data_prep/voxceleb1.py
new file mode 100644
index 00000000..00b2e380
--- /dev/null
+++ b/hyperion/data_prep/voxceleb1.py
@@ -0,0 +1,338 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import re
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+import glob
+
+import numpy as np
+import pandas as pd
+from jsonargparse import ActionYesNo
+from tqdm import tqdm
+
+from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils.misc import PathLike, urlretrieve_progress
+from .data_prep import DataPrep
+
+
+class VoxCeleb1DataPrep(DataPrep):
+    """Class for preparing VoxCeleb1 database into tables,
+       It prepares the full voxceleb either to train or test with
+       Original/Entire/Hard.
+       We don't consider preparing dev for train and test for test Original
+
+    Attributes:
+      corpus_dir: input data directory
+      task: train/test
+      cat_videos: concatenate utterances from the same video.
+      output_dir: output data directory
+      use_kaldi_ids: puts speaker-id in front of segment id like kaldi
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    def __init__(
+        self,
+        corpus_dir: PathLike,
+        task: str,
+        cat_videos: bool,
+        output_dir: PathLike,
+        use_kaldi_ids: bool,
+        target_sample_freq: int,
+        num_threads: int = 10,
+    ):
+        use_kaldi_ids = True
+        super().__init__(
+            corpus_dir, output_dir, use_kaldi_ids, target_sample_freq, num_threads
+        )
+
+        self.task = task
+        assert (
+            cat_videos == False or task == "train"
+        ), "cat-videos is only available for train task"
+
+        self.cat_videos = cat_videos
+
+    @staticmethod
+    def dataset_name():
+        return "voxceleb1"
+
+    @staticmethod
+    def add_class_args(parser):
+        DataPrep.add_class_args(parser)
+        parser.add_argument(
+            "--task",
+            default="test",
+            choices=["test", "train"],
+            help="""if we prepare the data for [test, train]""",
+        )
+        parser.add_argument(
+            "--cat-videos",
+            default=False,
+            action=ActionYesNo,
+            help="""concatenate utterances from the same video.""",
+        )
+
+    def _get_metadata(self):
+        file_name = "vox1_meta.csv"
+        file_path = self.corpus_dir / file_name
+        if not file_path.exists():
+            file_path = self.output_dir / file_name
+            if not file_path.exists():
+                url = "https://www.openslr.org/resources/49/vox1_meta.csv"
+                file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
+
+        df_meta = pd.read_csv(file_path, sep="\t")
+        df_meta.rename(columns=str.strip, inplace=True)
+        df_meta = df_meta.applymap(lambda x: str.strip(x) if isinstance(x, str) else x)
+        df_meta.set_index("VoxCeleb1 ID", inplace=True)
+        return df_meta
+
+    def _get_langs_est(self):
+        file_name = "lang_vox2_final.csv"
+        file_path = self.corpus_dir / file_name
+        if not file_path.exists():
+            file_path = self.output_dir / file_name
+            if not file_path.exists():
+                url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv"
+                file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
+
+        df_lang = pd.read_csv(file_path, sep=",")
+
+        if self.cat_videos:
+
+            def get_video(x):
+                x = re.sub("/[^/]*.wav$", "", x)
+                return re.sub("/", "-", x)
+
+        elif self.use_kaldi_ids:
+
+            def get_video(x):
+                x = re.sub(".wav$", "", x)
+                return re.sub("/", "-", x)
+
+        else:
+
+            def get_video(x):
+                x = re.sub(".wav$", "", x)
+                x = re.sub("^[^/]*/", "", x)
+                return re.sub("/", "-", x)
+
+        df_lang["id"] = df_lang["filename"].apply(get_video)
+        df_lang.drop(["filename"], axis=1, inplace=True)
+        df_lang.drop_duplicates(inplace=True)
+        df_lang.set_index("id", inplace=True)
+        df_lang["lang"] = df_lang["lang"].apply(str.lower)
+        return df_lang
+
+    @staticmethod
+    def make_cat_list(lists_cat_dir, rec_id, rec_files, video_idx, i):
+        list_file = lists_cat_dir / f"{rec_id}.txt"
+        with open(list_file, "w") as fw:
+            rec_idx = (video_idx == i).nonzero()[0]
+            recs_i = [f"file {rec_files[j]}" for j in rec_idx]
+            recs_i.sort()
+            recs_i = "\n".join(recs_i)
+            fw.write(f"{recs_i}\n")
+
+        file_path = (
+            f"ffmpeg -v 8 -f concat -safe 0 -i {list_file} -f wav -acodec pcm_s16le -|"
+        )
+        return file_path
+
+    def make_trials(self):
+        url_base = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta"
+        trials_file_names = [
+            "veri_test2.txt",
+            "list_test_hard2.txt",
+            "list_test_all2.txt",
+        ]
+        trials_names = ["trials_o", "trials_h", "trials_e"]
+
+        trials = {}
+        dfs = []
+        logging.info("making trials")
+        for trial_name, file_name in zip(trials_names, trials_file_names):
+            file_path = self.corpus_dir / file_name
+            if not file_path.exists():
+                file_path = self.output_dir / file_name
+                if not file_path.exists():
+                    url = f"{url_base}/{file_name}"
+                    file_path, _ = urlretrieve_progress(url, file_path, desc=file_name)
+
+            df_in = pd.read_csv(
+                file_path,
+                header=None,
+                sep=" ",
+                names=["key", "enroll_file", "test_file"],
+            )
+            key = ["target" if k == 1 else "nontarget" for k in df_in["key"]]
+
+            def get_modelid(s):
+                s = re.sub(r"\.wav", "", s)
+                return re.sub(r"/", "-", s)
+
+            if self.use_kaldi_ids:
+                get_segmentid = get_modelid
+            else:
+
+                def get_segmentid(s):
+                    s = get_modelid(s)
+                    return re.sub(r"[^-]*-", "", s)
+
+            modelid = [get_modelid(f) for f in df_in["enroll_file"]]
+            segmentid = [get_segmentid(f) for f in df_in["test_file"]]
+            df_out = pd.DataFrame(
+                {"modelid": modelid, "segmentid": segmentid, "targettype": key}
+            )
+            df_out.sort_values(by=["modelid", "segmentid"], inplace=True)
+            file_path = self.output_dir / f"{trial_name}.csv"
+            df_out.to_csv(file_path, index=False)
+            dfs.append(df_out)
+            trials[trial_name] = file_path
+
+        df_out = pd.concat(dfs, ignore_index=True)
+        df_out.sort_values(by=["modelid", "segmentid"], inplace=True)
+        file_path = self.output_dir / "trials.csv"
+        df_out.to_csv(file_path, index=False)
+        trials["trials"] = file_path
+
+        logging.info("making enrollment map")
+        modelid = df_out["modelid"].sort_values().unique()
+        if self.use_kaldi_ids:
+            segmentid = modelid
+        else:
+            segmentid = [re.sub(r"[^-]*-", "", s) for s in modelid]
+
+        df_out = pd.DataFrame({"modelid": modelid, "segmentid": segmentid})
+        file_path = self.output_dir / "enrollment.csv"
+        df_out.to_csv(file_path, index=False)
+        enrollments = {"enrollment": file_path}
+
+        return enrollments, trials
+
+    def prepare(self):
+
+        logging.info("getting audio meta-data")
+        df_meta = self._get_metadata()
+        logging.info("getting language estimations")
+        df_lang = self._get_langs_est()
+        rec_dir = self.corpus_dir
+        logging.info("searching audio files in %s", str(rec_dir))
+        rec_files = list(rec_dir.glob("**/*.wav"))
+        if not rec_files:
+            # symlinks? try glob
+            rec_files = [
+                Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
+            ]
+
+        speakers = [f.parents[1].name for f in rec_files]
+        video_ids = [f.parent.name for f in rec_files]
+        if self.cat_videos:
+            lists_cat_dir = self.output_dir / "lists_cat"
+            lists_cat_dir.mkdir(exist_ok=True, parents=True)
+            uniq_video_ids, uniq_video_idx, video_idx = np.unique(
+                video_ids, return_index=True, return_inverse=True
+            )
+            rec_ids = uniq_video_ids
+            speakers = [speakers[i] for i in uniq_video_idx]
+            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
+
+            file_paths = []
+            futures = []
+            logging.info("making video cat lists")
+            logging.info("submitting threats...")
+            with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
+                for i, rec_id in tqdm(enumerate(rec_ids)):
+                    future = pool.submit(
+                        VoxCeleb1DataPrep.make_cat_list,
+                        lists_cat_dir,
+                        rec_id,
+                        rec_files,
+                        video_idx,
+                        i,
+                    )
+                    futures.append(future)
+
+            logging.info("waiting threats...")
+            file_paths = [f.result() for f in tqdm(futures)]
+            video_ids = uniq_video_ids
+
+        else:
+            file_names = [f.with_suffix("").name for f in rec_files]
+            if self.use_kaldi_ids:
+                rec_ids = [
+                    f"{s}-{v}-{f}" for s, v, f in zip(speakers, video_ids, file_names)
+                ]
+            else:
+                rec_ids = [f"{v}-{f}" for v, f in zip(video_ids, file_names)]
+
+            file_paths = [str(r) for r in rec_files]
+
+        logging.info("making RecordingSet")
+        recs = pd.DataFrame({"id": rec_ids, "storage_path": file_paths})
+        recs = RecordingSet(recs)
+        recs.sort()
+
+        logging.info("getting recording durations")
+        self.get_recording_duration(recs)
+        if self.target_sample_freq:
+            recs["target_sample_freq"] = self.target_sample_freq
+
+        logging.info("making SegmentsSet")
+        segments = pd.DataFrame(
+            {
+                "id": rec_ids,
+                "video_ids": video_ids,
+                "speaker": speakers,
+                "gender": df_meta.loc[speakers, "Gender"],
+                "nationality": df_meta.loc[speakers, "Nationality"],
+                "language_est": [
+                    df_lang.loc[r, "lang"] if r in df_lang.index else "N/A"
+                    for r in rec_ids
+                ],
+                "language_est_conf": [
+                    df_lang.loc[r, "confidence"] if r in df_lang.index else "N/A"
+                    for r in rec_ids
+                ],
+                "duration": recs.loc[rec_ids, "duration"].values,
+            }
+        )
+        segments = SegmentSet(segments)
+        segments.sort()
+
+        logging.info("making speaker info file")
+        uniq_speakers = np.unique(speakers)
+        speakers = pd.DataFrame(
+            {
+                "id": uniq_speakers,
+                "vgg_id": df_meta.loc[uniq_speakers, "VGGFace1 ID"],
+                "gender": df_meta.loc[uniq_speakers, "Gender"],
+                "nationality": df_meta.loc[uniq_speakers, "Nationality"],
+            }
+        )
+        speakers = ClassInfo(speakers)
+
+        logging.info("making language info file")
+        languages = np.unique(df_lang["lang"])
+        languages = ClassInfo(pd.DataFrame({"id": languages}))
+
+        if self.task == "test":
+            enrollments, trials = self.make_trials()
+
+        logging.info("making dataset")
+        dataset = Dataset(
+            segments,
+            classes={"speaker": speakers, "languages": languages},
+            recordings={"recordings": recs},
+            enrollments=enrollments,
+            trials=trials,
+            sparse_trials=False,
+        )
+        logging.info("saving dataset at %s", self.output_dir)
+        dataset.save(self.output_dir)
+        logging.info(
+            "datasets containts %d segments, %d speakers", len(segments), len(speakers)
+        )
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index a1a9f0c3..1a32420f 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
+import glob
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -39,8 +40,7 @@ def __init__(
         target_sample_freq: int,
         num_threads: int = 10,
     ):
-        if cat_videos:
-            use_kaldi_ids = True
+        use_kaldi_ids = True
         super().__init__(
             corpus_dir, output_dir, use_kaldi_ids, target_sample_freq, num_threads
         )
@@ -143,6 +143,12 @@ def prepare(self):
         rec_dir = self.corpus_dir / self.subset
         logging.info("searching audio files in %s", str(rec_dir))
         rec_files = list(rec_dir.glob("**/*.m4a"))
+        if not rec_files:
+            # symlinks? try glob
+            rec_files = [
+                Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
+            ]
+
         speakers = [f.parents[1].name for f in rec_files]
         video_ids = [f.parent.name for f in rec_files]
         if self.cat_videos:
@@ -176,7 +182,7 @@ def prepare(self):
             video_ids = uniq_video_ids
 
         else:
-            file_names = [f.name for f in rec_files]
+            file_names = [f.with_suffix("").name for f in rec_files]
             if self.use_kaldi_ids:
                 rec_ids = [
                     f"{s}-{v}-{f}" for s, v, f in zip(speakers, video_ids, file_names)
diff --git a/hyperion/np/classifiers/__init__.py b/hyperion/np/classifiers/__init__.py
index d9d02ed0..60582016 100644
--- a/hyperion/np/classifiers/__init__.py
+++ b/hyperion/np/classifiers/__init__.py
@@ -10,4 +10,4 @@
 from .linear_svmc import LinearSVMC
 from .logistic_regression import LogisticRegression
 from .q_scoring_homo_gbe import QScoringHomoGBE
-from .svmc import GaussianSVMC
+from .svmc import SVMC
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 5e38494f..8fe67792 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -42,8 +42,9 @@ def _standardize_weights(self, x, x_lengths=None, weights=None):
         multiplied by the input data.
         """
         if weights is None:
+            time_dim = self.dim if self.dim >= 0 else x.dim() + self.dim
             return seq_lengths_to_mask(
-                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=self.dim
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=time_dim
             )
 
         if weights.dim() == x.dim():
@@ -599,7 +600,7 @@ def _standardize_weights(self, x, x_lengths=None, weights=None):
         """standardizes the weights to have shape (batch, max_length)."""
         if weights is None:
             return seq_lengths_to_mask(
-                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=1
+                x_lengths, x.size(self.dim), dtype=x.dtype, time_dim=2
             )
 
         if weights.dim() == x.dim():
@@ -797,7 +798,7 @@ def forward(self, x, x_lengths=None, weights=None):
                 if attn.dtype == torch.half:
                     min_value = -65504
                 else:
-                    min_value = -1e200
+                    min_value = -1e20
                 mask = weights.eq(0)
                 attn = attn.masked_fill(mask, min_value)
 
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 8556104a..d67785d2 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -355,7 +355,7 @@ def forward_hid_feats(
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
         h_enc, x = self.encoder_net.forward_hid_feats(
-            x, return_enc_layers, return_logits=True
+            x, return_enc_layers, return_output=True
         )
         output = {"h_enc": h_enc}
         if not return_logits and return_classif_layers is None:
@@ -363,7 +363,7 @@ def forward_hid_feats(
 
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
-        h_classif, y_pred = self.classif_net.forward_hid_feats(
+        h_classif = self.classif_net.forward_hid_feats(
             p, y, return_classif_layers, return_logits=return_logits
         )
         if return_logits:
@@ -750,7 +750,7 @@ def add_class_args(parser, prefix=None, skip=set()):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 160ee61b..a9ad224e 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -32,7 +32,12 @@ def __init__(
         if mvn is not None:
             mvn = MVN.filter_args(**mvn)
             self.mvn_cfg = mvn
-            if mvn["norm_mean"] or mvn["norm_var"]:
+            if (
+                ("norm_mean" in mvn)
+                and mvn["norm_mean"]
+                or ("norm_var" in mvn)
+                and mvn["norm_var"]
+            ):
                 self.mvn = MVN(**mvn)
 
         self.spec_augment = None
@@ -79,7 +84,7 @@ def forward(self, x, x_lengths=None):
         if self.trans:
             f = f.transpose(1, 2).contiguous()
 
-        return f
+        return f, f_lengths
 
     def get_config(self):
         config = {
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index 9f9b280b..e5d90f4f 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -402,7 +402,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/dc1d_decoder.py b/hyperion/torch/narchs/dc1d_decoder.py
index f5ab74d5..172a3d70 100644
--- a/hyperion/torch/narchs/dc1d_decoder.py
+++ b/hyperion/torch/narchs/dc1d_decoder.py
@@ -31,7 +31,7 @@ def __init__(
         conv_strides=2,
         conv_dilations=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         use_norm=True,
@@ -389,7 +389,7 @@ def add_class_args(parser, prefix=None, head_channels=False):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/dc1d_encoder.py b/hyperion/torch/narchs/dc1d_encoder.py
index 0c331a5e..6cf7f4ca 100644
--- a/hyperion/torch/narchs/dc1d_encoder.py
+++ b/hyperion/torch/narchs/dc1d_encoder.py
@@ -28,7 +28,7 @@ def __init__(
         conv_strides=2,
         conv_dilations=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         use_norm=True,
@@ -362,7 +362,7 @@ def add_class_args(parser, prefix=None, head_channels=False, in_feats=False):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/dc2d_decoder.py b/hyperion/torch/narchs/dc2d_decoder.py
index 4106cbfd..68679e0b 100644
--- a/hyperion/torch/narchs/dc2d_decoder.py
+++ b/hyperion/torch/narchs/dc2d_decoder.py
@@ -31,7 +31,7 @@ def __init__(
         conv_strides=2,
         conv_dilations=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         use_norm=True,
@@ -410,7 +410,7 @@ def add_class_args(parser, prefix=None, head_channels=False):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/dc2d_encoder.py b/hyperion/torch/narchs/dc2d_encoder.py
index ce7b9677..bc7e4b33 100644
--- a/hyperion/torch/narchs/dc2d_encoder.py
+++ b/hyperion/torch/narchs/dc2d_encoder.py
@@ -29,7 +29,7 @@ def __init__(
         conv_strides=2,
         conv_dilations=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         use_norm=True,
@@ -367,7 +367,7 @@ def add_class_args(parser, prefix=None, head_channels=False):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/fcnet.py b/hyperion/torch/narchs/fcnet.py
index cdbf1940..a47f304e 100644
--- a/hyperion/torch/narchs/fcnet.py
+++ b/hyperion/torch/narchs/fcnet.py
@@ -125,7 +125,7 @@ def __init__(
         in_units,
         hid_units,
         out_units=0,
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_act=None,
         dropout_rate=0,
         norm_layer=None,
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index 858cf4ea..5d3b9793 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -10,10 +10,16 @@
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
-from ..layer_blocks import (Res2NetBasicBlock, Res2NetBNBlock,
-                            ResNetBasicBlock, ResNetBNBlock,
-                            ResNetEndpointBlock, ResNetInputBlock,
-                            SEResNetBasicBlock, SEResNetBNBlock)
+from ..layer_blocks import (
+    Res2NetBasicBlock,
+    Res2NetBNBlock,
+    ResNetBasicBlock,
+    ResNetBNBlock,
+    ResNetEndpointBlock,
+    ResNetInputBlock,
+    SEResNetBasicBlock,
+    SEResNetBNBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..utils import scale_seq_lengths, seq_lengths_to_mask
@@ -69,7 +75,7 @@ def __init__(
         conv_channels=64,
         base_channels=64,
         out_units=0,
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_act=None,
         in_kernel_size=7,
         in_stride=2,
diff --git a/hyperion/torch/narchs/resnet1d_decoder.py b/hyperion/torch/narchs/resnet1d_decoder.py
index 0c577174..9332724f 100644
--- a/hyperion/torch/narchs/resnet1d_decoder.py
+++ b/hyperion/torch/narchs/resnet1d_decoder.py
@@ -9,9 +9,13 @@
 import torch
 import torch.nn as nn
 
-from ..layer_blocks import (DC1dDecBlock, ResNet1dBasicDecBlock,
-                            ResNet1dBNDecBlock, SEResNet1dBasicDecBlock,
-                            SEResNet1dBNDecBlock)
+from ..layer_blocks import (
+    DC1dDecBlock,
+    ResNet1dBasicDecBlock,
+    ResNet1dBNDecBlock,
+    SEResNet1dBasicDecBlock,
+    SEResNet1dBNDecBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import ICNR1d
 from ..layers import NormLayer1dFactory as NLF
@@ -34,7 +38,7 @@ def __init__(
         resb_dilations=1,
         resb_groups=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         se_r=16,
@@ -450,7 +454,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/resnet1d_encoder.py b/hyperion/torch/narchs/resnet1d_encoder.py
index 5bdad186..97b244f3 100644
--- a/hyperion/torch/narchs/resnet1d_encoder.py
+++ b/hyperion/torch/narchs/resnet1d_encoder.py
@@ -12,10 +12,16 @@
 import torch
 import torch.nn as nn
 
-from ..layer_blocks import (DC1dEncBlock, Res2Net1dBasicBlock,
-                            Res2Net1dBNBlock, ResNet1dBasicBlock,
-                            ResNet1dBNBlock, ResNet1dEndpoint,
-                            SEResNet1dBasicBlock, SEResNet1dBNBlock)
+from ..layer_blocks import (
+    DC1dEncBlock,
+    Res2Net1dBasicBlock,
+    Res2Net1dBNBlock,
+    ResNet1dBasicBlock,
+    ResNet1dBNBlock,
+    ResNet1dEndpoint,
+    SEResNet1dBasicBlock,
+    SEResNet1dBNBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer1dFactory as NLF
 from ..utils import seq_lengths_to_mask
@@ -37,7 +43,7 @@ def __init__(
         resb_dilations=1,
         resb_groups=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         drop_connect_rate=0,
@@ -472,7 +478,7 @@ def forward_hid_feats(self, x, x_lengths=None, layers=None, return_output=False)
         if self.head_channels > 0:
             x = self.head_block(x)
 
-        return x
+        return h, x
 
     def get_config(self):
 
@@ -675,7 +681,7 @@ def add_class_args(parser, prefix=None, skip=set(["in_feats"])):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/resnet2d_decoder.py b/hyperion/torch/narchs/resnet2d_decoder.py
index 426b37f5..0afa1acc 100644
--- a/hyperion/torch/narchs/resnet2d_decoder.py
+++ b/hyperion/torch/narchs/resnet2d_decoder.py
@@ -10,9 +10,13 @@
 import torch
 import torch.nn as nn
 
-from ..layer_blocks import (DC2dDecBlock, ResNet2dBasicDecBlock,
-                            ResNet2dBNDecBlock, SEResNet2dBasicDecBlock,
-                            SEResNet2dBNDecBlock)
+from ..layer_blocks import (
+    DC2dDecBlock,
+    ResNet2dBasicDecBlock,
+    ResNet2dBNDecBlock,
+    SEResNet2dBasicDecBlock,
+    SEResNet2dBNDecBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import ICNR2d
 from ..layers import NormLayer2dFactory as NLF
@@ -35,7 +39,7 @@ def __init__(
         resb_dilations=1,
         resb_groups=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         se_r=16,
@@ -457,7 +461,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/resnet2d_encoder.py b/hyperion/torch/narchs/resnet2d_encoder.py
index 84e6599e..a7fd047e 100644
--- a/hyperion/torch/narchs/resnet2d_encoder.py
+++ b/hyperion/torch/narchs/resnet2d_encoder.py
@@ -11,10 +11,15 @@
 import torch
 import torch.nn as nn
 
-from ..layer_blocks import (DC2dEncBlock, Res2Net2dBasicBlock,
-                            Res2Net2dBNBlock, ResNet2dBasicBlock,
-                            ResNet2dBNBlock, SEResNet2dBasicBlock,
-                            SEResNet2dBNBlock)
+from ..layer_blocks import (
+    DC2dEncBlock,
+    Res2Net2dBasicBlock,
+    Res2Net2dBNBlock,
+    ResNet2dBasicBlock,
+    ResNet2dBNBlock,
+    SEResNet2dBasicBlock,
+    SEResNet2dBNBlock,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from ..utils import seq_lengths_to_mask
@@ -38,7 +43,7 @@ class ResNet2dEncoder(NetArch):
         resb_dilations=1,
         resb_groups=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         se_r=16,
@@ -65,7 +70,7 @@ def __init__(
         resb_dilations=1,
         resb_groups=1,
         head_channels=0,
-        hid_act="relu6",
+        hid_act="relu",
         head_act=None,
         dropout_rate=0,
         se_r=16,
@@ -511,7 +516,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/resnet_factory.py b/hyperion/torch/narchs/resnet_factory.py
index 2d17a6d7..35ed9af0 100644
--- a/hyperion/torch/narchs/resnet_factory.py
+++ b/hyperion/torch/narchs/resnet_factory.py
@@ -146,7 +146,7 @@ def create(
         conv_channels=64,
         base_channels=64,
         out_units=0,
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_act=None,
         in_kernel_size=7,
         in_stride=2,
@@ -341,7 +341,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/spinenet.py b/hyperion/torch/narchs/spinenet.py
index 117c0733..4349dbe1 100644
--- a/hyperion/torch/narchs/spinenet.py
+++ b/hyperion/torch/narchs/spinenet.py
@@ -11,9 +11,17 @@
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
 
-from ..layer_blocks import (BlockSpec, Res2NetBasicBlock, Res2NetBNBlock,
-                            ResNetBasicBlock, ResNetBNBlock, ResNetInputBlock,
-                            SpineConv, SpineEndpoints, SpineResample)
+from ..layer_blocks import (
+    BlockSpec,
+    Res2NetBasicBlock,
+    Res2NetBNBlock,
+    ResNetBasicBlock,
+    ResNetBNBlock,
+    ResNetInputBlock,
+    SpineConv,
+    SpineEndpoints,
+    SpineResample,
+)
 from ..layers import ActivationFactory as AF
 from ..layers import NormLayer2dFactory as NLF
 from .net_arch import NetArch
@@ -111,7 +119,7 @@ def __init__(
         do_endpoint_conv=True,
         concat_ax=3,
         upsampling_type="nearest",
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_act=None,
         in_kernel_size=7,
         in_stride=2,
diff --git a/hyperion/torch/narchs/spinenet_factory.py b/hyperion/torch/narchs/spinenet_factory.py
index 092cbd0e..871b37e9 100644
--- a/hyperion/torch/narchs/spinenet_factory.py
+++ b/hyperion/torch/narchs/spinenet_factory.py
@@ -44,7 +44,7 @@ def create(
         conv_channels=64,
         base_channels=64,
         out_units=0,
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_act=None,
         in_kernel_size=7,
         in_stride=2,
@@ -243,7 +243,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/tdnn_factory.py b/hyperion/torch/narchs/tdnn_factory.py
index 901cc9d0..77f69b9c 100644
--- a/hyperion/torch/narchs/tdnn_factory.py
+++ b/hyperion/torch/narchs/tdnn_factory.py
@@ -21,7 +21,7 @@ def create(
         kernel_size=3,
         dilation=1,
         dilation_factor=1,
-        hid_act={"name": "relu6", "inplace": True},
+        hid_act={"name": "relu", "inplace": True},
         out_units=0,
         out_act=None,
         dropout_rate=0,
@@ -194,7 +194,7 @@ def add_class_args(parser, prefix=None):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/narchs/transformer_encoder_v1.py b/hyperion/torch/narchs/transformer_encoder_v1.py
index 4468185e..f8b50491 100644
--- a/hyperion/torch/narchs/transformer_encoder_v1.py
+++ b/hyperion/torch/narchs/transformer_encoder_v1.py
@@ -64,7 +64,7 @@ def __init__(
         in_layer_type="conv2d-sub",
         rel_pos_enc=False,
         causal_pos_enc=False,
-        hid_act="relu6",
+        hid_act="relu",
         norm_before=True,
         concat_after=False,
         padding_idx=-1,
@@ -408,7 +408,7 @@ def add_class_args(parser, prefix=None, in_feats=False):
         )
 
         try:
-            parser.add_argument("--hid-act", default="relu6", help="hidden activation")
+            parser.add_argument("--hid-act", default="relu", help="hidden activation")
         except:
             pass
 
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 9541d7b0..52474baa 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -109,10 +109,10 @@ def train_epoch(self, data_loader):
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
             with torch.no_grad():
-                feats = self.feat_extractor(input_data)
+                feats, feats_lengths = self.feat_extractor(input_data)
 
             with amp.autocast(enabled=self.use_amp):
-                output = self.model(feats, y=target)
+                output = self.model(feats, feats_lengths, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
@@ -162,9 +162,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
 
-                feats = self.feat_extractor(input_data)
+                feats, feats_lengths = self.feat_extractor(input_data)
                 with amp.autocast(enabled=self.use_amp):
-                    output = self.model(feats)
+                    output = self.model(feats, feats_lengths)
                     loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
index fb93b439..934b4b90 100644
--- a/hyperion/torch/utils/masking.py
+++ b/hyperion/torch/utils/masking.py
@@ -17,9 +17,7 @@ def scale_seq_lengths(lengths, max_out_length, max_in_length=None):
     if max_in_length == max_out_length:
         return lengths
 
-    return torch.div(lengths * max_out_length,
-                     max_in_length,
-                     rounding_mode="floor")
+    return torch.div(lengths * max_out_length, max_in_length, rounding_mode="floor")
 
 
 def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
@@ -29,7 +27,7 @@ def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
       lengths: sequence lengths with shape=(batch,). If None, it returns None
       max_length: maximum length of the sequence.
       dtype: dtype for the mask.
-      time_dim: dimension corresponding to time in the mask. This will
+      time_dim: dimension > 0 corresponding to time in the mask. This will
                 return a view of the mask which will adapt to the shape
                 of the tensor where we want to apply the mask.
                 This has to be a positive integer.
@@ -40,6 +38,7 @@ def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
     if lengths is None:
         return None
 
+    assert time_dim > 0
     assert lengths.dim() == 1
 
     if max_length is None:
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index db035987..51b476aa 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -5,6 +5,7 @@
 
 from .class_info import ClassInfo
 from .dataset import Dataset
+from .enrollment_map import EnrollmentMap
 from .feature_set import FeatureSet
 from .hyp_dataclass import HypDataClass
 from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
@@ -12,6 +13,7 @@
 from .recording_set import RecordingSet
 from .rttm import RTTM
 from .scp_list import SCPList
+
 # from .ext_segment_list import ExtSegmentList
 from .segment_list import SegmentList
 from .segment_set import SegmentSet
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index efb7c114..e6c9e861 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -4,7 +4,7 @@
 """
 
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 import yaml
 
@@ -13,41 +13,184 @@
 from .misc import PathLike
 from .recording_set import RecordingSet
 from .segment_set import SegmentSet
+from .enrollment_map import EnrollmentMap
+from .trial_key import TrialKey
+from .trial_ndx import TrialNdx
+from .sparse_trial_key import SparseTrialKey
 
 
 class Dataset:
     """ Class that contains all objects 
         (segments, recordings, features, class_infos) that 
         conform a dataset
+
+        Attributes:
+          segments:     SegmentSet object or path to it. 
+          classes:      Dictionary of ClassInfo objects or paths to then 
+          recordings:   Dictionary of RecordingSet objects or paths to then 
+          features:     Dictionary of FeatureSet objects or paths to then 
+          enrollments:  Dictionary of EnrollmentMap objects or paths to then 
+          trials:       Dictionary of TrialKey/TrialNdx/SparseTrialKey objects 
+            or paths to then 
+          sparse_trials: load trial keys using the SparseTrialKey class instead 
+              of TrialKey class.
+          table_sep:    Column separator when reading/writting tables
+
     """
 
     def __init__(
         self,
-        segments: SegmentSet,
-        classes: Optional[Dict[str, ClassInfo]] = None,
-        recordings: Optional[Dict[str, RecordingSet]] = None,
-        features: Optional[Dict[str, FeatureSet]] = None,
+        segments: Union[SegmentSet, PathLike],
+        classes: Optional[Dict[str, Union[ClassInfo, PathLike]]] = None,
+        recordings: Optional[Dict[str, Union[RecordingSet, PathLike]]] = None,
+        features: Optional[Dict[str, Union[FeatureSet, PathLike]]] = None,
+        enrollments: Optional[Dict[str, Union[EnrollmentMap, PathLike]]] = None,
+        trials: Optional[
+            Dict[str, Union[TrialKey, TrialNdx, SparseTrialKey, PathLike]]
+        ] = None,
+        sparse_trials: bool = False,
+        table_sep: Optional[str] = None,
     ):
-        self._segments = segments
-        self._classes = classes
-        self._recordings = recordings
-        self._features = features
 
-    @property
-    def segments(self):
+        if isinstance(segments, SegmentSet):
+            self._segments = segments
+            self._segments_path = None
+        else:
+            assert isinstance(segments, (str, Path))
+            self._segments = None
+            self._segments_path = Path(segments)
+
+        self._classes, self._classes_paths = self._parse_dict_args(classes, ClassInfo)
+
+        self._recordings, self._recordings_paths = self._parse_dict_args(
+            recordings, RecordingSet
+        )
+
+        self._features, self._features_paths = self._parse_dict_args(
+            features, FeatureSet
+        )
+        self._enrollments, self._enrollments_paths = self._parse_dict_args(
+            enrollments, EnrollmentMap,
+        )
+        self._trials, self._trials_paths = self._parse_dict_args(
+            trials, (TrialKey, TrialNdx, SparseTrialKey),
+        )
+
+        self.sparse_trials = sparse_trials
+        self.table_sep = table_sep
+
+    def _parse_dict_args(self, data, types):
+        if data is None:
+            return None, None
+
+        assert isinstance(data, dict)
+        objects = {k: (v if isinstance(v, types) else None) for k, v in data.items()}
+        paths = {
+            k: (v if isinstance(v, (str, Path)) else None) for k, v in data.items()
+        }
+
+        return objects, paths
+
+    def segments(self, keep_loaded: bool = True):
+        if self._segments is None:
+            assert self._segments_path is not None
+            segments = SegmentSet.load(self.segments_path, sep=self.table_sep)
+            if keep_loaded:
+                self._segments = segments
+            return segments
+
         return self._segments
 
-    @property
-    def recordings(self):
-        return self._recordings
+    def recordings_value(self, key: str, keep_loaded: bool = True):
+        if self._recordings[key] is None:
+            assert self._recordings_paths[key] is not None
+            recordings = RecordingSet.load(
+                self._recordings_paths[key], sep=self.table_sep
+            )
+            if keep_loaded:
+                self._recordings[key] = recordings
+
+        return self._recordings[key]
 
-    @property
-    def features(self):
-        return self._features
+    def features_value(self, key: str, keep_loaded: bool = True):
+        if self._features[key] is None:
+            assert self._features_paths[key] is not None
+            features = FeatureSet.load(self._features_paths[key], sep=self.table_sep)
+            if keep_loaded:
+                self._features[key] = features
+
+        return self._features[key]
+
+    def classes_value(self, key: str, keep_loaded: bool = True):
+        if self._classes[key] is None:
+            assert self._classes_paths[key] is not None
+            classes = ClassInfo.load(self._classes_paths[key], self.table_sep)
+            if keep_loaded:
+                self._classes[key] = classes
+
+        return self._classes[key]
+
+    def enrollments_value(self, key: str, keep_loaded: bool = True):
+        if self._enrollments[key] is None:
+            assert self._enrollments_paths[key] is not None
+            enrollments = EnrollmentMap.load(
+                self._enrollments_paths[key], sep=self.table_sep
+            )
+            if keep_loaded:
+                self._enrollments[key] = enrollments
+
+        return self._enrollments[key]
+
+    def trials_value(self, key: str, keep_loaded: bool = True):
+        if self._trials[key] is None:
+            assert self._trials_paths[key] is not None
+            try:
+                if self.sparse_trials:
+                    trials = SparseTrialKey.load(self._trials_paths[key])
+                else:
+                    trials = TrialKey.load(self._trials_paths[key])
+            except:
+                trials = TrialNdx.load(self._trials_paths[key])
+
+            if keep_loaded:
+                self._trials[key] = trials
+
+        return self._trials[key]
+
+    def recordings(self, keep_loaded: bool = True):
+        if self._recordings is None:
+            yield from ()
+        else:
+            for key in self._recordings.keys():
+                yield key, self.recordings_value(key, keep_loaded)
+
+    def features(self, keep_loaded: bool = True):
+        if self._features is None:
+            yield from ()
+        else:
+            for key in self._features.keys():
+                yield key, self.features_value(key, keep_loaded)
+
+    def classes(self, keep_loaded: bool = True):
+        if self._classes is None:
+            yield from ()
+        else:
+            for key in self._classes.keys():
+                yield key, self.classes_value(key, keep_loaded)
+
+    def enrollments(self, keep_loaded: bool = True):
+        if self._enrollments is None:
+            yield from ()
+        else:
+            for key in self._enrollments.keys():
+                yield key, self.enrollments_value(key, keep_loaded)
 
-    @property
-    def classes(self):
-        return self._classes
+    def trials(self, keep_loaded: bool = True):
+        if self._trials is None:
+            yield from ()
+        else:
+            for key in self._trials.keys():
+                yield key, self.trials_value(key, keep_loaded)
 
     @staticmethod
     def resolve_dataset_path(dataset_path):
@@ -69,64 +212,128 @@ def resolve_file_path(dataset_dir, file_path):
 
         return dataset_dir / file_path
 
-    def save(self, dataset_path: PathLike):
+    def save(
+        self,
+        dataset_path: PathLike,
+        update_paths: bool = True,
+        table_sep: Optional[str] = None,
+    ):
         """Saves all the dataset objects.
 
         Args:
-         dataset_path: str/Path indicating directory 
-          to save the dataset or .yaml file to save 
-          the dataset info.
+          dataset_path: str/Path indicating directory 
+            to save the dataset or .yaml file to save 
+            the dataset info.
+          update_paths: whether to update the file_paths in the 
+            data structures in the DateSet object
 
         """
+        table_sep = self.table_sep if table_sep is None else table_sep
+        if update_paths:
+            self.table_sep = table_sep
+
+        table_ext = ".tsv" if table_sep == "\t" else ".csv"
         dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
         dataset = {}
-        if self.segments is not None:
-            file_name = "segments.csv"
-            dataset["segments"] = file_name
-            file_path = dataset_dir / file_name
-            self.segments.save(file_path)
+        file_name = f"segments{table_ext}"
+        dataset["segments"] = file_name
+        file_path = dataset_dir / file_name
+        self.segments().save(file_path, sep=table_sep)
+        if update_paths:
+            self._segments_path = file_path
 
-        if self.recordings is not None:
-            file_names = {}
-            for k, v in self.recordings.items():
-                file_name = k + ".csv"
-                file_names[k] = file_name
-                file_path = dataset_dir / file_name
-                v.save(file_path)
+        file_names = {}
+        for k, v in self.recordings():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            v.save(file_path, sep=table_sep)
+            if update_paths:
+                self._recordings_paths[k] = file_path
 
+        if file_names:
             dataset["recordings"] = file_names
 
-        if self.features is not None:
-            file_names = {}
-            for k, v in self.features.items():
-                file_name = k + ".csv"
-                file_names[k] = file_name
-                file_path = dataset_dir / file_name
-                v.save(file_path)
+        file_names = {}
+        for k, v in self.features():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            v.save(file_path, sep=table_sep)
+            if update_paths:
+                self._features_paths[k] = file_path
 
+        if file_names:
             dataset["features"] = file_names
 
-        if self.classes is not None:
-            file_names = {}
-            for k, v in self.classes.items():
-                file_name = k + ".csv"
-                file_names[k] = file_name
-                file_path = dataset_dir / file_name
-                v.save(file_path)
+        file_names = {}
+        for k, v in self.classes():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            v.save(file_path, sep=table_sep)
+            if update_paths:
+                self._classes_paths[k] = file_path
 
+        if file_names:
             dataset["classes"] = file_names
 
+        file_names = {}
+        for k, v in self.enrollments():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            v.save(file_path, sep=table_sep)
+            if update_paths:
+                self._enrollments_paths[k] = file_path
+
+        if file_names:
+            dataset["enrollments"] = file_names
+
+        file_names = {}
+        for k, v in self.trials():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            v.save(file_path)
+            if update_paths:
+                self._trials_paths[k] = file_path
+
+        if file_names:
+            dataset["trials"] = file_names
+
         with open(dataset_file, "w") as f:
             yaml.dump(dataset, f)
 
+    def update_from_disk(self):
+        self.segments()
+        for k, v in self.recordings():
+            pass
+
+        for k, v in self.features():
+            pass
+
+        for k, v in self.classes():
+            pass
+
+        for k, v in self.enrollments():
+            pass
+
+        for k, v in self.trials():
+            pass
+
     @classmethod
-    def load(cls, dataset_path: PathLike):
+    def load(
+        cls, dataset_path: PathLike, lazy: bool = True, sparse_trials: bool = False
+    ):
         """Loads all the dataset objects.
 
         Args:
          dataset_path: str/Path indicating directory 
           to save the dataset or .yaml file to save 
           the dataset info.
+         lazy: load data structures lazily when they are needed.
+         sparse_trials: load trial keys using the SparseTrialKey class instead of TrialKey class
 
         """
         dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
@@ -134,27 +341,79 @@ def load(cls, dataset_path: PathLike):
             dataset = yaml.safe_load(f)
 
         assert "segments" in dataset
-        segments = SegmentSet.load(
-            Dataset.resolve_file_path(dataset_dir, dataset["segments"])
-        )
+        segments = Dataset.resolve_file_path(dataset_dir, dataset["segments"])
         classes = None
         recordings = None
         features = None
+        enrollments = None
+        trials = None
         if "classes" in dataset:
             classes = {}
             for k, v in dataset["classes"]:
-                classes[k] = ClassInfo.load(Dataset.resolve_file_path(dataset_dir, v))
+                classes[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "recordings" in dataset:
             recordings = {}
             for k, v in dataset["recordings"]:
-                recordings[k] = RecordingSet.load(
-                    Dataset.resolve_file_path(dataset_dir, v)
-                )
+                recordings[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "features" in dataset:
             features = {}
             for k, v in dataset["features"]:
-                features[k] = FeatureSet.load(Dataset.resolve_file_path(dataset_dir, v))
+                features[k] = Dataset.resolve_file_path(dataset_dir, v)
+
+        if "enrollments" in dataset:
+            enrollments = {}
+            for k, v in dataset["enrollments"]:
+                enrollments[k] = Dataset.resolve_file_path(dataset_dir, v)
+
+        if "trials" in dataset:
+            trials = {}
+            for k, v in dataset["trials"]:
+                trials[k] = Dataset.resolve_file_path(dataset_dir, v)
+
+        dataset = cls(
+            segments,
+            classes,
+            recordings,
+            features,
+            enrollments,
+            trials,
+            sparse_trials=sparse_trials,
+        )
+        if not lazy:
+            dataset.update_from_disk()
+
+        return dataset
+
+        # dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        # with open(dataset_file, "w") as f:
+        #     dataset = yaml.safe_load(f)
+
+        # assert "segments" in dataset
+        # segments = SegmentSet.load(
+        #     Dataset.resolve_file_path(dataset_dir, dataset["segments"])
+        # )
+        # classes = None
+        # recordings = None
+        # features = None
+        # if "classes" in dataset:
+        #     classes = {}
+        #     for k, v in dataset["classes"]:
+        #         classes[k] = ClassInfo.load(Dataset.resolve_file_path(dataset_dir, v))
+
+        # if "recordings" in dataset:
+        #     recordings = {}
+        #     for k, v in dataset["recordings"]:
+        #         recordings[k] = RecordingSet.load(
+        #             Dataset.resolve_file_path(dataset_dir, v)
+        #         )
+
+        # if "features" in dataset:
+        #     features = {}
+        #     for k, v in dataset["features"]:
+        #         features[k] = FeatureSet.load(Dataset.resolve_file_path(dataset_dir, v))
 
-        return cls(segments, classes, recordings, features)
+        # dataset = cls(segments, classes, recordings, features)
+        # if not lazy:
+        #     dataset.update_from_disk()
diff --git a/hyperion/utils/enrollment_map.py b/hyperion/utils/enrollment_map.py
new file mode 100644
index 00000000..024e5b74
--- /dev/null
+++ b/hyperion/utils/enrollment_map.py
@@ -0,0 +1,86 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import re
+from collections import OrderedDict
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from .list_utils import split_list, split_list_group_by_key
+from .info_table import InfoTable
+
+
+class EnrollmentMap(InfoTable):
+    """Class to store the mapping between enrollment id
+       and segmentids
+    """
+
+    def __init__(self, df):
+        if "modelid" in df:
+            df.rename(columns={"modelid": "id"}, inplace=True)
+        super().__init__(df)
+
+    def split(self, idx, num_parts):
+        """Splits the mapping into num_parts and return part idx.
+
+        Args:
+          idx: Part to return from 1 to num_parts.
+          num_parts: Number of parts to split the list.
+          group_by: All the lines with the same value in column
+                          groub_by_field go to the same part
+
+        Returns:
+          Sub InfoTable object
+        """
+        _, idx1 = split_list_group_by_key(self.df["id"], idx, num_parts)
+
+        df = self.df.iloc[idx1]
+        return EnrollmentMap(df)
+
+    def save(self, file_path, sep=None, nist_compatible=True):
+        if nist_compatible:
+            # For compatibility with NIST SRE files the index column "id"
+            # is saved as modelid
+            self.df.rename(columns={"id": "modelid"}, inplace=True)
+
+        super().save(file_path, sep)
+        if nist_compatible:
+            self.df.rename(columns={"modelid": "id"}, inplace=True)
+
+    @classmethod
+    def load(cls, file_path, sep=None):
+        """Loads EnrollmentMap from file.
+
+        Args:
+          file_path: File to read the list.
+          sep: Separator between the key and file_path in the text file.
+          dtype: Dictionary with the dtypes of each column.
+          name: name for the data to be loaded
+        Returns:
+          EnrollmentMap object
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext in ["", ".scp"]:
+            # if no extension we load as kaldi utt2spk file
+            df = pd.read_csv(
+                file_path,
+                sep=" ",
+                header=None,
+                names=["segmentid", "modelid"],
+                dtype={"segmentid": np.str, "modelid": np.str},
+            )
+            df = df[["modelid", "segmentid"]]
+        else:
+            if sep is None:
+                sep = "\t" if ".tsv" in ext else ","
+
+            df = pd.read_csv(file_path, sep=sep)
+
+        return cls(df)
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 5a4f27d2..6bcd4aca 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -119,7 +119,7 @@ def from_dict(cls, df_dict):
 
     @classmethod
     def load(cls, file_path, sep=None, name="class_id"):
-        """Loads utt2info list from text file.
+        """Loads table from file.
 
         Args:
           file_path: File to read the list.
@@ -127,7 +127,7 @@ def load(cls, file_path, sep=None, name="class_id"):
           dtype: Dictionary with the dtypes of each column.
           name: name for the data to be loaded
         Returns:
-          Utt2Info object
+          InfoTable object
         """
         file_path = Path(file_path)
         ext = file_path.suffix
@@ -156,7 +156,7 @@ def sort(self, column="id", ascending=True):
             self.df.sort_values(by=column, inplace=True, ascending=ascending)
 
     def split(self, idx, num_parts, group_by=None):
-        """Splits SCPList into num_parts and return part idx.
+        """Splits the table into num_parts and return part idx.
 
         Args:
           idx: Part to return from 1 to num_parts.
@@ -177,13 +177,13 @@ def split(self, idx, num_parts, group_by=None):
 
     @classmethod
     def merge(cls, tables):
-        """Merges several Utt2Info tables.
+        """Merges several tables.
 
         Args:
-          info_lists: List of Utt2Info
+          info_lists: List of InfoTables
 
         Returns:
-          Utt2Info object concatenation the info_lists.
+          InfoTable object concatenation the info_lists.
         """
         df_list = [table.df for table in tables]
         df = pd.concat(df_list)
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
index d51edc34..1852d25d 100644
--- a/hyperion/utils/segment_set.py
+++ b/hyperion/utils/segment_set.py
@@ -7,6 +7,10 @@
 
 
 class SegmentSet(InfoTable):
+    """Class to store information about a speech segment
+       Internally, it uses a pandas table.
+    """
+
     def __init__(self, df):
         super().__init__(df)
         if "start" in df and "recording_id" not in df:
diff --git a/hyperion/utils/sparse_trial_key.py b/hyperion/utils/sparse_trial_key.py
index 5afc72a0..1bc321a7 100644
--- a/hyperion/utils/sparse_trial_key.py
+++ b/hyperion/utils/sparse_trial_key.py
@@ -5,8 +5,10 @@
 
 import copy
 import os.path as path
+from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import scipy.sparse as sparse
 
 from .list_utils import *
@@ -79,6 +81,28 @@ def save_txt(self, file_path):
             for r, c in zip(non.row, non.col):
                 f.write("%s %s nontarget\n" % (self.model_set[r], self.seg_set[c]))
 
+    def save_table(self, file_path, sep=None):
+        """Saves object to txt file.
+
+        Args:
+          file_path: File to write the list.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(f"modelid{sep}segmentid{sep}targettype\n")
+            self.tar.eliminate_zeros()
+            self.non.eliminate_zeros()
+            tar = self.tar.tocoo()
+            for r, c in zip(tar.row, tar.col):
+                f.write(f"{self.model_set[r]}{sep}{self.seg_set[c]}{sep}target\n")
+            non = self.non.tocoo()
+            for r, c in zip(non.row, non.col):
+                f.write(f"{self.model_set[r]}{sep}{self.seg_set[c]}{sep}nontarget\n")
+
     @classmethod
     def load_h5(cls, file_path):
         raise NotImplementedError()
@@ -113,6 +137,40 @@ def load_txt(cls, file_path):
                 non[item[0], item[1]] = True
         return cls(model_set, seg_set, tar.tocsr(), non.tocsr())
 
+    @classmethod
+    def load_table(cls, file_path, sep=None):
+        """Loads object from txt file
+
+        Args:
+          file_path: File to read the list.
+
+        Returns:
+          TrialKey object.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        models = df["modelid"].values
+        segments = df["segmentid"].values
+        is_tar = (df["targettype"] == "target").values
+        model_set, _, model_idx = np.unique(
+            models, return_index=True, return_inverse=True
+        )
+        seg_set, _, seg_idx = np.unique(
+            segments, return_index=True, return_inverse=True
+        )
+        tar = sparse.lil_matrix((len(model_set), len(seg_set)), dtype="bool")
+        non = sparse.lil_matrix((len(model_set), len(seg_set)), dtype="bool")
+        for item in zip(model_idx, seg_idx, is_tar):
+            if item[2]:
+                tar[item[0], item[1]] = True
+            else:
+                non[item[0], item[1]] = True
+        return cls(model_set, seg_set, tar.tocsr(), non.tocsr())
+
     @classmethod
     def merge(cls, key_list):
         raise NotImplementedError()
diff --git a/hyperion/utils/trial_key.py b/hyperion/utils/trial_key.py
index 9552d7c0..4a99461b 100644
--- a/hyperion/utils/trial_key.py
+++ b/hyperion/utils/trial_key.py
@@ -5,9 +5,11 @@
 
 import copy
 import os.path as path
+from pathlib import Path
 
 import h5py
 import numpy as np
+import pandas as pd
 
 from .list_utils import *
 from .trial_ndx import TrialNdx
@@ -82,18 +84,20 @@ def sort(self):
         if self.trial_cond is not None:
             self.trial_cond = self.trial_cond[:, ix]
 
-    def save(self, file_path):
+    def save(self, file_path, sep=None):
         """Saves object to txt/h5 file.
 
         Args:
           file_path: File to write the list.
         """
-
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if ext in (".h5", ".hdf5"):
             self.save_h5(file_path)
-        else:
+        elif ext in ("", ".txt"):
             self.save_txt(file_path)
+        else:
+            self.save_table(file_path, sep)
 
     def save_h5(self, file_path):
         """Saves object to h5 file.
@@ -132,20 +136,40 @@ def save_txt(self, file_path):
           file_path: File to write the list.
         """
         with open(file_path, "w") as f:
-            idx = (self.tar.T == True).nonzero()
+            idx = (self.tar.T).nonzero()
             for item in zip(idx[0], idx[1]):
                 f.write(
                     "%s %s target\n" % (self.model_set[item[1]], self.seg_set[item[0]])
                 )
-            idx = (self.non.T == True).nonzero()
+            idx = (self.non.T).nonzero()
             for item in zip(idx[0], idx[1]):
                 f.write(
                     "%s %s nontarget\n"
                     % (self.model_set[item[1]], self.seg_set[item[0]])
                 )
 
+    def save_table(self, file_path, sep=None):
+        """Saves object to txt file.
+
+        Args:
+          file_path: File to write the list.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(f"modelid{sep}segmentid{sep}targettype\n")
+            I, J = np.logical_or(self.tar, self.non).nonzero()
+            for i, j in zip(I, J):
+                target_type = "target" if self.tar[i, j] else "nontarget"
+                f.write(
+                    f"{self.model_set[i]}{sep}{self.seg_set[j]}{sep}{target_type}\n"
+                )
+
     @classmethod
-    def load(cls, file_path):
+    def load(cls, file_path, sep=None):
         """Loads object from txt/h5 file
 
         Args:
@@ -154,11 +178,13 @@ def load(cls, file_path):
         Returns:
           TrialKey object.
         """
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        _, file_ext = path.splitext(file_path)
+        if file_ext in (".h5", ".hdf5"):
             return cls.load_h5(file_path)
-        else:
+        elif file_ext in ("", ".txt"):
             return cls.load_txt(file_path)
+        else:
+            return cls.load_table(file_path, sep)
 
     @classmethod
     def load_h5(cls, file_path):
@@ -240,6 +266,40 @@ def load_txt(cls, file_path):
                 non[item[0], item[1]] = True
         return cls(model_set, seg_set, tar, non)
 
+    @classmethod
+    def load_table(cls, file_path, sep=None):
+        """Loads object from txt file
+
+        Args:
+          file_path: File to read the list.
+
+        Returns:
+          TrialKey object.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        models = df["modelid"].values
+        segments = df["segmentid"].values
+        is_tar = (df["targettype"] == "target").values
+        model_set, _, model_idx = np.unique(
+            models, return_index=True, return_inverse=True
+        )
+        seg_set, _, seg_idx = np.unique(
+            segments, return_index=True, return_inverse=True
+        )
+        tar = np.zeros((len(model_set), len(seg_set)), dtype="bool")
+        non = np.zeros((len(model_set), len(seg_set)), dtype="bool")
+        for i, j, target_type in zip(model_idx, seg_idx, is_tar):
+            if target_type:
+                tar[i, j] = True
+            else:
+                non[i, j] = True
+        return cls(model_set, seg_set, tar, non)
+
     @classmethod
     def merge(cls, key_list):
         """Merges several key objects.

From 27878914b1bc20b2dbeb5c1139b6d23f2857cd07 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 25 May 2023 09:19:23 -0400
Subject: [PATCH 102/154] sre21 8k adapted to persephone branck

---
 egs/sre21-av-a/v1.16k/README.md               |  22 +-
 .../v1.16k/local/score_sre21_official.sh      |   2 +-
 egs/sre21-av-a/v1.8k/README.md                |  53 ++-
 egs/sre21-av-a/v1.8k/run_040_eval_be_v1.sh    |   2 +-
 egs/sre21-av-a/v1.8k/run_041_eval_be_v2.sh    |   4 +-
 egs/sre21-av-a/v1.8k/run_042_eval_be_v3.sh    |   2 +-
 egs/voxceleb/v1.1/local                       |   1 -
 .../{v1 => v1.1}/local/attack_analysis.py     |   0
 .../{v1 => v1.1}/local/attack_analysis.sh     |   0
 .../local/calibrate_voxceleb1_o_clean.sh      |   0
 egs/voxceleb/{v1 => v1.1}/local/make_musan.py |   0
 egs/voxceleb/{v1 => v1.1}/local/make_musan.sh |   0
 .../{v1 => v1.1}/local/make_rirs_data.sh      |   0
 .../{v1 => v1.1}/local/make_some_figs.py      |   0
 .../make_train_lists_sup_embed_with_augm.sh   |   0
 .../{v1 => v1.1}/local/make_trials_subset.py  |   0
 .../{v1 => v1.1}/local/make_vox2_trials.py    |   0
 .../{v1 => v1.1}/local/make_voxceleb1_o.pl    |   0
 .../{v1 => v1.1}/local/make_voxceleb1_oeh.pl  |   0
 .../{v1 => v1.1}/local/make_voxceleb1_old.pl  |   0
 .../{v1 => v1.1}/local/make_voxceleb1_orig.pl |   0
 .../local/make_voxceleb1_orig_v2.pl           |   0
 .../{v1 => v1.1}/local/make_voxceleb1_v2.pl   |   0
 .../{v1 => v1.1}/local/make_voxceleb1_v2_o.pl |   0
 .../local/make_voxceleb1_v2_oeh.pl            |   0
 .../{v1 => v1.1}/local/make_voxceleb1cat.pl   |   0
 .../local/make_voxceleb1cat_v2.pl             |   0
 .../{v1 => v1.1}/local/make_voxceleb2.pl      |   0
 .../{v1 => v1.1}/local/make_voxceleb2cat.pl   |   0
 .../local/prepare_voxsrc22_dev.py             |   0
 .../local/prepare_voxsrc22_test.py            |   0
 egs/voxceleb/{v1 => v1.1}/local/score_dcf.py  |   0
 .../{v1 => v1.1}/local/score_voxceleb1.sh     |   0
 .../local/score_voxceleb1_o_clean.sh          |   0
 .../local/score_voxceleb1_single_cond.sh      |   0
 .../{v1 => v1.1}/local/score_voxsrc22_dev.sh  |   0
 egs/voxceleb/v1.1/run_002_compute_evad.sh     |   1 -
 egs/voxceleb/v1.2/hyp_utils                   |   1 +
 ...aseplus_ecapatdnn512x3_phase1_default.yaml |   6 -
 ...aseplus_ecapatdnn512x3_phase2_default.yaml |  12 -
 ...aseplus_ecapatdnn512x3_phase3_default.yaml |  11 -
 ...lmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml |  24 --
 ...nn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh |  55 ----
 egs/voxceleb/v2/local                         |   2 +-
 egs/voxceleb/v2/run_001_prepare_data.sh       |  20 +-
 egs/voxceleb/v2/run_002_compute_evad.sh       |  63 ++--
 .../v2/run_003_prepare_noises_rirs.sh         |  67 ++++
 hyp_utils/conda_env.sh                        |   2 +-
 hyp_utils/create_data_split_dirs.sh           |   3 +-
 hyperion/bin/hyperion_dataset.py              |  93 ++++++
 hyperion/bin/hyperion_tables.py               | 129 ++++++++
 hyperion/bin/train_xvector_from_wav.py        |  10 +-
 hyperion/data_prep/data_prep.py               |   1 -
 hyperion/data_prep/voxceleb1.py               |   7 +-
 hyperion/data_prep/voxceleb2.py               |  11 +-
 hyperion/data_prep/voxsrc22.py                | 212 ++++++++++++
 hyperion/torch/trainers/torch_trainer.py      | 178 +++++-----
 hyperion/torch/trainers/xvector_trainer.py    |   8 +-
 .../trainers/xvector_trainer_from_wav.py      |  12 +-
 hyperion/utils/class_info.py                  |  27 +-
 hyperion/utils/dataset.py                     | 306 ++++++++++++++----
 hyperion/utils/enrollment_map.py              |  17 +-
 hyperion/utils/info_table.py                  |   7 +-
 63 files changed, 1024 insertions(+), 347 deletions(-)
 delete mode 120000 egs/voxceleb/v1.1/local
 rename egs/voxceleb/{v1 => v1.1}/local/attack_analysis.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/attack_analysis.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/calibrate_voxceleb1_o_clean.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_musan.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_musan.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_rirs_data.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_some_figs.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_train_lists_sup_embed_with_augm.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_trials_subset.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_vox2_trials.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_o.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_oeh.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_old.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_orig.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_orig_v2.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_v2.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_v2_o.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1_v2_oeh.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1cat.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb1cat_v2.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb2.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/make_voxceleb2cat.pl (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/prepare_voxsrc22_dev.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/prepare_voxsrc22_test.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/score_dcf.py (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/score_voxceleb1.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/score_voxceleb1_o_clean.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/score_voxceleb1_single_cond.sh (100%)
 rename egs/voxceleb/{v1 => v1.1}/local/score_voxsrc22_dev.sh (100%)
 create mode 120000 egs/voxceleb/v1.2/hyp_utils
 delete mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
 delete mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
 delete mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
 create mode 100755 egs/voxceleb/v2/run_003_prepare_noises_rirs.sh
 create mode 100644 hyperion/bin/hyperion_dataset.py
 create mode 100755 hyperion/bin/hyperion_tables.py
 create mode 100644 hyperion/data_prep/voxsrc22.py

diff --git a/egs/sre21-av-a/v1.16k/README.md b/egs/sre21-av-a/v1.16k/README.md
index 0f5d09ad..d90dc0a4 100644
--- a/egs/sre21-av-a/v1.16k/README.md
+++ b/egs/sre21-av-a/v1.16k/README.md
@@ -7,6 +7,20 @@ The systems runs at 16 kHz, telephone data is upsampled to 16k using SoX
 
    This recipe is based on these works
 ```
+@inproceedings{Villalba2022,
+author = {Jes\'us Villalba and Bengt J Borgstrom and Saurabh Kataria and Magdalena Rybicka and Carlos D Castillo and Jaejin Cho and L. Paola García-Perera and Pedro A. Torres-Carrasquillo and Najim Dehak},
+city = {ISCA},
+doi = {10.21437/Odyssey.2022-30},
+issue = {July},
+journal = {The Speaker and Language Recognition Workshop (Odyssey 2022)},
+month = {6},
+pages = {213-220},
+publisher = {ISCA},
+title = {Advances in Cross-Lingual and Cross-Source Audio-Visual Speaker Recognition: The JHU-MIT System for NIST SRE21},
+url = {https://www.isca-speech.org/archive/odyssey_2022/villalba22b_odyssey.html},
+year = {2022},
+}
+				 
 @inproceedings{Villalba2020,
 address = {Tokyo, Japan},
 author = {Villalba, Jes{\'{u}}s and Garcia-Romero, Daniel and Chen, Nanxin and Sell, Gregory and Borgstrom, Jonas and McCree, Alan and {Garcia Perera}, Leibny Paola and Kataria, Saurabh and Nidadavolu, Phani Sankar and Torres-Carrasquiilo, Pedro and Dehak, Najim},
@@ -139,14 +153,6 @@ The back-end used for these results is:
 | config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.19 | 0.64 | 0.089 | 
 | config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.15 | 0.61 | 0.102 |
 
-## SRE-CTS Superset dev set
-
-| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
-| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
-| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.37 | 0.076 | 0.106 | 
-| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.19 | 0.64 | 0.089 | 
-| config_fbank80_stmn_tseres2net50w26s4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | TSE-Res2Net50 w26xs4 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.15 | 0.61 | 0.102 |
-
 ## SRE21 Audio Dev (official scoring tool)
 
 | Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
diff --git a/egs/sre21-av-a/v1.16k/local/score_sre21_official.sh b/egs/sre21-av-a/v1.16k/local/score_sre21_official.sh
index a5bc03eb..e56906f6 100755
--- a/egs/sre21-av-a/v1.16k/local/score_sre21_official.sh
+++ b/egs/sre21-av-a/v1.16k/local/score_sre21_official.sh
@@ -18,7 +18,7 @@ echo "Score SRE21 ${track} ${subset} for $score_dir"
 
 soft_dir=./sre21/scoring_software
 
-if [ ! -f $s_dir/sre_scorer.py ];then
+if [ ! -f $soft_dir/sre_scorer.py ];then
     echo "downloading scoring tool"
     local/download_sre21_scoring_tool.sh
 fi
diff --git a/egs/sre21-av-a/v1.8k/README.md b/egs/sre21-av-a/v1.8k/README.md
index a105128c..b55f9bf0 100644
--- a/egs/sre21-av-a/v1.8k/README.md
+++ b/egs/sre21-av-a/v1.8k/README.md
@@ -10,6 +10,20 @@ copy the utt2est_lang files from the 16k data dirs to the VoxCeleb and SRE21 dat
 
    This recipe is based on these works
 ```
+@inproceedings{Villalba2022,
+author = {Jes\'us Villalba and Bengt J Borgstrom and Saurabh Kataria and Magdalena Rybicka and Carlos D Castillo and Jaejin Cho and L. Paola García-Perera and Pedro A. Torres-Carrasquillo and Najim Dehak},
+city = {ISCA},
+doi = {10.21437/Odyssey.2022-30},
+issue = {July},
+journal = {The Speaker and Language Recognition Workshop (Odyssey 2022)},
+month = {6},
+pages = {213-220},
+publisher = {ISCA},
+title = {Advances in Cross-Lingual and Cross-Source Audio-Visual Speaker Recognition: The JHU-MIT System for NIST SRE21},
+url = {https://www.isca-speech.org/archive/odyssey_2022/villalba22b_odyssey.html},
+year = {2022},
+}
+
 @inproceedings{Villalba2020,
 address = {Tokyo, Japan},
 author = {Villalba, Jes{\'{u}}s and Garcia-Romero, Daniel and Chen, Nanxin and Sell, Gregory and Borgstrom, Jonas and McCree, Alan and {Garcia Perera}, Leibny Paola and Kataria, Saurabh and Nidadavolu, Phani Sankar and Torres-Carrasquiilo, Pedro and Dehak, Najim},
@@ -91,8 +105,6 @@ run_0xx_....sh --config-file global_conf/config_fbank80_stmn_res2net50w26s8_arcs
 
    - `run_011_train_xvector.sh`
       - Trains the x-vector network on 4sec chunks
-
-   - `run_012_finetune_xvector.sh`
       - Fine-tune x-vector network on 10-15 secs utts
 
    - `run_030_extract_xvectors.sh`
@@ -111,4 +123,39 @@ run_0xx_....sh --config-file global_conf/config_fbank80_stmn_res2net50w26s8_arcs
 
 ## Results
 
-TODO
+The back-end used for these results is:
+- back-end V2 (run_041_eval_be_v2.sh)
+- Without S-Norm
+- Scores are calibrated as indicated in the paper.
+
+## SRE16 Eval40% YUE
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.922   | 0.154 | 0.200 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.168 | 0.127 | 0.134 | 
+
+
+## SRE-CTS Superset dev set
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 1.39 | 0.072 | 0.095 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 1.175 | 0.057 | 0.069 |
+
+
+## SRE21 Audio Dev (official scoring tool)
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 | 6.65 | 0.418 | 0.436 | 
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 3.73 | 0.319 | 0.325 |
+
+
+## SRE21 Audio Eval (official scoring tool)
+
+| Config | Model Type | Model Details | EER(%) | Min. Cprimary | Act. Cprimary |
+| ------ | ---------- | ------------- | ------ | ------------- | ------------- |
+| config_fbank80_stmn_ecapatdnn2048x4_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | ECAPA-TDNN 2048x4 | fine-tuned 10-15secs <br> AAM-Softmax margin=0.5 |  5.44  |  0.388 | 0.390 |
+| config_fbank80_stmn_res2net50w26s8_chattstatsi128_arcs30m0.3_adam_lr0.02_amp.v1.sh | Res2Net50 w26xs8 | fine-tuned 10 secs <br> AAM-Softmax margin=0.5 | 4.21 | 0.356 | 0.377 |
+
diff --git a/egs/sre21-av-a/v1.8k/run_040_eval_be_v1.sh b/egs/sre21-av-a/v1.8k/run_040_eval_be_v1.sh
index a55761ae..92cbd887 100755
--- a/egs/sre21-av-a/v1.8k/run_040_eval_be_v1.sh
+++ b/egs/sre21-av-a/v1.8k/run_040_eval_be_v1.sh
@@ -153,7 +153,7 @@ fi
 if [ $stage -le 4 ];then
   local/calibrate_sre21av_v1.sh --cmd "$train_cmd" $score_plda_dir
   local/score_sre16.sh data/sre16_eval40_yue_test eval40_yue ${score_plda_dir}_cal_v1
-  local/score_sre_cts_superset.sh data/sre_cts_superset_16k_dev ${score_plda_dir}_cal_v1
+  local/score_sre_cts_superset.sh data/sre_cts_superset_8k_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_dev_test audio_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio-visual_dev_test audio-visual_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_eval_test audio_eval ${score_plda_dir}_cal_v1
diff --git a/egs/sre21-av-a/v1.8k/run_041_eval_be_v2.sh b/egs/sre21-av-a/v1.8k/run_041_eval_be_v2.sh
index f8eae0a1..6890eba9 100755
--- a/egs/sre21-av-a/v1.8k/run_041_eval_be_v2.sh
+++ b/egs/sre21-av-a/v1.8k/run_041_eval_be_v2.sh
@@ -187,7 +187,7 @@ fi
 if [ $stage -le 4 ];then
   local/calibrate_sre21av_v1.sh --cmd "$train_cmd" $score_plda_dir
   local/score_sre16.sh data/sre16_eval40_yue_test eval40_yue ${score_plda_dir}_cal_v1
-  local/score_sre_cts_superset.sh data/sre_cts_superset_16k_dev ${score_plda_dir}_cal_v1
+  local/score_sre_cts_superset.sh data/sre_cts_superset_8k_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_dev_test audio_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio-visual_dev_test audio-visual_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_eval_test audio_eval ${score_plda_dir}_cal_v1
@@ -311,7 +311,7 @@ fi
 if [ $stage -le 7 ];then
   local/calibrate_sre21av_v1.sh --cmd "$train_cmd" $score_plda_dir
   local/score_sre16.sh data/sre16_eval40_yue_test eval40_yue ${score_plda_dir}_cal_v1
-  local/score_sre_cts_superset.sh data/sre_cts_superset_16k_dev ${score_plda_dir}_cal_v1
+  local/score_sre_cts_superset.sh data/sre_cts_superset_8k_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_dev_test audio_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio-visual_dev_test audio-visual_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_eval_test audio_eval ${score_plda_dir}_cal_v1
diff --git a/egs/sre21-av-a/v1.8k/run_042_eval_be_v3.sh b/egs/sre21-av-a/v1.8k/run_042_eval_be_v3.sh
index 263d7bbe..35afbb27 100755
--- a/egs/sre21-av-a/v1.8k/run_042_eval_be_v3.sh
+++ b/egs/sre21-av-a/v1.8k/run_042_eval_be_v3.sh
@@ -185,7 +185,7 @@ fi
 if [ $stage -le 4 ];then
   local/calibrate_sre21av_v1.sh --cmd "$train_cmd" $score_plda_dir
   local/score_sre16.sh data/sre16_eval40_yue_test eval40_yue ${score_plda_dir}_cal_v1
-  local/score_sre_cts_superset.sh data/sre_cts_superset_16k_dev ${score_plda_dir}_cal_v1
+  local/score_sre_cts_superset.sh data/sre_cts_superset_8k_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_dev_test audio_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio-visual_dev_test audio-visual_dev ${score_plda_dir}_cal_v1
   local/score_sre21.sh data/sre21_audio_eval_test audio_eval ${score_plda_dir}_cal_v1
diff --git a/egs/voxceleb/v1.1/local b/egs/voxceleb/v1.1/local
deleted file mode 120000
index 740b697d..00000000
--- a/egs/voxceleb/v1.1/local
+++ /dev/null
@@ -1 +0,0 @@
-../v1/local/
\ No newline at end of file
diff --git a/egs/voxceleb/v1/local/attack_analysis.py b/egs/voxceleb/v1.1/local/attack_analysis.py
similarity index 100%
rename from egs/voxceleb/v1/local/attack_analysis.py
rename to egs/voxceleb/v1.1/local/attack_analysis.py
diff --git a/egs/voxceleb/v1/local/attack_analysis.sh b/egs/voxceleb/v1.1/local/attack_analysis.sh
similarity index 100%
rename from egs/voxceleb/v1/local/attack_analysis.sh
rename to egs/voxceleb/v1.1/local/attack_analysis.sh
diff --git a/egs/voxceleb/v1/local/calibrate_voxceleb1_o_clean.sh b/egs/voxceleb/v1.1/local/calibrate_voxceleb1_o_clean.sh
similarity index 100%
rename from egs/voxceleb/v1/local/calibrate_voxceleb1_o_clean.sh
rename to egs/voxceleb/v1.1/local/calibrate_voxceleb1_o_clean.sh
diff --git a/egs/voxceleb/v1/local/make_musan.py b/egs/voxceleb/v1.1/local/make_musan.py
similarity index 100%
rename from egs/voxceleb/v1/local/make_musan.py
rename to egs/voxceleb/v1.1/local/make_musan.py
diff --git a/egs/voxceleb/v1/local/make_musan.sh b/egs/voxceleb/v1.1/local/make_musan.sh
similarity index 100%
rename from egs/voxceleb/v1/local/make_musan.sh
rename to egs/voxceleb/v1.1/local/make_musan.sh
diff --git a/egs/voxceleb/v1/local/make_rirs_data.sh b/egs/voxceleb/v1.1/local/make_rirs_data.sh
similarity index 100%
rename from egs/voxceleb/v1/local/make_rirs_data.sh
rename to egs/voxceleb/v1.1/local/make_rirs_data.sh
diff --git a/egs/voxceleb/v1/local/make_some_figs.py b/egs/voxceleb/v1.1/local/make_some_figs.py
similarity index 100%
rename from egs/voxceleb/v1/local/make_some_figs.py
rename to egs/voxceleb/v1.1/local/make_some_figs.py
diff --git a/egs/voxceleb/v1/local/make_train_lists_sup_embed_with_augm.sh b/egs/voxceleb/v1.1/local/make_train_lists_sup_embed_with_augm.sh
similarity index 100%
rename from egs/voxceleb/v1/local/make_train_lists_sup_embed_with_augm.sh
rename to egs/voxceleb/v1.1/local/make_train_lists_sup_embed_with_augm.sh
diff --git a/egs/voxceleb/v1/local/make_trials_subset.py b/egs/voxceleb/v1.1/local/make_trials_subset.py
similarity index 100%
rename from egs/voxceleb/v1/local/make_trials_subset.py
rename to egs/voxceleb/v1.1/local/make_trials_subset.py
diff --git a/egs/voxceleb/v1/local/make_vox2_trials.py b/egs/voxceleb/v1.1/local/make_vox2_trials.py
similarity index 100%
rename from egs/voxceleb/v1/local/make_vox2_trials.py
rename to egs/voxceleb/v1.1/local/make_vox2_trials.py
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_o.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_o.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_o.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_o.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_oeh.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_oeh.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_oeh.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_oeh.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_old.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_old.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_old.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_old.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_orig.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_orig.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_orig.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_orig.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_orig_v2.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_orig_v2.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_orig_v2.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_orig_v2.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_v2.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_v2.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_v2.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2_o.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_v2_o.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_v2_o.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_v2_o.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2_oeh.pl b/egs/voxceleb/v1.1/local/make_voxceleb1_v2_oeh.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1_v2_oeh.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1_v2_oeh.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1cat.pl b/egs/voxceleb/v1.1/local/make_voxceleb1cat.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1cat.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1cat.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb1cat_v2.pl b/egs/voxceleb/v1.1/local/make_voxceleb1cat_v2.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb1cat_v2.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb1cat_v2.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb2.pl b/egs/voxceleb/v1.1/local/make_voxceleb2.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb2.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb2.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb2cat.pl b/egs/voxceleb/v1.1/local/make_voxceleb2cat.pl
similarity index 100%
rename from egs/voxceleb/v1/local/make_voxceleb2cat.pl
rename to egs/voxceleb/v1.1/local/make_voxceleb2cat.pl
diff --git a/egs/voxceleb/v1/local/prepare_voxsrc22_dev.py b/egs/voxceleb/v1.1/local/prepare_voxsrc22_dev.py
similarity index 100%
rename from egs/voxceleb/v1/local/prepare_voxsrc22_dev.py
rename to egs/voxceleb/v1.1/local/prepare_voxsrc22_dev.py
diff --git a/egs/voxceleb/v1/local/prepare_voxsrc22_test.py b/egs/voxceleb/v1.1/local/prepare_voxsrc22_test.py
similarity index 100%
rename from egs/voxceleb/v1/local/prepare_voxsrc22_test.py
rename to egs/voxceleb/v1.1/local/prepare_voxsrc22_test.py
diff --git a/egs/voxceleb/v1/local/score_dcf.py b/egs/voxceleb/v1.1/local/score_dcf.py
similarity index 100%
rename from egs/voxceleb/v1/local/score_dcf.py
rename to egs/voxceleb/v1.1/local/score_dcf.py
diff --git a/egs/voxceleb/v1/local/score_voxceleb1.sh b/egs/voxceleb/v1.1/local/score_voxceleb1.sh
similarity index 100%
rename from egs/voxceleb/v1/local/score_voxceleb1.sh
rename to egs/voxceleb/v1.1/local/score_voxceleb1.sh
diff --git a/egs/voxceleb/v1/local/score_voxceleb1_o_clean.sh b/egs/voxceleb/v1.1/local/score_voxceleb1_o_clean.sh
similarity index 100%
rename from egs/voxceleb/v1/local/score_voxceleb1_o_clean.sh
rename to egs/voxceleb/v1.1/local/score_voxceleb1_o_clean.sh
diff --git a/egs/voxceleb/v1/local/score_voxceleb1_single_cond.sh b/egs/voxceleb/v1.1/local/score_voxceleb1_single_cond.sh
similarity index 100%
rename from egs/voxceleb/v1/local/score_voxceleb1_single_cond.sh
rename to egs/voxceleb/v1.1/local/score_voxceleb1_single_cond.sh
diff --git a/egs/voxceleb/v1/local/score_voxsrc22_dev.sh b/egs/voxceleb/v1.1/local/score_voxsrc22_dev.sh
similarity index 100%
rename from egs/voxceleb/v1/local/score_voxsrc22_dev.sh
rename to egs/voxceleb/v1.1/local/score_voxsrc22_dev.sh
diff --git a/egs/voxceleb/v1.1/run_002_compute_evad.sh b/egs/voxceleb/v1.1/run_002_compute_evad.sh
index 4e82a87a..27260be3 100755
--- a/egs/voxceleb/v1.1/run_002_compute_evad.sh
+++ b/egs/voxceleb/v1.1/run_002_compute_evad.sh
@@ -24,7 +24,6 @@ if [ $stage -le 1 ]; then
     dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
     if [ "$nodes" == "b0" ];then
       utils/create_split_dir.pl \
-	utils/create_split_dir.pl \
 	/export/b{04,05,06,07}/$dir_name $vaddir/storage
     elif [ "$nodes" == "b1" ];then
       utils/create_split_dir.pl \
diff --git a/egs/voxceleb/v1.2/hyp_utils b/egs/voxceleb/v1.2/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/voxceleb/v1.2/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
deleted file mode 100644
index 8574a1cf..00000000
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase1_default.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model: wavlmbaseplus_ecapatdnn512x3.yaml
-trainer: trainer_phase1_sgd_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
deleted file mode 100644
index 87b01a1f..00000000
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase2_default.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model:
-  xvector:
-    cos_scale: 32.0
-    margin: 0.2
-    margin_warmup_epochs: 0
-    intertop_k: 5
-    intertop_margin: 0.1
-trainer: trainer_phase2_sgd_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
deleted file mode 100644
index d13931e0..00000000
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_phase3_default.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-data:
-  train: train_data_default.yaml
-  val: val_data_default.yaml
-model:
-  xvector:
-    cos_scale: 32.0
-    margin: 0.4
-    margin_warmup_epochs: 0
-    intertop_margin: 0.
-trainer: trainer_phase3_sgd_default.yaml
- 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
index 34c6e8dc..d4db70a7 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v1.0.yaml
@@ -41,29 +41,6 @@ data:
       num_hard_prototypes: 0
     data_loader:
       num_workers: 8
-
-train:
-    dataset:
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-      aug_cfg: conf/reverb_noise_aug.yaml
-      wav_scale: 1
-    sampler:
-      batch_size: 32
-      iters_per_epoch: 6
-    data_loader:
-      num_workers: 8
-  val:
-    dataset:
-      max_chunk_length: 4.0
-      min_chunk_length: 4.0
-      aug_cfg: conf/reverb_noise_aug.yaml
-      wav_scale: 1
-    sampler:
-      batch_size: 32
-      iters_per_epoch: 6
-    data_loader:
-      num_workers: 8
 model: wavlmbaseplus_ecapatdnn512x3.yaml
 trainer:
   optim:
@@ -84,5 +61,4 @@ trainer:
   epochs: 60
   eff_batch_size: 1024
   train_mode: hf-feats-frozen-nograd
-
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
deleted file mode 100644
index 942fb336..00000000
--- a/egs/voxceleb/v2/global_conf/config_wav2vec2base_ecapatdnn512x2_arcs30m0.3_adam_lr0.001_amp.v12.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-# Wav2vec2 base trained on 960h LibriSpeech + ECAPA-TDNN 512x2
-
-# hugging face model
-hf_model_name=wav2vec2base
-
-#vad
-vad_config=conf/vad_16k.yaml
-
-# x-vector training 
-nnet_data=voxceleb2cat_train
-
-# x-vector cfg
-
-nnet_type=hf_wav2vec2resnet1d
-
-batch_size_1gpu=32
-eff_batch_size=512 # effective batch size
-dropout=0
-embed_dim=256
-lr=0.05
-s=30
-margin_warmup=20
-margin=0.3
-nnet_num_epochs=70
-
-
-lr=0.001
-#lr=0.005
-xvec_train_base_cfg=conf/train_wav2vec2base_ecapatdnn512x2_default.yaml
-xvec_train_args="--data.train.sampler.batch-size $batch_size_1gpu --trainer.optim.lr $lr --trainer.lrsched.warmup-steps 20000 --trainer.lrsched.hold-steps 20000 --trainer.lrsched.min-lr 1e-6 --trainer.epochs 75 --model conf/wav2vec2base_specaug5_ecapatdnn512x2.yaml --data.train.dataset.max-chunk-length 2 --data.train.dataset.min-chunk-length 2"
-
-nnet_name=${hf_model_name}_ecapatdnn512x2_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v12 #v1
-
-nnet_dir=exp/xvector_nnets/$nnet_name
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/swa_model_ep0076.pth
-nnet=$nnet_dir/model_ep0060.pth
-nnet=$nnet_dir/model_ep0030.pth
-nnet=$nnet_dir/model_ep0040.pth
-nnet=$nnet_dir/model_ep0020.pth
-
-
-# back-end
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=6
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
-
diff --git a/egs/voxceleb/v2/local b/egs/voxceleb/v2/local
index 740b697d..2ac14857 120000
--- a/egs/voxceleb/v2/local
+++ b/egs/voxceleb/v2/local
@@ -1 +1 @@
-../v1/local/
\ No newline at end of file
+../v1.1/local
\ No newline at end of file
diff --git a/egs/voxceleb/v2/run_001_prepare_data.sh b/egs/voxceleb/v2/run_001_prepare_data.sh
index 7bf15448..44385610 100755
--- a/egs/voxceleb/v2/run_001_prepare_data.sh
+++ b/egs/voxceleb/v2/run_001_prepare_data.sh
@@ -12,7 +12,7 @@ config_file=default_config.sh
 
 . parse_options.sh || exit 1;
 . datapath.sh 
-
+. $config_file
 
 if [ $stage -le 1 ];then
   # Prepare the VoxCeleb2 dataset for training.
@@ -26,3 +26,21 @@ if [ $stage -le 2 ];then
   # Use this for the newer version of voxceleb1:
   local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
+
+if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
+  local/prepare_voxsrc22_dev.py \
+    --vox1-corpus-dir $voxceleb1_root \
+    --voxsrc22-corpus-dir $voxsrc22_root \
+    --output-dir data/voxsrc22_dev
+fi
+
+# if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+#   local/prepare_voxsrc22_test.py \
+#     --corpus-dir $voxsrc22_root \
+#     --output-dir data/voxsrc22_test
+# fi
+
+if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
+  # # split vox2 into 2 parts, for cohort and qmf training
+  local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
+fi
diff --git a/egs/voxceleb/v2/run_002_compute_evad.sh b/egs/voxceleb/v2/run_002_compute_evad.sh
index eeae00ac..1248ad39 100755
--- a/egs/voxceleb/v2/run_002_compute_evad.sh
+++ b/egs/voxceleb/v2/run_002_compute_evad.sh
@@ -19,39 +19,40 @@ config_file=default_config.sh
 
 
 if [ $stage -le 1 ]; then
-    # Prepare to distribute data over multiple machines
-    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
-	dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
-	if [ "$nodes" == "b0" ];then
-	    utils/create_split_dir.pl \
-			    utils/create_split_dir.pl \
-		/export/b{04,05,06,07}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "b1" ];then
-	    utils/create_split_dir.pl \
-		/export/b{14,15,16,17}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "c0" ];then
-	    utils/create_split_dir.pl \
-		/export/c{06,07,08,09}/$dir_name $vaddir/storage
-	elif [ "$nodes" == "fs01" ];then
-	    utils/create_split_dir.pl \
-		/export/fs01/$dir_name $vaddir/storage
-	else
-	    echo "we don't distribute data between multiple machines"
-	fi
+  # Prepare to distribute data over multiple machines
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+    dir_name=$USER/hyp-data/voxceleb/v1/$storage_name/vad/storage
+    if [ "$nodes" == "b0" ];then
+      utils/create_split_dir.pl \
+	/export/b{04,05,06,07}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "b1" ];then
+      utils/create_split_dir.pl \
+	/export/b{14,15,16,17}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "c0" ];then
+      utils/create_split_dir.pl \
+	/export/c{06,07,08,09}/$dir_name $vaddir/storage
+    elif [ "$nodes" == "fs01" ];then
+      utils/create_split_dir.pl \
+	/export/fs01/$dir_name $vaddir/storage
+    else
+      echo "we don't distribute data between multiple machines"
     fi
+  fi
 fi
 
-#Train datasets
-if [ $stage -le 2 ];then 
-    for name in voxceleb2cat_train voxceleb1_test
-    do
-	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
-	nj=$(($num_spk < 40 ? $num_spk:40))
-	hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
-	    --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
-	    data/${name} exp/make_vad/$name $vaddir
-	utils/fix_data_dir.sh data/${name}
-    done
+if [ $stage -le 2 ];then
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 40 ? $num_spk:40))
+    hyp_utils/feats/make_evad.sh \
+      --write-utt2num-frames true \
+      --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+      data/${name} exp/make_vad/$name $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
 fi
 
-
diff --git a/egs/voxceleb/v2/run_003_prepare_noises_rirs.sh b/egs/voxceleb/v2/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..a448af9a
--- /dev/null
+++ b/egs/voxceleb/v2/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 16 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name voxceleb-v1.1-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../../sre19-cmn2/v1/RIRS_NOISES ];then
+	    ln -s ../../sre19-cmn2/v1/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 16 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 16 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index ceee4e93..8d5c67c1 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -79,7 +79,7 @@ if [ $num_gpus -gt 0 ];then
   #export TORCH_DISTRIBUTED_DEBUG=DETAIL #variable to find unused parameters
   if [ $num_gpus -gt 1 ];then
     
-    [[ $(type -P "$torchrun") ]] && command="torchrun" \
+    [[ $(type -P "torchrun") ]] && command="torchrun" \
 	|| command="python -m torch.distributed.run"
     command="$command --nproc_per_node=$num_gpus --standalone --nnodes=1"
   fi
diff --git a/hyp_utils/create_data_split_dirs.sh b/hyp_utils/create_data_split_dirs.sh
index 877b9e3f..06c30779 100755
--- a/hyp_utils/create_data_split_dirs.sh
+++ b/hyp_utils/create_data_split_dirs.sh
@@ -25,8 +25,7 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $linkdir ]; then
   echo "Prepare to distribute data over multiple $nodes nodes"
   dir_name=$storage_dir/$storage_name/storage
   if [ "$nodes" == "b0" ];then
-    utils/create_split_dir.pl \
-      hyp_utils/create_split_dir.pl \
+    hyp_utils/create_split_dir.pl \
       /export/b{04,05,06,07}/$dir_name $link_dir
   elif [ "$nodes" == "b1" ];then
     hyp_utils/create_split_dir.pl \
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
new file mode 100644
index 00000000..9e7bac5c
--- /dev/null
+++ b/hyperion/bin/hyperion_dataset.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Optional, Union, List
+from pathlib import Path
+
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import (
+    PathLike,
+    Dataset,
+    InfoTable,
+    RecordingSet,
+    FeatureSet,
+    ClassInfo,
+    EnrollmentMap,
+    SegmentSet,
+)
+
+subcommands = ["add_features"]
+# table_dict = {
+#     "segments": SegmentSet,
+#     "recordings": RecordingSet,
+#     "features": FeatureSet,
+#     "classes": ClassInfo,
+#     "enrollments": EnrollmentMap,
+#     "generic": InfoTable,
+# }
+
+
+def add_common_args(parser):
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+    )
+
+
+def make_add_features_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--features-name", required=True, help="""name of the feature"""
+    )
+    parser.add_argument("--features-file", required=True, help="""feature set file""")
+
+    add_common_args(parser)
+    return parser
+
+
+def add_features(
+    dataset: PathLike,
+    features_name: str,
+    features_file: PathLike,
+):
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.add_features(features_name, features_file)
+    dataset.save(dataset)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Tool to manipulates the Hyperion dataset")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for subcommand in subcommands:
+        parser_func = f"make_{subcommand}_parser"
+        subparser = globals()[parser_func]()
+        subcommands.add_subcommand(k, subparser)
+
+    args = parser.parse_args()
+    subcommand = args.subcommand
+    kwargs = namespace_to_dict(args)[args.subcommand]
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+
+    globals()[subcommand](**kwargs)
diff --git a/hyperion/bin/hyperion_tables.py b/hyperion/bin/hyperion_tables.py
new file mode 100755
index 00000000..a79a1dca
--- /dev/null
+++ b/hyperion/bin/hyperion_tables.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Optional, Union, List
+from pathlib import Path
+
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import (
+    PathLike,
+    InfoTable,
+    RecordingSet,
+    FeatureSet,
+    ClassInfo,
+    EnrollmentMap,
+    SegmentSet,
+)
+
+subcommands = ["cat"]
+table_dict = {
+    "segments": SegmentSet,
+    "recordings": RecordingSet,
+    "features": FeatureSet,
+    "classes": ClassInfo,
+    "enrollments": EnrollmentMap,
+    "generic": InfoTable,
+}
+
+
+def add_common_args(parser):
+    parser.add_argument(
+        "--table-type",
+        default="generic",
+        choices=list(table_dict.keys()),
+        help=f"Type of table in {list(table_dict.keys())}",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+    )
+
+
+def make_cat_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--input-files", default=None, nargs="+", help="optional list of input files"
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="""output file, if input-files is None, input files names are derived from it""",
+    )
+    parser.add_argument(
+        "--num-tables",
+        default=0,
+        type=int,
+        help="""number of jobs we used to create the individual tables""",
+    )
+    parser.add_argument(
+        "--base-idx",
+        default=1,
+        type=int,
+        help="""index of the first job, typically 0 or 1""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def cat(
+    table_type: str,
+    input_files: Union[List[PathLike], None],
+    output_file: PathLike,
+    num_table: int,
+    base_idx: int = 1,
+):
+
+    assert input_files is not None or num_jobs != 0
+    output_file = Path(output_file)
+    if input_files is None:
+        ext = output_file.suffix
+        input_file_base = output_file.with_suffix("")
+        input_files = []
+        for i in range(num_tables):
+            idx = base_idx + i
+            input_file_i = input_file_base.with_suffix(f".{idx}{ext}")
+            input_files.append(input_file_i)
+
+    table_class = table_dict[table_type]
+    tables = []
+    for file_path in input_files:
+        tables.append(table_class.load(file_path))
+
+    output_table = table_class.cat(tables)
+    output_table.save(output_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for subcommand in subcommands:
+        parser_func = f"make_{subcommand}_parser"
+        subparser = globals()[parser_func]()
+        subcommands.add_subcommand(k, subparser)
+
+    args = parser.parse_args()
+    subcommand = args.subcommand
+    kwargs = namespace_to_dict(args)[args.subcommand]
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+
+    globals()[subcommand](**kwargs)
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 5c999dd1..a210d429 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -6,12 +6,14 @@
 import logging
 import multiprocessing
 import os
-import sys
-import time
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index 19420761..d9828674 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -69,7 +69,6 @@ def get_recording_duration(self, recording_set):
         import itertools
         from ..utils import SCPList
 
-        # scp = SCPList(recording_set["id"].values, recording_set["storage_path"].values)
         futures = []
         logging.info("submitting threats...")
         with ThreadPoolExecutor(max_workers=self.num_threads) as pool:
diff --git a/hyperion/data_prep/voxceleb1.py b/hyperion/data_prep/voxceleb1.py
index 00b2e380..c23b64ff 100644
--- a/hyperion/data_prep/voxceleb1.py
+++ b/hyperion/data_prep/voxceleb1.py
@@ -214,7 +214,12 @@ def get_segmentid(s):
         return enrollments, trials
 
     def prepare(self):
-
+        logging.info(
+            "Peparing VoxCeleb1 for %s corpus_dir:%s -> data_dir:%s",
+            self.task,
+            self.corpus_dir,
+            self.output_dir,
+        )
         logging.info("getting audio meta-data")
         df_meta = self._get_metadata()
         logging.info("getting language estimations")
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index 1a32420f..bef34ec9 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -136,6 +136,12 @@ def make_cat_list(lists_cat_dir, rec_id, rec_files, video_idx, i):
         return file_path
 
     def prepare(self):
+        logging.info(
+            "Peparing VoxCeleb2 %s corpus_dir:%s -> data_dir:%s",
+            self.subset,
+            self.corpus_dir,
+            self.output_dir,
+        )
         logging.info("getting audio meta-data")
         df_meta = self._get_metadata()
         logging.info("getting language estimations")
@@ -224,11 +230,6 @@ def prepare(self):
                 "duration": recs.loc[rec_ids, "duration"].values,
             }
         )
-        # print(
-        #     recs.loc[rec_ids, "duration"],
-        #     len(segments),
-        #     len(recs.loc[rec_ids, "duration"]),
-        # )
         segments = SegmentSet(segments)
         segments.sort()
 
diff --git a/hyperion/data_prep/voxsrc22.py b/hyperion/data_prep/voxsrc22.py
new file mode 100644
index 00000000..1999262a
--- /dev/null
+++ b/hyperion/data_prep/voxsrc22.py
@@ -0,0 +1,212 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import glob
+import re
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import ActionYesNo
+from tqdm import tqdm
+
+from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils.misc import PathLike, urlretrieve_progress
+from .data_prep import DataPrep
+
+
+class VoxSRC22DataPrep(DataPrep):
+    """Class to prepare VoxSRC22 dev/test data
+    Attributes:
+      corpus_dir: input data directory
+      vox1_corpus_dir: input data directory for VoxCeleb1
+      subset: subset of the data dev or test
+      output_dir: output data directory
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    def __init__(
+        self,
+        corpus_dir: PathLike,
+        vox1_corpus_dir: PathLike,
+        subset: str,
+        output_dir: PathLike,
+        use_kaldi_ids: bool,
+        target_sample_freq: int,
+        num_threads: int = 10,
+    ):
+        use_kaldi_ids = False
+        super().__init__(
+            corpus_dir, output_dir, use_kaldi_ids, target_sample_freq, num_threads
+        )
+
+        assert (
+            vox1_corpus_dir is not None or subset == "test"
+        ), "dev set needs the VoxCeleb1 corpus dir"
+        self.subset = subset
+        self.vox1_corpus_dir = (
+            None if vox1_corpus_dir is None else Path(vox1_corpus_dir)
+        )
+
+    @staticmethod
+    def dataset_name():
+        return "voxceleb2"
+
+    @staticmethod
+    def add_class_args(parser):
+        DataPrep.add_class_args(parser)
+        parser.add_argument(
+            "--subset",
+            default="dev",
+            choices=["dev", "test"],
+            help="""vox2 subset in [dev, test]""",
+        )
+        parser.add_argument(
+            "--vox1-corpus-dir",
+            default=None,
+            help="""corpus directory of voxceleb 1.""",
+        )
+
+    def prepare_track12_dev(self):
+        logging.info(
+            "Preparing VoxSRC22 %s corpus:%s + %s -> %s",
+            self.subset,
+            self.corpus_dir,
+            self.vox1_corpus_dir,
+            self.output_dir,
+        )
+        logging.info("making trials")
+        trials_file = self.corpus_dir / "voxsrc2022_dev.txt"
+        df_in = pd.read_csv(
+            trials_file,
+            header=None,
+            sep=" ",
+            names=["key", "enroll_file", "test_file"],
+        )
+        key = ["target" if k == 1 else "nontarget" for k in df_in["key"]]
+
+        modelid = df_in["enroll_file"]
+        segmentid = df_in["test_file"]
+        df_trials = pd.DataFrame(
+            {"modelid": modelid, "segmentid": segmentid, "targettype": key}
+        )
+        df_trials.sort_values(by=["modelid", "segmentid"], inplace=True)
+        file_path = self.output_dir / "trials.csv"
+        df_trials.to_csv(file_path, index=False)
+        trials = {"trials": file_path}
+        modelid = df_trials["modelid"].sort_values().unique()
+        uniq_segmentid = df_trials["segmentid"].sort_values().unique()
+        uniq_segmentid = np.unique(np.concatenate((uniq_segmentid, modelid), axis=0))
+
+        logging.info("making enrollment map")
+        df_enroll = pd.DataFrame({"modelid": modelid, "segmentid": modelid})
+        file_path = self.output_dir / "enrollment.csv"
+        df_enroll.to_csv(file_path, index=False)
+        enrollments = {"enrollment": file_path}
+
+        logging.info("making RecordingSet")
+        vox1_segmentid = []
+        vox22_segmentid = []
+        for s in uniq_segmentid:
+            if "VoxSRC2022_dev" in s:
+                vox22_segmentid.append(s)
+            else:
+                vox1_segmentid.append(s)
+
+        vox1_rec_files = [
+            glob.glob(f"{self.vox1_corpus_dir}/**/{s}") for s in vox1_segmentid
+        ]
+        vox22_rec_files = [
+            glob.glob(f"{self.corpus_dir}/**/{s}") for s in vox22_segmentid
+        ]
+        rec_ids = vox22_segmentid + vox1_segmentid
+        rec_files = vox22_rec_files + vox1_rec_files
+
+        recs = pd.DataFrame({"id": rec_ids, "storage_path": rec_files})
+        recs = RecordingSet(recs)
+        recs.sort()
+
+        logging.info("getting recording durations")
+        self.get_recording_duration(recs)
+        if self.target_sample_freq:
+            recs["target_sample_freq"] = self.target_sample_freq
+
+        logging.info("making SegmentsSet")
+        segments = pd.DataFrame({"id": rec_ids,})
+        segments = SegmentSet(segments)
+        segments.sort()
+
+        logging.info("making dataset")
+        dataset = Dataset(
+            segments,
+            recordings={"recordings": recs},
+            enrollments=enrollments,
+            trials=trials,
+            sparse_trials=False,
+        )
+        logging.info("saving dataset at %s", self.output_dir)
+        dataset.save(self.output_dir)
+        logging.info(
+            "datasets containts %d segments", len(segments),
+        )
+
+    #             wav_file = voxsrc22_corpus_dir / file_id
+    #                             wav_file = vox1_corpus_dir / "wav" / file_id
+    #     logging.info("searching audio files in %s", self.vox1_corpus_dir)
+    #     vox1_rec_files = list(self.vox1_corpus_dir.glob("**/*.wav"))
+    #     if not vox1_rec_files:
+    #         # symlinks? try glob
+    #         vox1_rec_files = [
+    #             Path(f) for f in glob.iglob(f"{self.vox1_corpus_dir}/**/*.wav", recursive=True)
+    #         ]
+
+    #     vox1_rec_ids = [ f.parent.parent.name / f.parent.name / f.name for f in vox1_rec_files]
+    #     rec_files =
+
+    #     rec_files = list(self.corpus_dir.glob("**/*.wav"))
+    #     if not rec_files:
+    #         # symlinks? try glob
+    #         rec_files = [
+    #             Path(f) for f in glob.iglob(f"{self.corpus_dir}/**/*.wav", recursive=True)
+    #         ]
+
+    # u2s_file = output_dir / "utt2spk"
+    # logging.info("creating utt2spk file %s", u2s_file)
+    # file_ids = np.unique(np.concatenate((df_trials["enroll"], df_trials["test"])))
+    # with open(u2s_file, "w") as f:
+    #     for file_id in file_ids:
+    #         f.write("%s %s\n" % (file_id, file_id))
+
+    # s2u_file = output_dir / "spk2utt"
+    # logging.info("creating spk2utt file %s", s2u_file)
+    # with open(s2u_file, "w") as f:
+    #     for file_id in file_ids:
+    #         f.write("%s %s\n" % (file_id, file_id))
+
+    # wav_file = output_dir / "wav.scp"
+    # logging.info("creating wav.scp file %s", wav_file)
+    # with open(wav_file, "w") as f:
+    #     for file_id in file_ids:
+    #         if "VoxSRC2022_dev" in file_id:
+    #             wav_file = voxsrc22_corpus_dir / file_id
+    #         else:
+    #             wav_file = vox1_corpus_dir / "wav" / file_id
+
+    #         f.write("%s %s\n" % (file_id, wav_file))
+
+    def prepare_track12_test(self):
+        logging.info(
+            "Preparing VoxSRC22 %s corpus:%s -> %s",
+            self.subset,
+            self.corpus_dir,
+            self.output_dir,
+        )
+
+    def prepare(self):
+        if self.subset == "dev":
+            self.prepare_track12_dev()
+        else:
+            self.prepare_track12_test()
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index a6f20a8e..c8565d1d 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -21,13 +21,17 @@
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
-from ..loggers import (CSVLogger, LoggerList, ProgLogger, TensorBoardLogger,
-                       WAndBLogger)
+from ..loggers import CSVLogger, LoggerList, ProgLogger, TensorBoardLogger, WAndBLogger
 from ..lr_schedulers import LRScheduler as LRS
 from ..lr_schedulers import LRSchedulerFactory as LRSF
 from ..optim import OptimizerFactory as OF
-from ..utils import (FairFullyShardedDDP, FairShardedDDP, MetricAcc, TorchDDP,
-                     tensors_subset)
+from ..utils import (
+    FairFullyShardedDDP,
+    FairShardedDDP,
+    MetricAcc,
+    TorchDDP,
+    tensors_subset,
+)
 
 
 class DDPType(str, Enum):
@@ -72,6 +76,7 @@ class TorchTrainer(object):
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
     """
+
     def __init__(
         self,
         model,
@@ -113,8 +118,9 @@ def __init__(
         self.exp_path = Path(exp_path)
 
         if loggers is None:
-            self.loggers = self._default_loggers(log_interval, use_tensorboard,
-                                                 use_wandb, wandb)
+            self.loggers = self._default_loggers(
+                log_interval, use_tensorboard, use_wandb, wandb
+            )
         elif isinstance(loggers, list):
             self.loggers = LoggerList(loggers)
         else:
@@ -149,29 +155,23 @@ def __init__(
             self.rank = dist.get_rank()
             self.world_size = dist.get_world_size()
             if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(
-                    self.model)
+                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with distributed-data-parallel"
                     )
                 oss = False if ddp_type == DDPType.DDP else True
-                self.optimizer = self._make_optimizer(optim,
-                                                      self.model,
-                                                      oss=oss)
+                self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
                 self.model = TorchDDP(
                     self.model, device_ids=[device], output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(
-                    self.model)
+                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with fair sharded-distributed-data-parallel"
                     )
-                self.optimizer = self._make_optimizer(optim,
-                                                      self.model,
-                                                      oss=True)
+                self.optimizer = self._make_optimizer(optim, self.model, oss=True)
                 self.model = FairShardedDDP(self.model, self.optimizer)
             else:
                 if self.rank == 0:
@@ -184,9 +184,7 @@ def __init__(
                     mixed_precision=self.use_amp,
                     move_params_to_cpu=cpu_offload,
                 )
-                self.optimizer = self._make_optimizer(optim,
-                                                      self.model,
-                                                      oss=False)
+                self.optimizer = self._make_optimizer(optim, self.model, oss=False)
 
         else:
             self.optimizer = self._make_optimizer(optim, self.model)
@@ -216,9 +214,9 @@ def __init__(
             if self.rank == 0:
                 logging.info("init SWA model")
             self.swa_model = AveragedModel(self.model)
-            self.swa_scheduler = SWALR(self.optimizer,
-                                       swa_lr=self.swa_lr,
-                                       anneal_epochs=self.swa_anneal_epochs)
+            self.swa_scheduler = SWALR(
+                self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
+            )
 
     def set_epoch(self, data_loader):
         try:
@@ -252,8 +250,7 @@ def fit(self, train_data, val_data=None):
             if self.lr_scheduler is not None:
                 # this is needed by cosine scheduler
                 epoch_updates = int(len(train_data) / self.grad_acc_steps)
-                self.lr_scheduler.on_epoch_begin(epoch,
-                                                 epoch_updates=epoch_updates)
+                self.lr_scheduler.on_epoch_begin(epoch, epoch_updates=epoch_updates)
 
             logs = self.train_epoch(train_data)
             if val_data is not None:
@@ -275,8 +272,7 @@ def fit(self, train_data, val_data=None):
             self.save_checkpoint(logs)
 
         if self.in_swa:
-            self.loggers.on_epoch_begin(self.cur_epoch,
-                                        batches=len(train_data))
+            self.loggers.on_epoch_begin(self.cur_epoch, batches=len(train_data))
             self.model = self.swa_model.module
             logs = self.bn_update_epoch(train_data)
 
@@ -351,16 +347,16 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         with torch.no_grad():
             if swa_update_bn:
                 log_tag = "train_"
-                self.train()
+                self.model.train()
             else:
                 log_tag = "val_"
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-                input_data, target = tensors_subset(data, batch_keys, self.device)
-                batch_size = input_data.size(0)
+                x, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = x.size(0)
                 with amp.autocast(enabled=self.use_amp):
-                    output = self.model(input_data)
+                    output = self.model(x)
                     loss = self.loss(output, target)
 
                 batch_metrics["loss"] = loss.mean().item()
@@ -381,9 +377,9 @@ def bn_update_epoch(self, data_loader):
     def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
         if self.ddp:
             if self.ddp_type == DDPType.DDP:
-                nn.utils.clip_grad_norm_(model.parameters(),
-                                         grad_clip,
-                                         norm_type=grad_clip_norm)
+                nn.utils.clip_grad_norm_(
+                    model.parameters(), grad_clip, norm_type=grad_clip_norm
+                )
                 return
             if self.ddp_type == DDPType.FULLY_SHARDED_DDP:
                 # we have to use the member function in FullyShardedDDP class
@@ -395,24 +391,26 @@ def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
                 optim.clip_grad_norm(grad_clip, norm_type=grad_clip_norm)
 
         # if no DDP clip normally
-        nn.utils.clip_grad_norm_(model.parameters(),
-                                 grad_clip,
-                                 norm_type=grad_clip_norm)
+        nn.utils.clip_grad_norm_(
+            model.parameters(), grad_clip, norm_type=grad_clip_norm
+        )
 
     def update_model(self):
         """Updates the model and does gradding clipping."""
         if self.use_amp:
             if self.grad_clip > 0:
                 self.grad_scaler.unscale_(self.optimizer)
-                self._clip_grad_norm(self.model, self.optimizer,
-                                     self.grad_clip, self.grad_clip_norm)
+                self._clip_grad_norm(
+                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
+                )
 
             self.grad_scaler.step(self.optimizer)
             self.grad_scaler.update()
         else:
             if self.grad_clip > 0:
-                self._clip_grad_norm(self.model, self.optimizer,
-                                     self.grad_clip, self.grad_clip_norm)
+                self._clip_grad_norm(
+                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
+                )
 
             self.optimizer.step()
 
@@ -441,20 +439,21 @@ def _make_lr_sched(self, lr_sched, optim):
         lr_sched = LRSF.create(optim, **args)
         return lr_sched
 
-    def _default_loggers(self, log_interval, use_tensorboard, use_wandb,
-                         wandb):
+    def _default_loggers(self, log_interval, use_tensorboard, use_wandb, wandb):
         """Creates the default data loaders"""
         prog_log = ProgLogger(interval=log_interval)
         csv_log = CSVLogger(self.exp_path / "train.log", append=True)
         loggers = [prog_log, csv_log]
         if use_tensorboard:
             loggers.append(
-                TensorBoardLogger(self.exp_path / "tb", interval=log_interval))
+                TensorBoardLogger(self.exp_path / "tb", interval=log_interval)
+            )
         if use_wandb:
             loggers.append(
-                WAndBLogger(**wandb,
-                            path=self.exp_path / "wandb",
-                            interval=log_interval))
+                WAndBLogger(
+                    **wandb, path=self.exp_path / "wandb", interval=log_interval
+                )
+            )
         return LoggerList(loggers)
 
     def _get_lr(self):
@@ -478,7 +477,8 @@ def _compute_grad_acc_steps(self, data_loader):
                 return
 
             self.grad_acc_steps = int(
-                math.ceil(self.eff_batch_size / batch_size / self.world_size))
+                math.ceil(self.eff_batch_size / batch_size / self.world_size)
+            )
             logging.info(
                 "Setting grad_acc_steps=%d for "
                 "eff_batch_size=%d, avg_batch_size=%d, world_size=%d",
@@ -502,30 +502,24 @@ def checkpoint(self, logs=None):
           logs: logs containing the current value of the metrics.
         """
         checkpoint = {
-            "epoch":
-            self.cur_epoch,
-            "rng_state":
-            torch.get_rng_state(),
-            "model_cfg":
-            self.model.get_config(),
-            "model_state_dict":
-            self.model.state_dict(),
-            "optimizer_state_dict":
-            self.optimizer.state_dict(),
-            "loss_state_dict":
-            self.loss.state_dict() if self.loss is not None else None,
+            "epoch": self.cur_epoch,
+            "rng_state": torch.get_rng_state(),
+            "model_cfg": self.model.get_config(),
+            "model_state_dict": self.model.state_dict(),
+            "optimizer_state_dict": self.optimizer.state_dict(),
+            "loss_state_dict": self.loss.state_dict()
+            if self.loss is not None
+            else None,
         }
         if self.lr_scheduler is not None:
-            checkpoint[
-                "lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
+            checkpoint["lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
 
         if logs is not None:
             checkpoint["logs"] = logs
 
         if self.in_swa:
             checkpoint["swa_model_state_dict"] = self.swa_model.state_dict()
-            checkpoint[
-                "swa_scheduler_state_dict"] = self.swa_scheduler.state_dict()
+            checkpoint["swa_scheduler_state_dict"] = self.swa_scheduler.state_dict()
 
         return checkpoint
 
@@ -535,8 +529,9 @@ def save_checkpoint(self, logs=None):
         Args:
           logs: logs containing the current value of the metrics.
         """
-        if self.ddp and (self.ddp_type == DDPType.OSS_DDP
-                         or self.ddp_type == DDPType.OSS_SHARDED_DDP):
+        if self.ddp and (
+            self.ddp_type == DDPType.OSS_DDP or self.ddp_type == DDPType.OSS_SHARDED_DDP
+        ):
             # Not sure what this does, just copying from the example in
             # https://github.com/facebookresearch/fairscale/blob/master/benchmarks/oss.py
             # Check the checkpointing in the case of the OSS optimizer
@@ -591,17 +586,16 @@ def load_checkpoint(self, file_path):
         if self.loss is not None:
             self.loss.load_state_dict(checkpoint["loss_state_dict"])
         if self.lr_scheduler is not None:
-            self.lr_scheduler.load_state_dict(
-                checkpoint["lr_scheduler_state_dict"])
+            self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
 
         # if self.use_amp:
         #    amp.load_state_dict(checkpoint['amp'])
         if self.do_swa:
             if "swa_model_state_dict" in checkpoint:
-                self.swa_model.load_state_dict(
-                    checkpoint["swa_model_state_dict"])
+                self.swa_model.load_state_dict(checkpoint["swa_model_state_dict"])
                 self.swa_scheduler.load_state_dict(
-                    checkpoint["swa_scheduler_state_dict"])
+                    checkpoint["swa_scheduler_state_dict"]
+                )
             else:
                 self.swa_scheduler = SWALR(
                     self.optimizer,
@@ -681,13 +675,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             "--eff-batch-size",
             type=int,
             default=None,
-            help=
-            "effective total batch size, if given, it overrides grad_acc_steps",
+            help="effective total batch size, if given, it overrides grad_acc_steps",
         )
-        parser.add_argument("--epochs",
-                            type=int,
-                            default=200,
-                            help="number of epochs")
+        parser.add_argument("--epochs", type=int, default=200, help="number of epochs")
         if train_modes is not None:
             parser.add_argument(
                 "--train-mode",
@@ -707,19 +697,12 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             default=False,
             help="use tensorboard logger",
         )
-        parser.add_argument("--use-wandb",
-                            action="store_true",
-                            default=False,
-                            help="use wandb logger")
-        parser.add_argument("--wandb.project",
-                            default=None,
-                            help="wandb project name")
-        parser.add_argument("--wandb.group",
-                            default=None,
-                            help="wandb group name")
-        parser.add_argument("--wandb.name",
-                            default=None,
-                            help="wandb display name")
+        parser.add_argument(
+            "--use-wandb", action="store_true", default=False, help="use wandb logger"
+        )
+        parser.add_argument("--wandb.project", default=None, help="wandb project name")
+        parser.add_argument("--wandb.group", default=None, help="wandb group name")
+        parser.add_argument("--wandb.name", default=None, help="wandb display name")
         # parser.add_argument(
         #     '--wandb.path', default=None,
         #     help='wandb directory')
@@ -748,10 +731,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             default=False,
             help="CPU offload of gradients when using fully_sharded_ddp",
         )
-        parser.add_argument("--grad-clip",
-                            type=float,
-                            default=0,
-                            help="gradient clipping norm value")
+        parser.add_argument(
+            "--grad-clip", type=float, default=0, help="gradient clipping norm value"
+        )
         parser.add_argument(
             "--grad-clip-norm",
             default=2,
@@ -764,10 +746,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             default=0,
             help="start epoch for SWA, if 0 it does not use SWA",
         )
-        parser.add_argument("--swa-lr",
-                            type=float,
-                            default=1e-3,
-                            help="learning rate for SWA phase")
+        parser.add_argument(
+            "--swa-lr", type=float, default=1e-3, help="learning rate for SWA phase"
+        )
         parser.add_argument(
             "--swa-anneal-epochs",
             type=int,
@@ -786,7 +767,6 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index eddf47a7..a59cbe14 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -101,16 +101,16 @@ def train_epoch(self, data_loader):
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
-        for batch, (data, target) in enumerate(data_loader):
+        for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            input_data, target = tensors_subset(data, batch_keys, self.device)
-            batch_size = input_data.size(0)
+            x, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = x.size(0)
             with amp.autocast(enabled=self.use_amp):
-                output = self.model(input_data, y=target)
+                output = self.model(x, y=target)
                 loss = self.loss(output, target).mean() / self.grad_acc_steps
 
             if self.use_amp:
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 52474baa..0f6ccd9b 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -106,10 +106,10 @@ def train_epoch(self, data_loader):
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            input_data, target = tensors_subset(data, batch_keys, self.device)
-            batch_size = input_data.size(0)
+            audio, target = tensors_subset(data, batch_keys, self.device)
+            batch_size = audio.size(0)
             with torch.no_grad():
-                feats, feats_lengths = self.feat_extractor(input_data)
+                feats, feats_lengths = self.feat_extractor(audio)
 
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(feats, feats_lengths, y=target)
@@ -159,10 +159,10 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-                input_data, target = tensors_subset(data, batch_keys, self.device)
-                batch_size = input_data.size(0)
+                audio, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = audio.size(0)
 
-                feats, feats_lengths = self.feat_extractor(input_data)
+                feats, feats_lengths = self.feat_extractor(audio)
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats, feats_lengths)
                     loss = self.loss(output, target)
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index 70ee82c8..fe72339f 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -70,8 +70,33 @@ def load(cls, file_path, sep=None):
         if ext == "":
             # if no extension we load as kaldi utt2spk file
             df = pd.read_csv(
-                file_path, sep=" ", header=None, names=["id"], dtype={"id": np.str},
+                file_path,
+                sep=" ",
+                header=None,
+                names=["id"],
+                dtype={"id": np.str},
             )
             return cls(df)
 
         return super().load(file_path, sep)
+
+    @classmethod
+    def cat(cls, tables):
+        """Concatenates several tables.
+
+        Args:
+          info_lists: List of InfoTables
+
+        Returns:
+          InfoTable object concatenation the info_lists.
+        """
+        df_list = [table.df for table in tables]
+        df = pd.concat(df_list)
+        assert df["id"].is_unique, """there are duplicated ids in original tables"""
+        if not df["class_idx"].is_unique:
+            logging.warning(
+                """class_idx in concat tables are not unique, 
+                we will assign new class_idx"""
+            )
+            df["class_idx"].drop(columns=["class_idx"], inplace=True)
+        return cls(df)
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index e6c9e861..0ef81ab6 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -20,21 +20,21 @@
 
 
 class Dataset:
-    """ Class that contains all objects 
-        (segments, recordings, features, class_infos) that 
-        conform a dataset
-
-        Attributes:
-          segments:     SegmentSet object or path to it. 
-          classes:      Dictionary of ClassInfo objects or paths to then 
-          recordings:   Dictionary of RecordingSet objects or paths to then 
-          features:     Dictionary of FeatureSet objects or paths to then 
-          enrollments:  Dictionary of EnrollmentMap objects or paths to then 
-          trials:       Dictionary of TrialKey/TrialNdx/SparseTrialKey objects 
-            or paths to then 
-          sparse_trials: load trial keys using the SparseTrialKey class instead 
-              of TrialKey class.
-          table_sep:    Column separator when reading/writting tables
+    """Class that contains all objects
+    (segments, recordings, features, class_infos) that
+    conform a dataset
+
+    Attributes:
+      segments:     SegmentSet object or path to it.
+      classes:      Dictionary of ClassInfo objects or paths to then
+      recordings:   Dictionary of RecordingSet objects or paths to then
+      features:     Dictionary of FeatureSet objects or paths to then
+      enrollments:  Dictionary of EnrollmentMap objects or paths to then
+      trials:       Dictionary of TrialKey/TrialNdx/SparseTrialKey objects
+        or paths to then
+      sparse_trials: load trial keys using the SparseTrialKey class instead
+          of TrialKey class.
+      table_sep:    Column separator when reading/writting tables
 
     """
 
@@ -70,10 +70,12 @@ def __init__(
             features, FeatureSet
         )
         self._enrollments, self._enrollments_paths = self._parse_dict_args(
-            enrollments, EnrollmentMap,
+            enrollments,
+            EnrollmentMap,
         )
         self._trials, self._trials_paths = self._parse_dict_args(
-            trials, (TrialKey, TrialNdx, SparseTrialKey),
+            trials,
+            (TrialKey, TrialNdx, SparseTrialKey),
         )
 
         self.sparse_trials = sparse_trials
@@ -217,16 +219,41 @@ def save(
         dataset_path: PathLike,
         update_paths: bool = True,
         table_sep: Optional[str] = None,
+        force_save_all: bool = False,
     ):
-        """Saves all the dataset objects.
+        """Saves the dataset to disk.
 
         Args:
-          dataset_path: str/Path indicating directory 
-            to save the dataset or .yaml file to save 
+          dataset_path: str/Path indicating directory
+            to save the dataset or .yaml file to save
             the dataset info.
-          update_paths: whether to update the file_paths in the 
-            data structures in the DateSet object
+          update_paths: whether to update the file_paths in the
+            data structures in the DataSet object
+          force_save_all: forces saving all tables even if they haven't changed,
+                          otherwise, it only saves tables loaded in memory
+                          and those that are not in the datadirectory
+        """
+        if force_save_all:
+            self.save_all(dataset_path, update_paths, table_sep)
+        else:
+            self.save_changed(dataset_path, update_paths, table_sep)
 
+    def save_changed(
+        self,
+        dataset_path: PathLike,
+        update_paths: bool = True,
+        table_sep: Optional[str] = None,
+        force_save_all: bool = False,
+    ):
+        """Saves the tables that change in disk or tables
+           that are not in the ouput directory.
+
+        Args:
+          dataset_path: str/Path indicating directory
+            to save the dataset or .yaml file to save
+            the dataset info.
+          update_paths: whether to update the file_paths in the
+            data structures in the DataSet object
         """
         table_sep = self.table_sep if table_sep is None else table_sep
         if update_paths:
@@ -238,12 +265,139 @@ def save(
         file_name = f"segments{table_ext}"
         dataset["segments"] = file_name
         file_path = dataset_dir / file_name
-        self.segments().save(file_path, sep=table_sep)
+        if (
+            self._segments is not None
+            or file_path != self._segments_path
+            or not file_path.exists()
+        ):
+            self.segments(keep_loaded=False).save(file_path, sep=table_sep)
+            if update_paths:
+                self._segments_path = file_path
+
+        file_names = {}
+        for k in self._recordings.keys():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            if (
+                self._recordings is not None
+                or file_path != self._recordings_paths[k]
+                or not file_path.exists()
+            ):
+                v = self.recordings_value(k, keep_loaded=False)
+                v.save(file_path, sep=table_sep)
+                if update_paths:
+                    self._recordings_paths[k] = file_path
+
+        if file_names:
+            dataset["recordings"] = file_names
+
+        file_names = {}
+        for k in self._features.keys():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            if (
+                self._features is not None
+                or file_path != self._features_paths[k]
+                or not file_path.exists()
+            ):
+                v = self.features_value(k, keep_loaded=False)
+                v.save(file_path, sep=table_sep)
+                if update_paths:
+                    self._features_paths[k] = file_path
+
+        if file_names:
+            dataset["features"] = file_names
+
+        file_names = {}
+        for k, v in self._classes.keys():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            if (
+                self._classes is not None
+                or file_path != self._classes_paths[k]
+                or not file_path.exists()
+            ):
+                v = self.classes_value(k, keep_loaded=False)
+                v.save(file_path, sep=table_sep)
+                if update_paths:
+                    self._classes_paths[k] = file_path
+
+        if file_names:
+            dataset["classes"] = file_names
+
+        file_names = {}
+        for k, v in self._enrollments.keys():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            if (
+                self._enrollments is not None
+                or file_path != self._enrollments_paths[k]
+                or not file_path.exists()
+            ):
+                v = self.enrollments_value(k, keep_loaded=False)
+                v.save(file_path, sep=table_sep)
+                if update_paths:
+                    self._enrollments_paths[k] = file_path
+
+        if file_names:
+            dataset["enrollments"] = file_names
+
+        file_names = {}
+        for k, v in self._trials.keys():
+            file_name = k + table_ext
+            file_names[k] = file_name
+            file_path = dataset_dir / file_name
+            if (
+                self._trials is not None
+                or file_path != self._trials_paths[k]
+                or not file_path.exists()
+            ):
+                v = self.trials_value(k, keep_loaded=False)
+                v.save(file_path)
+                if update_paths:
+                    self._trials_paths[k] = file_path
+
+        if file_names:
+            dataset["trials"] = file_names
+
+        with open(dataset_file, "w") as f:
+            yaml.dump(dataset, f)
+
+    def save_all(
+        self,
+        dataset_path: PathLike,
+        update_paths: bool = True,
+        table_sep: Optional[str] = None,
+    ):
+        """Saves all the dataset objects.
+
+        Args:
+          dataset_path: str/Path indicating directory
+            to save the dataset or .yaml file to save
+            the dataset info.
+          update_paths: whether to update the file_paths in the
+            data structures in the DataSet object
+        """
+        table_sep = self.table_sep if table_sep is None else table_sep
+        if update_paths:
+            self.table_sep = table_sep
+
+        table_ext = ".tsv" if table_sep == "\t" else ".csv"
+        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        dataset = {}
+        file_name = f"segments{table_ext}"
+        dataset["segments"] = file_name
+        file_path = dataset_dir / file_name
+        self.segments(keep_loaded=False).save(file_path, sep=table_sep)
         if update_paths:
             self._segments_path = file_path
 
         file_names = {}
-        for k, v in self.recordings():
+        for k, v in self.recordings(keep_loaded=False):
             file_name = k + table_ext
             file_names[k] = file_name
             file_path = dataset_dir / file_name
@@ -255,7 +409,7 @@ def save(
             dataset["recordings"] = file_names
 
         file_names = {}
-        for k, v in self.features():
+        for k, v in self.features(keep_loaded=False):
             file_name = k + table_ext
             file_names[k] = file_name
             file_path = dataset_dir / file_name
@@ -267,7 +421,7 @@ def save(
             dataset["features"] = file_names
 
         file_names = {}
-        for k, v in self.classes():
+        for k, v in self.classes(keep_loaded=False):
             file_name = k + table_ext
             file_names[k] = file_name
             file_path = dataset_dir / file_name
@@ -279,7 +433,7 @@ def save(
             dataset["classes"] = file_names
 
         file_names = {}
-        for k, v in self.enrollments():
+        for k, v in self.enrollments(keep_loaded=False):
             file_name = k + table_ext
             file_names[k] = file_name
             file_path = dataset_dir / file_name
@@ -291,7 +445,7 @@ def save(
             dataset["enrollments"] = file_names
 
         file_names = {}
-        for k, v in self.trials():
+        for k, v in self.trials(keep_loaded=False):
             file_name = k + table_ext
             file_names[k] = file_name
             file_path = dataset_dir / file_name
@@ -329,8 +483,8 @@ def load(
         """Loads all the dataset objects.
 
         Args:
-         dataset_path: str/Path indicating directory 
-          to save the dataset or .yaml file to save 
+         dataset_path: str/Path indicating directory
+          to save the dataset or .yaml file to save
           the dataset info.
          lazy: load data structures lazily when they are needed.
          sparse_trials: load trial keys using the SparseTrialKey class instead of TrialKey class
@@ -386,34 +540,64 @@ def load(
 
         return dataset
 
-        # dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
-        # with open(dataset_file, "w") as f:
-        #     dataset = yaml.safe_load(f)
-
-        # assert "segments" in dataset
-        # segments = SegmentSet.load(
-        #     Dataset.resolve_file_path(dataset_dir, dataset["segments"])
-        # )
-        # classes = None
-        # recordings = None
-        # features = None
-        # if "classes" in dataset:
-        #     classes = {}
-        #     for k, v in dataset["classes"]:
-        #         classes[k] = ClassInfo.load(Dataset.resolve_file_path(dataset_dir, v))
-
-        # if "recordings" in dataset:
-        #     recordings = {}
-        #     for k, v in dataset["recordings"]:
-        #         recordings[k] = RecordingSet.load(
-        #             Dataset.resolve_file_path(dataset_dir, v)
-        #         )
-
-        # if "features" in dataset:
-        #     features = {}
-        #     for k, v in dataset["features"]:
-        #         features[k] = FeatureSet.load(Dataset.resolve_file_path(dataset_dir, v))
-
-        # dataset = cls(segments, classes, recordings, features)
-        # if not lazy:
-        #     dataset.update_from_disk()
+    def add_features(self, features_name: str, features: Union[PathLike, FeatureSet]):
+        if isinstance(features, (str, Path)):
+            self._features[features_name] = None
+            self._features_paths[features_name] = features
+        elif isinstance(features, FeatureSet):
+            self._features[features_name] = features
+            self._features_paths[features_name] = None
+        else:
+            raise ValueError()
+
+    def add_recordings(
+        self,
+        recordings_name: str,
+        recordings: Union[PathLike, RecordingSet],
+    ):
+        if isinstance(features, (str, Path)):
+            self._recordings[features_name] = None
+            self._recordings_paths[recordings_name] = recordings
+        elif isinstance(recordings, RecordingSet):
+            self._recordings[recordings_name] = recordings
+            self._recordings_paths[recordings_name] = None
+        else:
+            raise ValueError()
+
+    def add_classes(self, classes_name: str, classes: Union[PathLike, ClassInfo]):
+        if isinstance(classes, (str, Path)):
+            self._classes[features_name] = None
+            self._classes_paths[classes_name] = classes
+        elif isinstance(classes, ClassInfo):
+            self._classes[classes_name] = classes
+            self._classes_paths[classes_name] = None
+        else:
+            raise ValueError()
+
+    def add_enrollments(
+        self,
+        enrollments_name: str,
+        enrollments: Union[PathLike, EnrollmentMap],
+    ):
+        if isinstance(features, (str, Path)):
+            self._enrollments[features_name] = None
+            self._enrollments_paths[enrollments_name] = enrollments
+        elif isinstance(enrollments, EnrollmentMap):
+            self._enrollments[enrollments_name] = enrollments
+            self._enrollments_paths[enrollments_name] = None
+        else:
+            raise ValueError()
+
+    def add_trials(
+        self,
+        trials_name: str,
+        trials: Union[PathLike, TrialKey, TrialNdx, SparseTrialKey],
+    ):
+        if isinstance(features, (str, Path)):
+            self._trials[features_name] = None
+            self._trials_paths[trials_name] = trials
+        elif isinstance(trials, (TrialKey, TrialNdx, SparseTrialKey)):
+            self._trials[trials_name] = trials
+            self._trials_paths[trials_name] = None
+        else:
+            raise ValueError()
diff --git a/hyperion/utils/enrollment_map.py b/hyperion/utils/enrollment_map.py
index 024e5b74..4af69144 100644
--- a/hyperion/utils/enrollment_map.py
+++ b/hyperion/utils/enrollment_map.py
@@ -18,12 +18,13 @@
 
 class EnrollmentMap(InfoTable):
     """Class to store the mapping between enrollment id
-       and segmentids
+    and segmentids
     """
 
     def __init__(self, df):
         if "modelid" in df:
             df.rename(columns={"modelid": "id"}, inplace=True)
+        assert "segmentid" in df
         super().__init__(df)
 
     def split(self, idx, num_parts):
@@ -84,3 +85,17 @@ def load(cls, file_path, sep=None):
             df = pd.read_csv(file_path, sep=sep)
 
         return cls(df)
+
+    @classmethod
+    def cat(cls, tables):
+        """Concatenates several tables.
+
+        Args:
+          info_lists: List of InfoTables
+
+        Returns:
+          InfoTable object concatenation the info_lists.
+        """
+        df_list = [table.df for table in tables]
+        df = pd.concat(df_list)
+        return cls(df)
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 6bcd4aca..45eab05f 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -176,8 +176,8 @@ def split(self, idx, num_parts, group_by=None):
         return self.__class__(df)
 
     @classmethod
-    def merge(cls, tables):
-        """Merges several tables.
+    def cat(cls, tables):
+        """Concatenates several tables.
 
         Args:
           info_lists: List of InfoTables
@@ -187,6 +187,9 @@ def merge(cls, tables):
         """
         df_list = [table.df for table in tables]
         df = pd.concat(df_list)
+        assert df[
+            "id"
+        ].is_unique, """there are duplicated ids in the tables we are concatenating"""
         return cls(df)
 
     def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):

From 63a2bd994c961b6c438bda454cc66a8695d1b797 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 30 Jun 2023 09:38:03 -0400
Subject: [PATCH 103/154] added config 2.0 to vox v2

---
 ...un_031_attack_type_verif_and_noveltydet.sh |   2 +-
 egs/voxceleb/v1.1/README.md                   |  18 +-
 ...rain_idrnd_resnet100_xvec_stage1_v3.0.yaml |   2 +-
 ...train_res2net50w26s8_xvec_stage1_v3.0.yaml |   2 +-
 ...train_res2net50w26s8_xvec_stage2_v3.0.yaml |   3 +-
 egs/voxceleb/v1.1/run_030_extract_xvectors.sh |   4 +-
 egs/voxceleb/v1.1/run_040_eval_be.sh          |   2 +-
 egs/voxceleb/v1.2/run_001_prepare_data.sh     |  34 +-
 egs/voxceleb/v2/README.md                     | 149 +-----
 ...lmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml |  59 +++
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml |  63 +++
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml |  73 +++
 .../wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml  |  45 ++
 .../wavlmbaseplus_ecapatdnn512x3_v2.0.yaml    |  44 ++
 ...onfig_wavlmbaseplus_ecapatdnn512x3_v2.0.sh |  54 ++
 egs/voxceleb/v2/run_030_extract_xvectors.sh   |  16 +-
 egs/voxceleb/v2/run_040_eval_be.sh            | 294 +++++++++-
 hyperion/bin/adv_finetune_xvector_from_wav.py |   5 +-
 hyperion/bin/apply_mvn_select_frames.py       |   9 +-
 hyperion/bin/audio_to_duration.py             |   5 +-
 hyperion/bin/compute_energy_vad.py            |   9 +-
 hyperion/bin/compute_mfcc_feats.py            |   9 +-
 hyperion/bin/copy_feats.py                    |   1 -
 hyperion/bin/decode_wav2transducer.py         |  12 +-
 hyperion/bin/decode_wav2vec2rnn_transducer.py |   5 +-
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |   9 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |  10 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |  18 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |   9 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |   5 +-
 ...sine_scoring_from_transfer_art_test_wav.py |  18 +-
 hyperion/bin/eval_xvec_logits_from_wav.py     |   9 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |  38 +-
 hyperion/bin/extract_xvectors_from_feats.py   |   5 +-
 hyperion/bin/extract_xvectors_from_wav.py     |   9 +-
 .../extract_xvectors_slidwin_from_feats.py    |  11 +-
 .../bin/extract_xvectors_slidwin_from_wav.py  |  11 +-
 hyperion/bin/finetune_wav2vec2transducer.py   |   5 +-
 hyperion/bin/finetune_wav2vec2xvector.py      |  18 +-
 .../bin/finetune_xvector_dfr_from_feats.py    |   5 +-
 hyperion/bin/finetune_xvector_dfr_from_wav.py |   5 +-
 hyperion/bin/finetune_xvector_from_feats.py   |   5 +-
 hyperion/bin/finetune_xvector_from_wav.py     |   9 +-
 .../generate_adv_attacks_xvector_classif.py   |  11 +-
 .../bin/generate_adv_attacks_xvector_verif.py |  11 +-
 hyperion/bin/hyperion_dataset.py              |  23 +-
 hyperion/bin/hyperion_tables.py               |  22 +-
 hyperion/bin/make_babble_noise_audio_files.py |   7 +-
 hyperion/bin/pack_wav_rirs.py                 |   9 +-
 hyperion/bin/plot_embedding_tsne.py           |   5 +-
 hyperion/bin/plot_embedding_tsne_per_class.py |   5 +-
 hyperion/bin/prepare_data.py                  |   5 +-
 hyperion/bin/preprocess_audio_files.py        |   7 +-
 .../split_dataset_into_trials_and_cohort.py   |  68 +++
 hyperion/bin/train_wav2rnn_transducer.py      |   5 +-
 hyperion/bin/train_wav2vec2rnn_transducer.py  |   5 +-
 hyperion/bin/train_wav2vec2transducer.py      |   5 +-
 hyperion/bin/train_wav2vec2xvector.py         |   5 +-
 hyperion/bin/train_xvector_from_feats.py      |   5 +-
 hyperion/bin/train_xvector_from_wav.py        |   9 +-
 hyperion/data_prep/__init__.py                |   1 +
 hyperion/data_prep/voxceleb1.py               |   2 +-
 hyperion/data_prep/voxceleb2.py               |   2 +-
 hyperion/data_prep/voxsrc22.py                |  21 +-
 .../data/class_weighted_seg_chunk_sampler.py  |   2 +-
 .../models/wav2xvectors/hf_wav2xvector.py     | 100 ++--
 hyperion/torch/torch_model.py                 |  34 +-
 hyperion/torch/tpm/hf/hf_hubert.py            |  32 ++
 hyperion/torch/tpm/hf/hf_wav2vec2.py          |   6 +
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      |  84 ++-
 hyperion/torch/tpm/hf/hf_wavlm.py             |  32 ++
 hyperion/torch/trainers/torch_trainer.py      |  12 +-
 hyperion/utils/dataset.py                     | 500 ++++++++++++++----
 hyperion/utils/segment_set.py                 |  10 +-
 74 files changed, 1535 insertions(+), 628 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
 create mode 100755 hyperion/bin/split_dataset_into_trials_and_cohort.py

diff --git a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
index 4ce703ba..3b93fabd 100755
--- a/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
+++ b/egs/voxceleb/adv.v2/run_031_attack_type_verif_and_noveltydet.sh
@@ -293,7 +293,7 @@ if [ $stage -le 13 ]; then
     awk '!/benign/' $list_someknown_dir/train/utt2spk > $list_someknown_dir/train_nobenign/utt2spk
     steps_backend/train_be_v1.sh --cmd "$train_cmd" \
         --plda-type splda \
-        --y-dim 6 \
+        --y-dim 5 \
 	$sign_dir/train/xvector.scp \
         $list_someknown_dir/train_nobenign \
         $be_dir 
diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 73b9bb4e..3b9eeaa9 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -111,8 +111,11 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | | | | Cosine + AS-Norm | 0.72 | 0.046 | 0.070 |
 | | | | Cosine + QMF | 0.67 | 0.039 | 0.074 |
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.040 | 0.065 |
-| | | | Cosine + AS-Norm | 0.52 | 0.33 | 0.045 |
+| | | | Cosine + AS-Norm | 0.52 | 0.033 | 0.045 |
 | | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.60 | 0.043 | 0.071 |
+| | | | Cosine + AS-Norm | 0.53 | 0.034 | 0.063 |
+| | | | Cosine + QMF | 0.49 | 0.033 | 0.054 |
 
 
 ### VoxCeleb 1 Entire-Clean trial list
@@ -143,8 +146,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
 | | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
 | | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
-
-
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.75 | 0.047 | 0.077 |
+| | | | Cosine + AS-Norm | 0.70 | 0.042 | 0.072 |
+| | | | Cosine + QMF | 0.68 | 0.040 | 0.069 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -174,7 +178,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
 | | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
 | | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
-
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.41 | 0.081 | 0.132 |
+| | | | Cosine + AS-Norm | 1.28 | 0.071 | 0.116 |
+| | | | Cosine + QMF | 1.21 | 0.069 | 0.113 |
 
 
 ### VoxSRC2022 dev
@@ -205,6 +211,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
 | | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
 | | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.96 | 0.124 | 0.211 |
+| | | | Cosine + AS-Norm | 1.79 | 0.118 | 0239 |
+| | | | Cosine + QMF | 1.68 | 0.114 | 0.216 |
+
 
 ## Results before 2023
 
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
index 9e302200..1016087d 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
@@ -68,5 +68,5 @@ trainer:
   grad_clip: 250
   use_amp: true
   log_interval: 1000
-  epochs: 35
+  epochs: 30
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
index 40fb362e..e98d6c13 100644
--- a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage1_v3.0.yaml
@@ -68,5 +68,5 @@ trainer:
   grad_clip: 250
   use_amp: true
   log_interval: 1000
-  epochs: 35
+  epochs: 30
   eff_batch_size: 256
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
index 469e166b..5c9af011 100644
--- a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
@@ -44,7 +44,8 @@ model:
   margin_warmup_epochs: 0
   intertop_margin: 0.1
   override_dropouts: true
-  dropout_rate: 0.0
+  # dropout_rate: 0.0
+  dropout_rate: 0.2
 trainer:
   optim:
     opt_type: sgd
diff --git a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
index 8c0949f4..f933a7b2 100755
--- a/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v1.1/run_030_extract_xvectors.sh
@@ -8,7 +8,7 @@
 set -e
 
 stage=1
-nnet_stage=1
+nnet_stage=2
 config_file=default_config.sh
 use_gpu=false
 xvec_chunk_length=12800
@@ -85,4 +85,4 @@ if [ $stage -le 2 ]; then
   done
 fi
 
-exit
+
diff --git a/egs/voxceleb/v1.1/run_040_eval_be.sh b/egs/voxceleb/v1.1/run_040_eval_be.sh
index 0780584c..6bdbdf92 100755
--- a/egs/voxceleb/v1.1/run_040_eval_be.sh
+++ b/egs/voxceleb/v1.1/run_040_eval_be.sh
@@ -8,7 +8,7 @@
 set -e
 
 stage=1
-nnet_stage=1
+nnet_stage=2
 config_file=default_config.sh
 
 
diff --git a/egs/voxceleb/v1.2/run_001_prepare_data.sh b/egs/voxceleb/v1.2/run_001_prepare_data.sh
index f956bc8c..c151e270 100755
--- a/egs/voxceleb/v1.2/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.2/run_001_prepare_data.sh
@@ -16,26 +16,31 @@ config_file=default_config.sh
 
 if [ $stage -le 1 ];then
   # Prepare the VoxCeleb2 dataset for training.
-  hyp_utils/conda_env.sh \
-    prepare_data.py voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
-    --cat-videos --use-kaldi-ids \
-    --output-dir data/voxceleb2cat_train
+  prepare_data.py voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
+		  --cat-videos --use-kaldi-ids \
+		  --output-dir data/voxceleb2cat_train
 fi
 
 if [ $stage -le 2 ];then
   # prepare voxceleb1 for test
-  #hyp_utils/conda_env.sh \
-    prepare_data.py voxceleb1 --task test --corpus-dir $voxceleb1_root \
-    --use-kaldi-ids \
-    --output-dir data/voxceleb1_test
+  # hyp_utils/conda_env.sh 
+  prepare_data.py voxceleb1 --task test --corpus-dir $voxceleb1_root \
+		  --use-kaldi-ids \
+		  --output-dir data/voxceleb1_test
   #local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
-exit
+
 if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
-  local/prepare_voxsrc22_dev.py \
-    --vox1-corpus-dir $voxceleb1_root \
-    --voxsrc22-corpus-dir $voxsrc22_root \
-    --output-dir data/voxsrc22_dev
+  prepare_data.py voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
+		  --vox1-corpus-dir $voxceleb1_root \
+		  --output-dir data/voxsrc22_dev
+  # local/prepare_voxsrc22_dev.py \
+  #   --vox1-corpus-dir $voxceleb1_root \
+  #   --voxsrc22-corpus-dir $voxsrc22_root \
+  #   --output-dir data/voxsrc22_dev
+  prepare_data.py voxsrc22 --subset test --corpus-dir $voxsrc22_root \
+		  --vox1-corpus-dir $voxceleb1_root \
+		  --output-dir data/voxsrc22_test
 fi
 
 # if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
@@ -46,5 +51,6 @@ fi
 
 if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
   # # split vox2 into 2 parts, for cohort and qmf training
-  local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
+  split_dataset_into_trials_and_cohort.py --data-dir data/voxceleb2cat_train
+  #local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
 fi
diff --git a/egs/voxceleb/v2/README.md b/egs/voxceleb/v2/README.md
index 5b5b93e5..c64a4b41 100644
--- a/egs/voxceleb/v2/README.md
+++ b/egs/voxceleb/v2/README.md
@@ -1,24 +1,9 @@
-# VoxCeleb V1.1
+# VoxCeleb V2
 
-Recipe for the VoxCeleb Speaker Verification Task
+Recipe for the VoxCeleb Speaker Verification Task using Wav2Vec2, WavLM or Hubert models from HuggingFace as feature extractors
 
 ## Differences w.r.t VoxCeleb V1 recipe
 
-In recipe version V1: 
-   - We compute speech augmentations and acoustic features offline and dump them to disk. 
-   - Augmentation is performed using Kaldi scripts and wav-reverbate tool
-   - Babble noise is created on-the-fly when computing features by mixing 3-7 single speaker files.
-
-In this recipe:
-   - We compute speech augmentations and acoustic features are computed always on-the-fly,
-     we don't dump any features to disk. 
-   - Augmentation is performed using Hyperin SpeechAugment class.
-   - The behavior of this class is controlled 
-     by the the configuration file `conf/reverb_noise_aug.yml`, 
-     which mimics the proportions of noise and RIR types, and SNRs used in the V1 or the recipe.
-   - Babble noise is created offline by mixing 3-10 single speaker files.
-
-
 ## Citing
 
 ## Training Data
@@ -41,15 +26,14 @@ In this recipe:
 ## Usage
 
    - Run the run_0*.sh scripts in sequence
-   - By default it will use Light ResNet (16 base channels)
-   - For better performance use full ResNet (64 base channels) using `config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh` file as
+   - By default it will use 
+   - For better performance use 
 ```bash
 run_011_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
 run_030_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
 run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
 ```
 
-   - To train with mixed precision training use config file `config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh`
 
 ## Recipe Steps:
 
@@ -73,7 +57,9 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
       - Creates training and validation lists for x-vector training
 
    - `run_011_train_xvector.sh`
-      - Trains the x-vector network
+      - Trains the x-vector model on frozen wav2vec features
+      - Finetunes wav2vec+x-vector model
+      - Large margin finetuning of wav2vec+x-vector model
 
    - `run_030_extract_xvectors.sh`
       - Extracts x-vectors for VoxCeleb2 or VoxCeleb2+augmentation for PLDA training
@@ -89,117 +75,30 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.00 | 0.129 | 0.216 |
-| | | | Cosine | 2.04 | 0.138 | 0.210 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA |  1.35 | 0.091 | 0.159 |
-| | | | Cosine |  1.22 | 0.082 | 0.129 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.074 | 0.124 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA |  1.30 | 0.090 | 0.160 |
-| | | | Cosine |  1.44 | 0.100 | 0.173 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.23 | 0.091 | 0.143 |
-| | | | Cosine |  1.17 | 0.081 | 0.110 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.37 | 0.104 | 0.179 |
-| | | | Cosine | 1.31 | 0.080 | 0.139 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.29 | 0.088 | 0.129 |
-| | | | Cosine | 1.23 | 0.083 | 0.136 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.095 | 0.156 |
-| | | | Cosine | 1.29 | 0.089 | 0.146 |
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.084 | 0.136 |
-| | | | Cosine | 1.18 | 0.078 | 0.115 |
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.084 | 0.145 |
-| | | | Cosine | 1.12 | 0.073 | 0.131 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.53 | 0.104 | 0.189 |
-| | | | Cosine | 1.31 | 0.084 | 0.132 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  0.98 | 0.066 | 0.116 |
-| | | | Cosine | 1.12 | 0.071 | 0.103 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.05 | 0.077 | 0.123 |
-| | | | Cosine | 0.96 | 0.065 | 0.110 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.04 | 0.071 | 0.118 |
-| | | | Cosine | 0.93 | 0.067 | 0.108 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  0.90 | 0.067 | 0.118 |
-| | | | Cosine | 0.85 | 0.060 | 0.094 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
-| | | | Cosine | 1.29 | 0.084 | 0.140 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |
-
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.060 | 0.116 |
+| | | | Cosine + AS-Norm | 0.81 | 0.058 | 0.108 |
+| | | | Cosine + QMF | 0.75 | 0.054 | 0.086 |
 
 ### VoxCeleb 1 Entire-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.86 | 0.124 | 0.210 |
-| | | | Cosine | 1.93 | 0.122 | 0.201 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.43 | 0.091 | 0.159 |
-| | | | Cosine | 1.24 | 0.080 | 0.136 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.077 | 0.132 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 1.27 | 0.084 | 0.150 |
-| | | | Cosine | 1.30 | 0.082 | 0.150 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.30 | 0.083 | 0.146 |
-| | | | Cosine | 1.09 | 0.071 | 0.124 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.45 | 0.097 | 0.165 |
-| | | | Cosine | 1.15 | 0.076 | 0.132 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.47 | 0.094 | 0.165 |
-| | | | Cosine | 1.27 | 0.082 | 0.148 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.31 | 0.086 | 0.149 |
-| | | | Cosine | 1.22 | 0.079 | 0.134 |
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.27 | 0.082 | 0.145 |
-| | | | Cosine | 1.16 | 0.074 | 0.130 |
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.077 | 0.136 |
-| | | | Cosine | 1.11 | 0.071 | 0.125 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.46 | 0.097 | 0.173 |
-| | | | Cosine | 1.24 | 0.080 | 0.140 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.071 | 0.127 |
-| | | | Cosine | 1.05 | 0.067 | 0.117 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.078 | 0.134 |
-| | | | Cosine | 1.05 | 0.069 | 0.121 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.18 | 0.075 | 0.131 |
-| | | | Cosine | 0.98 | 0.063 | 0.110 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA |  1.17 | 0.072 | 0.123 |
-| | | | Cosine | 0.94 | 0.061 | 0.107 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
-| | | | Cosine | 1.27 | 0.079 | 0.142 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |
-
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.81 | 0.051 | 0.087 |
+| | | | Cosine + AS-Norm | 0.78 | 0.047 | 0.083 |
+| | | | Cosine + QMF | 0.75 | 0.046 | 0.076 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
 | ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 3.29 | 0.195 | 0.318 |
-| | | | Cosine | 3.27 | 0.188 | 0.303 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.66 | 0.160 | 0.258 |
-| | | | Cosine | 2.32 | 0.139 | 0.232 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 2.19 | 0.133 | 0.215 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 2.33 | 0.139 | 0.227 |
-| | | | Cosine | 2.33 | 0.142 | 0.235 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.46 | 0.142 | 0.237 |
-| | | | Cosine | 2.14 | 0.126 | 0.203 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 2.57 | 0.153 | 0.255 |
-| | | | Cosine | 2.11 | 0.127 | 0.205 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 2.64 | 0.157 | 0.244 |
-| | | | Cosine | 2.33 | 0.141 | 0.232 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  2.42 | 0.144 | 0.245 |
-| | | | Cosine | 2.26 | 0.133 | 0.224
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.39 | 0.141 | 0.235 |
-| | | | Cosine | 2.17 | 0.128 | 0.215
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.28 | 0.131 | 0.225 |
-| | | | Cosine | 2.11 | 0.124 | 0.204 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  2.77 | 0.172 | 0.271 |
-| | | | Cosine | 2.45 | 0.141 | 0.225 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  2.07 | 0.124 | 0.201 |
-| | | | Cosine | 1.95 | 0.113 | 0.181 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  2.34 | 0.136 | 0.230 |
-| | | | Cosine | 1.99 | 0.119 | 0.196 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  2.18 | 0.127 | 0.211 |
-| | | | Cosine | 1.89 | 0.112 | 0.184 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  2.14 | 0.125 | 0.209 |
-| | | | Cosine | 1.84 | 0.110 | 0.186 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
-| | | | Cosine | 2.26 | 0.134 | 0.214 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.73 | 0.113 | 0.182 |
+| | | | Cosine + AS-Norm | 1.63 | 0.100 | 0.160 |
+| | | | Cosine + QMF | 1.56 | 0.096 | 0.155 |
+
+### VoxSRC2022 dev
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.60 | 0.163 | 0.257 |
+| | | | Cosine + AS-Norm | 2.43 | 0.150 | 0.244 |
+| | | | Cosine + QMF | 2.31 | 0.143 | 0.232 |
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..bd3e7f86
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..eed0ad1f
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..d66d6877
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..d7e3388f
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 9
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..b2430d97
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..373535c2
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/run_030_extract_xvectors.sh b/egs/voxceleb/v2/run_030_extract_xvectors.sh
index 67122f85..16f29841 100755
--- a/egs/voxceleb/v2/run_030_extract_xvectors.sh
+++ b/egs/voxceleb/v2/run_030_extract_xvectors.sh
@@ -7,10 +7,10 @@
 . ./path.sh
 set -e
 
-stage=2
+stage=1
+nnet_stage=3
 config_file=default_config.sh
 use_gpu=false
-nnet_stage=3
 hf_chunk_length=120 #seconds
 xvec_chunk_length=120 #seconds
 . parse_options.sh || exit 1;
@@ -36,20 +36,20 @@ fi
 
 xvector_dir=exp/xvectors/$nnet_name
 
-if [ $stage -le 1 ]; then
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
   # Extract xvectors for training LDA/PLDA
   for name in voxceleb2cat_train
   do
     if [ $plda_num_augs -eq 0 ]; then
       steps_xvec/extract_wav2vec2xvectors.sh \
 	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
-	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+	--random-utt-length true --min-utt-length 2 --max-utt-length 30 \
     	$nnet data/${name} \
     	$xvector_dir/${name}
     else
       steps_xvec/extract_wav2vec2xvectors.sh \
 	--cmd "$xvec_cmd" --nj 300 ${xvec_args} \
-	--random-utt-length true --min-utt-length 4 --max-utt-length 140 \
+	--random-utt-length true --min-utt-length 2 --max-utt-length 30 \
 	--aug-config $plda_aug_config --num-augs $plda_num_augs \
     	$nnet data/${name} \
     	$xvector_dir/${name}_augx${plda_num_augs} \
@@ -60,7 +60,10 @@ fi
 
 if [ $stage -le 2 ]; then
   # Extracts x-vectors for evaluation
-  for name in voxceleb1_test 
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb1_test $extra_data
   do
     num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
     nj=$(($num_spk < 100 ? $num_spk:100))
@@ -71,4 +74,3 @@ if [ $stage -le 2 ]; then
   done
 fi
 
-exit
diff --git a/egs/voxceleb/v2/run_040_eval_be.sh b/egs/voxceleb/v2/run_040_eval_be.sh
index ac561344..0982abeb 100755
--- a/egs/voxceleb/v2/run_040_eval_be.sh
+++ b/egs/voxceleb/v2/run_040_eval_be.sh
@@ -7,10 +7,10 @@
 . ./path.sh
 set -e
 
-# By default we evaluate the nnet after finetuning stage 3 and only with cosine scoring
-stage=3
-config_file=default_config.sh
+stage=1
 nnet_stage=3
+config_file=default_config.sh
+
 
 . parse_options.sh || exit 1;
 . $config_file
@@ -25,6 +25,15 @@ elif [ $nnet_stage -eq 2 ];then
 elif [ $nnet_stage -eq 3 ];then
   nnet=$nnet_s3
   nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
 fi
 
 plda_label=${plda_type}y${plda_y_dim}_v1
@@ -35,8 +44,12 @@ be_dir=exp/be/$nnet_name/$be_name
 score_dir=exp/scores/$nnet_name/${be_name}
 score_plda_dir=$score_dir/plda
 score_cosine_dir=exp/scores/$nnet_name/cosine
+score_cosine_snorm_dir=exp/scores/$nnet_name/cosine_snorm
+score_cosine_qmf_dir=exp/scores/$nnet_name/cosine_qmf
 
-if [ $stage -le 1 ]; then
+
+if [ "$do_plda" == "true" ];then
+  if [ $stage -le 1 ]; then
     echo "Train PLDA on Voxceleb2"
     steps_be/train_be_v1.sh \
       --cmd "$train_cmd" \
@@ -45,14 +58,12 @@ if [ $stage -le 1 ]; then
       --y_dim $plda_y_dim --z_dim $plda_z_dim \
       $xvector_dir/$plda_data/xvector.scp \
       data/$plda_data \
-      $be_dir &
-
-    wait
-fi
-
-
-if [ $stage -le 2 ];then
-
+      $be_dir
+    
+  fi
+  
+  
+  if [ $stage -le 2 ];then
     echo "Eval Voxceleb 1 with LDA+CentWhiten+LNorm+PLDA"
     steps_be/eval_be_v1.sh \
       --cmd "$train_cmd" --plda_type $plda_type \
@@ -62,7 +73,7 @@ if [ $stage -le 2 ];then
       $be_dir/lda_lnorm.h5 \
       $be_dir/plda.h5 \
       $score_plda_dir/voxceleb1_scores
-    
+
     $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
     	       local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
     
@@ -72,32 +83,267 @@ if [ $stage -le 2 ];then
       cat $f
       echo ""
     done
-    
+  fi
 fi
 
-score_plda_dir=$score_cosine_dir
+
 
 if [ $stage -le 3 ];then
 
-    echo "Eval Voxceleb 1 with Cosine scoring"
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  steps_be/eval_be_cos.sh \
+    --cmd "$train_cmd" \
+    data/voxceleb1_test/trials \
+    data/voxceleb1_test/utt2model \
+    $xvector_dir/voxceleb1_test/xvector.scp \
+    $score_cosine_dir/voxceleb1_scores
+
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_dir 
+
+  for f in $(ls $score_cosine_dir/*_results);
+  do
+    echo $f
+    cat $f
+    echo ""
+  done
+
+fi
+
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+
+    echo "Eval voxsrc2 with Cosine scoring"
     steps_be/eval_be_cos.sh --cmd "$train_cmd" \
-    	data/voxceleb1_test/trials \
-    	data/voxceleb1_test/utt2model \
-    	$xvector_dir/voxceleb1_test/xvector.scp \
-    	$score_plda_dir/voxceleb1_scores
+    	data/voxsrc22_dev/trials \
+    	data/voxsrc22_dev/utt2model \
+    	$xvector_dir/voxsrc22_dev/xvector.scp \
+    	$score_cosine_dir/voxsrc22_dev_scores &
 
-    $train_cmd --mem 10G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
-	local/score_voxceleb1.sh data/voxceleb1_test $score_plda_dir 
+    # steps_be/eval_be_cos.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    # 	$score_cosine_dir/voxsrc22_test_scores
 
-    for f in $(ls $score_plda_dir/*_results);
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_dir 
+
+    for f in $(ls $score_cosine_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+
+fi
+
+
+if [ "$do_snorm" == "true" ];then
+  if [ $stage -le 5 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 22G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_snorm_dir 
+    
+    for f in $(ls $score_cosine_snorm_dir/*_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+  fi
+
+  if [ $stage -le 6 ];then
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos_snorm.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_snorm_dir/voxsrc22_dev_scores &
+    
+    # steps_be/eval_be_cos_snorm.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #   data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    # 	$score_cosine_snorm_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_snorm_dir 
+
+    for f in $(ls $score_cosine_snorm_dir/voxsrc22_dev_results);
+    do
+	echo $f
+	cat $f
+	echo ""
+    done
+  fi
+fi
+
+
+if [ "$do_qmf" == "true" ];then
+  if [ $stage -le 7 ];then
+    awk '{ print $1, $2*100}' \
+	$xvector_dir/voxceleb2cat_train/utt2speech_dur \
+	> $xvector_dir/voxceleb2cat_train/utt2num_frames
+    
+    echo "Train QMF in Vox2"
+    steps_be/train_be_cos_qmf.sh \
+      --cmd "$train_cmd" --coh-nbest 1000 \
+      data/voxceleb2cat_train/trials \
+      data/voxceleb2cat_train/utt2model \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $xvector_dir/voxceleb2cat_train/utt2num_frames \
+      data/voxceleb2cat_train/snorm_utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/voxceleb2_qmf_scores
+
+  fi
+
+  if [ $stage -le 8 ];then
+    awk '{ print $1, $2*100}' \
+	$xvector_dir/voxceleb1_test/utt2speech_dur \
+	> $xvector_dir/voxceleb1_test/utt2num_frames
+    
+    echo "Eval Voxceleb 1 with Cosine scoring"
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd --mem 20G" --coh-nbest 1000 \
+      data/voxceleb1_test/trials \
+      data/voxceleb1_test/utt2model \
+      $xvector_dir/voxceleb1_test/xvector.scp \
+      $xvector_dir/voxceleb1_test/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxceleb1_scores
+    
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_snorm.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1_qmf.log \
+	       local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxceleb1{,_snorm,_qmf}_[oeh]_clean_results);
+    do
+      echo $f
+      cat $f
+      echo ""
+    done
+
+  fi
+
+  if [ $stage -le 9 ];then
+    awk '{ print $1, $2*100}' \
+	$xvector_dir/voxsrc22_dev/utt2speech_dur \
+	> $xvector_dir/voxsrc22_dev/utt2num_frames
+
+    echo "Eval voxsrc2 with Cosine scoring"
+    steps_be/eval_be_cos_qmf.sh \
+      --cmd "$train_cmd  --mem 20G" --coh-nbest 1000  \
+      data/voxsrc22_dev/trials \
+      data/voxsrc22_dev/utt2model \
+      $xvector_dir/voxsrc22_dev/xvector.scp \
+      $xvector_dir/voxsrc22_dev/utt2num_frames \
+      data/voxceleb2cat_train/utt2spk \
+      $xvector_dir/voxceleb2cat_train/xvector.scp \
+      $score_cosine_qmf_dir/qmf.h5 \
+      $score_cosine_qmf_dir/voxsrc22_dev_scores &
+
+    # awk '{ print $1, $2*100}' \
+    # 	$xvector_dir/voxsrc22_test/utt2speech_dur \
+    # 	> $xvector_dir/voxsrc22_test/utt2num_frames
+    # steps_be/eval_be_cos_qmf.sh --cmd "$train_cmd" \
+    # 	data/voxsrc22_test/trials \
+    # 	data/voxsrc22_test/utt2model \
+    # 	$xvector_dir/voxsrc22_test/xvector.scp \
+    #	$xvector_dir/voxsrc22_test/utt2num_frames \
+    #	data/voxceleb2cat_train/utt2spk \
+    #	$xvector_dir/voxceleb2cat_train/xvector.scp \
+    #	$score_cosine_qmf_dir/qmf.h5 \
+    # 	$score_cosine_qmf_dir/voxsrc22_test_scores
+
+    wait
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir 
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_snorm.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _snorm
+    $train_cmd --mem 10G --num-threads 1 $score_cosine_qmf_dir/log/score_voxsrc22_dev_qmf.log \
+	local/score_voxsrc22_dev.sh data/voxsrc22_dev $score_cosine_qmf_dir _qmf
+
+    for f in $(ls $score_cosine_qmf_dir/voxsrc22_dev{,_snorm,_qmf}_results);
     do
 	echo $f
 	cat $f
 	echo ""
     done
+  fi
+
+fi
+
+if [ "$do_pca" != "true" ];then
+  exit 0
+fi
+
+
+be_name=pca_r${pca_var_r}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name/${be_name}
+score_cosine_dir=exp/scores/$nnet_name/$be_name/cosine
+score_cosine_snorm_dir=exp/scores/$nnet_name/$be_name/cosine_snorm
+score_cosine_qmf_dir=exp/scores/$nnet_name/$be_name/cosine_qmf
+
+be_dir=exp/be/$nnet_name/
+score_be_dir=$score_dir/pca_r${pca_var_r}
+
+if [ $stage -le 10 ]; then
+  echo "Train projection on Voxceleb2"
+  $train_cmd $be_dir/log/train_be.log \
+	     hyp_utils/conda_env.sh \
+	     steps_be/train_be_proj_v1.py \
+	     --v-file scp:$xvector_dir/$plda_data/xvector.scp \
+	     --train-list data/$plda_data/utt2spk \
+	     --output-path $be_dir \
+	     --pca.pca-var-r $pca_var_r
 
 fi
 
 
-exit
+if [ $stage -le 11 ];then
+
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  steps_be/eval_be_cos.sh \
+    --cmd "$train_cmd" \
+    --preproc-file $be_dir/preproc.h5 \
+    data/voxceleb1_test/trials \
+    data/voxceleb1_test/utt2model \
+    $xvector_dir/voxceleb1_test/xvector.scp \
+    $score_cosine_dir/voxceleb1_scores
 
+  $train_cmd --mem 10G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     local/score_voxceleb1.sh data/voxceleb1_test $score_cosine_dir 
+
+  for f in $(ls $score_cosine_dir/*_results);
+  do
+    echo $f
+    cat $f
+    echo ""
+  done
+
+fi
diff --git a/hyperion/bin/adv_finetune_xvector_from_wav.py b/hyperion/bin/adv_finetune_xvector_from_wav.py
index 7be882e0..f45b84a0 100755
--- a/hyperion/bin/adv_finetune_xvector_from_wav.py
+++ b/hyperion/bin/adv_finetune_xvector_from_wav.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -32,6 +29,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
index f5a3ce15..bdf53786 100755
--- a/hyperion/bin/apply_mvn_select_frames.py
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -10,13 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -25,6 +18,8 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def process_feats(
diff --git a/hyperion/bin/audio_to_duration.py b/hyperion/bin/audio_to_duration.py
index 38e8dff2..ac8852a4 100755
--- a/hyperion/bin/audio_to_duration.py
+++ b/hyperion/bin/audio_to_duration.py
@@ -9,12 +9,11 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.utils import SegmentSet
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def audio_to_duration(audio_file, output_file, **kwargs):
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index 058f982a..e9773fff 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -9,17 +9,12 @@
 import time
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.feats import EnergyVAD
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_vad(input_path, output_path, write_num_frames, **kwargs):
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index ca6e26f7..442e4141 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -9,19 +9,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
diff --git a/hyperion/bin/copy_feats.py b/hyperion/bin/copy_feats.py
index 0385cc55..4549caec 100755
--- a/hyperion/bin/copy_feats.py
+++ b/hyperion/bin/copy_feats.py
@@ -12,7 +12,6 @@
 import time
 
 import numpy as np
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index c7de38f1..972b247c 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -13,13 +13,6 @@
 import numpy as np
 import pandas as pd
 import sentencepiece as spm
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -27,10 +20,13 @@
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.models.wav2transducer.beam_search import beam_search, greedy_search
+from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
+                                                              greedy_search)
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/decode_wav2vec2rnn_transducer.py b/hyperion/bin/decode_wav2vec2rnn_transducer.py
index 8ef8d414..4fdc3140 100755
--- a/hyperion/bin/decode_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/decode_wav2vec2rnn_transducer.py
@@ -13,9 +13,6 @@
 import numpy as np
 import pandas as pd
 import sentencepiece as spm
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 50fd5088..7c9d4104 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -10,13 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -33,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index 5697404d..fb0d402c 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -7,19 +7,11 @@
 import os
 import sys
 import time
-
 # [Added Sonal May21]
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -37,6 +29,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 0ca1f740..2d5baa17 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -11,17 +11,10 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from art.estimators.classification import PyTorchClassifier
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
+from art.estimators.classification import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -29,15 +22,16 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import (
-    ARTAttackFactory as AttackFactory,
-)
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 49a762af..76af5d75 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -10,13 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -31,6 +24,8 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index b2c111ca..f33402a1 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -10,9 +10,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
@@ -29,6 +26,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index 8b6c8dae..f94dc497 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -11,17 +11,10 @@
 
 import numpy as np
 import pandas as pd
-from art.classifiers import PyTorchClassifier
-from art.estimators.classification import PyTorchClassifier
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
+from art.classifiers import PyTorchClassifier
+from art.estimators.classification import PyTorchClassifier
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -29,15 +22,16 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import (
-    ARTAttackFactory as AttackFactory,
-)
+from hyperion.torch.adv_attacks.art_attack_factory import \
+    ARTAttackFactory as AttackFactory
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index 98ba76b5..9efbd6dd 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -11,13 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -28,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index c4c4676f..6f7d269e 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -11,15 +11,8 @@
 
 import numpy as np
 import pandas as pd
-import torchaudio.transforms as tat
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
+import torchaudio.transforms as tat
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -28,26 +21,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-
-resamplers = {}
-
-
-def get_resampler(source_fs, target_fs):
-    if source_fs in resamplers:
-        return resamplers[source_fs]
-
-    resampler = tat.Resample(
-        int(source_fs),
-        int(target_fs),
-        lowpass_filter_width=64,
-        rolloff=0.9475937167399596,
-        resampling_method="kaiser_window",
-        beta=14.769656459379492,
-    )
-    resampler_f = lambda x: resampler(torch.from_numpy(x)).numpy()
-    resamplers[source_fs] = resampler_f
-    return resampler_f
-
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 resamplers = {}
 
@@ -168,7 +143,10 @@ def extract_xvectors(
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
+                v_reader = VRF.create(
+                    vad_spec,
+                    path_prefix=vad_path_prefix,
+                )
 
             while not reader.eof():
                 t1 = time.time()
@@ -240,7 +218,7 @@ def extract_xvectors(
                     writer.write([key], [y])
                     if write_speech_dur is not None:
                         keys.append(key)
-                        info.append(str(x.shape[1] * fs))
+                        info.append(str(x.shape[1] / fs))
 
                     t8 = time.time()
                     read_time = t2 - t1
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index 926e0bcc..13ad4277 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -22,6 +19,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index f49a5fb0..577bbae7 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -11,13 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
@@ -28,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index eaf0a5cc..a54c4d64 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -10,15 +10,8 @@
 import time
 
 import numpy as np
-import yaml
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
@@ -27,6 +20,8 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index 9dc0aa2c..8939ba91 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -11,15 +11,8 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -29,6 +22,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_device(use_gpu):
diff --git a/hyperion/bin/finetune_wav2vec2transducer.py b/hyperion/bin/finetune_wav2vec2transducer.py
index df267e72..6f17f800 100755
--- a/hyperion/bin/finetune_wav2vec2transducer.py
+++ b/hyperion/bin/finetune_wav2vec2transducer.py
@@ -12,9 +12,6 @@
 
 import k2
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index b3edd9b5..fc3c7084 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -26,6 +23,8 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
@@ -79,7 +78,12 @@ def init_model(num_classes, in_model_file, rank, **kwargs):
 
 
 def init_hard_prototype_mining(model, train_loader, val_loader, rank):
-    if not train_loader.batch_sampler.hard_prototype_mining:
+    try:
+        hard_prototype_mining = train_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
         return
 
     if rank == 0:
@@ -118,7 +122,11 @@ def train_model(gpu_id, args):
         logging.info("trainer args={}".format(trn_args))
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
-        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
diff --git a/hyperion/bin/finetune_xvector_dfr_from_feats.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
index 2ac01025..17cafb85 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_feats.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_feats.py
@@ -12,9 +12,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_dfr_from_wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
index ff97d3ca..f7832a47 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_wav.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_wav.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(
diff --git a/hyperion/bin/finetune_xvector_from_feats.py b/hyperion/bin/finetune_xvector_from_feats.py
index 7a1fb5a9..ac9c2d0b 100755
--- a/hyperion/bin/finetune_xvector_from_feats.py
+++ b/hyperion/bin/finetune_xvector_from_feats.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -23,6 +20,8 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 7d602709..1c7cbe58 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -10,13 +10,6 @@
 import time
 from pathlib import Path
 
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
@@ -32,6 +25,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 8c6f38a6..209915c5 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -11,16 +11,9 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -31,6 +24,8 @@
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialNdx, Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index fbd3a5fb..363e3afc 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -11,16 +11,9 @@
 
 import numpy as np
 import pandas as pd
-import yaml
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 import torch.nn as nn
+import yaml
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -35,6 +28,8 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
index 9e7bac5c..c5a3f6b9 100644
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -4,27 +4,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from typing import Optional, Union, List
 from pathlib import Path
-
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
+from typing import List, Optional, Union
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import (
-    PathLike,
-    Dataset,
-    InfoTable,
-    RecordingSet,
-    FeatureSet,
-    ClassInfo,
-    EnrollmentMap,
-    SegmentSet,
-)
+from hyperion.utils import (ClassInfo, Dataset, EnrollmentMap, FeatureSet,
+                            InfoTable, PathLike, RecordingSet, SegmentSet)
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 subcommands = ["add_features"]
 # table_dict = {
diff --git a/hyperion/bin/hyperion_tables.py b/hyperion/bin/hyperion_tables.py
index a79a1dca..5a5f0b4f 100755
--- a/hyperion/bin/hyperion_tables.py
+++ b/hyperion/bin/hyperion_tables.py
@@ -4,26 +4,14 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from typing import Optional, Union, List
 from pathlib import Path
-
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
+from typing import List, Optional, Union
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import (
-    PathLike,
-    InfoTable,
-    RecordingSet,
-    FeatureSet,
-    ClassInfo,
-    EnrollmentMap,
-    SegmentSet,
-)
+from hyperion.utils import (ClassInfo, EnrollmentMap, FeatureSet, InfoTable,
+                            PathLike, RecordingSet, SegmentSet)
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 subcommands = ["cat"]
 table_dict = {
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 972ff01f..4a356037 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -10,15 +10,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 
 def make_noise(xs):
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index 4aafa075..78ac59c1 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -10,16 +10,11 @@
 import time
 
 import numpy as np
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
index e011dfe8..e2157e3e 100755
--- a/hyperion/bin/plot_embedding_tsne.py
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -13,13 +13,12 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 6f35f074..6af0202c 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -13,15 +13,14 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.clustering import AHC
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
 from hyperion.utils.math import cosine_scoring
+from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
+                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
index 4105f482..e90ad0f7 100755
--- a/hyperion/bin/prepare_data.py
+++ b/hyperion/bin/prepare_data.py
@@ -6,11 +6,10 @@
 import logging
 from pathlib import Path
 
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 from hyperion.data_prep import DataPrep
 from hyperion.hyp_defs import config_logger
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 
 def make_parser(data_prep_class):
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index 2f4e5cbc..e8adfd16 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -10,15 +10,14 @@
 import time
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
-
 from hyperion.hyp_defs import config_logger
 from hyperion.io import AudioWriter as Writer
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
+from scipy import ndimage, signal
 
 
 def process_vad(vad, length, fs, dilation, erosion):
diff --git a/hyperion/bin/split_dataset_into_trials_and_cohort.py b/hyperion/bin/split_dataset_into_trials_and_cohort.py
new file mode 100755
index 00000000..24ec10bf
--- /dev/null
+++ b/hyperion/bin/split_dataset_into_trials_and_cohort.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from pathlib import Path
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import Dataset
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=(
+            """Split speakers in dataset into test speaker to create ASV trials and 
+        cohort speakers for S-Norm"""
+        )
+    )
+
+    parser.add_argument("--data-dir", required=True, help="Path to dataset")
+    parser.add_argument(
+        "--num-1k-tar-trials", type=int, default=30, help="thousands of target trials"
+    )
+    parser.add_argument(
+        "--num-trial-speakers",
+        type=int,
+        default=1000,
+        help="number of speakers to create trials",
+    )
+    parser.add_argument(
+        "--intra-gender",
+        default=True,
+        action=ActionYesNo,
+        help="Whether we create intra gender trials or not",
+    )
+    parser.add_argument("--seed", type=int, default=1123, help="random seed")
+    parser.add_argument(
+        "--trials-dir", default=None, help="Path to output trials dataset"
+    )
+    parser.add_argument(
+        "--cohort-dir", default=None, help="Path to output cohort dataset"
+    )
+
+    args = parser.parse_args()
+    config_logger(1)
+    data_dir = args.data_dir
+    cohort_dir = args.cohort_dir
+    cohort_dir = f"{data_dir}_cohort" if cohort_dir is None else cohort_dir
+    trials_dir = args.trials_dir
+    trials_dir = f"{data_dir}_trials" if trials_dir is None else trials_dir
+
+    del args.data_dir
+    del args.cohort_dir
+    del args.trials_dir
+    args = namespace_to_dict(args)
+
+    dataset = Dataset.load(data_dir)
+    trials_dataset, cohort_dataset = dataset.split_into_trials_and_cohort(**args)
+    trials_dataset.save(trials_dir)
+    cohort_dataset.save(cohort_dir)
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 26fcf72c..8930b299 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -12,9 +12,6 @@
 
 import k2
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -23,6 +20,8 @@
 from hyperion.torch.models import Wav2RNNRNNTransducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index 5daffb6d..7018c406 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -12,9 +12,6 @@
 
 import k2
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
                                    HFWav2Vec2RNNTransducer)
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index ce53be86..55f3b996 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -12,9 +12,6 @@
 
 import k2
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -24,6 +21,8 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 5e7ecafa..8e1653b1 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -25,6 +22,8 @@
                                    HFWavLM2ResNet1dXVector)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index 7f4ab0fa..71bba080 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -11,9 +11,6 @@
 from pathlib import Path
 
 import numpy as np
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
 import torch
 import torch.nn as nn
 from hyperion.hyp_defs import config_logger, set_float_cpu
@@ -28,6 +25,8 @@
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index a210d429..b2e36cac 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -8,13 +8,6 @@
 import os
 from pathlib import Path
 
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
-
 import torch
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
@@ -29,6 +22,8 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
+from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
+                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
diff --git a/hyperion/data_prep/__init__.py b/hyperion/data_prep/__init__.py
index 9ae59246..e978e219 100644
--- a/hyperion/data_prep/__init__.py
+++ b/hyperion/data_prep/__init__.py
@@ -6,3 +6,4 @@
 from .data_prep import DataPrep
 from .voxceleb2 import VoxCeleb2DataPrep
 from .voxceleb1 import VoxCeleb1DataPrep
+from .voxsrc22 import VoxSRC22DataPrep
diff --git a/hyperion/data_prep/voxceleb1.py b/hyperion/data_prep/voxceleb1.py
index c23b64ff..b3958605 100644
--- a/hyperion/data_prep/voxceleb1.py
+++ b/hyperion/data_prep/voxceleb1.py
@@ -330,7 +330,7 @@ def prepare(self):
         logging.info("making dataset")
         dataset = Dataset(
             segments,
-            classes={"speaker": speakers, "languages": languages},
+            classes={"speaker": speakers, "language_est": languages},
             recordings={"recordings": recs},
             enrollments=enrollments,
             trials=trials,
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index bef34ec9..29ad3e44 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -251,7 +251,7 @@ def prepare(self):
         logging.info("making dataset")
         dataset = Dataset(
             segments,
-            {"speaker": speakers, "languages": languages},
+            {"speaker": speakers, "language_est": languages},
             {"recordings": recs},
         )
         logging.info("saving dataset at %s", self.output_dir)
diff --git a/hyperion/data_prep/voxsrc22.py b/hyperion/data_prep/voxsrc22.py
index 1999262a..79369149 100644
--- a/hyperion/data_prep/voxsrc22.py
+++ b/hyperion/data_prep/voxsrc22.py
@@ -53,7 +53,7 @@ def __init__(
 
     @staticmethod
     def dataset_name():
-        return "voxceleb2"
+        return "voxsrc22"
 
     @staticmethod
     def add_class_args(parser):
@@ -117,11 +117,13 @@ def prepare_track12_dev(self):
                 vox1_segmentid.append(s)
 
         vox1_rec_files = [
-            glob.glob(f"{self.vox1_corpus_dir}/**/{s}") for s in vox1_segmentid
-        ]
-        vox22_rec_files = [
-            glob.glob(f"{self.corpus_dir}/**/{s}") for s in vox22_segmentid
+            glob.glob(f"{self.vox1_corpus_dir}/**/{s}")[0] for s in vox1_segmentid
         ]
+        # vox22_rec_files = [
+        #     glob.glob(f"{self.corpus_dir}/**/{s}")[0] for s in vox22_segmentid
+        # ]
+        vox22_rec_files = [f"{self.corpus_dir}/{s}" for s in vox22_segmentid]
+
         rec_ids = vox22_segmentid + vox1_segmentid
         rec_files = vox22_rec_files + vox1_rec_files
 
@@ -135,7 +137,11 @@ def prepare_track12_dev(self):
             recs["target_sample_freq"] = self.target_sample_freq
 
         logging.info("making SegmentsSet")
-        segments = pd.DataFrame({"id": rec_ids,})
+        segments = pd.DataFrame(
+            {
+                "id": rec_ids,
+            }
+        )
         segments = SegmentSet(segments)
         segments.sort()
 
@@ -150,7 +156,8 @@ def prepare_track12_dev(self):
         logging.info("saving dataset at %s", self.output_dir)
         dataset.save(self.output_dir)
         logging.info(
-            "datasets containts %d segments", len(segments),
+            "datasets containts %d segments",
+            len(segments),
         )
 
     #             wav_file = voxsrc22_corpus_dir / file_id
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 7fbfbd71..6ee00307 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -205,7 +205,7 @@ def _set_class_weights(self):
             self.class_info.set_uniform_weights()
         elif self.weight_mode == "data-prior":
             weights = self.class_info["total_duration"].values
-            self.class_info.set_weights(self, weights)
+            self.class_info.set_weights(weights)
 
         if self.weight_exponent != 1.0:
             self.class_info.exp_weights(self.weight_exponent)
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 5599fa1e..c2bcdf99 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -26,11 +26,9 @@ class HFWav2XVector(TorchModel):
                            than one layer is used.
     """
 
-    def __init__(self,
-                 hf_feats,
-                 xvector,
-                 feat_fusion_start=0,
-                 feat_fusion_method="weighted-avg"):
+    def __init__(
+        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+    ):
 
         super().__init__()
         self.hf_feats = hf_feats
@@ -51,12 +49,9 @@ def _make_fuser(self):
             self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
         elif self.feat_fusion_method == "linear":
             self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1,
-                                                     num_layers) / num_layers
+            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
         elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim,
-                                        layer_dim,
-                                        bias=False)
+            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
 
     def _fuse_hid_feats(self, hid_feats):
         """Fuses the hidden features from the Wav2Vec model.
@@ -71,7 +66,7 @@ def _fuse_hid_feats(self, hid_feats):
             # There is only one layer of features
             return hid_feats[0]
 
-        hid_feats = hid_feats[self.feat_fusion_start:]
+        hid_feats = hid_feats[self.feat_fusion_start :]
         if self.feat_fusion_method == "weighted-avg":
             hid_feats = torch.stack(hid_feats, dim=-1)
             norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
@@ -125,14 +120,14 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
-    def forward_feats(self,
-                      x,
-                      x_lengths,
-                      return_feat_layers=None,
-                      chunk_length=0,
-                      detach_chunks=False):
-        return_hid_states = (False if return_feat_layers is None
-                             and self.feat_fusion_method == "last" else True)
+    def forward_feats(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fusion_method == "last"
+            else True
+        )
         with self._hf_context:
             hf_output = self.hf_feats(
                 x,
@@ -154,7 +149,8 @@ def forward_feats(self,
             # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
             # as the hidden features of the x-vector encoder.
             hid_feats = [
-                f.transpose(1, 2) for i, f in enumerate(hid_feats)
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
                 if i in return_feat_layers
             ]
         else:
@@ -194,7 +190,8 @@ def forward(
           "h_classif" (list hidden classification head layers), "h_feats" (wav2vec features)
         """
         feats, hid_feats, feat_lengths = self.forward_feats(
-            x, x_lengths, return_feat_layers)
+            x, x_lengths, return_feat_layers
+        )
         output = self.xvector(
             feats,
             feat_lengths,
@@ -230,16 +227,17 @@ def extract_embed(
             x, x_lengths = remove_silence(x, x_lengths)
 
         feats, _, feat_lengths = self.forward_feats(
-            x,
-            x_lengths,
-            chunk_length=hf_chunk_length,
-            detach_chunks=detach_chunks)
-        xvec_chunk_length = int(xvec_chunk_length *
-                                self.hf_feats.sample_frequency *
-                                feats.size(-1) // x.size(-1))
-        return self.xvector.extract_embed(feats, feat_lengths,
-                                          xvec_chunk_length, embed_layer,
-                                          detach_chunks)
+            x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
+        )
+        xvec_chunk_length = int(
+            xvec_chunk_length
+            * self.hf_feats.sample_frequency
+            * feats.size(-1)
+            // x.size(-1)
+        )
+        return self.xvector.extract_embed(
+            feats, feat_lengths, xvec_chunk_length, embed_layer, detach_chunks
+        )
 
     def freeze_feat_fuser(self):
         if self.feat_fuser is None:
@@ -258,6 +256,23 @@ def freeze_hf_feats(self):
     def freeze_hf_feature_encoder(self):
         self.hf_feats.freeze_feature_encoder()
 
+    def has_param_groups(self):
+        return self.hf_feats.has_param_groups()
+
+    def trainable_param_groups(self):
+        if not self.has_param_groups():
+            return self.trainable_parameters()
+
+        param_groups = self.hf_feats.trainable_param_groups()
+        if self.feat_fusion_method == "weighted-avg":
+            if self.feat_fuser.requires_grad:
+                param_groups.append({"params": self.feat_fuser})
+        else:
+            param_groups.append({"params": self.feat_fuser.parameters()})
+
+        param_groups.append({"params": self.xvector.trainable_parameters()})
+        return param_groups
+
     def set_train_mode(self, mode):
         if mode == self._train_mode:
             return
@@ -302,11 +317,11 @@ def _train(self, train_mode: str):
             self.hf_feats.train()
             self.xvector._train("ft-embed_affine")
         elif train_mode in [
-                "ft-xvector",
-                "hf-feats-frozen",
-                "ft-xvector-nograd",
-                "hf-feats-frozen-nograd",
-                "hf-feat-extractor-frozen",
+            "ft-xvector",
+            "hf-feats-frozen",
+            "ft-xvector-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
             self.xvector._train("full")
@@ -369,16 +384,19 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--feat-fusion-start",
             default=0,
             type=int,
-            help=
-            ("the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
-             "the wav2vec num_layers"),
+            help=(
+                "the input to x-vector model will fuse the wav2vec layers from feat_fusion_start to"
+                "the wav2vec num_layers"
+            ),
         )
         parser.add_argument(
             "--feat-fusion-method",
             default="weighted-avg",
             choices=["weighted-avg", "linear", "cat", "last"],
-            help=("method to fuse the hidden layers from the wav2vec model "
-                  "in [weighted-avg, cat]"),
+            help=(
+                "method to fuse the hidden layers from the wav2vec model "
+                "in [weighted-avg, cat]"
+            ),
         )
 
         if prefix is not None:
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 65e5884d..0cb887ca 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -13,8 +13,8 @@
 
 
 class TorchModel(nn.Module):
-    """Base class for all Pytorch Models and NNet architectures
-    """
+    """Base class for all Pytorch Models and NNet architectures"""
+
     registry = {}
 
     def __init_subclass__(cls, **kwargs):
@@ -45,6 +45,12 @@ def non_trainable_parameters(self, recurse: bool = True):
             if not param.requires_grad:
                 yield param
 
+    def has_param_groups(self):
+        return False
+
+    def trainable_param_groups(self):
+        return self.trainable_parameters()
+
     def freeze(self):
         for param in self.parameters():
             param.requires_grad = False
@@ -109,10 +115,9 @@ def save(self, file_path):
             os.makedirs(file_dir, exist_ok=True)
 
         config = self.get_config()
-        torch.save({
-            "model_cfg": self.get_config(),
-            "model_state_dict": self.state_dict()
-        })
+        torch.save(
+            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()}
+        )
 
     @staticmethod
     def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
@@ -132,8 +137,7 @@ def _load_cfg_state_dict(file_path=None, cfg=None, state_dict=None):
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-        cfg, state_dict = TorchModel._load_cfg_state_dict(
-            file_path, cfg, state_dict)
+        cfg, state_dict = TorchModel._load_cfg_state_dict(file_path, cfg, state_dict)
 
         model = cls(**cfg)
         if state_dict is not None:
@@ -148,14 +152,15 @@ def get_loss(self):
 
     @property
     def device(self):
-        devices = {param.device
-                   for param in self.parameters()
-                   } | {buf.device
-                        for buf in self.buffers()}
+        devices = {param.device for param in self.parameters()} | {
+            buf.device for buf in self.buffers()
+        }
         if len(devices) != 1:
             raise RuntimeError(
                 "Cannot determine device: {} different devices found".format(
-                    len(devices)))
+                    len(devices)
+                )
+            )
 
         return next(iter(devices))
 
@@ -217,5 +222,4 @@ def auto_load(file_path, extra_objs={}, map_location=None):
                     # if it failed the 3 trials raise exception
                     raise err
                 # remove module prefix when is trained with dataparallel
-                state_dict = ODict(
-                    (p.sub("", k), v) for k, v in state_dict.items())
+                state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index b2198924..2957e433 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -135,6 +135,8 @@ class HFHubert(HFWav2VecBase):
           chunk by chunk, if it is too long to fit in GPU.
         right_encoder_context: (`int`): future context frames used by the transformer encoder.
         sample_frequency: (`int`) waveform sample frequency used to train the model.
+        feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
+        encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
     """
 
     def __init__(
@@ -182,6 +184,8 @@ def __init__(
         left_encoder_context: int = 16,
         right_encoder_context: int = 16,
         sample_frequency: int = 16000,
+        feat_extract_lr: Optional[float] = None,
+        encoder_lr: Optional[float] = None,
     ):
 
         super().__init__(
@@ -199,6 +203,8 @@ def __init__(
             left_encoder_context=left_encoder_context,
             right_encoder_context=right_encoder_context,
             sample_frequency=sample_frequency,
+            feat_extract_lr=feat_extract_lr,
+            encoder_lr=encoder_lr,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -287,6 +293,32 @@ def num_encoder_layers(self):
     def hidden_size(self):
         return self.hf_config.hidden_size
 
+    def change_dropouts(
+        self,
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        feat_proj_dropout: float = 0.1,
+        **kwargs,
+    ):
+        import transformers.models.hubert.modeling_hubert as t
+
+        self.hf_model.config.hidden_dropout = hidden_dropout
+        self.hf_model.config.activation_dropout = activation_dropout
+        self.hf_model.config.attention_dropout = attention_dropout
+        self.hf_model.config.feat_proj_dropout = feat_proj_dropout
+
+        self.hf_model.feature_projection.dropout.p = feat_proj_dropout
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, nn.Dropout):
+                module.p = hidden_dropout
+
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, t.HubertAttention):
+                module.dropout = activation_dropout
+            if isinstance(module, t.HubertFeatureProjection):
+                module.intermediate_dropout.p = activation_dropout
+
     def drop_upper_layers(self, max_layers: int):
         if max_layers >= self.hf_config.num_hidden_layers:
             return
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index e1f21153..26da7beb 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -148,6 +148,8 @@ class HFWav2Vec2(HFWav2VecBase):
           chunk by chunk, if it is too long to fit in GPU.
         right_encoder_context: (`int`): future context frames used by the transformer encoder.
         sample_frequency: (`int`) waveform sample frequency used to train the model.
+        feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
+        encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
     """
 
     def __init__(
@@ -200,6 +202,8 @@ def __init__(
         left_encoder_context: int = 16,
         right_encoder_context: int = 16,
         sample_frequency: int = 16000,
+        feat_extract_lr: Optional[float] = None,
+        encoder_lr: Optional[float] = None,
     ):
 
         super().__init__(
@@ -217,6 +221,8 @@ def __init__(
             left_encoder_context=left_encoder_context,
             right_encoder_context=right_encoder_context,
             sample_frequency=sample_frequency,
+            feat_extract_lr=feat_extract_lr,
+            encoder_lr=encoder_lr,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index b0a815c7..a9c4ddef 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -53,6 +53,8 @@ class HFWav2VecBase(TorchModel):
           chunk by chunk, if it is too long to fit in GPU.
         right_encoder_context: (`int`): future context frames used by the transformer encoder.
         sample_frequency: (`int`) waveform sample frequency used to train the model.
+        feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
+        encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
     """
 
     def __init__(
@@ -71,6 +73,8 @@ def __init__(
         left_encoder_context: int = 16,
         right_encoder_context: int = 16,
         sample_frequency: int = 16000,
+        feat_extract_lr: Optional[float] = None,
+        encoder_lr: Optional[float] = None,
     ):
         super().__init__()
         self.pretrained_model_path = pretrained_model_path
@@ -84,6 +88,8 @@ def __init__(
         self.override_spec_augment = override_spec_augment
         self.right_encoder_context = right_encoder_context
         self.left_encoder_context = left_encoder_context
+        self.feat_extract_lr = feat_extract_lr
+        self.encoder_lr = encoder_lr
 
         if pretrained_model_path is not None and not ignore_pretrained:
             rank = ddp_get_rank()
@@ -215,7 +221,14 @@ def out_shape(self, in_shape):
         C = self.hf_model.config.hidden_size
         return (in_shape[0], out_length, C)
 
-    def change_config(self, override_dropouts, override_spec_augment, **kwargs):
+    def change_config(
+        self,
+        override_dropouts: bool,
+        override_spec_augment: bool,
+        feat_extract_lr: Optional[float] = None,
+        encoder_lr: Optional[float] = None,
+        **kwargs,
+    ):
         if override_spec_augment:
             logging.info("overriding speech augment")
             self.change_spec_augment(**kwargs)
@@ -224,6 +237,9 @@ def change_config(self, override_dropouts, override_spec_augment, **kwargs):
             logging.info("overriding hf model dropouts")
             self.change_dropouts(**kwargs)
 
+        self.feat_extract_lr = feat_extract_lr
+        self.encoder_lr = encoder_lr
+
     def change_spec_augment(
         self,
         apply_spec_augment: bool = True,
@@ -249,6 +265,35 @@ def change_dropouts(self, **kwargs):
     def freeze_feature_encoder(self):
         self.hf_model.freeze_feature_encoder()
 
+    def has_param_groups(self):
+        return self.feat_extract_lr is not None or self.encoder_lr is not None
+
+    def trainable_param_groups(self):
+        if not self.has_param_groups():
+            return self.trainable_parameters()
+
+        if self.feat_extract_lr == self.encoder_lr:
+            return [{"params": self.trainable_parameters(), "lr": self.encoder_lr}]
+
+        param_groups = [
+            {"params": self.hf_model.feature_extractor.parameters()},
+            {"params": self.hf_model.feature_projection.parameters()},
+            {"params": self.hf_model.encoder.parameters()},
+        ]
+        if self.hf_model.adapter is not None:
+            param_groups.append({"params": self.hf_model.adapter.parameters()})
+
+        if self.feat_extract_lr is not None:
+            param_groups[0]["lr"] = self.feat_extract_lr
+            param_groups[1]["lr"] = self.feat_extract_lr
+
+        if self.encoder_lr is not None:
+            param_groups[2]["lr"] = self.encoder_lr
+            if len(param_groups) == 4:
+                param_groups[3]["lr"] = self.encoder_lr
+
+        return param_groups
+
     @property
     def hf_config(self):
         return self.hf_model.config
@@ -570,7 +615,6 @@ def add_class_args(parser, prefix=None, skip=set()):
             help=("file path or HuggingFace Hub path to pre-trained model"),
         )
 
-
         parser.add_argument(
             "--normalize-input",
             default=True,
@@ -659,6 +703,24 @@ def add_class_args(parser, prefix=None, skip=set()):
                 "when the signal is evaluated chunk by chunk."
             ),
         )
+        parser.add_argument(
+            "--feat-extractor-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for conv feature extractor, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
+        parser.add_argument(
+            "--encoder-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for transformer encoder, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
@@ -696,6 +758,24 @@ def add_finetune_args(parser, prefix=None, skip=set()):
                 "arguments instead of the defaults in the pretrained model."
             ),
         )
+        parser.add_argument(
+            "--feat-extractor-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for conv feature extractor, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
+        parser.add_argument(
+            "--encoder-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for transformer encoder, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 0d5c5ad3..e1b67d81 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -148,6 +148,8 @@ class HFWavLM(HFWav2VecBase):
           chunk by chunk, if it is too long to fit in GPU.
         right_encoder_context: (`int`): future context frames used by the transformer encoder.
         sample_frequency: (`int`) waveform sample frequency used to train the model.
+        feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
+        encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
     """
 
     def __init__(
@@ -200,6 +202,8 @@ def __init__(
         left_encoder_context: int = 16,
         right_encoder_context: int = 16,
         sample_frequency: int = 16000,
+        feat_extract_lr: Optional[float] = None,
+        encoder_lr: Optional[float] = None,
     ):
 
         super().__init__(
@@ -217,6 +221,8 @@ def __init__(
             left_encoder_context=left_encoder_context,
             right_encoder_context=right_encoder_context,
             sample_frequency=sample_frequency,
+            feat_extract_lr=feat_extract_lr,
+            encoder_lr=encoder_lr,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -310,6 +316,32 @@ def num_encoder_layers(self):
     def hidden_size(self):
         return self.hf_config.hidden_size
 
+    def change_dropouts(
+        self,
+        hidden_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        feat_proj_dropout: float = 0.1,
+        **kwargs,
+    ):
+        import transformers.models.wavlm.modeling_wavlm as t
+
+        self.hf_model.config.hidden_dropout = hidden_dropout
+        self.hf_model.config.activation_dropout = activation_dropout
+        self.hf_model.config.attention_dropout = attention_dropout
+        self.hf_model.config.feat_proj_dropout = feat_proj_dropout
+
+        self.hf_model.feature_projection.dropout.p = feat_proj_dropout
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, nn.Dropout):
+                module.p = hidden_dropout
+
+        for module in self.hf_model.encoder.modules():
+            if isinstance(module, t.WavLMAttention):
+                module.dropout = activation_dropout
+            if isinstance(module, t.WavLMFeatureProjection):
+                module.intermediate_dropout.p = activation_dropout
+
     def drop_upper_layers(self, max_layers: int):
         if max_layers >= self.hf_config.num_hidden_layers:
             return
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index c8565d1d..5e41747c 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -163,7 +163,9 @@ def __init__(
                 oss = False if ddp_type == DDPType.DDP else True
                 self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
                 self.model = TorchDDP(
-                    self.model, device_ids=[device], output_device=device,
+                    self.model,
+                    device_ids=[device],
+                    output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
                 self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
@@ -424,7 +426,9 @@ def _make_optimizer(self, optim, model, oss=False):
         opt_args["oss"] = oss
         if self.rank == 0:
             logging.info("optimizer args={}".format(opt_args))
-        optimizer = OF.create(model.parameters(), **opt_args)
+
+        # optimizer = OF.create(model.parameters(), **opt_args)
+        optimizer = OF.create(model.trainable_param_groups(), **opt_args)
         return optimizer
 
     def _make_lr_sched(self, lr_sched, optim):
@@ -458,8 +462,8 @@ def _default_loggers(self, log_interval, use_tensorboard, use_wandb, wandb):
 
     def _get_lr(self):
         """Returns the current learning rate to show in the loggers"""
-        for param_group in self.optimizer.param_groups:
-            return param_group["lr"]
+        lrs = [param_group["lr"] for param_group in self.optimizer.param_groups]
+        return max(lrs)
 
     def _compute_grad_acc_steps(self, data_loader):
         if self.eff_batch_size is None:
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index 0ef81ab6..d1d969fb 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -2,10 +2,13 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+import logging
 from pathlib import Path
 from typing import Dict, Optional, Union
-
+from copy import deepcopy
+import math
+import numpy as np
+import pandas as pd
 import yaml
 
 from .class_info import ClassInfo
@@ -93,10 +96,13 @@ def _parse_dict_args(self, data, types):
 
         return objects, paths
 
+    def clone(self):
+        return deepcopy(self)
+
     def segments(self, keep_loaded: bool = True):
         if self._segments is None:
             assert self._segments_path is not None
-            segments = SegmentSet.load(self.segments_path, sep=self.table_sep)
+            segments = SegmentSet.load(self._segments_path, sep=self.table_sep)
             if keep_loaded:
                 self._segments = segments
             return segments
@@ -111,6 +117,7 @@ def recordings_value(self, key: str, keep_loaded: bool = True):
             )
             if keep_loaded:
                 self._recordings[key] = recordings
+            return recordings
 
         return self._recordings[key]
 
@@ -120,6 +127,7 @@ def features_value(self, key: str, keep_loaded: bool = True):
             features = FeatureSet.load(self._features_paths[key], sep=self.table_sep)
             if keep_loaded:
                 self._features[key] = features
+            return features
 
         return self._features[key]
 
@@ -129,6 +137,7 @@ def classes_value(self, key: str, keep_loaded: bool = True):
             classes = ClassInfo.load(self._classes_paths[key], self.table_sep)
             if keep_loaded:
                 self._classes[key] = classes
+            return classes
 
         return self._classes[key]
 
@@ -140,6 +149,7 @@ def enrollments_value(self, key: str, keep_loaded: bool = True):
             )
             if keep_loaded:
                 self._enrollments[key] = enrollments
+            return enrollments
 
         return self._enrollments[key]
 
@@ -156,6 +166,7 @@ def trials_value(self, key: str, keep_loaded: bool = True):
 
             if keep_loaded:
                 self._trials[key] = trials
+            return trials
 
         return self._trials[key]
 
@@ -194,6 +205,49 @@ def trials(self, keep_loaded: bool = True):
             for key in self._trials.keys():
                 yield key, self.trials_value(key, keep_loaded)
 
+    # def add_recordings(self, recordings: Dict[str, Union[RecordingSet, PathLike]]):
+    #     recordings, recordings_paths = self._parse_dict_args(recordings, RecordingSet)
+    #     if self._recordings is None:
+    #         self._recordings = self._recordings_paths = {}
+    #     self._recordings.update(recordings)
+    #     self._recordings_paths.update(recordings_paths)
+
+    # def add_features(self, features: Dict[str, Union[FeatureSet, PathLike]]):
+    #     features, features_paths = self._parse_dict_args(features, FeatureSet)
+    #     if self._features is None:
+    #         self._features = self._features_paths = {}
+    #     self._features.update(features)
+    #     self._features_paths.update(features_paths)
+
+    # def add_classes(self, classes: Dict[str, Union[ClassInfo, PathLike]]):
+    #     classes, classes_paths = self._parse_dict_args(classes, ClassInfo)
+    #     if self._classes is None:
+    #         self._classes = self._classes_paths = {}
+    #     self._classes.update(classes)
+    #     self._classes_paths.update(classes_paths)
+
+    # def add_enrollments(self, enrollments: Dict[str, Union[EnrollmentMap, PathLike]]):
+    #     enrollments, enrollments_paths = self._parse_dict_args(
+    #         enrollments,
+    #         EnrollmentMap,
+    #     )
+    #     if self._enrollments is None:
+    #         self._enrollments = self._enrollments_paths = {}
+    #     self._enrollments.update(enrollments)
+    #     self._enrollments_paths.update(enrollments_paths)
+
+    # def add_trials(
+    #     self, trials: Dict[str, Union[TrialKey, TrialNdx, SparseTrialKey, PathLike]]
+    # ):
+    #     trials, trials_paths = self._parse_dict_args(
+    #         trials,
+    #         (TrialKey, TrialNdx, SparseTrialKey),
+    #     )
+    #     if self._trials is None:
+    #         self._trials = self._trials_paths = {}
+    #     self._trials.update(trials)
+    #     self._trials_paths.update(trials_paths)
+
     @staticmethod
     def resolve_dataset_path(dataset_path):
         dataset_path = Path(dataset_path)
@@ -209,6 +263,8 @@ def resolve_dataset_path(dataset_path):
 
     @staticmethod
     def resolve_file_path(dataset_dir, file_path):
+        dataset_dir = Path(dataset_dir)
+        file_path = Path(file_path)
         if file_path.is_file():
             return file_path
 
@@ -274,95 +330,100 @@ def save_changed(
             if update_paths:
                 self._segments_path = file_path
 
-        file_names = {}
-        for k in self._recordings.keys():
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            if (
-                self._recordings is not None
-                or file_path != self._recordings_paths[k]
-                or not file_path.exists()
-            ):
-                v = self.recordings_value(k, keep_loaded=False)
-                v.save(file_path, sep=table_sep)
-                if update_paths:
-                    self._recordings_paths[k] = file_path
-
-        if file_names:
-            dataset["recordings"] = file_names
-
-        file_names = {}
-        for k in self._features.keys():
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            if (
-                self._features is not None
-                or file_path != self._features_paths[k]
-                or not file_path.exists()
-            ):
-                v = self.features_value(k, keep_loaded=False)
-                v.save(file_path, sep=table_sep)
-                if update_paths:
-                    self._features_paths[k] = file_path
-
-        if file_names:
-            dataset["features"] = file_names
-
-        file_names = {}
-        for k, v in self._classes.keys():
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            if (
-                self._classes is not None
-                or file_path != self._classes_paths[k]
-                or not file_path.exists()
-            ):
-                v = self.classes_value(k, keep_loaded=False)
-                v.save(file_path, sep=table_sep)
-                if update_paths:
-                    self._classes_paths[k] = file_path
-
-        if file_names:
-            dataset["classes"] = file_names
-
-        file_names = {}
-        for k, v in self._enrollments.keys():
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            if (
-                self._enrollments is not None
-                or file_path != self._enrollments_paths[k]
-                or not file_path.exists()
-            ):
-                v = self.enrollments_value(k, keep_loaded=False)
-                v.save(file_path, sep=table_sep)
-                if update_paths:
-                    self._enrollments_paths[k] = file_path
-
-        if file_names:
-            dataset["enrollments"] = file_names
-
-        file_names = {}
-        for k, v in self._trials.keys():
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            if (
-                self._trials is not None
-                or file_path != self._trials_paths[k]
-                or not file_path.exists()
-            ):
-                v = self.trials_value(k, keep_loaded=False)
-                v.save(file_path)
-                if update_paths:
-                    self._trials_paths[k] = file_path
-
-        if file_names:
-            dataset["trials"] = file_names
+        if self._recordings is not None:
+            file_names = {}
+            for k in self._recordings.keys():
+                file_name = k + table_ext
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                if (
+                    self._recordings[k] is not None
+                    or file_path != self._recordings_paths[k]
+                    or not file_path.exists()
+                ):
+                    v = self.recordings_value(k, keep_loaded=False)
+                    v.save(file_path, sep=table_sep)
+                    if update_paths:
+                        self._recordings_paths[k] = file_path
+
+            if file_names:
+                dataset["recordings"] = file_names
+
+        if self._features is not None:
+            file_names = {}
+            for k in self._features.keys():
+                file_name = k + table_ext
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                if (
+                    self._features[k] is not None
+                    or file_path != self._features_paths[k]
+                    or not file_path.exists()
+                ):
+                    v = self.features_value(k, keep_loaded=False)
+                    v.save(file_path, sep=table_sep)
+                    if update_paths:
+                        self._features_paths[k] = file_path
+
+            if file_names:
+                dataset["features"] = file_names
+
+        if self._classes is not None:
+            file_names = {}
+            for k in self._classes.keys():
+                file_name = k + table_ext
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                if (
+                    self._classes[k] is not None
+                    or file_path != self._classes_paths[k]
+                    or not file_path.exists()
+                ):
+                    v = self.classes_value(k, keep_loaded=False)
+                    v.save(file_path, sep=table_sep)
+                    if update_paths:
+                        self._classes_paths[k] = file_path
+
+            if file_names:
+                dataset["classes"] = file_names
+
+        if self._enrollments is not None:
+            file_names = {}
+            for k in self._enrollments.keys():
+                file_name = k + table_ext
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                if (
+                    self._enrollments[k] is not None
+                    or file_path != self._enrollments_paths[k]
+                    or not file_path.exists()
+                ):
+                    v = self.enrollments_value(k, keep_loaded=False)
+                    v.save(file_path, sep=table_sep)
+                    if update_paths:
+                        self._enrollments_paths[k] = file_path
+
+            if file_names:
+                dataset["enrollments"] = file_names
+
+        if self._trials is not None:
+            file_names = {}
+            for k in self._trials.keys():
+                file_name = k + table_ext
+                file_names[k] = file_name
+                file_path = dataset_dir / file_name
+                if (
+                    self._trials[k] is not None
+                    or file_path != self._trials_paths[k]
+                    or not file_path.exists()
+                ):
+                    v = self.trials_value(k, keep_loaded=False)
+                    v.save(file_path)
+                    if update_paths:
+                        self._trials_paths[k] = file_path
+
+            if file_names:
+                dataset["trials"] = file_names
 
         with open(dataset_file, "w") as f:
             yaml.dump(dataset, f)
@@ -491,7 +552,7 @@ def load(
 
         """
         dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
-        with open(dataset_file, "w") as f:
+        with open(dataset_file, "r") as f:
             dataset = yaml.safe_load(f)
 
         assert "segments" in dataset
@@ -503,27 +564,27 @@ def load(
         trials = None
         if "classes" in dataset:
             classes = {}
-            for k, v in dataset["classes"]:
+            for k, v in dataset["classes"].items():
                 classes[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "recordings" in dataset:
             recordings = {}
-            for k, v in dataset["recordings"]:
+            for k, v in dataset["recordings"].items():
                 recordings[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "features" in dataset:
             features = {}
-            for k, v in dataset["features"]:
+            for k, v in dataset["features"].items():
                 features[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "enrollments" in dataset:
             enrollments = {}
-            for k, v in dataset["enrollments"]:
+            for k, v in dataset["enrollments"].items():
                 enrollments[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "trials" in dataset:
             trials = {}
-            for k, v in dataset["trials"]:
+            for k, v in dataset["trials"].items():
                 trials[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         dataset = cls(
@@ -541,6 +602,10 @@ def load(
         return dataset
 
     def add_features(self, features_name: str, features: Union[PathLike, FeatureSet]):
+        if self._features is None:
+            self._features = {}
+            self._features_paths = {}
+
         if isinstance(features, (str, Path)):
             self._features[features_name] = None
             self._features_paths[features_name] = features
@@ -555,6 +620,10 @@ def add_recordings(
         recordings_name: str,
         recordings: Union[PathLike, RecordingSet],
     ):
+        if self._recordings is None:
+            self._recordings = {}
+            self._recordings_paths = {}
+
         if isinstance(features, (str, Path)):
             self._recordings[features_name] = None
             self._recordings_paths[recordings_name] = recordings
@@ -565,6 +634,10 @@ def add_recordings(
             raise ValueError()
 
     def add_classes(self, classes_name: str, classes: Union[PathLike, ClassInfo]):
+        if self._classes is None:
+            self._classes = {}
+            self._classes_paths = {}
+
         if isinstance(classes, (str, Path)):
             self._classes[features_name] = None
             self._classes_paths[classes_name] = classes
@@ -579,8 +652,12 @@ def add_enrollments(
         enrollments_name: str,
         enrollments: Union[PathLike, EnrollmentMap],
     ):
-        if isinstance(features, (str, Path)):
-            self._enrollments[features_name] = None
+        if self._enrollments is None:
+            self._enrollments = {}
+            self._enrollments_paths = {}
+
+        if isinstance(enrollments, (str, Path)):
+            self._enrollments[enrollments_name] = None
             self._enrollments_paths[enrollments_name] = enrollments
         elif isinstance(enrollments, EnrollmentMap):
             self._enrollments[enrollments_name] = enrollments
@@ -593,7 +670,11 @@ def add_trials(
         trials_name: str,
         trials: Union[PathLike, TrialKey, TrialNdx, SparseTrialKey],
     ):
-        if isinstance(features, (str, Path)):
+        if self._trials is None:
+            self._trials = {}
+            self._trials_paths = {}
+
+        if isinstance(trials, (str, Path)):
             self._trials[features_name] = None
             self._trials_paths[trials_name] = trials
         elif isinstance(trials, (TrialKey, TrialNdx, SparseTrialKey)):
@@ -601,3 +682,220 @@ def add_trials(
             self._trials_paths[trials_name] = None
         else:
             raise ValueError()
+
+    def remove_features(self, features_name: str):
+        if self._features_paths[features_name] is not None:
+            file_path = Path(self._features_paths[features_name])
+            if file_path.is_file():
+                file_path.unlink()
+
+        del self._features[features_name]
+        del self._features_paths[features_name]
+
+    def remove_recordings(
+        self,
+        recordings_name: str,
+    ):
+        if self._recordingsr_paths[recordings_name] is not None:
+            file_path = Path(self._recordings_paths[recordings_name])
+            if file_path.is_file():
+                file_path.unlink()
+
+        del self._recordings[recordings_name]
+        del self._recordings_paths[recordings_name]
+
+    def remove_classes(self, classes_name: str):
+        if self._classes_paths[classes_name] is not None:
+            file_path = Path(self._classes_paths[classes_name])
+            if file_path.is_file():
+                file_path.unlink()
+
+        del self._classes[classes_name]
+        del self._classes_paths[classes_name]
+
+    def remove_enrollments(
+        self,
+        enrollments_name: str,
+    ):
+        if self._enrollments_paths[enrollments_name] is not None:
+            file_path = Path(self._enrollments_paths[enrollments_name])
+            if file_path.is_file():
+                file_path.unlink()
+
+        del self._enrollments[enrollments_name]
+        del self._enrollments_paths[enrollments_name]
+
+    def remove_trials(
+        self,
+        trials_name: str,
+    ):
+        if self._trials_paths[trials_name] is not None:
+            file_path = Path(self._trials_paths[trials_name])
+            if file_path.is_file():
+                file_path.unlink()
+
+        del self._trials[trials_name]
+        del self._trials_paths[trials_name]
+
+    def set_segments(self, segments: Union[PathLike, SegmentSet]):
+        if isinstance(segments, SegmentSet):
+            self._segments = segments
+        else:
+            self._segments_path = segments
+
+    def clean(self):
+        rec_ids = self.segments().recording_ids()
+        for k, table in self.recordings():
+            table = table.loc[table["id"].isin(rec_ids)].copy()
+            self._recordings[k] = RecordingSet(table)
+
+        ids = self.segments()["id"].values
+        for k, table in self.features():
+            table = table.loc[table["id"].isin(ids)].copy()
+            self._features[k] = FeatureSet(table)
+
+        for k, table in self.classes():
+            class_ids = self.segments()[k].unique()
+            table = table[table["id"].isin(class_ids)].copy()
+            self._classes[k] = ClassInfo(table)
+
+        remove_keys = []
+        for k, table in self.enrollments():
+            table = table.loc[table["segmentid"].isin(ids)].copy()
+            if len(table) > 0:
+                self._enrollments[k] = EnrollmentMap(table)
+            else:
+                remove_keys.append(k)
+
+        for k in remove_keys:
+            self.remove_enrollments(k)
+
+        remove_keys = []
+        for k, key in self.trials():
+            keep_ids = [cur_id for cur_id in key.seg_set if cur_id in ids]
+            if keep_ids:
+                key = key.filter(key.model_set, keep_ids, keep=True)
+                self._trials[k] = key
+            else:
+                remove_keys.append(k)
+
+        for k in remove_keys:
+            self.remove_trials(k)
+
+    def _split_into_trials_and_cohort(
+        self,
+        segments: SegmentSet,
+        num_tar_trials: int,
+        num_trial_speakers: int,
+        seed: int,
+    ):
+        # select test speakers
+        rng = np.random.RandomState(seed=seed)
+
+        spks = segments["speaker"].unique()
+        trial_spks = rng.choice(spks, size=(num_trial_speakers,), replace=False)
+        snorm_segments = SegmentSet(segments[~segments["speaker"].isin(trial_spks)])
+
+        trial_segments = segments[segments["speaker"].isin(trial_spks)]
+        # solution of 2nd degree eq.
+        # num_spks * n (n-1) /2 = num_trials
+        num_segs_per_spk = int(
+            math.ceil((1 + math.sqrt(1 + 8 * num_tar_trials // num_trial_speakers)) / 2)
+        )
+
+        n = num_trial_speakers * num_segs_per_spk
+        seg_ids = rng.choice(trial_segments["id"], size=(n,), replace=False)
+        trial_segments = SegmentSet(segments[segments["id"].isin(seg_ids)])
+        seg_ids = trial_segments["id"].values
+        class_ids = trial_segments["speaker"].values
+        tar = np.zeros((n - 1, n), dtype=bool)
+        non = np.zeros((n - 1, n), dtype=bool)
+
+        ntar = 0
+        nnon = 0
+        for i in range(n - 1):
+            for j in range(i + 1, n):
+                if class_ids[i] == class_ids[j]:
+                    tar[i, j] = True
+                else:
+                    non[i, j] = True
+
+        logging.info("Got ntar=%d and nnon=%d", tar.sum(), non.sum())
+        trials = TrialKey(seg_ids[:-1], seg_ids, tar, non)
+        df_enr = pd.DataFrame({"id": seg_ids[:-1], "segmentid": seg_ids[:-1]})
+        enrollments = EnrollmentMap(df_enr)
+        return trials, enrollments, snorm_segments
+
+    def split_into_trials_and_cohort(
+        self,
+        num_1k_tar_trials: int,
+        num_trial_speakers: int,
+        intra_gender: bool = True,
+        trials_name="trials_qmf",
+        seed=1123,
+    ):
+        """When training quality measure fusion in, e.g., VoxCeleb recipe.
+        We split the data into 2 parts:
+            1) used to calculate SV scores to train the fusion
+            2) cohort used to calculate the S-Norm parameters used in the QMF.
+
+        The trials_file will be stored in the current dataset
+        A new dataset is created with only the cohort speakers
+
+        Args:
+          num_1k_tar_trials: num of 1000 target trials.
+          num_trial_speakers: number of spks used to create trials.
+          intra_gender: if True, no cross gender trials are done.
+
+        Returns:
+          Dataset used for trials with trial list.
+          Dataset used for cohort.
+        """
+        num_tar_trials = num_1k_tar_trials * 1000
+        if intra_gender:
+            num_tar_trials = num_tar_trials // 2
+            num_trial_speakers = num_trial_speakers // 2
+            segments = self.segments()
+            segments_male = SegmentSet(segments[segments["gender"] == "m"])
+            segments_female = SegmentSet(segments[segments["gender"] == "f"])
+            trials_male, enroll_male, cohort_male = self._split_into_trials_and_cohort(
+                segments_male,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
+            )
+            (
+                trials_female,
+                enroll_female,
+                cohort_female,
+            ) = self._split_into_trials_and_cohort(
+                segments_female,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
+            )
+            trials = TrialKey.merge([trials_male, trials_female])
+            enroll = EnrollmentMap.cat([enroll_male, enroll_female])
+            cohort = SegmentSet.cat([cohort_male, cohort_female])
+        else:
+            segments = self.segments()
+            trials, enroll, cohort = self._split_into_trials_and_cohort(
+                segments,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
+            )
+
+        dataset_trials = self.clone()
+        segments = self.segments()
+        trials_segments = SegmentSet(segments.loc[segments["id"].isin(trials.seg_set)])
+        dataset_trials.set_segments(trials_segments)
+        dataset_trials.add_trials("trials", trials)
+        dataset_trials.add_enrollments("enrollments", enroll)
+        dataset_trials.clean()
+
+        dataset_cohort = self.clone()
+        dataset_cohort.set_segments(cohort)
+        dataset_cohort.clean()
+
+        return dataset_trials, dataset_cohort
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
index 1852d25d..6aef5bb2 100644
--- a/hyperion/utils/segment_set.py
+++ b/hyperion/utils/segment_set.py
@@ -8,7 +8,7 @@
 
 class SegmentSet(InfoTable):
     """Class to store information about a speech segment
-       Internally, it uses a pandas table.
+    Internally, it uses a pandas table.
     """
 
     def __init__(self, df):
@@ -29,7 +29,13 @@ def has_time_marks(self):
     def has_recording_ids(self):
         return "recording_id" in self.df
 
-    def recording_ids(self, ids):
+    def recording_ids(self, ids=None):
+        if ids is None:
+            if "recording_id" in self.df:
+                return self.df["recording_id"]
+            else:
+                return self.df["id"]
+
         if "recording_id" in self.df:
             return self.df.loc[ids, "recording_id"]
 

From c1d193abd8161a35017d316382b6025ef2c22db0 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 19 Jul 2023 16:36:06 -0400
Subject: [PATCH 104/154] finished experiments of models 2.0 in voxceleb/v2

---
 egs/voxceleb/v1.2/run_001_prepare_data.sh     | 18 +----
 egs/voxceleb/v2/README.md                     | 60 +++++++++++++++
 ...lsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml | 59 +++++++++++++++
 ...c2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml | 59 +++++++++++++++
 ...baseplus9l_ecapatdnn512x3_stage1_v2.0.yaml | 59 +++++++++++++++
 ...baseplus_ecapatdnn512x3_stage1_v2.0_0.yaml | 59 +++++++++++++++
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml |  2 +-
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml |  2 +-
 ...lmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml | 59 +++++++++++++++
 ...lmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml | 63 ++++++++++++++++
 ...lmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml | 73 +++++++++++++++++++
 ...wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml | 59 +++++++++++++++
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml | 63 ++++++++++++++++
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml | 73 +++++++++++++++++++
 ...v2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml | 45 ++++++++++++
 .../wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml | 44 +++++++++++
 .../wavlmlarge12l_ecapatdnn512x3_v2.0.yaml    | 45 ++++++++++++
 .../conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml  | 44 +++++++++++
 ...wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | 54 ++++++++++++++
 ...ig_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | 54 ++++++++++++++
 ...fig_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | 54 ++++++++++++++
 ...onfig_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | 54 ++++++++++++++
 .../config_wavlmlarge_ecapatdnn512x3_v2.0.sh  | 54 ++++++++++++++
 hyp_utils/create_data_split_dirs.sh           |  4 +-
 hyp_utils/create_data_split_links.sh          |  6 +-
 hyp_utils/feats/make_evad.sh                  |  2 +-
 hyperion/bin/compute_energy_vad.py            | 37 ++++++++--
 hyperion/io/ark_data_writer.py                | 20 +++--
 hyperion/io/audio_reader.py                   | 12 ++-
 hyperion/io/data_rw_factory.py                |  8 +-
 hyperion/io/data_writer.py                    | 36 ++++++++-
 hyperion/io/h5_data_writer.py                 | 19 +++--
 hyperion/io/rw_specifiers.py                  | 47 ++++++++----
 hyperion/utils/__init__.py                    |  1 +
 34 files changed, 1281 insertions(+), 67 deletions(-)
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0_0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh

diff --git a/egs/voxceleb/v1.2/run_001_prepare_data.sh b/egs/voxceleb/v1.2/run_001_prepare_data.sh
index c151e270..aef70e96 100755
--- a/egs/voxceleb/v1.2/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.2/run_001_prepare_data.sh
@@ -23,34 +23,24 @@ fi
 
 if [ $stage -le 2 ];then
   # prepare voxceleb1 for test
-  # hyp_utils/conda_env.sh 
   prepare_data.py voxceleb1 --task test --corpus-dir $voxceleb1_root \
 		  --use-kaldi-ids \
 		  --output-dir data/voxceleb1_test
-  #local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
 
 if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
   prepare_data.py voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
 		  --vox1-corpus-dir $voxceleb1_root \
 		  --output-dir data/voxsrc22_dev
-  # local/prepare_voxsrc22_dev.py \
-  #   --vox1-corpus-dir $voxceleb1_root \
-  #   --voxsrc22-corpus-dir $voxsrc22_root \
-  #   --output-dir data/voxsrc22_dev
-  prepare_data.py voxsrc22 --subset test --corpus-dir $voxsrc22_root \
-		  --vox1-corpus-dir $voxceleb1_root \
-		  --output-dir data/voxsrc22_test
 fi
 
 # if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
-#   local/prepare_voxsrc22_test.py \
-#     --corpus-dir $voxsrc22_root \
-#     --output-dir data/voxsrc22_test
+#   prepare_data.py voxsrc22 --subset test --corpus-dir $voxsrc22_root \
+# 		  --vox1-corpus-dir $voxceleb1_root \
+# 		  --output-dir data/voxsrc22_test
 # fi
 
 if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
-  # # split vox2 into 2 parts, for cohort and qmf training
+  # split vox2 into 2 parts, for cohort and qmf training
   split_dataset_into_trials_and_cohort.py --data-dir data/voxceleb2cat_train
-  #local/make_vox2_trials.py --data-dir data/voxceleb2cat_train
 fi
diff --git a/egs/voxceleb/v2/README.md b/egs/voxceleb/v2/README.md
index c64a4b41..a005b6e8 100644
--- a/egs/voxceleb/v2/README.md
+++ b/egs/voxceleb/v2/README.md
@@ -78,6 +78,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.060 | 0.116 |
 | | | | Cosine + AS-Norm | 0.81 | 0.058 | 0.108 |
 | | | | Cosine + QMF | 0.75 | 0.054 | 0.086 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.89 | 0.069 | 0.108 |
+| | | | Cosine + AS-Norm | 0.86 | 0.067 | 0.108 |
+| | | | Cosine + QMF | 0.77 | 0.066 | 0.105 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.74 | 0.057 | 0.085 |
+| | | | Cosine + AS-Norm | 0.73 | 0.055 | 0.093 |
+| | | | Cosine + QMF | 0.66 | 0.051 | 0.094 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.74 | 0.053 | 0.080 |
+| | | | Cosine + AS-Norm | 0.71 | 0.050 | 0.087 |
+| | | | Cosine + QMF | 0.64 | 0.045 | 0.087 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.063 | 0.111 |
+| | | | Cosine + AS-Norm | 0.68 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.63 | 0.048 | 0.071 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.14 | 0.074 | 0.107 |
+| | | | Cosine + AS-Norm | 0.94 | 0.060 | 0.089 |
+| | | | Cosine + QMF | 0.89 | 0.054 | 0.076 |
 
 ### VoxCeleb 1 Entire-Clean trial list
 
@@ -86,6 +101,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.81 | 0.051 | 0.087 |
 | | | | Cosine + AS-Norm | 0.78 | 0.047 | 0.083 |
 | | | | Cosine + QMF | 0.75 | 0.046 | 0.076 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.89 | 0.056 | 0.099 |
+| | | | Cosine + AS-Norm | 0.86 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.82 | 0.050 | 0.085 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.80 | 0.049 | 0.088 |
+| | | | Cosine + AS-Norm | 0.76 | 0.045 | 0.080 |
+| | | | Cosine + QMF | 0.73 | 0.043 | 0.078 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.91 | 0.056 | 0.094 |
+| | | | Cosine + AS-Norm | 0.87 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.83 | 0.050 | 0.086 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.80 | 0.050 | 0.086 |
+| | | | Cosine + AS-Norm | 0.73 | 0.045 | 0.074 |
+| | | | Cosine + QMF | 0.69 | 0.042 | 0.069 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.99 | 0.058 | 0.103 |
+| | | | Cosine + AS-Norm | 0.87 | 0.052 | 0.090 |
+| | | | Cosine + QMF | 0.83 | 0.050 | 0.085 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -94,6 +124,21 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.73 | 0.113 | 0.182 |
 | | | | Cosine + AS-Norm | 1.63 | 0.100 | 0.160 |
 | | | | Cosine + QMF | 1.56 | 0.096 | 0.155 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.88 | 0.122 | 0.200 |
+| | | | Cosine + AS-Norm | 1.77 | 0.110 | 0.175 |
+| | | | Cosine + QMF | 1.66 | 0.104 | 0.168 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.67 | 0.103 | 0.165 |
+| | | | Cosine + AS-Norm | 1.54 | 0.093 | 0.152 |
+| | | | Cosine + QMF | 1.45 | 0.089 | 0.145 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.78 | 0.106 | 0.174 |
+| | | | Cosine + AS-Norm | 1.70 | 0.099 | 0.162 |
+| | | | Cosine + QMF | 1.61 | 0.094 | 0.153 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.49 | 0.087 | 0.137 |
+| | | | Cosine + AS-Norm | 1.29 | 0.074 | 0.117 |
+| | | | Cosine + QMF | 1.22 | 0.069 | 0.111 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.84 | 0.107 | 0.172 |
+| | | | Cosine + AS-Norm | 1.47 | 0.083 | 0.128 |
+| | | | Cosine + QMF | 1.39 | 0.079 | 0.123 |
 
 ### VoxSRC2022 dev
 
@@ -102,3 +147,18 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.60 | 0.163 | 0.257 |
 | | | | Cosine + AS-Norm | 2.43 | 0.150 | 0.244 |
 | | | | Cosine + QMF | 2.31 | 0.143 | 0.232 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.82 | 0.183 | 0.286 |
+| | | | Cosine + AS-Norm | 2.69 | 0.168 | 0.265 |
+| | | | Cosine + QMF | 2.52 | 0.158 | 0.252 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.65 | 0.176 | 0.289 |
+| | | | Cosine + AS-Norm | 2.55 | 0.171 | 0.292 |
+| | | | Cosine + QMF | 2.38 | 0.159 | 0.266 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.62 | 0.153 | 0.251 |
+| | | | Cosine + AS-Norm | 2.53 | 0.149 | 0.247 |
+| | | | Cosine + QMF | 0.242 | 0.144 | 0.231 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.225 |
+| | | | Cosine + AS-Norm | 2.01 | 0.125 | 0.209 |
+| | | | Cosine + QMF | 1.92 | 0.117 | 0.200 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.83 | 0.175 | 0.276 |
+| | | | Cosine + AS-Norm | 2.31 | 0.149 | 0.244 |
+| | | | Cosine + QMF | 2.22 | 0.137 | 0.229 |
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..ad991124
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..254ff796
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..52be6db5
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0_0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0_0.yaml
new file mode 100644
index 00000000..ebeedde6
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0_0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 4850
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-4
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 25
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
index eed0ad1f..69a8322b 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -58,6 +58,6 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 30
+  epochs: 8
   eff_batch_size: 512
   train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
index d66d6877..3443591a 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -68,6 +68,6 @@ trainer:
     update_lr_on_opt_step: true
   use_amp: true
   log_interval: 1000
-  epochs: 8
+  epochs: 4
   eff_batch_size: 256
   train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..abe5da6e
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..7287188c
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..3443591a
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..2addaa1e
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmlarge_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..69a8322b
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..5e1260ad
--- /dev/null
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..c3466259
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+  drop_layers_gt: 12
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..dc3737e3
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..5025f047
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+  drop_layers_gt: 12
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..0a6303f5
--- /dev/null
+++ b/egs/voxceleb/v2/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..67a4665e
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# Wav2Vec2 Multilingual 300M params layers 2-12
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m12l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..80ee785b
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# Wav2Vec2 Multilingual 300M params
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..c2b30f68
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus9l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..530096cc
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge12l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..1b276bcd
--- /dev/null
+++ b/egs/voxceleb/v2/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/hyp_utils/create_data_split_dirs.sh b/hyp_utils/create_data_split_dirs.sh
index 06c30779..b8aad6c8 100755
--- a/hyp_utils/create_data_split_dirs.sh
+++ b/hyp_utils/create_data_split_dirs.sh
@@ -6,7 +6,7 @@
 
 storage_name=$(date +'%m_%d_%H_%M')
 
-echo "$0 $@"  # Print the command line for logging
+
 
 if [ -f path.sh ]; then . ./path.sh; fi
 . parse_options.sh || exit 1;
@@ -15,6 +15,7 @@ if [ $# -ne 3 ]; then
   echo "Usage: $0 <output-dir> <storage-dir> <nodes>"
   echo "$0 exp/vad_dir $USER/hyp-data/voxceleb/v1/vad/storage b0"
 fi
+
 output_dir=$1
 storage_dir=$2
 nodes=$3
@@ -22,6 +23,7 @@ nodes=$3
 link_dir=$output_dir/storage
 
 if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $linkdir ]; then
+  echo "$0 $@"  # Print the command line for logging
   echo "Prepare to distribute data over multiple $nodes nodes"
   dir_name=$storage_dir/$storage_name/storage
   if [ "$nodes" == "b0" ];then
diff --git a/hyp_utils/create_data_split_links.sh b/hyp_utils/create_data_split_links.sh
index fb5b8ca0..8416742e 100755
--- a/hyp_utils/create_data_split_links.sh
+++ b/hyp_utils/create_data_split_links.sh
@@ -6,11 +6,11 @@
 
 storage_name=$(date +'%m_%d_%H_%M')
 
-echo "$0 $@"  # Print the command line for logging
-if [ $# -ne 3 ]; then
-  echo "Usage: $0 <output-file-pattern> < <num-jobs>"
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 <output-file-pattern> <num-jobs>"
   echo "$0 exp/vad_dir/vad.JOB.ark 40"
 fi
+echo "$0 $@"  # Print the command line for logging
 output_file_pattern=$1
 nj=$2
 
diff --git a/hyp_utils/feats/make_evad.sh b/hyp_utils/feats/make_evad.sh
index 373fc4a6..16ddbf74 100755
--- a/hyp_utils/feats/make_evad.sh
+++ b/hyp_utils/feats/make_evad.sh
@@ -87,7 +87,7 @@ fi
 $cmd JOB=1:$nj $logdir/make_vad_${name}.JOB.log \
     hyp_utils/conda_env.sh \
     compute_energy_vad.py --cfg $vad_config $opt_args \
-    --input $scp --output ark,scp:$vaddir/vad_$name.JOB.ark,$vaddir/vad_$name.JOB.scp \
+    --recordings-file $scp --output-spec ark,scp:$vaddir/vad_$name.JOB.ark,$vaddir/vad_$name.JOB.scp \
     --part-idx JOB --num-parts $nj || exit 1
 
 # concatenate the .scp files together.
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index e9773fff..9d50388c 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -13,19 +13,31 @@
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.feats import EnergyVAD
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
-def compute_vad(input_path, output_path, write_num_frames, **kwargs):
+def compute_vad(recordings_file, output_spec, write_num_frames, **kwargs):
 
     vad_args = EnergyVAD.filter_args(**kwargs)
     vad = EnergyVAD(**vad_args)
 
     input_args = AR.filter_args(**kwargs)
-    reader = AR(input_path, **input_args)
+    reader = AR(recordings_file, **input_args)
 
-    writer = DWF.create(output_path)
+    metadata_columns = [
+        "frame_shift",
+        "frame_length",
+        "num_frames",
+        "num_speech_frames",
+        "prob_speech",
+    ]
+
+    writer = DWF.create(output_spec, metadata_columns=metadata_columns)
 
     if write_num_frames is not None:
         f_num_frames = open(write_num_frames, "w")
@@ -39,6 +51,7 @@ def compute_vad(input_path, output_path, write_num_frames, **kwargs):
         rtf = vad.frame_shift * y.shape[0] / dt
         num_speech_frames = np.sum(y)
         prob_speech = num_speech_frames / y.shape[0] * 100
+
         logging.info(
             "Extracted VAD for %s detected %d/%d (%f %%) speech frames, elapsed-time=%.2f ms. real-time-factor=%.2f",
             key,
@@ -48,7 +61,14 @@ def compute_vad(input_path, output_path, write_num_frames, **kwargs):
             dt,
             rtf,
         )
-        writer.write([key], [y])
+        metadata = {
+            "frame_shift": vad.frame_shift,
+            "frame_length": vad.frame_length,
+            "num_frames": y.shape[0],
+            "num_speech_frames": num_speech_frames,
+            "prob_speech": prob_speech,
+        }
+        writer.write([key], [y], metadata)
         if write_num_frames is not None:
             f_num_frames.write("%s %d\n" % (key, y.shape[0]))
 
@@ -63,9 +83,10 @@ def compute_vad(input_path, output_path, write_num_frames, **kwargs):
     parser = ArgumentParser(description="Compute Kaldi Energy VAD")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--input", dest="input_path", required=True)
-    parser.add_argument("--output", dest="output_path", required=True)
+    parser.add_argument("--recordings-file", required=True)
+    parser.add_argument("--output-spec", required=True)
     parser.add_argument("--write-num-frames", default=None)
+    parser.add_argument("--write-stats", default=None)
 
     AR.add_class_args(parser)
     EnergyVAD.add_class_args(parser)
diff --git a/hyperion/io/ark_data_writer.py b/hyperion/io/ark_data_writer.py
index 6adf78b2..26f77112 100644
--- a/hyperion/io/ark_data_writer.py
+++ b/hyperion/io/ark_data_writer.py
@@ -3,10 +3,10 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from typing import Union, Optional, List
+from typing import Union, Optional, List, Dict
 
 import numpy as np
-
+import pandas as pd
 from ..hyp_defs import float_save
 from ..utils.kaldi_io_funcs import init_kaldi_output_stream, is_token, write_token
 from ..utils.kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
@@ -46,7 +46,10 @@ def __init__(
             self.f = open(archive_path, "w")
 
         if script_path is not None and not self.script_is_scp:
-            row = self.script_sep.join(["id", "storage_path", "storage_byte"])
+            columns = ["id", "storage_path", "storage_byte"]
+            if self.metadata_columns is not None:
+                columns += self.metadata_columns
+            row = self.script_sep.join(columns)
             self.f_script.write(f"{row}\n")
 
     def __exit__(self, exc_type, exc_value, traceback):
@@ -97,6 +100,7 @@ def write(
         self,
         keys: Union[str, List[str], np.array],
         data: Union[np.array, List[np.array]],
+        metadata: Optional[Union[pd.DataFrame, Dict]] = None,
     ):
         """Writes data to file.
 
@@ -107,9 +111,7 @@ def write(
                 it can be a 3D numpy array.
                 If they are vectors, it can be a 2D numpy array.
         """
-        if isinstance(keys, str):
-            keys = [keys]
-            data = [data]
+        keys, data, metadata = self.standardize_write_args(keys, data, metadata)
 
         for i, key_i in enumerate(keys):
             assert is_token(key_i), "Token %s not valid" % key_i
@@ -125,7 +127,11 @@ def write(
                 if self.script_is_scp:
                     self.f_script.write(f"{key_i} {self.archive_path}:{pos}\n")
                 else:
-                    row = self.script_sep.join([key_i, self.archive_path, str(pos)])
+                    columns = [key_i, str(self.archive_path), str(pos)]
+                    if metadata is not None:
+                        metadata_i = [str(m[i]) for m in metadata]
+                        columns += metadata_i
+                    row = self.script_sep.join(columns)
                     self.f_script.write(f"{row}\n")
 
             if self._flush:
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index 1052ce8c..6c152cc5 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -346,7 +346,9 @@ def read(self, num_records: int = 0, time_offset: float = 0, time_durs: float =
                 key = segment["id"]
                 x_i, fs_i = self._read_segment(segment, offset_i, dur_i)
             else:
-                key, file_path = self.recordings.iloc[self.cur_item]
+                segment = self.recordings.iloc[self.cur_item]
+                key = segment["id"]
+                file_path = segment["storage_path"]
                 x_i, fs_i = self.read_wavspecifier(
                     file_path, self.wav_scale, offset_i, dur_i
                 )
@@ -397,7 +399,8 @@ def add_class_args(parser, prefix: Optional[str] = None):
 
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix, action=ActionParser(parser=parser),
+                "--" + prefix,
+                action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
@@ -423,7 +426,7 @@ def read(
         Args:
           keys: List of recording/segment_ids names.
           time_offset: float or float list with time-offsets
-          time_durs: float or float list with durations 
+          time_durs: float or float list with durations
 
         Returns:
           data: List of waveforms
@@ -527,7 +530,8 @@ def add_class_args(parser, prefix: Optional[str] = None):
         )
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix, action=ActionParser(parser=parser),
+                "--" + prefix,
+                action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/io/data_rw_factory.py b/hyperion/io/data_rw_factory.py
index b56e8c27..092f5549 100644
--- a/hyperion/io/data_rw_factory.py
+++ b/hyperion/io/data_rw_factory.py
@@ -30,7 +30,10 @@ class DataWriterFactory(object):
 
     @staticmethod
     def create(
-        wspecifier: PathLike, compress: bool = False, compression_method: str = "auto"
+        wspecifier: PathLike,
+        compress: bool = False,
+        compression_method: str = "auto",
+        metadata_columns: Optional[List[str]] = None,
     ):
         if isinstance(wspecifier, str):
             wspecifier = WSpecifier.create(wspecifier)
@@ -47,6 +50,7 @@ def create(
                     flush=wspecifier.flush,
                     compress=compress,
                     compression_method=compression_method,
+                    metadata_columns=metadata_columns,
                 )
             else:
                 return ADW(
@@ -56,6 +60,7 @@ def create(
                     flush=wspecifier.flush,
                     compress=compress,
                     compression_method=compression_method,
+                    metadata_columns=metadata_columns,
                 )
 
     @staticmethod
@@ -76,7 +81,6 @@ def add_class_args(parser, prefix: Optional[PathLike] = None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='data writer options')
 
 
 class SequentialDataReaderFactory(object):
diff --git a/hyperion/io/data_writer.py b/hyperion/io/data_writer.py
index 8adbf87a..ff35ef2a 100644
--- a/hyperion/io/data_writer.py
+++ b/hyperion/io/data_writer.py
@@ -5,9 +5,10 @@
 
 import os
 from abc import ABCMeta, abstractmethod
-from typing import Union, Optional, List
+from typing import Union, Optional, List, Dict
 from pathlib import Path
 import numpy as np
+import pandas as pd
 from ..utils import PathLike
 
 
@@ -34,12 +35,14 @@ def __init__(
         flush: bool = False,
         compress: bool = False,
         compression_method: str = "auto",
+        metadata_columns: Optional[List[str]] = None,
     ):
         self.archive_path = Path(archive_path)
         self.script_path = Path(script_path) if script_path is not None else None
         self._flush = flush
         self.compress = compress
         self.compression_method = compression_method
+        self.metadata_columns = metadata_columns
 
         archive_dir = self.archive_path.parent
         archive_dir.mkdir(exist_ok=True, parents=True)
@@ -56,9 +59,7 @@ def __init__(
                 self.f_script = open(self.script_path, "w")
             else:
                 self.script_sep = "," if script_ext == ".csv" else "\t"
-                self.f_script = open(self.script_path, "w", "utf-8")
-                row = self.script_sep.join(["id", "storage_path"])
-                self.f_script.write(f"{row}\n")
+                self.f_script = open(self.script_path, "w", encoding="utf-8")
 
     def __enter__(self):
         """Function required when entering contructions of type
@@ -87,11 +88,37 @@ def flush(self):
         """Flushes the file"""
         pass
 
+    def standardize_write_args(
+        self,
+        keys: Union[str, List[str], np.array],
+        data: Union[np.array, List[np.array]],
+        metadata: Optional[Union[pd.DataFrame, Dict]] = None,
+    ):
+        if isinstance(keys, str):
+            keys = [keys]
+            data = [data]
+
+        if metadata is not None:
+            if isinstance(metadata, pd.DataFrame):
+                metadata = metadata.to_dict()
+
+            metadata_list = []
+            for c in self.metadata_columns:
+                m_c = metadata[c]
+                if not isinstance(m_c, (list, np.ndarray)):
+                    m_c = [m_c]
+                metadata_list.append(m_c)
+
+            metadata = metadata_list
+
+        return keys, data, metadata
+
     @abstractmethod
     def write(
         self,
         keys: Union[str, List[str], np.array],
         data: Union[np.array, List[np.array]],
+        metadata: Optional[Union[pd.DataFrame, Dict]] = None,
     ):
         """Writes data to file.
 
@@ -101,5 +128,6 @@ def write(
                 If all the matrices have the same dimension
                 it can be a 3D numpy array.
                 If they are vectors, it can be a 2D numpy array.
+          metadata: dictionary/DataFrame with metadata
         """
         pass
diff --git a/hyperion/io/h5_data_writer.py b/hyperion/io/h5_data_writer.py
index c34aa0ca..4d05f963 100644
--- a/hyperion/io/h5_data_writer.py
+++ b/hyperion/io/h5_data_writer.py
@@ -3,10 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from typing import Union, Optional, List
+from typing import Union, Optional, List, Dict
 
 import h5py
 import numpy as np
+import pandas as pd
 
 from ..hyp_defs import float_save
 from ..utils.kaldi_io_funcs import is_token
@@ -37,7 +38,10 @@ def __init__(
 
         self.f = h5py.File(archive_path, "w")
         if script_path is not None and not self.script_is_scp:
-            row = self.script_sep.join(["id", "storage_path"])
+            columns = ["id", "storage_path"]
+            if self.metadata_columns is not None:
+                columns += self.metadata_columns
+            row = self.script_sep.join(columns)
             self.f_script.write(f"{row}\n")
 
     def __exit__(self, exc_type, exc_value, traceback):
@@ -89,6 +93,7 @@ def write(
         self,
         keys: Union[str, List[str], np.array],
         data: Union[np.array, List[np.array]],
+        metadata: Optional[Union[pd.DataFrame, Dict]] = None,
     ):
         """Writes data to file.
 
@@ -99,9 +104,7 @@ def write(
                 it can be a 3D numpy array.
                 If they are vectors, it can be a 2D numpy array.
         """
-        if isinstance(keys, str):
-            keys = [keys]
-            data = [data]
+        keys, data, metadata = self.standardize_write_args(keys, data, metadata)
 
         for i, key_i in enumerate(keys):
             assert is_token(key_i), "Token %s not valid" % key_i
@@ -115,7 +118,11 @@ def write(
                 if self.script_is_scp:
                     self.f_script.write(f"{key_i} {self.archive_path}\n")
                 else:
-                    row = self.script_sep.join([key_i, self.archive_path])
+                    columns = [key_i, str(self.archive_path)]
+                    if metadata is not None:
+                        metadata_i = [str(m[i]) for m in metadata]
+                        columns += metadata_i
+                    row = self.script_sep.join(columns)
                     self.f_script.write(f"{row}\n")
 
             if self._flush:
diff --git a/hyperion/io/rw_specifiers.py b/hyperion/io/rw_specifiers.py
index 37f579b4..93123247 100644
--- a/hyperion/io/rw_specifiers.py
+++ b/hyperion/io/rw_specifiers.py
@@ -7,6 +7,8 @@
 
 import re
 from enum import Enum
+from pathlib import Path
+import pandas as pd
 
 
 class ArchiveType(Enum):
@@ -174,6 +176,11 @@ def create(cls, wspecifier):
                     archive_type = ArchiveType.AUDIO
                     archive = archives[cur_archive]
                     cur_archive += 1
+                elif option == "csv":
+                    assert script is None, "Repeated csv in wspecifier %s" % script
+                    assert len(archives) > cur_archive
+                    script = archives[cur_archive]
+                    cur_archive += 1
                 elif option == "scp":
                     assert script is None, "Repeated scp in wspecifier %s" % script
                     assert len(archives) > cur_archive
@@ -332,7 +339,7 @@ def create(cls, rspecifier):
             assert len(archives) == 1
 
             spec_type = None
-            archive = archives[0]
+            archive = Path(archives[0])
             archive_type = None
             once = False
             is_sorted = False
@@ -361,6 +368,9 @@ def create(cls, rspecifier):
                     assert spec_type is None
                     spec_type = RSpecType.ARCHIVE
                     archive_type = ArchiveType.RTTM
+                elif option == "csv":
+                    assert spec_type is None
+                    spec_type = RSpecType.SCRIPT
                 elif option == "scp":
                     assert spec_type is None
                     spec_type = RSpecType.SCRIPT
@@ -374,24 +384,31 @@ def create(cls, rspecifier):
             assert spec_type is not None, "Wrong wspecifier options %s" % fields[0]
 
             if spec_type == RSpecType.SCRIPT:
-                with open(archive, "r") as f:
-                    scp_f2 = f.readline().strip().split(" ")[1]
-                    if re.match(r".*\.h5(?:.[0-9]+:[0-9]+.)?$", scp_f2) is not None:
+                if archive.suffix == ".csv":
+                    df = pd.read_csv(archive, nrows=2)
+                    storage_path = df["storage_path"].values[0]
+                    if re.match(r".*\.h5$", scp_f2) is not None:
                         archive_type = ArchiveType.H5
-                    elif re.match(r".*\.ark:.*$", scp_f2) is not None:
+                    elif re.match(r".*\.ark$", scp_f2) is not None:
                         archive_type = ArchiveType.ARK
-                    elif (
-                        re.match(r".*[cvg]:[0-9]+.[0-9]+:[0-9]+.$", scp_f2) is not None
-                    ):
+                    elif re.match(r".*[cvg]$", scp_f2) is not None:
                         archive_type = ArchiveType.AUDIO
                     else:
-                        archive_type = ArchiveType.ARK
-
-                    # .split('[')[0].split(':')
-                    # if len(scp) == 1:
-                    #     archive_type = ArchiveType.H5
-                    # else:
-                    #     archive_type = ArchiveType.ARK
+                        raise ValueError(f"Unknown format for {storage_path}")
+                else:
+                    with open(archive, "r") as f:
+                        scp_f2 = f.readline().strip().split(" ")[1]
+                        if re.match(r".*\.h5(?:.[0-9]+:[0-9]+.)?$", scp_f2) is not None:
+                            archive_type = ArchiveType.H5
+                        elif re.match(r".*\.ark:.*$", scp_f2) is not None:
+                            archive_type = ArchiveType.ARK
+                        elif (
+                            re.match(r".*[cvg]:[0-9]+.[0-9]+:[0-9]+.$", scp_f2)
+                            is not None
+                        ):
+                            archive_type = ArchiveType.AUDIO
+                        else:
+                            archive_type = ArchiveType.ARK
 
             if archive_type == ArchiveType.ARK:
                 for option in options:
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index 51b476aa..e8ad5056 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .info_table import InfoTable
 from .class_info import ClassInfo
 from .dataset import Dataset
 from .enrollment_map import EnrollmentMap

From 89efce43a3c25b1fc3284afb84823af803d92add Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 4 Sep 2023 18:59:26 -0400
Subject: [PATCH 105/154] voxceleb v1.2 works up to snorm backend

---
 egs/sre19-av-v/v0.1/steps_be/face_be_utils.py |   9 +-
 .../v1/steps_be/eval-tel-be-snorm-v2.py       |   2 +-
 egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py   |   2 +-
 .../v1/steps_be/train-tel-be-knn-v1.py        |   2 +-
 .../v1/steps_be/train-tel-be-knn-v3.py        |   2 +-
 .../v1/steps_be/train-tel-be-knn-v4.py        |   2 +-
 .../adv.v2/steps_backend/eval-be-cos-Nvs1.py  |   2 +-
 .../adv.v2/steps_backend/eval-be-cos.py       |   2 +-
 egs/voxceleb/v1.1/conf/vad_16k.yaml           |   1 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |  84 +--
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |  16 +-
 ...train_ecapatdnn512x3_xvec_stage1_v3.0.yaml |  89 +--
 ...train_ecapatdnn512x3_xvec_stage2_v3.0.yaml |  30 +-
 egs/voxceleb/v1.2/conf/vad_16k.yaml           |   3 +-
 egs/voxceleb/v1.2/run_002_compute_evad.sh     |  66 +++
 .../v1.2/run_003_prepare_noises_rirs.sh       | 102 ++++
 .../v1.2/run_004_prepare_xvec_train_data.sh   |  76 +++
 egs/voxceleb/v1.2/run_005_train_xvector.sh    |  78 +++
 egs/voxceleb/v1.2/run_006_extract_xvectors.sh | 103 ++++
 egs/voxceleb/v1/steps_be/eval_be_cos.py       |   2 +-
 egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py   |   2 +-
 egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py |   2 +-
 hyp_utils/create_audios_split_links.sh        |  27 +
 hyp_utils/create_data_split_links.sh          |   2 -
 .../xvectors/extract_wav2vec2xvectors.sh      |   8 +-
 .../xvectors/extract_xvectors_from_wav.sh     |  10 +-
 .../make_babble_noise_for_nnet_train.sh       |  22 +-
 .../xvectors/pack_rirs_for_nnet_train.sh      |   9 -
 .../preprocess_audios_for_nnet_train.sh       |   8 +-
 hyperion/bin/eval_cosine_scoring_backend.py   | 200 +++++++
 .../eval_cosine_scoring_backend_with_qmf.py   | 472 +++++++++++++++
 hyperion/bin/eval_verification_metrics.py     |  96 +++
 hyperion/bin/eval_xvec_logits_from_wav.py     |  20 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |  41 +-
 hyperion/bin/extract_wav2xvectors.py          | 333 +++++++++++
 hyperion/bin/extract_xvectors_from_feats.py   |  20 +-
 hyperion/bin/extract_xvectors_from_wav.py     |  26 +-
 .../extract_xvectors_slidwin_from_feats.py    |  10 +-
 .../bin/extract_xvectors_slidwin_from_wav.py  |  10 +-
 hyperion/bin/finetune_wav2xvector.py          | 228 ++++++++
 .../generate_adv_attacks_xvector_classif.py   |   8 +-
 hyperion/bin/hyperion_dataset.py              | 406 ++++++++++++-
 hyperion/bin/hyperion_tables.py               |  33 +-
 hyperion/bin/make_babble_noise_audio_files.py | 102 ++--
 hyperion/bin/make_wav2xvector.py              |  91 +++
 hyperion/bin/merge_scores.py                  |  99 ++++
 hyperion/bin/pack_wav_rirs.py                 |  17 +-
 hyperion/bin/plot_embedding_tsne_per_class.py |  11 +-
 hyperion/bin/prepare_data.py                  |   9 +-
 hyperion/bin/preprocess_audio_files.py        | 163 +++---
 hyperion/bin/train_wav2vec2xvector.py         |  19 +-
 hyperion/bin/train_wav2xvector.py             | 196 +++++++
 hyperion/data_prep/__init__.py                |   2 +
 hyperion/data_prep/data_prep.py               |   3 +-
 hyperion/data_prep/musan.py                   | 107 ++++
 hyperion/data_prep/rirs.py                    | 103 ++++
 hyperion/data_prep/voxceleb1.py               |  18 +-
 hyperion/data_prep/voxceleb2.py               |  23 +-
 hyperion/data_prep/voxsrc22.py                |  49 +-
 hyperion/helpers/trial_data_reader.py         |   2 +-
 hyperion/helpers/vector_class_reader.py       |   2 +-
 hyperion/io/ark_data_reader.py                |   6 +-
 hyperion/io/audio_reader.py                   |  18 +-
 hyperion/io/audio_writer.py                   |  71 ++-
 hyperion/io/hyp_data_reader.py                |   5 +-
 hyperion/io/packed_audio_reader.py            |   6 +-
 hyperion/io/rw_specifiers.py                  |   6 +-
 hyperion/np/augment/noise_augment.py          |  26 +-
 hyperion/np/augment/reverb_augment.py         |  15 +-
 hyperion/np/augment/speech_augment.py         |   2 +-
 hyperion/np/augment/speed_augment.py          |  22 +-
 .../classifiers/binary_logistic_regression.py |   2 +-
 hyperion/np/classifiers/greedy_fusion.py      |   4 +-
 hyperion/np/classifiers/linear_gbe.py         |   8 +-
 hyperion/np/classifiers/linear_gbe_up.py      |   9 +-
 hyperion/np/classifiers/linear_svmc.py        |   8 +-
 .../np/classifiers/logistic_regression.py     |   6 +-
 hyperion/np/classifiers/q_scoring_homo_gbe.py |   2 +-
 hyperion/np/classifiers/svmc.py               |   4 +-
 hyperion/np/feats/energy_vad.py               |  40 +-
 hyperion/np/feats/mfcc.py                     |  57 +-
 hyperion/np/metrics/__init__.py               |   7 +-
 hyperion/np/metrics/cllr.py                   |   2 +-
 hyperion/np/metrics/utils.py                  |   2 +-
 hyperion/np/metrics/verification_evaluator.py |  78 ++-
 hyperion/np/pdfs/core/normal.py               |  21 +-
 hyperion/np/pdfs/core/normal_diag_cov.py      |  11 +-
 hyperion/np/pdfs/hmm/hmm.py                   |   4 +-
 hyperion/np/pdfs/jfa/jfa_total.py             |   9 +-
 .../np/pdfs/mixtures/exp_family_mixture.py    |   2 +-
 hyperion/np/pdfs/mixtures/gmm.py              |  24 +-
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py     |  13 +-
 .../np/pdfs/mixtures/gmm_tied_diag_cov.py     |  13 +-
 hyperion/np/pdfs/plda/frplda.py               |   4 +-
 hyperion/np/pdfs/plda/plda.py                 |   4 +-
 hyperion/np/pdfs/plda/splda.py                |   4 +-
 hyperion/np/transforms/skl_tsne.py            |   4 +-
 hyperion/torch/data/audio_dataset.py          |  12 +
 hyperion/torch/layers/audio_feats_factory.py  |   2 +-
 hyperion/torch/models/__init__.py             |  18 +-
 hyperion/torch/models/plda/splda.py           |   2 +-
 .../models/wav2xvectors/hf_wav2xvector.py     |   4 +-
 .../wav2xvectors/wav2resnet1d_xvector.py      |  18 +
 .../models/wav2xvectors/wav2resnet_xvector.py |  18 +
 .../torch/models/wav2xvectors/wav2xvector.py  | 113 +++-
 hyperion/torch/narchs/audio_feats_mvn.py      |   4 +
 hyperion/torch/torch_model.py                 |  19 +-
 hyperion/utils/class_info.py                  |  16 +
 hyperion/utils/dataset.py                     | 552 +++++++++++++-----
 hyperion/utils/fold_list.py                   |   2 +-
 hyperion/utils/info_table.py                  |  72 ++-
 hyperion/utils/{math.py => math_funcs.py}     |  22 +-
 hyperion/utils/plotting.py                    |   3 +-
 hyperion/utils/scp_list.py                    |   2 +-
 hyperion/utils/segment_set.py                 |  42 +-
 hyperion/utils/sparse_trial_key.py            |  18 +-
 hyperion/utils/sparse_trial_scores.py         | 124 +++-
 hyperion/utils/train_val_eval_list.py         |   2 +-
 hyperion/utils/trial_key.py                   |  16 +-
 hyperion/utils/trial_ndx.py                   |  84 ++-
 hyperion/utils/trial_scores.py                |  86 ++-
 hyperion/utils/utt2info.py                    |   2 +-
 122 files changed, 4509 insertions(+), 945 deletions(-)
 create mode 100755 egs/voxceleb/v1.2/run_002_compute_evad.sh
 create mode 100755 egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
 create mode 100755 egs/voxceleb/v1.2/run_005_train_xvector.sh
 create mode 100755 egs/voxceleb/v1.2/run_006_extract_xvectors.sh
 create mode 100755 hyp_utils/create_audios_split_links.sh
 create mode 100755 hyperion/bin/eval_cosine_scoring_backend.py
 create mode 100755 hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
 create mode 100755 hyperion/bin/eval_verification_metrics.py
 create mode 100755 hyperion/bin/extract_wav2xvectors.py
 create mode 100755 hyperion/bin/finetune_wav2xvector.py
 mode change 100644 => 100755 hyperion/bin/hyperion_dataset.py
 create mode 100755 hyperion/bin/make_wav2xvector.py
 create mode 100755 hyperion/bin/merge_scores.py
 create mode 100755 hyperion/bin/train_wav2xvector.py
 create mode 100644 hyperion/data_prep/musan.py
 create mode 100644 hyperion/data_prep/rirs.py
 rename hyperion/utils/{math.py => math_funcs.py} (93%)

diff --git a/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py b/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
index 14e3fc20..b6252df7 100644
--- a/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
+++ b/egs/sre19-av-v/v0.1/steps_be/face_be_utils.py
@@ -2,15 +2,11 @@
   Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 """
-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import division
-
 import logging
 import numpy as np
 
 from hyperion.utils.utt2info import Utt2Info
-from hyperion.utils.math import softmax
+from hyperion.utils.math_funcs import softmax
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import LNorm
 from hyperion.np.clustering import AHC
@@ -23,9 +19,6 @@ def lnorm(x):
 
 def cosine_scr(x1, x2):
 
-    # t = LNorm()
-    # x1 = t.predict(x1)
-    # x2 = t.predict(x2)
     x1 = lnorm(x1)
     x2 = lnorm(x2)
     return np.dot(x1, x2.T)
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
index 907509fd..c9657a66 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-snorm-v2.py
@@ -16,7 +16,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
 from hyperion.helpers import TrialDataReader as TDR
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.np.pdfs import PLDA
 from hyperion.np.transforms import TransformList
 from hyperion.np.score_norm import AdaptSNorm as SNorm
diff --git a/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py b/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
index b661cbde..24ef731b 100755
--- a/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
+++ b/egs/sre20-cts/v1/steps_be/eval-tel-be-v2.py
@@ -15,7 +15,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.np.pdfs import PLDA
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
index 8e7715e0..bdef3fc3 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v1.py
@@ -17,7 +17,7 @@
 from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 
 from numpy.linalg import matrix_rank
 
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
index 12f1725b..51795676 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v3.py
@@ -17,7 +17,7 @@
 from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 
 from numpy.linalg import matrix_rank, svd
 
diff --git a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
index 234f966c..79c1cd6f 100755
--- a/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
+++ b/egs/sre20-cts/v1/steps_be/train-tel-be-knn-v4.py
@@ -17,7 +17,7 @@
 from hyperion.np.transforms import TransformList, PCA, LDA, LNorm
 from hyperion.helpers import PLDAFactory as F
 from hyperion.utils.utt2info import Utt2Info
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 
 from numpy.linalg import matrix_rank, svd
 
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
index 85e82149..48094d0f 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos-Nvs1.py
@@ -15,7 +15,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.np.pdfs import PLDA
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
diff --git a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
index d5cd6a55..49720cb5 100755
--- a/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
+++ b/egs/voxceleb/adv.v2/steps_backend/eval-be-cos.py
@@ -19,7 +19,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
 from hyperion.np.transforms import TransformList
diff --git a/egs/voxceleb/v1.1/conf/vad_16k.yaml b/egs/voxceleb/v1.1/conf/vad_16k.yaml
index 5fb0111c..a8d7b4d4 100644
--- a/egs/voxceleb/v1.1/conf/vad_16k.yaml
+++ b/egs/voxceleb/v1.1/conf/vad_16k.yaml
@@ -6,3 +6,4 @@ vad_energy_threshold: 5.5
 vad_energy_mean_scale: 0.5
 vad_proportion_threshold: 0.12
 vad_frames_context: 2
+wav_scale: 32767
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
index 1633f4a2..2cf31713 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -29,48 +29,50 @@ data:
       min_chunk_length: 2.0
     data_loader:
       num_workers: 8
-feats: fbank80_specaug1_stmn_16k.yaml
-model: 
-  resnet_enc:
-    in_feats: 80
-    in_conv_channels: 2048
-    in_kernel_size: 5
-    in_stride: 1
-    resb_type: seres2bn
-    resb_repeats:
-    - 1
-    - 1
-    - 1
-    - 1
-    resb_channels:
-    - 2048
-    resb_kernel_sizes:
-    - 3
-    resb_dilations:
-    - 2
-    - 3
-    - 4
-    - 5
-    resb_strides:
-    - 1
-    res2net_width_factor: 1
-    res2net_scale: 8
-    se_r: 4
-    multilayer: true
-    multilayer_concat: true
-    endpoint_channels: 4096
-    norm_before: false
+
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 2048
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 2048
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      - 5
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 4096
+      norm_before: false
+      dropout_rate: 0.2
+      hid_act: swish 
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
     dropout_rate: 0.2
-    hid_act: swish
-  pool_net:
-    pool_type: ch-wise-att-mean+stddev
-    inner_feats: 128
-  embed_dim: 192
-  cos_scale: 30.0
-  margin: 0.2
-  margin_warmup_epochs: 5.0
-  dropout_rate: 0.2
-  norm_before: false
+    norm_before: false
 trainer:
   optim: 
     opt_type: adam
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
index 877736b3..21f0db8b 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -37,15 +37,15 @@ data:
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
-feats: fbank80_stmn_16k.yaml
 model:
-  cos_scale: 30.0
-  margin: 0.3
-  margin_warmup_epochs: 0
-  intertop_margin: 0.1
-  resnet_enc:
-    override_dropouts: true
-    dropout_rate: 0.25
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    resnet_enc:
+      override_dropouts: true
+      dropout_rate: 0.25
 trainer:
   optim:
     opt_type: sgd
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
index f15d453d..03a7f736 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -29,47 +29,48 @@ data:
       min_chunk_length: 2.0
     data_loader:
       num_workers: 8
-feats: fbank80_specaug1_stmn_16k.yaml
-model: 
-  resnet_enc:
-    in_feats: 80
-    in_conv_channels: 512
-    in_kernel_size: 5
-    in_stride: 1
-    resb_type: seres2bn
-    resb_repeats:
-    - 1
-    - 1
-    - 1
-    resb_channels:
-    - 512
-    resb_kernel_sizes:
-    - 3
-    resb_dilations:
-    - 2
-    - 3
-    - 4
-    resb_strides:
-    - 1
-    res2net_width_factor: 1
-    res2net_scale: 8
-    se_r: 4
-    multilayer: true
-    multilayer_concat: true
-    endpoint_channels: 1536
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 512
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 512
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 1536
+      norm_before: false
+      dropout_rate: 0.002
+      hid_act: swish
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.0
     norm_before: false
-    dropout_rate: 0.002
     hid_act: swish
-  pool_net:
-    pool_type: ch-wise-att-mean+stddev
-    inner_feats: 128
-  embed_dim: 192
-  cos_scale: 30.0
-  margin: 0.2
-  margin_warmup_epochs: 5.0
-  dropout_rate: 0.0
-  norm_before: false
-  hid_act: swish
 trainer:
   optim: 
     opt_type: adam
@@ -91,3 +92,5 @@ trainer:
   log_interval: 1000
   epochs: 40
   eff_batch_size: 256
+  target_key: speaker
+  train_mode: full
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
index 45e55d97..9788bb7c 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,31 +21,31 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
       num_workers: 8
-feats: fbank80_stmn_16k.yaml
 model:
-  cos_scale: 30.0
-  margin: 0.3
-  margin_warmup_epochs: 0
-  intertop_margin: 0.1
-  resnet_enc:
-    override_dropouts: true
-    dropout_rate: 0.
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    resnet_enc:
+      override_dropouts: true
+      dropout_rate: 0.
 trainer:
   optim:
     opt_type: sgd
@@ -67,3 +67,5 @@ trainer:
   swa_start: 31
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
+  train_mode: full
diff --git a/egs/voxceleb/v1.2/conf/vad_16k.yaml b/egs/voxceleb/v1.2/conf/vad_16k.yaml
index 5fb0111c..e5a6bb82 100644
--- a/egs/voxceleb/v1.2/conf/vad_16k.yaml
+++ b/egs/voxceleb/v1.2/conf/vad_16k.yaml
@@ -2,7 +2,8 @@ sample_frequency: 16000
 frame_shift: 10
 frame_length: 25 
 snip_edges: false
-vad_energy_threshold: 5.5
+vad_energy_threshold: -4.89
 vad_energy_mean_scale: 0.5
 vad_proportion_threshold: 0.12
 vad_frames_context: 2
+wav_scale: 1
diff --git a/egs/voxceleb/v1.2/run_002_compute_evad.sh b/egs/voxceleb/v1.2/run_002_compute_evad.sh
new file mode 100755
index 00000000..e7593df2
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_002_compute_evad.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+vad_dir=`pwd`/exp/vad_e
+vad_config=conf/vad_16k.yaml
+nj=40
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ -z "$vad_config" ];then
+  echo "We are not using VAD in this configuration"
+  exit 0
+fi
+
+if [ "$do_voxsrc22" == "true" ];then
+  extra_data="voxsrc22_dev"
+fi
+
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    hyp_utils/create_data_split_dirs.sh \
+      $vad_dir/$name \
+      $USER/hyp-data/voxceleb/v1.2/vad $nodes
+  done
+fi
+
+#Train datasets
+if [ $stage -le 2 ];then
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    # This creates links to distribute data in CLSP grid
+    # If you are not at CLSP grid, it does nothing and can be deleted
+    hyp_utils/create_data_split_links.sh $vad_dir/$name/vad.JOB.ark $nj
+    echo "compute vad for $name"
+    $train_cmd JOB=1:$nj $vad_dir/$name/log/vad.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       compute_energy_vad.py --cfg $vad_config \
+	       --recordings-file data/$name/recordings.csv \
+	       --output-spec ark,csv:$vad_dir/$name/vad.JOB.ark,$vad_dir/$name/vad.JOB.csv \
+	       --part-idx JOB --num-parts $nj || exit 1
+
+    hyperion_tables.py cat \
+		       --table-type features \
+		       --output-file $vad_dir/$name/vad.csv --num-tables $nj
+    hyperion_dataset.py add_features \
+			--dataset data/$name \
+			--features-name vad \
+			--features-file $vad_dir/$name/vad.csv
+  done
+fi
+
+
diff --git a/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh b/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..aed1dae4
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nj=10
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+  for name in noise music speech
+  do
+    prepare_data.py musan \
+		    --corpus-dir $musan_root \
+		    --subset $name \
+		    --output-dir data/musan_$name
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # # Prepare to distribute data over multiple machines
+  # # This only does something at CLSP grid
+  # hyp_utils/create_data_split_dirs.sh $vad_dir $USER/hyp-data/voxceleb/v1.2/vad $nodes
+
+  for name in musan_noise musan_music
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_proc_audio
+    output_dir=exp/proc_audio/$name
+    $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${name}.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       preprocess_audio_files.py \
+	       --audio-format flac  \
+	       --part-idx JOB --num-parts $nj \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_dir/recordings.JOB.csv
+   
+    hyperion_tables.py cat \
+		       --table-type recordings \
+		       --output-file $output_dir/recordings.csv --num-tables $nj
+    hyperion_dataset.py set_recordings \
+			--dataset $input_data_dir \
+			--recordings-file $output_dir/recordings.csv \
+			--output-dataset $output_data_dir
+		     
+    
+  done
+fi
+
+if [ $stage -le 3 ]; then
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+      input_data_dir=data/$name
+      output_data_dir=data/${name}_babble
+      output_dir=exp/proc_audio/${name}_babble
+      $train_cmd $output_dir/log/make_babble_noise_${name}.log \
+		 hyp_utils/conda_env.sh \
+		 make_babble_noise_audio_files.py \
+		 --audio-format flac \
+		 --min-spks 3 --max-spks 10 --num-reuses 5 \
+		 --recordings-file $input_data_dir/recordings.csv \
+		 --output-path $output_dir \
+		 --output-recordings-file $output_data_dir/recordings.csv
+      hyperion_dataset.py make_from_recordings \
+			  --dataset $output_data_dir \
+			  --recordings-file $output_data_dir/recordings.csv
+    done
+fi
+
+if [ $stage -le 4 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+      # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+      wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+      unzip rirs_noises.zip
+    fi
+    prepare_data.py rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
+    prepare_data.py rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
+    prepare_data.py rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+      output_dir=exp/rirs/$rirs
+      data_dir=data/$rirs
+      $train_cmd $output_dir/log/pack_rirs_${name}.log \
+		 hyp_utils/conda_env.sh \
+		 pack_wav_rirs.py ${args} --input $data_dir/recordings.csv \
+		 --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
+      hyperion_dataset.py add_features --dataset $data_dir \
+			  --features-name rirs --features-file $output_dir/rirs.csv
+
+    done
+fi
+
diff --git a/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh b/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..7649ff22
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+nodes=b1
+nj=40
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  hyp_utils/create_data_split_dirs.sh \
+    exp/xvector_audios/$nnet_data \
+    $USER/hyp-data/voxceleb/v1.2/xvector_audios/$nnet_data $nodes
+fi
+
+if [ $stage -le 2 ];then
+  output_dir=exp/proc_audio/$nnet_data
+  # This creates links to distribute data in CLSP grid
+  # If you are not at CLSP grid, it does nothing and can be deleted
+  hyp_utils/create_audios_split_links.sh $output_dir data/$nnet_data/recordings.csv flac
+  if [ -n "$vad_config" ];then
+    vad_args="--vad csv:data/$nnet_data/vad.csv"
+    update_durs="--update-seg-durs"
+  fi
+  
+  $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${nnet_data}.JOB.log \
+	     hyp_utils/conda_env.sh \
+	     preprocess_audio_files.py \
+	     --audio-format flac --remove-dc-offset $vad_args \
+	     --part-idx JOB --num-parts $nj \
+	     --recordings-file data/$nnet_data/recordings.csv \
+	     --output-path $output_dir \
+	     --output-recordings-file $output_dir/recordings.JOB.csv
+
+  hyperion_tables.py cat \
+		     --table-type recordings \
+		     --output-file $output_dir/recordings.csv --num-tables $nj
+
+  hyperion_dataset.py set_recordings $update_durs \
+		      --dataset data/$nnet_data \
+		      --recordings-file $output_dir/recordings.csv \
+		      --output-dataset data/${nnet_data}_proc_audio \
+		      --remove-features vad
+fi
+
+if [ $stage -le 3 ];then
+  hyperion_dataset.py remove_short_segments \
+		      --dataset data/${nnet_data}_proc_audio \
+		      --output-dataset data/${nnet_data}_filtered \
+		      --length-name duration --min-length 2.0
+
+  hyperion_dataset.py remove_classes_few_segments \
+		      --dataset data/${nnet_data}_filtered \
+		      --class-name speaker --min-segs 4
+fi
+
+if [ $stage -le 4 ];then
+  hyperion_dataset.py split_train_val \
+		      --dataset data/${nnet_data}_filtered \
+		      --val-prob 0.03 \
+		      --joint-classes speaker --min-train-samples 1 \
+		      --seed 1123581321 \
+		      --train-dataset data/${nnet_data}_xvector_train \
+		      --val-dataset data/${nnet_data}_xvector_val 
+fi
+
diff --git a/egs/voxceleb/v1.2/run_005_train_xvector.sh b/egs/voxceleb/v1.2/run_005_train_xvector.sh
new file mode 100755
index 00000000..d2f31ea1
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_005_train_xvector.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_data_dir=data/${nnet_data}_xvector_train
+val_data_dir=data/${nnet_data}_xvector_val
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_wav2xvector.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2xvector.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v1.2/run_006_extract_xvectors.sh b/egs/voxceleb/v1.2/run_006_extract_xvectors.sh
new file mode 100755
index 00000000..09b8c8e9
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_006_extract_xvectors.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=false
+xvec_chunk_length=120.0
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+  xvec_args="--use-gpu --chunk-length $xvec_chunk_length"
+  xvec_cmd="$cuda_eval_cmd --gpu 1 --mem 6G"
+  num_gpus=1
+else
+  xvec_cmd="$train_cmd --mem 12G"
+  num_gpus=0
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
+  # Extract xvectors for training LDA/PLDA
+  nj=100
+  for name in voxceleb2cat_train
+  do
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      extract_wav2xvectors.py ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion_tables.py cat \
+		       --table-type features \
+		       --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # Extracts x-vectors for evaluation
+  nj=100
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb1_test $extra_data
+  do
+    num_segs=$(wc -l data/$name/segments.csv | awk '{ print $1-1}')
+    nj=$(($num_segs < 100 ? $num_segs:100))
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      extract_wav2xvectors.py ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion_tables.py cat \
+		       --table-type features \
+		       --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos.py b/egs/voxceleb/v1/steps_be/eval_be_cos.py
index 1f9978ee..a9bc03d1 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos.py
@@ -20,7 +20,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.list_utils import ismember
 from hyperion.utils import TrialNdx, TrialScores
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
 from hyperion.np.transforms import TransformList
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
index 7034126a..bf66d72b 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_qmf.py
@@ -19,7 +19,7 @@
 
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils import TrialNdx, TrialScores, Utt2Info
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.np.pdfs import PLDA
 from hyperion.utils.list_utils import ismember
 from hyperion.helpers import TrialDataReader as TDR
diff --git a/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
index dad89ced..0eca769d 100755
--- a/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
+++ b/egs/voxceleb/v1/steps_be/eval_be_cos_snorm.py
@@ -20,7 +20,7 @@
 from hyperion.hyp_defs import float_cpu, config_logger
 from hyperion.utils.list_utils import ismember
 from hyperion.utils import TrialNdx, TrialScores
-from hyperion.utils.math import cosine_scoring
+from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.helpers import TrialDataReader as TDR
 from hyperion.helpers import PLDAFactory as F
 from hyperion.np.transforms import TransformList
diff --git a/hyp_utils/create_audios_split_links.sh b/hyp_utils/create_audios_split_links.sh
new file mode 100755
index 00000000..7125a2c4
--- /dev/null
+++ b/hyp_utils/create_audios_split_links.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright
+#                2023   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+# Creates links to distrubute data into multiple nodes in clsp grid
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <output-dir> <recordings-file> <audio-format>"
+  echo "$0 exp/xvector_audios/voxceleb data/voxceleb/recordings.csv flac"
+fi
+echo "$0 $@"  # Print the command line for logging
+output_dir=$1
+rec_file=$2
+file_format=$3
+
+if [[ $(hostname -f) != *.clsp.jhu.edu ]]; then
+   exit 0
+fi
+
+for f in $(awk -F "," '$1!="id" { print $1}' $rec_file); do
+  # the next command does nothing unless $output_dir/storage/ exists, see
+  # utils/create_data_link.pl for more info.
+  hyp_utils/create_data_link.pl $output_dir/$f.$file_format
+done
+
+
+
diff --git a/hyp_utils/create_data_split_links.sh b/hyp_utils/create_data_split_links.sh
index 8416742e..c7cfa3eb 100755
--- a/hyp_utils/create_data_split_links.sh
+++ b/hyp_utils/create_data_split_links.sh
@@ -4,8 +4,6 @@
 # Apache 2.0.
 # Creates links to distrubute data into multiple nodes in clsp grid
 
-storage_name=$(date +'%m_%d_%H_%M')
-
 if [ $# -ne 2 ]; then
   echo "Usage: $0 <output-file-pattern> <num-jobs>"
   echo "$0 exp/vad_dir/vad.JOB.ark 40"
diff --git a/hyp_utils/xvectors/extract_wav2vec2xvectors.sh b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
index 6c6f0fdf..d8ae2e55 100755
--- a/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
+++ b/hyp_utils/xvectors/extract_wav2vec2xvectors.sh
@@ -87,9 +87,9 @@ if [ $stage -le 0 ];then
 	extract_wav2vec2xvectors.py \
 	${args} $write_speech_dur_opt \
 	--part-idx JOB --num-parts $nj \
-	--input $data_dir/wav.scp \
+	--recordings-file $data_dir/wav.scp \
 	--model-path $nnet_file --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length \
-	--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
+	--output-spec ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
     set -e
 fi
 
@@ -109,9 +109,9 @@ if [ $stage -le 1 ];then
 		 extract_wav2vec2xvectors.py \
 		 ${args} $write_speech_dur_opt \
 		 --part-idx $i --num-parts $nj \
-		 --input $data_dir/wav.scp \
+		 --recordings-file $data_dir/wav.scp \
 		 --model-path $nnet_file --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length \
-		 --output ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
+		 --output-spec ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
 	fi
     done
     wait
diff --git a/hyp_utils/xvectors/extract_xvectors_from_wav.sh b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
index 0b5227cc..b763a25c 100755
--- a/hyp_utils/xvectors/extract_xvectors_from_wav.sh
+++ b/hyp_utils/xvectors/extract_xvectors_from_wav.sh
@@ -87,10 +87,10 @@ if [ $stage -le 0 ];then
 	hyp_utils/conda_env.sh --num-gpus $num_gpus \
 	extract_xvectors_from_wav.py \
 	--feats $feat_config ${args} $write_num_frames_opt \
-	--part-idx JOB --num-parts $nj \
-	--input $data_dir/wav.scp \
+	--part-idx JOB --num-parts $nj  \
+	--recordings-file $data_dir/wav.scp \
 	--model-path $nnet_file --chunk-length $chunk_length \
-	--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
+	--output-spec ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
     set -e
 fi
 
@@ -110,9 +110,9 @@ if [ $stage -le 1 ];then
 		 extract_xvectors_from_wav.py \
 		 --feats $feat_config ${args} $write_num_frames_opt \
 		 --part-idx $i --num-parts $nj \
-		 --input $data_dir/wav.scp \
+		 --recordings-file $data_dir/wav.scp \
 		 --model-path $nnet_file --chunk-length $chunk_length \
-		 --output ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
+		 --output-spec ark,scp:$output_dir/xvector.$i.ark,$output_dir/xvector.$i.scp &
 	fi
     done
     wait
diff --git a/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh b/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
index 27c77454..4530ad3b 100755
--- a/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
+++ b/hyp_utils/xvectors/make_babble_noise_for_nnet_train.sh
@@ -8,9 +8,7 @@ nj=1
 cmd="run.pl"
 stage=0
 file_format=flac
-nodes=b1
 storage_name=$(date +'%m_%d_%H_%M')
-#proc_opts="--remove-dc-offset"
 min_spks=3
 max_spks=10
 num_reuses=5
@@ -23,10 +21,8 @@ if [ $# != 3 ]; then
   echo "Usage: $0 <in-data-dir> <out-data-dir> <feat-dir>"
   echo "e.g.: $0 data/train data/train_no_sil exp/make_xvector_features"
   echo "Options: "
-  #echo "  --nj <nj>                                        # number of parallel jobs"
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
   echo "  --file-format <str|flac>                         # Output file_format supported by soundfile (flac,ogg,wav,...)"
-  #echo "  --proc-opts <str|--remove-dc-offset>             # Extra arguments for proc-audio-files.py"
   echo "  --min-spks <int|3>                               # max number of spks per utterance"
   echo "  --max-spks <int|10>                              # max number of spks per utterance"
   echo "  --num-reuses <int|10>                            # number of times a signal is reused to create babble"
@@ -51,22 +47,12 @@ output_dir=$(utils/make_absolute.sh $dir)
 args=""
 $cmd $dir/log/make_babble_noise_${name}.log \
     hyp_utils/conda_env.sh \
-    make_babble_noise_audio_files.py ${args} \
-    --output-audio-format $file_format $args $proc_opts \
+    make_babble_noise_audio_files.py \
+    --audio-format $file_format $args $proc_opts \
     --min-spks $min_spks --max-spks $max_spks --num-reuses $num_reuses \
     --write-time-durs $data_out/utt2dur \
-    --input $data_in/wav.scp \
+    --recordings-file $data_in/wav.scp \
     --output-path $output_dir \
-    --output-script $data_out/wav.scp
-
-
-
-# for n in $(seq $nj); do
-#   cat $output_dir/wav.${name}.$n.scp || exit 1;
-# done > ${data_out}/wav.scp || exit 1
-
-# for n in $(seq $nj); do
-#   cat $output_dir/utt2dur.${name}.$n || exit 1;
-# done > ${data_out}/utt2dur || exit 1
+    --output-recordings-file $data_out/wav.scp
 
 echo "$0: Succeeded making babble noise for $name"
diff --git a/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh b/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
index c6634135..437cd208 100755
--- a/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
+++ b/hyp_utils/xvectors/pack_rirs_for_nnet_train.sh
@@ -66,13 +66,4 @@ $cmd $dir/log/pack_rirs_${name}.log \
     pack_wav_rirs.py ${args} --input $data_in/wav.scp \
      --output ${file_format},scp:$output_dir/rirs_${name}.${file_format},$data_out/rirs.scp || exit 1;
 
-
-# for n in $(seq $nj); do
-#   cat $output_dir/wav.${name}.$n.scp || exit 1;
-# done > ${data_out}/wav.scp || exit 1
-
-# for n in $(seq $nj); do
-#   cat $output_dir/utt2dur.${name}.$n || exit 1;
-# done > ${data_out}/utt2dur || exit 1
-
 echo "$0: Succeeded packing RIRs for $name"
diff --git a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
index 8321169f..aed40672 100755
--- a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
+++ b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
@@ -92,12 +92,14 @@ fi
 
 $cmd JOB=1:$nj $dir/log/preproc_audios_${name}.JOB.log \
     hyp_utils/conda_env.sh \
-    preprocess_audio_files.py ${args} --output-audio-format $file_format $args $proc_opts \
+    preprocess_audio_files.py ${args} --audio-format $file_format $args $proc_opts \
     --write-time-durs $output_dir/utt2dur.${name}.JOB \
     --part-idx JOB --num-parts $nj \
-    --input $data_in/wav.scp \
+    # --input $data_in/wav.scp \
+    --recordings-file $data_in/wav.scp \
     --output-path $output_dir \
-    --output-script $output_dir/wav.${name}.JOB.scp
+    --output-recordings-file $output_dir/wav.${name}.JOB.scp
+    #--output-script $output_dir/wav.${name}.JOB.scp
 
 for n in $(seq $nj); do
   cat $output_dir/wav.${name}.$n.scp || exit 1;
diff --git a/hyperion/bin/eval_cosine_scoring_backend.py b/hyperion/bin/eval_cosine_scoring_backend.py
new file mode 100755
index 00000000..1a740024
--- /dev/null
+++ b/hyperion/bin/eval_cosine_scoring_backend.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+"""
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import TrialNdx, TrialKey, TrialScores, EnrollmentMap, SegmentSet
+from hyperion.utils.math_funcs import cosine_scoring
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm
+
+
+def load_trial_data(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+):
+    test_feats_reader = DRF.create(feats_file)
+    if enroll_feats_file is not None and enroll_feats_file != feats_file:
+        enroll_feats_reader = DRF.create(enroll_feats_file)
+    else:
+        enroll_feats_reader = test_feats_reader
+
+    enroll_map = EnrollmentMap.load(enroll_map_file)
+    try:
+        ndx = TrialNdx.load(ndx_file)
+    except:
+        ndx = TrialKey.load(ndx_file).to_ndx()
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        ndx = ndx.split(
+            enroll_part_idx, num_enroll_parts, test_part_idx, num_test_parts
+        )
+
+    enroll_map = enroll_map.filter(items=ndx.model_set)
+    x_e = enroll_feats_reader.read(enroll_map["segmentid"], squeeze=True)
+    x_t = test_feats_reader.read(ndx.seg_set, squeeze=True)
+    return enroll_map, ndx, x_e, x_t
+
+
+def load_cohort_data(segments_file, feats_file):
+
+    segments = SegmentSet.load(segments_file)
+    feats_reader = DRF.create(feats_file)
+    x = feats_reader.read(segments["id"], squeeze=True)
+    return segments, x
+
+
+def eval_backend(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    preproc_file,
+    score_file,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+    cohort_segments_file,
+    cohort_feats_file,
+    cohort_nbest,
+    avg_cohort_by,
+):
+
+    logging.info("loading data")
+    enroll_map, ndx, x_e, x_t = load_trial_data(
+        enroll_map_file,
+        ndx_file,
+        enroll_feats_file,
+        feats_file,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
+    enroll_set, enroll_ids = np.unique(enroll_map["id"], return_inverse=True)
+
+    t1 = time.time()
+    logging.info("computing score")
+    if preproc_file is not None:
+        preprocessor = TransformList.load(preproc_file)
+        x_e = preprocessor(x_e)
+        x_t = preprocessor(x_t)
+
+    scores = cosine_scoring(x_e, x_t, ids1=enroll_ids)
+    dt = time.time() - t1
+    num_trials = scores.shape[0] * scores.shape[1]
+    logging.info(
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+        dt,
+        dt / num_trials * 1000,
+    )
+
+    if cohort_segments_file is not None:
+        t1 = time.time()
+        cohort_segments, x_coh = load_cohort_data(
+            cohort_segments_file, cohort_feats_file
+        )
+        if preproc_file is not None:
+            x_coh = preprocessor(x_coh)
+
+        if avg_cohort_by is not None:
+            cohort_class = cohort_segments[avg_cohort_by]
+            _, cohort_ids = np.unique(cohort_class, return_inverse=True)
+        else:
+            cohort_ids = None
+
+        logging.info("computing enroll vs cohort")
+        scores_enr_coh = cosine_scoring(x_e, x_coh, ids2=cohort_ids)
+        logging.info("computing cohort vs test")
+        scores_coh_test = cosine_scoring(x_coh, x_t, ids1=cohort_ids)
+        snorm = AdaptSNorm(cohort_nbest)
+        scores = snorm(scores, scores_coh_test, scores_enr_coh)
+        dt = time.time() - t1
+        logging.info(
+            "s-norm elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+            dt,
+            dt / num_trials * 1000,
+        )
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        score_file = Path(score_file)
+        new_suffix = f".{enroll_part_idx}.{test_part_idx}{score_file.suffix}"
+        score_file = score_file.with_suffix(new_suffix)
+
+    logging.info("saving scores to %s", score_file)
+    # sort scores rows to match the ndx model_set order
+    sort_idx = [np.nonzero(enroll_set == e)[0][0] for e in ndx.model_set]
+    scores = scores[sort_idx]
+    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
+    scores.save(score_file)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Eval cosine-scoring with optional AS-Norm")
+
+    parser.add_argument("--enroll-feats-file", default=None)
+    parser.add_argument("--feats-file", required=True)
+    parser.add_argument("--ndx-file", required=True)
+    parser.add_argument("--enroll-map-file", required=True)
+    parser.add_argument("--preproc-file", default=None)
+    parser.add_argument("--cohort-segments-file", default=None)
+    parser.add_argument("--cohort-feats-file", default=None)
+    parser.add_argument("--cohort-nbest", type=int, default=1000)
+    parser.add_argument(
+        "--avg-cohort-by",
+        default=None,
+        help="segments file column to average vectors from same class class",
+    )
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "--enroll-part-idx", default=1, type=int, help="enroll part index"
+    )
+    parser.add_argument(
+        "--num-enroll-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the enroll
+                list to run evaluation in parallel""",
+    )
+    parser.add_argument("--test-part-idx", default=1, type=int, help="test part index")
+    parser.add_argument(
+        "--num-test-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the test list
+                to run evaluation in parallel""",
+    )
+
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_backend(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
new file mode 100755
index 00000000..f567dd81
--- /dev/null
+++ b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+"""
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import (
+    TrialNdx,
+    TrialKey,
+    TrialScores,
+    EnrollmentMap,
+    SegmentSet,
+    InfoTable,
+)
+from hyperion.utils.math_funcs import cosine_scoring, average_vectors
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList
+from hyperion.np.score_norm import AdaptSNorm
+
+
+def get_precomp_qm_names(quality_measures):
+    # snorm qm will be calculated later
+    return [q for q in quality_measures if q not in ["snorm-mu", "snorm-mu/s"]]
+
+
+def normalize_duration(q, min_dur, max_dur, frame_rate):
+
+    q = q / frame_rate
+    q = np.log(np.clip(q / frame_rate, a_min=min_dur, a_max=max_dur))
+    log_min_dur = np.log(min_dur)
+    log_max_dur = np.log(max_dur)
+    q = (q - log_min_dur) / (log_max_dur - log_min_dur)
+    return q
+
+
+def load_trial_data(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    enroll_segments_file,
+    segments_file,
+    quality_measures,
+    min_dur,
+    max_dur,
+    frame_rate,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+):
+    test_feats_reader = DRF.create(feats_file)
+    if enroll_feats_file is not None and enroll_feats_file != feats_file:
+        enroll_feats_reader = DRF.create(enroll_feats_file)
+    else:
+        enroll_feats_reader = test_feats_reader
+
+    enroll_map = EnrollmentMap.load(enroll_map_file)
+    try:
+        ndx = TrialNdx.load(ndx_file)
+    except:
+        ndx = TrialKey.load(ndx_file).to_ndx()
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        ndx = ndx.split(
+            enroll_part_idx, num_enroll_parts, test_part_idx, num_test_parts
+        )
+
+    enroll_map = enroll_map.filter(items=ndx.model_set)
+    x_e = enroll_feats_reader.read(enroll_map["segmentid"], squeeze=True)
+    x_t = test_feats_reader.read(ndx.seg_set, squeeze=True)
+
+    # quality measures may be in segments file or/and feature_set file
+    # so we combine both if both are given
+    if segments_file is not None:
+        test_segments = SegmentSet.load(segments_file)
+        if enroll_segments_file is not None and segments_file != enroll_segments_file:
+            enroll_segments = SegmentSet.load(enroll_segments_file)
+        else:
+            enroll_segments = test_segments
+
+    test_feats_set = test_feats_reader.feature_set
+    enroll_feats_set = enroll_feats_reader.feature_set
+    if segments_file:
+        test_segments.add_columns(test_feats_set)
+        if enroll_feats_set != test_feats_set or enroll_segments != test_segments:
+            enroll_segments.add_columns(enroll_feats_set)
+
+    # now we retrive the quality measures
+    q_e = []
+    q_t = []
+    # snorm qm will be calculated later
+    retrieve_qm = get_precomp_qm_names(quality_measures)
+    q_e = enroll_segments.loc[enroll_map["segmentid"], retrieve_qm]
+    q_t = test_segments.loc[ndx.seg_set, retrieve_qm]
+
+    # normalize durations
+    if "speech_duration" in retrieve_qm:
+        q_e["speech_duration"] = normalize_duration(
+            q_e["speech_duration"], min_dur, max_dur, 1
+        )
+        q_t["speech_duration"] = normalize_duration(
+            q_t["speech_duration"], min_dur, max_dur, 1
+        )
+
+    if "num_speech_frames" in retrieve_qm:
+        q_e["num_speech_frames"] = normalize_duration(
+            q_e["num_speech_frames"], min_dur, max_dur, frame_rate
+        )
+        q_t["num_speech_frames"] = normalize_duration(
+            q_t["num_speech_frames"], min_dur, max_dur, frame_rate
+        )
+
+    # q_e = np.asarray(q_e)
+    # q_t = np.asarray(q_t)
+
+    return enroll_map, ndx, x_e, x_t, q_e, q_t
+
+
+def load_cohort_data(segments_file, feats_file):
+
+    segments = SegmentSet.load(segments_file)
+    feats_reader = DRF.create(feats_file)
+    x = feats_reader.read(segments["id"], squeeze=True)
+
+    # segments.add_columns(feats_reader.feature_set)
+
+    # retrieve_qm = get_precomp_qm_names(quality_measures)
+    # q = np.asarray(segments[retrieve_qm])
+    return segments, x  # , q
+
+
+def average_qm(q, model_set, ids):
+    q_avg = average_vectors(q.values, ids)
+    q_avg = pd.DataFrame(q, columns=q.columns)
+    q_avg["id"] = model_set
+    q_avg.set_index("id", drop=False, inplace=True)
+    return q_avg
+
+
+def get_score_filepath(
+    score_file,
+    score_name,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+):
+
+    score_file = Path(score_file)
+    new_suffix = ""
+    if score_name is not None:
+        new_suffix = f".{score_name}"
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        new_suffix = (
+            f"{new_suffix}.{enroll_part_idx}.{test_part_idx}{score_file.suffix}"
+        )
+
+    if new_suffix:
+        new_suffix = f"{new_suffix}{score_file.suffix}"
+        score_file = score_file.with_suffix(new_suffix)
+
+    return score_file
+
+def save_scores(ndx, scores, score_file, score_name,     enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts):
+
+def save_empty_scores(ndx, score_file, score_name,     enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts):
+    scores = np.zeros(ndx.trial_mask.shape, dtype="float32")
+    score_file = get_score_filepath(score_file, score_name,enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts)
+
+    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
+    scores.save(score_file)
+
+
+
+
+def segment_to_trial_qm(q_e, q_t):
+    q_trial = {}
+    for q_name in ["speech_duration", "num_speech_frames"]:
+        if q_name in q_e:
+            q_trial_name = f"max_{q_name}"
+            q_trial[q_trial_name] = np.maximum(
+                q_e[q_name].values[:, None], q_t[q_name].values[None, :]
+            )
+            q_trial_name = f"min_{q_name}"
+            q_trial[q_trial_name] = np.minimum(
+                q_e[q_name].values[:, None], q_t[q_name].values[None, :]
+            )
+
+    return q_trial
+
+
+def align_scores_to_ndx(enroll_set, ndx, scores, scores_norm, q_trial):
+    # sort scores rows to match the ndx model_set order
+    sort_idx = [np.nonzero(enroll_set == e)[0][0] for e in ndx.model_set]
+    scores = scores[sort_idx]
+    if scores_norm is not None:
+        scores_norm = scores_norm[sort_idx]
+    for qm in q_trial:
+        q_trial[qm] = q_trial[qm][sort_idx]
+
+    return scores, scores_norm, q_trial
+
+
+def make_qm_table(ndx, scores, scores_norm, q_trial):
+    if scores_norm is None:
+        scores = scores[ndx.trial_mask]
+    else:
+        scores = scores_norm[ndx.trial_mask]
+
+    for qm in q_trial:
+        q_trial[qm] = q_trial[qm][ndx.trial_mask]
+
+    I, J = np.nonzero(ndx.trial_mask)
+    modelid = ndx.model_set[I]
+    segmentid = ndx.seg_set[J]
+    unique_id = [f"{a}-{b}" for a, b in zip(modelid, segmentid)]
+
+    q_dict = {
+        "id": unique_id,
+        "modelid": modelid,
+        "segmentid": segmentid,
+        "scores": scores,
+    }
+    q_dict.update(q_trial)
+    df = pd.DataFrame(q_dict)
+    return InfoTable(df)
+
+
+
+
+def eval_backend(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    enroll_segments_file,
+    segments_file,
+    preproc_file,
+    qmf_file,
+    quality_measures,
+    min_dur,
+    max_dur,
+    frame_rate,
+    cohort_segments_file,
+    cohort_feats_file,
+    cohort_nbest,
+    avg_cohort_by,
+    score_file,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+):
+
+    logging.info("loading data")
+    enroll_map, ndx, x_e, x_t, q_e, q_t = load_trial_data(
+        enroll_map_file,
+        ndx_file,
+        enroll_feats_file,
+        feats_file,
+        enroll_segments_file,
+        segments_file,
+        quality_measures,
+        min_dur,
+        max_dur,
+        frame_rate,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
+
+    if not np.any(ndx.trial_mask):
+        # this part doesn't have any trials, save empty files
+        
+    
+    enroll_set, enroll_ids = np.unique(enroll_map["id"], return_inverse=True)
+    q_e = average_qm(q_e, enroll_set, enroll_ids)
+
+    t1 = time.time()
+    logging.info("computing score")
+    if preproc_file is not None:
+        preprocessor = TransformList.load(preproc_file)
+        x_e = preprocessor(x_e)
+        x_t = preprocessor(x_t)
+
+    scores = cosine_scoring(x_e, x_t, ids1=enroll_ids)
+    dt = time.time() - t1
+    num_trials = scores.shape[0] * scores.shape[1]
+    logging.info(
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+        dt,
+        dt / num_trials * 1000,
+    )
+
+    q_trial = segment_to_trial_qm(q_e, q_t)
+    scores_norm = None
+    if cohort_segments_file is not None:
+        t1 = time.time()
+        cohort_segments, x_coh = load_cohort_data(
+            cohort_segments_file, cohort_feats_file
+        )
+        if preproc_file is not None:
+            x_coh = preprocessor(x_coh)
+
+        if avg_cohort_by is not None:
+            cohort_class = cohort_segments[avg_cohort_by]
+            _, cohort_ids = np.unique(cohort_class, return_inverse=True)
+        else:
+            cohort_ids = None
+
+        logging.info("computing enroll vs cohort")
+        scores_enr_coh = cosine_scoring(x_e, x_coh, ids2=cohort_ids)
+        logging.info("computing cohort vs test")
+        scores_coh_test = cosine_scoring(x_coh, x_t, ids1=cohort_ids)
+        snorm = AdaptSNorm(cohort_nbest)
+        scores_norm, mu_z, s_z, mu_t, s_t = snorm(
+            scores, scores_coh_test, scores_enr_coh, return_stats=True
+        )
+        if "snorm-mu" in quality_measures:
+            q_trial["max_snorm-mu"] = np.maximum(mu_z, mu_t)
+            q_trial["min_snorm-mu"] = np.minimum(mu_z, mu_t)
+        if "snorm-mu/s" in quality_measures:
+            mu_z = mu_z / s_z
+            mu_t = mu_t / s_t
+            q_trial["max_snorm-mu/s"] = np.maximum(mu_z, mu_t)
+            q_trial["min_snorm-mu/s"] = np.minimum(mu_z, mu_t)
+
+        dt = time.time() - t1
+        logging.info(
+            "s-norm elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+            dt,
+            dt / num_trials * 1000,
+        )
+
+    scores, scores_norm, q_trial = align_scores_to_ndx(
+        enroll_set, ndx, scores, scores_norm, q_trial
+    )
+    if qmf_file is None:
+        qm_table = make_qm_table(ndx, scores, scores_norm, q_trial)
+        qm_file = get_score_filepath(
+            score_file,
+            "qm",
+            enroll_part_idx,
+            num_enroll_parts,
+            test_part_idx,
+            num_test_parts,
+        )
+        qm_table.save(qm_file)
+        return
+
+    score_file_nonorm = get_score_filepath(
+        score_file,
+        None,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
+    logging.info("saving scores to %s", score_file_nonorm)
+    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
+    scores.save(score_file_nonorm)
+
+    if scores_norm is not None:
+        score_file_snorm = get_score_filepath(
+            score_file,
+            "snorm",
+            enroll_part_idx,
+            num_enroll_parts,
+            test_part_idx,
+            num_test_parts,
+        )
+        logging.info("saving scores with AS-Norm to %s", score_file_snorm)
+        scores.scores = scores_norm
+        scores.save(score_file_snorm)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Eval cosine-scoring with optional AS-Norm and QMF"
+    )
+
+    parser.add_argument("--enroll-feats-file", default=None)
+    parser.add_argument("--feats-file", required=True)
+    parser.add_argument("--ndx-file", required=True)
+    parser.add_argument("--enroll-map-file", required=True)
+    parser.add_argument("--enroll-segments-file", default=None)
+    parser.add_argument("--segments-file", default=None)
+    parser.add_argument("--preproc-file", default=None)
+    parser.add_argument("--qmf-file", default=None)
+    parser.add_argument(
+        "--quality-measures",
+        default=["snorm-mu/s", "speech_duration"],
+        nargs="+",
+        choices=["snorm-mu/s", "snorm-mu", "speech_duration", "num_speech_frames"],
+    )
+    parser.add_argument(
+        "--min-dur", default=0.1, type=float, help="lower bound to clip durations"
+    )
+    parser.add_argument(
+        "--max-dur", default=30.0, type=float, help="upper bound to clip durations"
+    )
+    parser.add_argument(
+        "--frame-rate",
+        default=100,
+        type=float,
+        help="frames/sec when durationa are expressed in frames",
+    )
+    parser.add_argument("--cohort-segments-file", default=None)
+    parser.add_argument("--cohort-feats-file", default=None)
+    parser.add_argument("--cohort-nbest", type=int, default=1000)
+    parser.add_argument(
+        "--avg-cohort-by",
+        default=None,
+        help="segments file column to average vectors from same class class",
+    )
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "--enroll-part-idx", default=1, type=int, help="enroll part index"
+    )
+    parser.add_argument(
+        "--num-enroll-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the enroll
+                list to run evaluation in parallel""",
+    )
+    parser.add_argument("--test-part-idx", default=1, type=int, help="test part index")
+    parser.add_argument(
+        "--num-test-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the test list
+                to run evaluation in parallel""",
+    )
+
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_backend(**namespace_to_dict(args))
diff --git a/hyperion/bin/eval_verification_metrics.py b/hyperion/bin/eval_verification_metrics.py
new file mode 100755
index 00000000..83227558
--- /dev/null
+++ b/hyperion/bin/eval_verification_metrics.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from pathlib import Path
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.np.metrics import VerificationEvaluator as VE
+
+from jsonargparse import (
+    ActionConfigFile,
+    ActionYesNo,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+
+def eval_verification_metrics(
+    key_files,
+    score_files,
+    key_names,
+    score_names,
+    p_tar,
+    c_miss,
+    c_fa,
+    sparse,
+    output_file,
+):
+
+    assert len(key_files) == len(key_names)
+    assert len(score_files) == len(score_names)
+    dfs = []
+    for score_file, score_name in zip(score_files, score_names):
+        for key_file, key_name in zip(key_files, key_names):
+            logging.info("Evaluating %s - %s", score_name, key_name)
+            evaluator = VE(
+                key_file,
+                score_file,
+                p_tar,
+                c_miss,
+                c_fa,
+                key_name,
+                score_name,
+                sparse=sparse,
+            )
+            df_ij = evaluator.compute_dcf_eer()
+            dfs.append(df_ij)
+
+    df = pd.concat(dfs)
+    logging.info("saving results to %s", output_file)
+    output_file = Path(output_file)
+    output_file.parent.mkdir(exist_ok=True, parents=True)
+    sep = "\t" if output_file.suffix == ".tsv" else ","
+    df.to_csv(output_file, sep=sep, index=False, float_format="{:,.4f}".format)
+
+    pd.options.display.float_format = "{:.4}".format
+    print(df.to_string(), flush=True)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Evaluate speaker verification metrics")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--key-files", required=True, nargs="+")
+    parser.add_argument("--score-files", required=True, nargs="+")
+    parser.add_argument("--key-names", required=True, nargs="+")
+    parser.add_argument("--score-names", required=True, nargs="+")
+    parser.add_argument(
+        "--p-tar",
+        default=[0.05, 0.01, 0.005, 0.001],
+        nargs="+",
+        type=float,
+        help="target priors",
+    )
+    parser.add_argument(
+        "--c-miss", default=None, nargs="+", type=float, help="cost of miss"
+    )
+    parser.add_argument(
+        "--c-fa", default=None, nargs="+", type=float, help="cost of false alarm"
+    )
+    parser.add_argument("--sparse", default=False, action=ActionYesNo)
+    parser.add_argument("--output-file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
+    )
+
+    args = parser.parse_args()
+    kwargs = namespace_to_dict(args)
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+    del kwargs["cfg"]
+    eval_verification_metrics(**kwargs)
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index 9efbd6dd..f60c7508 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -21,8 +21,12 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def init_device(use_gpu):
@@ -76,13 +80,15 @@ def augment(key0, x0, augmenter, aug_df, aug_id):
 
 
 def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
-    utt_length = rng.randint(low=min_utt_length, high=max_utt_length + 1)
+    utt_length = rng.integers(low=min_utt_length, high=max_utt_length + 1)
     if utt_length < x.shape[1]:
-        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        first_frame = rng.integers(low=0, high=x.shape[1] - utt_length)
         x = x[:, first_frame : first_frame + utt_length]
         logging.info(
-            "extract-random-utt %s of length=%d first-frame=%d"
-            % (key, x.shape[1], first_frame)
+            "extract-random-utt %s of length=%d first-frame=%d",
+            key,
+            x.shape[1],
+            first_frame,
         )
     return x
 
@@ -105,7 +111,7 @@ def eval_xvec(
     **kwargs
 ):
 
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
     model = load_model(model_path, device)
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 6f7d269e..5eba1b99 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -21,8 +21,12 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 resamplers = {}
 
@@ -84,9 +88,11 @@ def augment(key0, x0, augmenter, aug_df, aug_id):
 
 
 def select_random_chunk(key, x, fs, min_utt_length, max_utt_length, rng):
-    utt_length = rng.randint(low=fs * min_utt_length, high=fs * max_utt_length + 1)
+    utt_length = rng.integers(
+        low=int(fs * min_utt_length), high=int(fs * max_utt_length + 1)
+    )
     if utt_length < x.shape[1]:
-        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        first_frame = rng.integers(low=0, high=x.shape[1] - utt_length)
         x = x[:, first_frame : first_frame + utt_length]
         logging.info(
             "extract-random-utt %s of length=%d first-frame=%d",
@@ -98,7 +104,7 @@ def select_random_chunk(key, x, fs, min_utt_length, max_utt_length, rng):
 
 
 def extract_xvectors(
-    input_spec,
+    recordings_file,
     output_spec,
     vad_spec,
     write_speech_dur,
@@ -117,7 +123,7 @@ def extract_xvectors(
     **kwargs,
 ):
 
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     model = load_model(model_path, device)
 
@@ -138,15 +144,12 @@ def extract_xvectors(
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec) as writer:
 
-        logging.info(f"opening input stream: {input_spec} with args={ar_args}")
-        with AR(input_spec, **ar_args) as reader:
+        logging.info(f"opening input stream: {recordings_file} with args={ar_args}")
+        with AR(recordings_file, **ar_args) as reader:
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(
-                    vad_spec,
-                    path_prefix=vad_path_prefix,
-                )
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
 
             while not reader.eof():
                 t1 = time.time()
@@ -160,9 +163,7 @@ def extract_xvectors(
                 t2 = time.time()
                 if fs != model.sample_frequency:
                     resampler = get_resampler(fs, model.sample_frequency)
-                    print(f"x01 {x0.shape} {np.max(x0)}")
                     x0 = resampler(x0)
-                    print(f"x01 {x0.shape} {np.max(x0)}")
 
                 logging.info("processing utt %s", key0)
                 for aug_id in range(num_augs):
@@ -260,7 +261,7 @@ def extract_xvectors(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--recordings-file", required=True)
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument("--write-speech-dur", default=None)
     parser.add_argument(
@@ -278,7 +279,7 @@ def extract_xvectors(
     parser.add_argument("--model-path", required=True)
     parser.add_argument(
         "--hf-chunk-length",
-        type=int,
+        type=float,
         default=0,
         help=(
             "max. chunk length used in each forward pass "
@@ -288,7 +289,7 @@ def extract_xvectors(
     )
     parser.add_argument(
         "--xvec-chunk-length",
-        type=int,
+        type=float,
         default=0,
         help=(
             "max. chunk length used in each forward pass "
@@ -314,18 +315,18 @@ def extract_xvectors(
     )
     parser.add_argument(
         "--min-utt-length",
-        type=int,
+        type=float,
         default=5,
         help=("minimum utterance length in secs when using random utt length"),
     )
     parser.add_argument(
         "--max-utt-length",
-        type=int,
+        type=float,
         default=120,
         help=("maximum utterance length in secs when using random utt length"),
     )
 
-    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument("--output-spec", required=True)
     parser.add_argument(
         "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
     )
diff --git a/hyperion/bin/extract_wav2xvectors.py b/hyperion/bin/extract_wav2xvectors.py
new file mode 100755
index 00000000..7b04fcc8
--- /dev/null
+++ b/hyperion/bin/extract_wav2xvectors.py
@@ -0,0 +1,333 @@
+#!/usr/bin/env python
+"""
+ Copyright 2019 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+import torch
+import torchaudio.transforms as tat
+from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.np.augment import SpeechAugment
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.utils import open_device
+from hyperion.utils import Utt2Info
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+resamplers = {}
+
+
+def get_resampler(source_fs, target_fs):
+    if source_fs in resamplers:
+        return resamplers[source_fs]
+
+    resampler = tat.Resample(
+        int(source_fs),
+        int(target_fs),
+        lowpass_filter_width=64,
+        rolloff=0.9475937167399596,
+        resampling_method="kaiser_window",
+        beta=14.769656459379492,
+    )
+    resampler_f = lambda x: resampler(torch.from_numpy(x)).numpy()
+    resamplers[source_fs] = resampler_f
+    return resampler_f
+
+
+def init_device(use_gpu):
+    set_float_cpu("float32")
+    num_gpus = 1 if use_gpu else 0
+    logging.info("initializing devices num_gpus=%d", num_gpus)
+    device = open_device(num_gpus=num_gpus)
+    return device
+
+
+def load_model(model_path, device):
+    logging.info("loading model %s", model_path)
+    model = TML.load(model_path)
+    logging.info(f"xvector-model={model}")
+    model.to(device)
+    model.eval()
+    return model
+
+
+def augment(key0, x0, augmenter, aug_df, aug_id):
+    if augmenter is None:
+        x = x0
+        key = key0
+    else:
+        x, aug_info = augmenter(x0)
+        key = "%s-aug-%02d" % (key0, aug_id)
+        aug_df_row = {
+            "key_aug": key,
+            "key_orig": key0,
+            "noise_type": aug_info["noise"]["noise_type"],
+            "snr": aug_info["noise"]["snr"],
+            "rir_type": aug_info["reverb"]["rir_type"],
+            "srr": aug_info["reverb"]["srr"],
+            "sdr": aug_info["sdr"],
+        }
+
+        aug_df.append(pd.DataFrame(aug_df_row, index=[0]))
+
+    return key, x
+
+
+def select_random_chunk(key, x, fs, min_utt_length, max_utt_length, rng):
+    utt_length = rng.integers(
+        low=int(fs * min_utt_length), high=int(fs * max_utt_length + 1)
+    )
+    if utt_length < x.shape[1]:
+        first_frame = rng.integers(low=0, high=x.shape[1] - utt_length)
+        x = x[:, first_frame : first_frame + utt_length]
+        logging.info(
+            "extract-random-utt %s of length=%d first-frame=%d",
+            key,
+            x.shape[1],
+            first_frame,
+        )
+    return x
+
+
+def extract_xvectors(
+    recordings_file,
+    output_spec,
+    vad_spec,
+    write_speech_dur,
+    vad_path_prefix,
+    model_path,
+    chunk_length,
+    embed_layer,
+    random_utt_length,
+    min_utt_length,
+    max_utt_length,
+    aug_cfg,
+    num_augs,
+    aug_info_path,
+    use_gpu,
+    **kwargs,
+):
+
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
+    device = init_device(use_gpu)
+    model = load_model(model_path, device)
+
+    if write_speech_dur is not None:
+        keys = []
+        info = []
+
+    if aug_cfg is not None:
+        augmenter = SpeechAugment.create(aug_cfg, rng=rng)
+        aug_df = []
+    else:
+        augmenter = None
+        aug_df = None
+        num_augs = 1
+
+    metadata_columns = ["speech_duration"]
+
+    ar_args = AR.filter_args(**kwargs)
+    logging.info("opening output stream: %s with args=%s", output_spec, str(ar_args))
+    with DWF.create(output_spec, metadata_columns=metadata_columns) as writer:
+
+        logging.info(f"opening input stream: {recordings_file} with args={ar_args}")
+        with AR(recordings_file, **ar_args) as reader:
+
+            if vad_spec is not None:
+                logging.info("opening VAD stream: %s", vad_spec)
+                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
+
+            while not reader.eof():
+                t1 = time.time()
+                key, x0, fs = reader.read(1)
+                if len(key) == 0:
+                    break
+
+                x0 = x0[0]
+                key0 = key[0]
+                fs = fs[0]
+                t2 = time.time()
+                if fs != model.sample_frequency:
+                    resampler = get_resampler(fs, model.sample_frequency)
+                    x0 = resampler(x0)
+
+                logging.info("processing utt %s", key0)
+                for aug_id in range(num_augs):
+                    metadata = {}
+                    t3 = time.time()
+                    key, x = augment(key0, x0, augmenter, aug_df, aug_id)
+                    t4 = time.time()
+                    with torch.no_grad():
+                        x = torch.tensor(
+                            x[None, :], dtype=torch.get_default_dtype()
+                        ).to(device)
+                        t5 = time.time()
+                        tot_samples = x.shape[1]
+                        if vad_spec is not None:
+                            vad = v_reader.read(key0)[0]
+                            vad = torch.tensor(
+                                vad[None, None, :], dtype=torch.float
+                            ).to(device)
+                            vad = torch.nn.functional.interpolate(
+                                vad, size=x.size(-1), mode="nearest"
+                            ).bool()[0, 0]
+                            x = x[:, vad]
+
+                        logging.info(
+                            "utt %s detected %d/%d (%.2f %%) speech samples",
+                            key,
+                            x.shape[1],
+                            tot_samples,
+                            x.shape[1] / tot_samples * 100,
+                        )
+
+                        if random_utt_length:
+                            x = select_random_chunk(
+                                key, x, fs, min_utt_length, max_utt_length, rng
+                            )
+
+                        metadata["speech_duration"] = (
+                            x.shape[1] / model.sample_frequency
+                        )
+
+                        t6 = time.time()
+                        if x.shape[1] == 0:
+                            y = np.zeros((model.embed_dim,), dtype=float_cpu())
+                        else:
+                            y = (
+                                model.extract_embed(
+                                    x,
+                                    chunk_length=chunk_length,
+                                    embed_layer=embed_layer,
+                                )
+                                .cpu()
+                                .numpy()[0]
+                            )
+
+                    t7 = time.time()
+                    writer.write([key], [y], metadata=metadata)
+                    if write_speech_dur is not None:
+                        keys.append(key)
+                        info.append(str(x.shape[1] / fs))
+
+                    t8 = time.time()
+                    read_time = t2 - t1
+                    tot_time = read_time + t8 - t3
+                    logging.info(
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "aug-time=%.3f feat-time=%.3f "
+                            "vad-time=%.3f embed-time=%.3f write-time=%.3f "
+                            "rt-factor=%.2f"
+                        ),
+                        key,
+                        tot_time,
+                        read_time,
+                        t4 - t3,
+                        t5 - t4,
+                        t6 - t5,
+                        t7 - t6,
+                        t8 - t7,
+                        x.shape[1] / fs / tot_time,
+                    )
+
+    if write_speech_dur is not None:
+        logging.info("writing speech duration in secs to %s", write_speech_dur)
+        u2sd = Utt2Info.create(keys, info)
+        u2sd.save(write_speech_dur)
+
+    if aug_info_path is not None:
+        aug_df = pd.concat(aug_df, ignore_index=True)
+        aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="""Extracts x-vectors from waveform computing acoustic features on the fly"""
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--recordings-file", required=True)
+    parser.add_argument("--vad", dest="vad_spec", default=None)
+    parser.add_argument("--write-speech-dur", default=None)
+    parser.add_argument(
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad")
+    )
+
+    AR.add_class_args(parser)
+
+    parser.add_argument("--aug-cfg", default=None)
+    parser.add_argument("--aug-info-path", default=None)
+    parser.add_argument(
+        "--num-augs", default=1, type=int, help="number of augmentations per utterance"
+    )
+
+    parser.add_argument("--model-path", required=True)
+    parser.add_argument(
+        "--chunk-length",
+        type=float,
+        default=0,
+        help=(
+            "max. chunk length used in each forward pass "
+            "of the x-vector encoder,"
+            "if 0 the full utterance is used"
+        ),
+    )
+    parser.add_argument(
+        "--embed-layer",
+        type=int,
+        default=None,
+        help=(
+            "classifier layer to get the embedding from, "
+            "if None, it uses layer set in training phase"
+        ),
+    )
+
+    parser.add_argument(
+        "--random-utt-length",
+        default=False,
+        action="store_true",
+        help="calculates x-vector from a random chunk",
+    )
+    parser.add_argument(
+        "--min-utt-length",
+        type=float,
+        default=5,
+        help=("minimum utterance length in secs when using random utt length"),
+    )
+    parser.add_argument(
+        "--max-utt-length",
+        type=float,
+        default=120,
+        help=("maximum utterance length in secs when using random utt length"),
+    )
+
+    parser.add_argument("--output-spec", required=True)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    extract_xvectors(**namespace_to_dict(args))
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index 13ad4277..b02db70c 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -19,8 +19,12 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def init_device(use_gpu):
@@ -50,13 +54,15 @@ def load_model(model_path, device):
 
 
 def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
-    utt_length = rng.randint(low=min_utt_length, high=max_utt_length + 1)
+    utt_length = rng.integers(low=min_utt_length, high=max_utt_length + 1)
     if utt_length < x.shape[1]:
-        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        first_frame = rng.integers(low=0, high=x.shape[1] - utt_length)
         x = x[:, first_frame : first_frame + utt_length]
         logging.info(
-            "extract-random-utt %s of length=%d first-frame=%d"
-            % (key, x.shape[1], first_frame)
+            "extract-random-utt %s of length=%d first-frame=%d",
+            key,
+            x.shape[1],
+            first_frame,
         )
     return x
 
@@ -78,7 +84,7 @@ def extract_xvectors(
 ):
 
     logging.info("initializing")
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     mvn = init_mvn(device, **kwargs)
     model = load_model(model_path, device)
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index 577bbae7..6a8130d3 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -21,8 +21,12 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def init_device(use_gpu):
@@ -76,9 +80,9 @@ def augment(key0, x0, augmenter, aug_df, aug_id):
 
 
 def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
-    utt_length = rng.randint(low=min_utt_length, high=max_utt_length + 1)
+    utt_length = rng.integers(low=min_utt_length, high=max_utt_length + 1)
     if utt_length < x.shape[1]:
-        first_frame = rng.randint(low=0, high=x.shape[1] - utt_length)
+        first_frame = rng.integers(low=0, high=x.shape[1] - utt_length)
         x = x[:, first_frame : first_frame + utt_length]
         logging.info(
             "extract-random-utt %s of length=%d first-frame=%d",
@@ -90,7 +94,7 @@ def select_random_chunk(key, x, min_utt_length, max_utt_length, rng):
 
 
 def extract_xvectors(
-    input_spec,
+    recordings_file,
     output_spec,
     vad_spec,
     write_num_frames_spec,
@@ -108,7 +112,7 @@ def extract_xvectors(
     **kwargs
 ):
 
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
     model = load_model(model_path, device)
@@ -130,9 +134,9 @@ def extract_xvectors(
     with DWF.create(output_spec) as writer:
 
         logging.info(
-            "opening input stream: {} with args={}".format(input_spec, ar_args)
+            "opening input stream: {} with args={}".format(recordings_file, ar_args)
         )
-        with AR(input_spec, **ar_args) as reader:
+        with AR(recordings_file, **ar_args) as reader:
 
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
@@ -235,12 +239,12 @@ def extract_xvectors(
 
     parser = ArgumentParser(
         description=(
-            "Extracts x-vectors from waveform computing " "acoustic features on the fly"
+            "Extracts x-vectors from waveform computing acoustic features on the fly"
         )
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--input", dest="input_spec", required=True)
+    parser.add_argument("--recordings-file", required=True)
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
         "--write-num-frames", dest="write_num_frames_spec", default=None
@@ -299,7 +303,7 @@ def extract_xvectors(
         help=("maximum utterance length when using random utt length"),
     )
 
-    parser.add_argument("--output", dest="output_spec", required=True)
+    parser.add_argument("--output-spec", required=True)
     parser.add_argument(
         "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
     )
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index a54c4d64..bcec5133 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -20,8 +20,12 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def init_device(use_gpu):
@@ -71,7 +75,7 @@ def extract_xvectors(
 ):
 
     logging.info("initializing")
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     mvn = init_mvn(device, **kwargs)
     model = load_model(model_path, device)
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index 8939ba91..f1a64e1b 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -22,8 +22,12 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def init_device(use_gpu):
@@ -96,7 +100,7 @@ def extract_xvectors(
     **kwargs
 ):
 
-    rng = np.random.RandomState(seed=1123581321 + kwargs["part_idx"])
+    rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
     model = load_model(model_path, device)
diff --git a/hyperion/bin/finetune_wav2xvector.py b/hyperion/bin/finetune_wav2xvector.py
new file mode 100755
index 00000000..b100b544
--- /dev/null
+++ b/hyperion/bin/finetune_wav2xvector.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import multiprocessing
+import os
+import sys
+import time
+from pathlib import Path
+
+import torch
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.metrics import CategoricalAccuracy
+
+# from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import Wav2ResNet1dXVector as R1dXVec
+from hyperion.torch.models import Wav2ResNetXVector as RXVec
+
+# from hyperion.torch.models import SpineNetXVector as SpineXVec
+# from hyperion.torch.models import TDNNXVector as TDXVec
+# from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+# from hyperion.torch.narchs import AudioFeatsMVN as AF
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    # "efficientnet": EXVec,
+    # "tdnn": TDXVec,
+    # "transformer": TFXVec,
+    # "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_xvector(num_classes, in_model_file, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_finetune_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network ft args={}".format(xvec_args))
+    xvec_args["xvector"]["num_classes"] = num_classes
+    model = TML.load(in_model_file)
+    model.change_config(**xvec_args)
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def init_hard_prototype_mining(model, train_loader, val_loader, rank):
+    try:
+        hard_prototype_mining = train_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
+        return
+
+    if rank == 0:
+        logging.info("setting hard prototypes")
+
+    affinity_matrix = model.compute_prototype_affinity()
+    train_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+    try:
+        hard_prototype_mining = val_loader.batch_sampler.hard_prototype_mining
+    except:
+        hard_prototype_mining = False
+
+    if not hard_prototype_mining:
+        return
+
+    val_loader.batch_sampler.set_hard_prototypes(affinity_matrix)
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+    model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
+    init_hard_prototype_mining(model, train_loader, val_loader, rank)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    xvec_class.add_finetune_args(parser, prefix="model")
+    parser.add_argument("--in-model-file", required=True)
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 209915c5..4336b7b9 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -24,8 +24,12 @@
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialNdx, Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
old mode 100644
new mode 100755
index c5a3f6b9..2e3a35ec
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -8,30 +8,40 @@
 from typing import List, Optional, Union
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import (ClassInfo, Dataset, EnrollmentMap, FeatureSet,
-                            InfoTable, PathLike, RecordingSet, SegmentSet)
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
-subcommands = ["add_features"]
-# table_dict = {
-#     "segments": SegmentSet,
-#     "recordings": RecordingSet,
-#     "features": FeatureSet,
-#     "classes": ClassInfo,
-#     "enrollments": EnrollmentMap,
-#     "generic": InfoTable,
-# }
+from hyperion.utils import (
+    ClassInfo,
+    Dataset,
+    EnrollmentMap,
+    FeatureSet,
+    InfoTable,
+    PathLike,
+    RecordingSet,
+    SegmentSet,
+)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+
+subcommand_list = [
+    "add_features",
+    "set_recordings",
+    "make_from_recordings",
+    "remove_short_segments",
+    "rebuild_class_idx",
+    "remove_classes_few_segments",
+    "split_train_val",
+    "copy",
+    "add_cols_to_segments",
+]
 
 
 def add_common_args(parser):
     parser.add_argument(
-        "-v",
-        "--verbose",
-        dest="verbose",
-        default=1,
-        choices=[0, 1, 2, 3],
-        type=int,
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
     )
 
 
@@ -45,6 +55,11 @@ def make_add_features_parser():
         "--features-name", required=True, help="""name of the feature"""
     )
     parser.add_argument("--features-file", required=True, help="""feature set file""")
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
 
     add_common_args(parser)
     return parser
@@ -54,10 +69,353 @@ def add_features(
     dataset: PathLike,
     features_name: str,
     features_file: PathLike,
+    output_dataset: PathLike,
 ):
+    if output_dataset is None:
+        output_dataset = dataset
+
     dataset = Dataset.load(dataset, lazy=True)
     dataset.add_features(features_name, features_file)
-    dataset.save(dataset)
+    dataset.save(output_dataset)
+
+
+def make_set_recordings_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--recordings-file", required=True, help="""recordings set file"""
+    )
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+    parser.add_argument(
+        "--remove-features",
+        default=None,
+        nargs="+",
+        help="""removes feature files from the dataset, 
+        since they maybe obsolote after modifiying the recordings""",
+    )
+    parser.add_argument(
+        "--update-seg-durs",
+        default=False,
+        action=ActionYesNo,
+        help="""updates the durations in the segment table""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def set_recordings(
+    dataset: PathLike,
+    recordings_file: PathLike,
+    output_dataset: PathLike,
+    remove_features: List[str],
+    update_seg_durs: bool,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.set_recordings(recordings_file, update_seg_durs)
+    if remove_features is not None:
+        for features_name in remove_features:
+            dataset.remove_features(features_name)
+
+    dataset.save(output_dataset)
+
+
+def make_make_from_recordings_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--recordings-file", required=True, help="""recordings set file"""
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def make_from_recordings(
+    dataset: PathLike, recordings_file: PathLike,
+):
+    output_dataset = dataset
+    import pandas as pd
+
+    rec_df = pd.read_csv(recordings_file)
+    seg_df = rec_df[["id"]]
+    segments = SegmentSet(seg_df)
+    dataset = Dataset(segments, recordings=recordings_file)
+    dataset.save(output_dataset)
+
+
+def make_remove_short_segments_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--min-length",
+        required=True,
+        type=float,
+        help="""minimum required length of the segment""",
+    )
+
+    parser.add_argument(
+        "--length-name",
+        default="duration",
+        help="""name of the column indicating the length of the segment""",
+    )
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def remove_short_segments(
+    dataset: PathLike, min_length: float, length_name: str, output_dataset: PathLike,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.remove_short_segments(min_length, length_name)
+    dataset.save(output_dataset)
+
+
+def make_rebuild_class_idx_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--class-name", required=True, help="""name of the class type e.g.: speaker"""
+    )
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def rebuild_class_idx(
+    dataset: PathLike, class_name: str, output_dataset: PathLike,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.rebuild_class_idx(class_name)
+    dataset.save(output_dataset)
+
+
+def make_remove_classes_few_segments_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--class-name", required=True, help="""name of the class type e.g.: speaker"""
+    )
+    parser.add_argument(
+        "--min-segs", default=1, type=int, help="""min. num. of segments/class"""
+    )
+    parser.add_argument(
+        "--rebuild-idx",
+        default=False,
+        action=ActionYesNo,
+        help="""regenerate class indexes from 0 to new_num_classes-1""",
+    )
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def remove_classes_few_segments(
+    dataset: PathLike,
+    class_name: str,
+    min_segs: int,
+    rebuild_idx: bool,
+    output_dataset: PathLike,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.remove_classes_few_segments(class_name, min_segs, rebuild_idx)
+    dataset.save(output_dataset)
+
+
+def make_split_train_val_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""input dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--val-prob",
+        default=0.05,
+        type=float,
+        help="""proportion of segments used for val""",
+    )
+    parser.add_argument(
+        "--min-train-samples",
+        default=1,
+        type=int,
+        help="""min. number of training samples / class""",
+    )
+
+    parser.add_argument(
+        "--joint-classes",
+        default=None,
+        nargs="+",
+        help="""types of classes that need to have same classes in train and val""",
+    )
+    parser.add_argument(
+        "--disjoint-classes",
+        default=None,
+        nargs="+",
+        help="""types of classes that need to have different classes in train and val""",
+    )
+    parser.add_argument(
+        "--seed", default=11235813, type=int, help="""random seed""",
+    )
+
+    parser.add_argument(
+        "--train-dataset", required=True, help="""output train dataset dir""",
+    )
+    parser.add_argument(
+        "--val-dataset", required=True, help="""output val dataset dir""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def split_train_val(
+    dataset: PathLike,
+    val_prob: float,
+    joint_classes: List[str],
+    disjoint_classes: List[str],
+    min_train_samples: int,
+    seed: int,
+    train_dataset: PathLike,
+    val_dataset: PathLike,
+):
+    dataset = Dataset.load(dataset, lazy=True)
+    train_ds, val_ds = dataset.split_train_val(
+        val_prob, joint_classes, disjoint_classes, min_train_samples, seed
+    )
+    train_ds.save(train_dataset)
+    val_ds.save(val_dataset)
+
+    num_total = len(dataset)
+    num_train = len(train_ds)
+    num_val = len(val_ds)
+    logging.info(
+        "train: %d (%.2f%%) segments, val: %d (%.2f%%) segments",
+        num_train,
+        num_train / num_total * 100,
+        num_val,
+        num_val / num_total * 100,
+    )
+
+
+def make_copy_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--output-dataset",
+        required=True,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def copy(
+    dataset: PathLike, output_dataset: PathLike,
+):
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.save(output_dataset)
+
+
+def make_add_cols_to_segments_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--right-table", required=True, help="table where the new data is"
+    )
+    parser.add_argument(
+        "--columns",
+        required=True,
+        nargs="+",
+        help="""columns to copy to segments table""",
+    )
+    parser.add_argument(
+        "--on", default=["id"], nargs="+", help="""columns to match both tables rows""",
+    )
+    parser.add_argument(
+        "--right-on",
+        default=None,
+        nargs="+",
+        help="""columns to match both tables rows""",
+    )
+
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def add_cols_to_segments(
+    dataset: PathLike,
+    right_table: PathLike,
+    column_names: List[str],
+    on: List[str],
+    right_on: List[str],
+    output_dataset: PathLike,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.add_cols_to_segments(right_table, column_names, on, right_on)
+    dataset.save(output_dataset)
 
 
 if __name__ == "__main__":
@@ -66,15 +424,15 @@ def add_features(
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
-    for subcommand in subcommands:
+    for subcommand in subcommand_list:
         parser_func = f"make_{subcommand}_parser"
         subparser = globals()[parser_func]()
-        subcommands.add_subcommand(k, subparser)
+        subcommands.add_subcommand(subcommand, subparser)
 
     args = parser.parse_args()
     subcommand = args.subcommand
     kwargs = namespace_to_dict(args)[args.subcommand]
     config_logger(kwargs["verbose"])
     del kwargs["verbose"]
-
+    del kwargs["cfg"]
     globals()[subcommand](**kwargs)
diff --git a/hyperion/bin/hyperion_tables.py b/hyperion/bin/hyperion_tables.py
index 5a5f0b4f..7f61b35a 100755
--- a/hyperion/bin/hyperion_tables.py
+++ b/hyperion/bin/hyperion_tables.py
@@ -8,12 +8,23 @@
 from typing import List, Optional, Union
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import (ClassInfo, EnrollmentMap, FeatureSet, InfoTable,
-                            PathLike, RecordingSet, SegmentSet)
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-
-subcommands = ["cat"]
+from hyperion.utils import (
+    ClassInfo,
+    EnrollmentMap,
+    FeatureSet,
+    InfoTable,
+    PathLike,
+    RecordingSet,
+    SegmentSet,
+)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+subcommand_list = ["cat"]
 table_dict = {
     "segments": SegmentSet,
     "recordings": RecordingSet,
@@ -73,11 +84,11 @@ def cat(
     table_type: str,
     input_files: Union[List[PathLike], None],
     output_file: PathLike,
-    num_table: int,
+    num_tables: int,
     base_idx: int = 1,
 ):
 
-    assert input_files is not None or num_jobs != 0
+    assert input_files is not None or num_tables != 0
     output_file = Path(output_file)
     if input_files is None:
         ext = output_file.suffix
@@ -103,15 +114,15 @@ def cat(
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
-    for subcommand in subcommands:
+    for subcommand in subcommand_list:
         parser_func = f"make_{subcommand}_parser"
         subparser = globals()[parser_func]()
-        subcommands.add_subcommand(k, subparser)
+        subcommands.add_subcommand(subcommand, subparser)
 
     args = parser.parse_args()
     subcommand = args.subcommand
     kwargs = namespace_to_dict(args)[args.subcommand]
     config_logger(kwargs["verbose"])
     del kwargs["verbose"]
-
+    del kwargs["cfg"]
     globals()[subcommand](**kwargs)
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 4a356037..68e5b22b 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -15,12 +15,15 @@
 from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from scipy import ndimage, signal
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
-def make_noise(xs):
+def make_noise(xs, max_value):
 
     lens = np.array([x.shape[0] for x in xs])
     max_len = np.max(lens)
@@ -28,73 +31,78 @@ def make_noise(xs):
     for i in range(len(xs)):
         xs[i] = np.tile(xs[i], int(num_tiles[i]))[:max_len]
 
+    xs[0] -= xs[0].mean()
     for i in range(1, len(xs)):
         xs[0] += xs[i] - xs[i].mean()
 
+    max_x = np.max(np.abs(xs[0]))
+    if max_x > max_value:
+        xs[0] *= max_value / max_x
+
     return xs[0]
 
 
 def make_babble_noise_audio_files(
-    input_path,
+    recordings_file,
     output_path,
-    output_script,
-    write_time_durs_spec,
+    output_recordings_file,
+    write_time_durs,
     min_spks=3,
     max_spks=7,
     num_reuses=5,
     random_seed=112358,
-    **kwargs
+    **kwargs,
 ):
 
     input_args = AR.filter_args(**kwargs)
     output_args = Writer.filter_args(**kwargs)
-    logging.info("input_args={}".format(input_args))
-    logging.info("output_args={}".format(output_args))
+    logging.info(f"input_args={input_args}")
+    logging.info(f"output_args={output_args}")
 
-    rng = np.random.RandomState(seed=random_seed)
+    rng = np.random.default_rng(seed=random_seed)
 
-    if write_time_durs_spec is not None:
+    if write_time_durs is not None:
         okeys = []
         info = []
 
     count = 0
     t1 = time.time()
-    with AR(input_path, **input_args) as reader:
+    with AR(recordings_file, **input_args) as reader, Writer(
+        output_path, output_recordings_file, **output_args
+    ) as writer:
         keys = reader.keys
-        with Writer(output_path, output_script, **output_args) as writer:
-
-            for iters in range(num_reuses):
-                keys = rng.permutation(keys)
-
-                cur_spks = min_spks
+        for iters in range(num_reuses):
+            keys = rng.permutation(keys)
+
+            cur_spks = min_spks
+            utt_list = []
+            for utt_idx in range(len(keys)):
+                if len(utt_list) < cur_spks:
+                    utt_list.append(keys[utt_idx])
+                    continue
+
+                x, fs = reader.read(utt_list)
+                fs = fs[0]
+                y = make_noise(x, reader.wav_scale)
+                babble_id = "babble-%05d" % (count)
+                logging.info("writing file %s", babble_id)
+                writer.write([babble_id], [y], [fs])
+                if write_time_durs is not None:
+                    okeys.append(babble_id)
+                    info.append(y.shape[0] / fs)
+
+                count += 1
                 utt_list = []
-                for utt_idx in range(len(keys)):
-                    if len(utt_list) < cur_spks:
-                        utt_list.append(keys[utt_idx])
-                        continue
-
-                    x, fs = reader.read(utt_list)
-                    fs = fs[0]
-                    y = make_noise(x)
-                    babble_id = "babble-%05d" % (count)
-                    logging.info("writing file % s" % (babble_id))
-                    writer.write([babble_id], [y], [fs])
-                    if write_time_durs_spec is not None:
-                        okeys.append(babble_id)
-                        info.append(y.shape[0] / fs)
-
-                    count += 1
-                    utt_list = []
-                    cur_spks += 1
-                    if cur_spks > max_spks:
-                        cur_spks = min_spks
-
-    if write_time_durs_spec is not None:
-        logging.info("writing time durations to %s" % (write_time_durs_spec))
+                cur_spks += 1
+                if cur_spks > max_spks:
+                    cur_spks = min_spks
+
+    if write_time_durs is not None:
+        logging.info("writing time durations to %s", write_time_durs)
         u2td = Utt2Info.create(okeys, info)
-        u2td.save(write_time_durs_spec)
+        u2td.save(write_time_durs)
 
-    logging.info("finished making babble files, elapsed-time=%f" % (time.time() - t1))
+    logging.info("finished making babble files, elapsed-time=%f", time.time() - t1)
 
 
 if __name__ == "__main__":
@@ -102,10 +110,10 @@ def make_babble_noise_audio_files(
     parser = ArgumentParser(description="Creates babble noise by adding speech files")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--input", dest="input_path", required=True)
+    parser.add_argument("--recordings-file", required=True)
     parser.add_argument("--output-path", required=True)
-    parser.add_argument("--output-script", required=True)
-    parser.add_argument("--write-time-durs", dest="write_time_durs_spec", default=None)
+    parser.add_argument("--output-recordings-file", required=True)
+    parser.add_argument("--write-time-durs", default=None)
 
     AR.add_class_args(parser)
     Writer.add_class_args(parser)
diff --git a/hyperion/bin/make_wav2xvector.py b/hyperion/bin/make_wav2xvector.py
new file mode 100755
index 00000000..b5972d1b
--- /dev/null
+++ b/hyperion/bin/make_wav2xvector.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Jesus Villalba (Johns Hopkins University)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+
+import logging
+import os
+import sys
+import time
+
+import numpy as np
+import pandas as pd
+import torch
+from hyperion.hyp_defs import config_logger
+
+# from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch import TorchModel
+
+# from hyperion.torch.models import SpineNetXVector as SpineXVec
+# from hyperion.torch.models import TDNNXVector as TDXVec
+# from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+# from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import ResNet1dXVector as R1dXVec
+from hyperion.torch.models import ResNetXVector as RXVec
+from hyperion.torch.models import Wav2ResNet1dXVector as W2R1dXVec
+from hyperion.torch.models import Wav2ResNetXVector as W2RXVec
+from hyperion.torch.narchs import AudioFeatsMVN as AF
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+
+def init_feats(feats):
+    feat_args = AF.filter_args(**feats)
+    logging.info(f"feat args={feat_args}")
+    logging.info("initializing feature extractor")
+    feat_extractor = AF(trans=True, **feat_args)
+    logging.info(f"feat-extractor={feat_extractor}")
+    return feat_extractor
+
+
+def load_model(model_path):
+    logging.info("loading model %s", model_path)
+    model = TorchModel.auto_load(model_path)
+    logging.info(f"xvector-model={model}")
+    return model
+
+
+def make_wav2xvector(feats, xvector_path, output_path):
+
+    feats = init_feats(feats)
+    xvector_model = load_model(xvector_path)
+    if isinstance(xvector_model, RXVec):
+        model = W2RXVec(feats, xvector_model)
+    elif isinstance(xvector_model, R1dXVec):
+        model = W2R1dXVec(feats, xvector_model)
+    else:
+        TypeError(
+            "Conversion of xvector class=%s not available", xvector_model.__class__
+        )
+
+    logging.info("saving model of class %s to %s", model.__class__, output_path)
+    model.save(output_path)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="""Combines the feature extractor config with XVector model
+        to produce a Wav2XVector model with integrated feature extraction"""
+    )
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    AF.add_class_args(parser, prefix="feats")
+    parser.add_argument("--xvector-path", required=True)
+    parser.add_argument("--output-path", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    del args.cfg
+    logging.debug(args)
+
+    make_wav2xvector(**namespace_to_dict(args))
diff --git a/hyperion/bin/merge_scores.py b/hyperion/bin/merge_scores.py
new file mode 100755
index 00000000..6a275f5c
--- /dev/null
+++ b/hyperion/bin/merge_scores.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from pathlib import Path
+
+from hyperion.hyp_defs import config_logger
+
+from hyperion.utils import TrialScores
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+
+def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, base_idx):
+
+    output_file = Path(output_file)
+    output_file.parent.mkdir(exist_ok=True, parents=True)
+
+    ext = output_file.suffix
+
+    if input_files is None:
+        input_file_base = output_file.with_suffix("")
+        input_files = []
+        for i in range(num_enroll_parts):
+            idx_i = base_idx + i
+            for j in range(num_test_parts):
+                idx_j = base_idx + j
+                input_file_i = input_file_base.with_suffix(f".{idx_i}.{idx_j}{ext}")
+                input_files.append(input_file_i)
+
+    if ext == ".h5":
+        # if files are h5 we need to load everything in RAM
+        score_list = []
+        for score_file in input_files:
+            scores = TrialScores.load(score_file)
+            score_list.append(scores)
+
+        scores = TrialScores.merge(score_list)
+        scores.save(output_file)
+    else:
+        has_header = ext in [".csv", ".tsv"]
+        write_header = True
+        with open(output_file, "w", encoding="utf-8") as f_out:
+            for score_file in input_files:
+                with open(score_file) as f_in:
+                    for i, line in enumerate(f_in):
+                        if i == 0 and has_header and not write_header:
+                            continue
+                        f_out.write(line)
+                        write_header = False
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--input-files", default=None, nargs="+", help="optional list of input files"
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="""output file, if input-files is None, input files names are derived from it""",
+    )
+    parser.add_argument(
+        "--num-enroll-parts",
+        default=1,
+        type=int,
+        help="""number of parts we divided the enrollment set""",
+    )
+    parser.add_argument(
+        "--num-test-parts",
+        default=1,
+        type=int,
+        help="""number of parts we divided the test set""",
+    )
+
+    parser.add_argument(
+        "--base-idx",
+        default=1,
+        type=int,
+        help="""index of the first job, typically 0 or 1""",
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
+    )
+
+    args = parser.parse_args()
+    kwargs = namespace_to_dict(args)
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+    del kwargs["cfg"]
+    merge_scores(**kwargs)
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index 78ac59c1..b2a1bc2b 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -13,8 +13,12 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def pack_wav_rirs(input_path, output_spec, **kwargs):
@@ -32,12 +36,15 @@ def pack_wav_rirs(input_path, output_spec, **kwargs):
             h[h < 1e-3] = 0
             h = np.trim_zeros(h)
             logging.info(
-                "Packing rir %s h_max=%f h_delay=%d h-length=%d"
-                % (key, h_max, h_delay, len(h))
+                "Packing rir %s h_max=%f h_delay=%d h-length=%d",
+                key,
+                h_max,
+                h_delay,
+                len(h),
             )
             writer.write([key], [h])
 
-    logging.info("Packed RIRS elapsed-time=%.f" % (time.time() - t1))
+    logging.info("Packed RIRS elapsed-time=%.f", time.time() - t1)
 
 
 if __name__ == "__main__":
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 6af0202c..14da4d07 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -18,9 +18,14 @@
 from hyperion.np.clustering import AHC
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
-from hyperion.utils.math import cosine_scoring
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
+from hyperion.utils.math_funcs import cosine_scoring
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
index e90ad0f7..f6723c7d 100755
--- a/hyperion/bin/prepare_data.py
+++ b/hyperion/bin/prepare_data.py
@@ -8,8 +8,12 @@
 
 from hyperion.data_prep import DataPrep
 from hyperion.hyp_defs import config_logger
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 
 def make_parser(data_prep_class):
@@ -33,6 +37,5 @@ def make_parser(data_prep_class):
     config_logger(1)
     data_prep_class = DataPrep.registry[args.subcommand]
     args = namespace_to_dict(args)[args.subcommand]
-
     data_prep = data_prep_class(**args)
     data_prep.prepare()
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index e8adfd16..bda9a503 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -15,13 +15,26 @@
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 from scipy import ndimage, signal
 
 
+def resample_vad(vad, length):
+    step = (len(vad) - 1) / length
+    assert step < 1
+    idx = step * np.arange(length, dtype=float)
+    idx = np.round(idx).astype(int)
+    return vad[idx]
+
+
 def process_vad(vad, length, fs, dilation, erosion):
-    vad = signal.resample(vad, length) > 0.5
+    # vad = signal.resample(vad, length) > 0.5
+    vad = resample_vad(vad, length)
     if dilation > 0:
         iters = int(dilation * fs)
         vad = ndimage.binary_dilation(vad, iterations=iters)
@@ -34,9 +47,9 @@ def process_vad(vad, length, fs, dilation, erosion):
 
 
 def process_audio_files(
-    input_path,
+    recordings_file,
     output_path,
-    output_script,
+    output_recordings_file,
     write_time_durs_spec,
     vad_spec,
     vad_path_prefix,
@@ -44,86 +57,92 @@ def process_audio_files(
     vad_dilation=0,
     vad_erosion=0,
     remove_dc_offset=False,
-    **kwargs
+    **kwargs,
 ):
 
     input_args = AR.filter_args(**kwargs)
     output_args = Writer.filter_args(**kwargs)
-    logging.info("input_args={}".format(input_args))
-    logging.info("output_args={}".format(output_args))
+    logging.info(f"input_args={input_args}")
+    logging.info(f"output_args={output_args}")
 
     if write_time_durs_spec is not None:
         keys = []
         info = []
 
-    with AR(input_path, **input_args) as reader:
-        with Writer(output_path, output_script, **output_args) as writer:
+    with AR(recordings_file, **input_args) as reader, Writer(
+        output_path, output_recordings_file, **output_args
+    ) as writer:
 
-            if vad_spec is not None:
-                logging.info("opening VAD stream: %s" % (vad_spec))
-                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
+        if vad_spec is not None:
+            logging.info("opening VAD stream: %s", vad_spec)
+            v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
-            t1 = time.time()
-            for data in reader:
-                key, x, fs = data
-                logging.info("Processing audio %s" % (key))
-                t2 = time.time()
-
-                tot_samples = x.shape[0]
-                if vad_spec is not None:
-                    num_vad_frames = int(round(tot_samples * vad_fs / fs))
-                    vad = v_reader.read(key, num_frames=num_vad_frames)[0].astype(
-                        "bool", copy=False
-                    )
-                    logging.info("vad=%d/%d" % (np.sum(vad == 1), len(vad)))
-                    vad = process_vad(vad, tot_samples, fs, vad_dilation, vad_erosion)
-                    logging.info("vad=%d/%d" % (np.sum(vad == 1), len(vad)))
-                    x = x[vad]
-
-                logging.info(
-                    "utt %s detected %f/%f secs (%.2f %%) speech "
-                    % (
-                        key[0],
-                        x.shape[0] / fs,
-                        tot_samples / fs,
-                        x.shape[0] / tot_samples * 100,
-                    )
-                )
+        t1 = time.time()
+        for data in reader:
+            key, x, fs = data
+            logging.info("Processing audio %s", key)
+            t2 = time.time()
 
-                if x.shape[0] > 0:
-                    if remove_dc_offset:
-                        x -= np.mean(x)
-
-                    writer.write([key], [x], [fs])
-                    if write_time_durs_spec is not None:
-                        keys.append(key)
-                        info.append(x.shape[0] / fs)
-
-                    xmax = np.max(x)
-                    xmin = np.min(x)
-                else:
-                    xmax = 0
-                    xmin = 0
-
-                t3 = time.time()
-                dt2 = (t2 - t1) * 1000
-                dt3 = (t3 - t1) * 1000
-                time_dur = len(x) / fs
-                rtf = (time_dur * 1000) / dt3
-                logging.info(
-                    (
-                        "Packed audio %s length=%0.3f secs "
-                        "elapsed-time=%.2f ms. "
-                        "read-time=%.2f ms. write-time=%.2f ms. "
-                        "real-time-factor=%.2f"
-                        "x-range=[%f-%f]"
-                    )
-                    % (key, time_dur, dt3, dt2, dt3 - dt2, rtf, xmin, xmax)
+            tot_samples = x.shape[0]
+            if vad_spec is not None:
+                num_vad_frames = int(round(tot_samples * vad_fs / fs))
+                vad = v_reader.read(key, num_frames=num_vad_frames)[0].astype(
+                    "bool", copy=False
                 )
-                t1 = time.time()
+                logging.info("vad=%d/%d", np.sum(vad == 1), len(vad))
+                vad = process_vad(vad, tot_samples, fs, vad_dilation, vad_erosion)
+                logging.info("vad=%d/%d", np.sum(vad == 1), len(vad))
+                x = x[vad]
+
+            logging.info(
+                "utt %s detected %f/%f secs (%.2f %%) speech ",
+                key[0],
+                x.shape[0] / fs,
+                tot_samples / fs,
+                x.shape[0] / tot_samples * 100,
+            )
+
+            if x.shape[0] > 0:
+                if remove_dc_offset:
+                    x -= np.mean(x)
+
+                writer.write([key], [x], [fs])
+                if write_time_durs_spec is not None:
+                    keys.append(key)
+                    info.append(x.shape[0] / fs)
+
+                xmax = np.max(x)
+                xmin = np.min(x)
+            else:
+                xmax = 0
+                xmin = 0
+
+            t3 = time.time()
+            dt2 = (t2 - t1) * 1000
+            dt3 = (t3 - t1) * 1000
+            time_dur = len(x) / fs
+            rtf = (time_dur * 1000) / dt3
+            logging.info(
+                (
+                    "Packed audio %s length=%0.3f secs "
+                    "elapsed-time=%.2f ms. "
+                    "read-time=%.2f ms. write-time=%.2f ms. "
+                    "real-time-factor=%.2f "
+                    "x-range=[%f - %f]"
+                ),
+                key,
+                time_dur,
+                dt3,
+                dt2,
+                dt3 - dt2,
+                rtf,
+                xmin,
+                xmax,
+            )
+            t1 = time.time()
 
     if write_time_durs_spec is not None:
-        logging.info("writing time durations to %s" % (write_time_durs_spec))
+        logging.info("writing time durations to %s", write_time_durs_spec)
         u2td = Utt2Info.create(keys, info)
         u2td.save(write_time_durs_spec)
 
@@ -135,9 +154,9 @@ def process_audio_files(
     )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
-    parser.add_argument("--input", dest="input_path", required=True)
+    parser.add_argument("--recordings-file", required=True)
     parser.add_argument("--output-path", required=True)
-    parser.add_argument("--output-script", required=True)
+    parser.add_argument("--output-recordings-file", required=True)
     parser.add_argument("--write-time-durs", dest="write_time_durs_spec", default=None)
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 8e1653b1..f132a35c 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -5,6 +5,7 @@
 """
 import logging
 import multiprocessing
+
 # import sys
 import os
 import time
@@ -17,13 +18,19 @@
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.models import (HFHubert2ResNet1dXVector,
-                                   HFWav2Vec2ResNet1dXVector,
-                                   HFWavLM2ResNet1dXVector)
+from hyperion.torch.models import (
+    HFHubert2ResNet1dXVector,
+    HFWav2Vec2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
@@ -95,7 +102,7 @@ def train_model(gpu_id, args):
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
-        logging.info("trainer args={}".format(trn_args))
+        logging.info(f"trainer args={trn_args}")
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
         model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
diff --git a/hyperion/bin/train_wav2xvector.py b/hyperion/bin/train_wav2xvector.py
new file mode 100755
index 00000000..ddf292b8
--- /dev/null
+++ b/hyperion/bin/train_wav2xvector.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import multiprocessing
+import os
+from pathlib import Path
+
+import torch
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.data import AudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.metrics import CategoricalAccuracy
+
+# from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import Wav2ResNet1dXVector as R1dXVec
+from hyperion.torch.models import Wav2ResNetXVector as RXVec
+
+# from hyperion.torch.models import SpineNetXVector as SpineXVec
+# from hyperion.torch.models import TDNNXVector as TDXVec
+# from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.trainers import XVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    # "efficientnet": EXVec,
+    # "tdnn": TDXVec,
+    # "transformer": TFXVec,
+    # "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_xvector(num_classes, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_args(**kwargs["model"])
+    if rank == 0:
+        logging.info("xvector network args={}".format(xvec_args))
+    xvec_args["xvector"]["num_classes"] = num_classes
+    model = xvec_class(**xvec_args)
+    if rank == 0:
+        logging.info("x-vector-model={}".format(model))
+    return model
+
+
+def train_xvec(gpu_id, args):
+
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+
+    model = init_xvector(list(train_loader.dataset.num_classes.values())[0], **kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+
+    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.dataset.class_files", "data.val.dataset.class_files"
+    )
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    xvec_class.add_class_args(parser, prefix="model")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train Wav2XVector from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
diff --git a/hyperion/data_prep/__init__.py b/hyperion/data_prep/__init__.py
index e978e219..9d885718 100644
--- a/hyperion/data_prep/__init__.py
+++ b/hyperion/data_prep/__init__.py
@@ -4,6 +4,8 @@
 """
 
 from .data_prep import DataPrep
+from .musan import MusanDataPrep
+from .rirs import RIRSDataPrep
 from .voxceleb2 import VoxCeleb2DataPrep
 from .voxceleb1 import VoxCeleb1DataPrep
 from .voxsrc22 import VoxSRC22DataPrep
diff --git a/hyperion/data_prep/data_prep.py b/hyperion/data_prep/data_prep.py
index d9828674..0f654676 100644
--- a/hyperion/data_prep/data_prep.py
+++ b/hyperion/data_prep/data_prep.py
@@ -67,7 +67,8 @@ def _get_recording_duration(recordings, i, n):
     def get_recording_duration(self, recording_set):
 
         import itertools
-        from ..utils import SCPList
+
+        # from ..utils import SCPList #don't remember why I put this here
 
         futures = []
         logging.info("submitting threats...")
diff --git a/hyperion/data_prep/musan.py b/hyperion/data_prep/musan.py
new file mode 100644
index 00000000..abf7a46c
--- /dev/null
+++ b/hyperion/data_prep/musan.py
@@ -0,0 +1,107 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import glob
+import re
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import ActionYesNo
+from tqdm import tqdm
+
+from ..utils import Dataset, RecordingSet, SegmentSet
+from ..utils.misc import PathLike, urlretrieve_progress
+from .data_prep import DataPrep
+
+
+class MusanDataPrep(DataPrep):
+    """Class for preparing Musan database into tables
+
+    Attributes:
+      corpus_dir: input data directory
+      subset: subset of the data noise, music, speech
+      output_dir: output data directory
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    def __init__(
+        self,
+        corpus_dir: PathLike,
+        subset: str,
+        output_dir: PathLike,
+        target_sample_freq: int,
+        num_threads: int = 10,
+        **kwargs,
+    ):
+        super().__init__(corpus_dir, output_dir, False, target_sample_freq, num_threads)
+        self.subset = subset
+
+    @staticmethod
+    def dataset_name():
+        return "musan"
+
+    @staticmethod
+    def add_class_args(parser):
+        DataPrep.add_class_args(parser)
+        parser.add_argument(
+            "--subset",
+            choices=["noise", "music", "speech"],
+            help="""musan subset in [noise, music, speech]""",
+            required=True,
+        )
+
+    def prepare(self):
+        logging.info(
+            "Peparing Musan %s corpus_dir:%s -> data_dir:%s",
+            self.subset,
+            self.corpus_dir,
+            self.output_dir,
+        )
+        rec_dir = self.corpus_dir / self.subset
+        logging.info("searching audio files in %s", str(rec_dir))
+        rec_files = list(rec_dir.glob("**/*.wav"))
+        if not rec_files:
+            # symlinks? try glob
+            rec_files = [
+                Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
+            ]
+
+        assert len(rec_files) > 0, "recording files not found"
+
+        rec_ids = [f.with_suffix("").name for f in rec_files]
+        storage_paths = [str(f) for f in rec_files]
+        logging.info("making RecordingSet")
+        recs = pd.DataFrame({"id": rec_ids, "storage_path": storage_paths})
+        recs = RecordingSet(recs)
+        recs.sort()
+
+        logging.info("getting recording durations")
+        self.get_recording_duration(recs)
+        if self.target_sample_freq:
+            recs["target_sample_freq"] = self.target_sample_freq
+
+        logging.info("making SegmentsSet")
+        segments = pd.DataFrame(
+            {
+                "id": rec_ids,
+                "duration": recs.loc[rec_ids, "duration"].values,
+                "noise_type": self.subset,
+            }
+        )
+        segments = SegmentSet(segments)
+        segments.sort()
+        logging.info("making dataset")
+        dataset = Dataset(
+            segments,
+            recordings=recs,
+        )
+        logging.info("saving dataset at %s", self.output_dir)
+        dataset.save(self.output_dir)
+        logging.info(
+            "datasets containts %d segments",
+            len(segments),
+        )
diff --git a/hyperion/data_prep/rirs.py b/hyperion/data_prep/rirs.py
new file mode 100644
index 00000000..066819a8
--- /dev/null
+++ b/hyperion/data_prep/rirs.py
@@ -0,0 +1,103 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import glob
+import re
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import ActionYesNo
+from tqdm import tqdm
+
+from ..utils import Dataset, RecordingSet, SegmentSet
+from ..utils.misc import PathLike, urlretrieve_progress
+from .data_prep import DataPrep
+
+
+class RIRSDataPrep(DataPrep):
+    """Class for preparing Musan database into tables
+
+    Attributes:
+      corpus_dir: input data directory
+      output_dir: output data directory
+      target_sample_freq: target sampling frequency to convert the audios to.
+    """
+
+    def __init__(
+        self,
+        corpus_dir: PathLike,
+        output_dir: PathLike,
+        target_sample_freq: int,
+        num_threads: int = 10,
+        **kwargs,
+    ):
+        super().__init__(corpus_dir, output_dir, False, target_sample_freq, num_threads)
+
+    @staticmethod
+    def dataset_name():
+        return "rirs"
+
+    @staticmethod
+    def add_class_args(parser):
+        DataPrep.add_class_args(parser)
+
+    def prepare(self):
+        logging.info(
+            "Peparing RIRS corpus_dir:%s -> data_dir:%s",
+            self.corpus_dir,
+            self.output_dir,
+        )
+        rec_dir = self.corpus_dir
+        rirs_file = self.corpus_dir / "rir_list"
+        if rirs_file.exists():
+            rirs_table = pd.read_csv(
+                rirs_file,
+                sep=" ",
+                header=None,
+                names=["dummy1", "rir_id", "dummy2", "room_id", "rec_files"],
+            )
+            rec_files = [Path(f) for f in rirs_table["rec_files"].values]
+            room_ids = rirs_table["room_id"].values
+        else:
+            logging.info("searching audio files in %s", str(rec_dir))
+            rec_files = list(rec_dir.glob("**/*.wav"))
+            room_ids = None
+            if not rec_files:
+                # symlinks? try glob
+                rec_files = [
+                    Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
+                ]
+
+        assert len(rec_files) > 0, "recording files not found"
+
+        rec_ids = [f.with_suffix("").name for f in rec_files]
+        storage_paths = [str(f) for f in rec_files]
+        logging.info("making RecordingSet")
+        recs = pd.DataFrame({"id": rec_ids, "storage_path": storage_paths})
+        recs = RecordingSet(recs)
+        recs.sort()
+
+        logging.info("getting recording durations")
+        self.get_recording_duration(recs)
+        if self.target_sample_freq:
+            recs["target_sample_freq"] = self.target_sample_freq
+
+        logging.info("making SegmentsSet")
+        segments = pd.DataFrame(
+            {"id": rec_ids, "duration": recs.loc[rec_ids, "duration"].values,}
+        )
+        if room_ids is not None:
+            segments["room_id"] = room_ids
+        segments = SegmentSet(segments)
+        segments.sort()
+        logging.info("making dataset")
+        dataset = Dataset(segments, recordings=recs,)
+        logging.info("saving dataset at %s", self.output_dir)
+        dataset.save(self.output_dir)
+        logging.info(
+            "datasets containts %d segments", len(segments),
+        )
diff --git a/hyperion/data_prep/voxceleb1.py b/hyperion/data_prep/voxceleb1.py
index b3958605..025fad37 100644
--- a/hyperion/data_prep/voxceleb1.py
+++ b/hyperion/data_prep/voxceleb1.py
@@ -233,17 +233,19 @@ def prepare(self):
                 Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
             ]
 
+        assert len(rec_files) > 0, "recording files not found"
+
         speakers = [f.parents[1].name for f in rec_files]
         video_ids = [f.parent.name for f in rec_files]
         if self.cat_videos:
+            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, video_ids)]
             lists_cat_dir = self.output_dir / "lists_cat"
             lists_cat_dir.mkdir(exist_ok=True, parents=True)
-            uniq_video_ids, uniq_video_idx, video_idx = np.unique(
-                video_ids, return_index=True, return_inverse=True
+            rec_ids, uniq_rec_idx, rec_idx = np.unique(
+                rec_ids, return_index=True, return_inverse=True
             )
-            rec_ids = uniq_video_ids
-            speakers = [speakers[i] for i in uniq_video_idx]
-            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
+            speakers = [speakers[i] for i in uniq_rec_idx]
+            video_ids = [video_ids[i] for i in uniq_rec_idx]
 
             file_paths = []
             futures = []
@@ -256,15 +258,13 @@ def prepare(self):
                         lists_cat_dir,
                         rec_id,
                         rec_files,
-                        video_idx,
+                        rec_idx,
                         i,
                     )
                     futures.append(future)
 
             logging.info("waiting threats...")
             file_paths = [f.result() for f in tqdm(futures)]
-            video_ids = uniq_video_ids
-
         else:
             file_names = [f.with_suffix("").name for f in rec_files]
             if self.use_kaldi_ids:
@@ -331,7 +331,7 @@ def prepare(self):
         dataset = Dataset(
             segments,
             classes={"speaker": speakers, "language_est": languages},
-            recordings={"recordings": recs},
+            recordings=recs,
             enrollments=enrollments,
             trials=trials,
             sparse_trials=False,
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index 29ad3e44..969f2228 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -148,24 +148,27 @@ def prepare(self):
         df_lang = self._get_langs_est()
         rec_dir = self.corpus_dir / self.subset
         logging.info("searching audio files in %s", str(rec_dir))
-        rec_files = list(rec_dir.glob("**/*.m4a"))
+        rec_files1 = list(rec_dir.glob("**/*.m4a"))
+        rec_files = [Path(f) for f in glob.iglob(f"{rec_dir}/**/*.m4a", recursive=True)]
         if not rec_files:
             # symlinks? try glob
             rec_files = [
-                Path(f) for f in glob.iglob(f"{rec_dir}/**/*.wav", recursive=True)
+                Path(f) for f in glob.iglob(f"{rec_dir}/**/*.m4a", recursive=True)
             ]
 
+        assert len(rec_files) > 0, "recording files not found"
+
         speakers = [f.parents[1].name for f in rec_files]
         video_ids = [f.parent.name for f in rec_files]
         if self.cat_videos:
+            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, video_ids)]
             lists_cat_dir = self.output_dir / "lists_cat"
             lists_cat_dir.mkdir(exist_ok=True, parents=True)
-            uniq_video_ids, uniq_video_idx, video_idx = np.unique(
-                video_ids, return_index=True, return_inverse=True
+            rec_ids, uniq_rec_idx, rec_idx = np.unique(
+                rec_ids, return_index=True, return_inverse=True
             )
-            rec_ids = uniq_video_ids
-            speakers = [speakers[i] for i in uniq_video_idx]
-            rec_ids = [f"{s}-{v}" for s, v in zip(speakers, uniq_video_ids)]
+            speakers = [speakers[i] for i in uniq_rec_idx]
+            video_ids = [video_ids[i] for i in uniq_rec_idx]
 
             file_paths = []
             futures = []
@@ -178,15 +181,13 @@ def prepare(self):
                         lists_cat_dir,
                         rec_id,
                         rec_files,
-                        video_idx,
+                        rec_idx,
                         i,
                     )
                     futures.append(future)
 
             logging.info("waiting threats...")
             file_paths = [f.result() for f in tqdm(futures)]
-            video_ids = uniq_video_ids
-
         else:
             file_names = [f.with_suffix("").name for f in rec_files]
             if self.use_kaldi_ids:
@@ -252,7 +253,7 @@ def prepare(self):
         dataset = Dataset(
             segments,
             {"speaker": speakers, "language_est": languages},
-            {"recordings": recs},
+            recs,
         )
         logging.info("saving dataset at %s", self.output_dir)
         dataset.save(self.output_dir)
diff --git a/hyperion/data_prep/voxsrc22.py b/hyperion/data_prep/voxsrc22.py
index 79369149..f81f6eaf 100644
--- a/hyperion/data_prep/voxsrc22.py
+++ b/hyperion/data_prep/voxsrc22.py
@@ -127,6 +127,9 @@ def prepare_track12_dev(self):
         rec_ids = vox22_segmentid + vox1_segmentid
         rec_files = vox22_rec_files + vox1_rec_files
 
+        assert len(vox22_rec_files) > 0, "vox22 recording files not found"
+        assert len(vox1_rec_files) > 0, "vox1 recording files not found"
+
         recs = pd.DataFrame({"id": rec_ids, "storage_path": rec_files})
         recs = RecordingSet(recs)
         recs.sort()
@@ -148,7 +151,7 @@ def prepare_track12_dev(self):
         logging.info("making dataset")
         dataset = Dataset(
             segments,
-            recordings={"recordings": recs},
+            recordings=recs,
             enrollments=enrollments,
             trials=trials,
             sparse_trials=False,
@@ -160,50 +163,6 @@ def prepare_track12_dev(self):
             len(segments),
         )
 
-    #             wav_file = voxsrc22_corpus_dir / file_id
-    #                             wav_file = vox1_corpus_dir / "wav" / file_id
-    #     logging.info("searching audio files in %s", self.vox1_corpus_dir)
-    #     vox1_rec_files = list(self.vox1_corpus_dir.glob("**/*.wav"))
-    #     if not vox1_rec_files:
-    #         # symlinks? try glob
-    #         vox1_rec_files = [
-    #             Path(f) for f in glob.iglob(f"{self.vox1_corpus_dir}/**/*.wav", recursive=True)
-    #         ]
-
-    #     vox1_rec_ids = [ f.parent.parent.name / f.parent.name / f.name for f in vox1_rec_files]
-    #     rec_files =
-
-    #     rec_files = list(self.corpus_dir.glob("**/*.wav"))
-    #     if not rec_files:
-    #         # symlinks? try glob
-    #         rec_files = [
-    #             Path(f) for f in glob.iglob(f"{self.corpus_dir}/**/*.wav", recursive=True)
-    #         ]
-
-    # u2s_file = output_dir / "utt2spk"
-    # logging.info("creating utt2spk file %s", u2s_file)
-    # file_ids = np.unique(np.concatenate((df_trials["enroll"], df_trials["test"])))
-    # with open(u2s_file, "w") as f:
-    #     for file_id in file_ids:
-    #         f.write("%s %s\n" % (file_id, file_id))
-
-    # s2u_file = output_dir / "spk2utt"
-    # logging.info("creating spk2utt file %s", s2u_file)
-    # with open(s2u_file, "w") as f:
-    #     for file_id in file_ids:
-    #         f.write("%s %s\n" % (file_id, file_id))
-
-    # wav_file = output_dir / "wav.scp"
-    # logging.info("creating wav.scp file %s", wav_file)
-    # with open(wav_file, "w") as f:
-    #     for file_id in file_ids:
-    #         if "VoxSRC2022_dev" in file_id:
-    #             wav_file = voxsrc22_corpus_dir / file_id
-    #         else:
-    #             wav_file = vox1_corpus_dir / "wav" / file_id
-
-    #         f.write("%s %s\n" % (file_id, wav_file))
-
     def prepare_track12_test(self):
         logging.info(
             "Preparing VoxSRC22 %s corpus:%s -> %s",
diff --git a/hyperion/helpers/trial_data_reader.py b/hyperion/helpers/trial_data_reader.py
index 4f33770b..85904eb2 100644
--- a/hyperion/helpers/trial_data_reader.py
+++ b/hyperion/helpers/trial_data_reader.py
@@ -16,7 +16,7 @@
 from ..utils.utt2info import Utt2Info
 
 
-class TrialDataReader(object):
+class TrialDataReader:
     """
     Loads Ndx, enroll file and x-vectors to evaluate PLDA.
     """
diff --git a/hyperion/helpers/vector_class_reader.py b/hyperion/helpers/vector_class_reader.py
index c4c531ad..a9993768 100644
--- a/hyperion/helpers/vector_class_reader.py
+++ b/hyperion/helpers/vector_class_reader.py
@@ -49,7 +49,7 @@ def __init__(
                     v[0]: int(v[1]) for v in [line.rstrip().split() for line in f]
                 }
 
-        self.rng = np.random.RandomState(vcr_seed)
+        self.rng = np.random.default_rng(vcr_seed)
         self.csplit_max_spc = csplit_max_spc
         self.csplit_min_spc = csplit_min_spc
         self.csplit_mode = csplit_mode
diff --git a/hyperion/io/ark_data_reader.py b/hyperion/io/ark_data_reader.py
index 6cf22d5f..eaf76d49 100644
--- a/hyperion/io/ark_data_reader.py
+++ b/hyperion/io/ark_data_reader.py
@@ -223,8 +223,8 @@ def read(
                     self._eof = True
                     break
 
-                row_offset_i = row_offset[i] if row_offset_is_list else row_offset
-                num_rows_i = num_rows[i] if num_rows_is_list else num_rows
+                row_offset_i = row_offset[count] if row_offset_is_list else row_offset
+                num_rows_i = num_rows[count] if num_rows_is_list else num_rows
 
                 binary = init_kaldi_input_stream(self.f)
                 data_i = KaldiMatrix.read(
@@ -269,7 +269,7 @@ def __init__(
         self, file_path: PathLike, path_prefix: Optional[PathLike] = None, **kwargs
     ):
         super().__init__(file_path, permissive=False, **kwargs)
-        self.feature_set = FeatureSet.load(self.file_path, sep=scp_sep)
+        self.feature_set = FeatureSet.load(self.file_path)
 
         if self.num_parts > 1:
             self.feature_set = self.feature_set.split(self.part_idx, self.num_parts)
diff --git a/hyperion/io/audio_reader.py b/hyperion/io/audio_reader.py
index 6c152cc5..a1adaef0 100644
--- a/hyperion/io/audio_reader.py
+++ b/hyperion/io/audio_reader.py
@@ -55,7 +55,7 @@ def __init__(
         self,
         recordings: Union[RecordingSet, PathLike],
         segments: Union[SegmentSet, PathLike, None] = None,
-        wav_scale: float = 2 ** 15 - 1,
+        wav_scale: float = 1.0,
     ):
         if not isinstance(recordings, RecordingSet):
             recordings = RecordingSet.load(recordings)
@@ -255,7 +255,7 @@ def __init__(
         self,
         recordings: Union[RecordingSet, PathLike],
         segments: Union[SegmentSet, PathLike, None] = None,
-        wav_scale: float = 2 ** 15 - 1,
+        wav_scale: float = 1.0,
         part_idx: int = 1,
         num_parts: int = 1,
     ):
@@ -373,7 +373,8 @@ def add_class_args(parser, prefix: Optional[str] = None):
 
         parser.add_argument(
             "--wav-scale",
-            default=2 ** 15 - 1,
+            default=1.0,
+            # default=2 ** 15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
@@ -399,8 +400,7 @@ def add_class_args(parser, prefix: Optional[str] = None):
 
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
+                "--" + prefix, action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
@@ -411,7 +411,7 @@ def __init__(
         self,
         recordings: Union[RecordingSet, PathLike],
         segments: Union[SegmentSet, PathLike, None] = None,
-        wav_scale: float = 2 ** 15 - 1,
+        wav_scale: float = 1.0,
     ):
         super().__init__(recordings, segments, wav_scale)
 
@@ -524,14 +524,14 @@ def add_class_args(parser, prefix: Optional[str] = None):
 
         parser.add_argument(
             "--wav-scale",
-            default=2 ** 15 - 1,
+            default=1.0,
+            # default=2 ** 15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
+                "--" + prefix, action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/io/audio_writer.py b/hyperion/io/audio_writer.py
index e416c209..ca0dde9f 100644
--- a/hyperion/io/audio_writer.py
+++ b/hyperion/io/audio_writer.py
@@ -27,12 +27,33 @@
     "DOUBLE": "float64",
     "MS_ADPCM": "int16",
     "ULAW": "int16",
-    "PCM_U8": "uint8",
-    "PCM_S8": "int8",
+    "PCM_S8": "int16",
     "VORBIS": "float32",
     "GSM610": "int16",
     "G721_32": "int16",
-    "PCM_24": "int24",
+    "PCM_24": "int32",
+}
+
+scale_32 = 2 ** 31 - 1
+scale_24 = 2 ** 23 - 1
+scale_16 = 2 ** 15 - 1
+scale_8 = 2 ** 7 - 1
+
+
+subtype_to_scale = {
+    "PCM_32": scale_32,
+    "ALAW": scale_16,
+    "IMA_ADPCM": scale_16,
+    "FLOAT": 1,
+    "PCM_16": scale_16,
+    "DOUBLE": 1,
+    "MS_ADPCM": scale_16,
+    "ULAW": scale_16,
+    "PCM_S8": scale_8,
+    "VORBIS": 1,
+    "GSM610": scale_16,
+    "G721_32": scale_16,
+    "PCM_24": scale_24,
 }
 
 
@@ -45,6 +66,7 @@ class AudioWriter(object):
       audio_format:   audio file format
       audio_subtype: subtype of audio in [PCM_16, PCM_32, FLOAT, DOUBLE, ...],
                if None, it uses soundfile defaults (recommended)
+      wav_scale: scale of the input waveform
     """
 
     def __init__(
@@ -53,6 +75,7 @@ def __init__(
         script_path: Optional[PathLike] = None,
         audio_format: str = "wav",
         audio_subtype: Optional[str] = None,
+        wav_scale: float = 1.0,
     ):
         self.output_path = Path(output_path)
         self.script_path = Path(script_path) if script_path is not None else None
@@ -63,9 +86,15 @@ def __init__(
         if audio_subtype is None:
             self.subtype = sf.default_subtype(self.audio_format)
         else:
-            self.subtype = audio_subtype
+            self.subtype = audio_subtype.upper()
             assert sf.check_format(self.audio_format, self.subtype)
 
+        self._dtype = subtype_to_npdtype[self.subtype]
+
+        self.wav_scale = wav_scale
+        # we multiply the audio for this number before saving it.
+        self._output_wav_scale = subtype_to_scale[self.subtype] / wav_scale
+
         self.script_is_scp = False
         self.script_sep = None
         self.f_script = None
@@ -78,7 +107,7 @@ def __init__(
                 self.f_script = open(self.script_path, "w")
             else:
                 self.script_sep = "," if script_ext == ".csv" else "\t"
-                self.f_script = open(self.script_path, "w", "utf-8")
+                self.f_script = open(self.script_path, "w", encoding="utf-8")
                 row = self.script_sep.join(
                     ["id", "storage_path", "duration", "sample_freq"]
                 )
@@ -123,8 +152,7 @@ def write(
             data = [data]
 
         fs_is_list = isinstance(fs, (list, np.ndarray))
-        assert self.subtype in subtype_to_npdtype
-        dtype = subtype_to_npdtype[self.subtype]
+
         output_files = []
         for i, key_i in enumerate(keys):
             assert is_token(key_i), "Token %s not valid" % key_i
@@ -135,7 +163,7 @@ def write(
                 self.audio_format,
             )
             fs_i = int(fs[i]) if fs_is_list else fs
-            data_i = data[i].astype(dtype, copy=False)
+            data_i = (self._output_wav_scale * data[i]).astype(self._dtype, copy=False)
             sf.write(output_file, data_i, fs_i, subtype=self.subtype)
 
             output_files.append(output_file)
@@ -156,14 +184,11 @@ def write(
     @staticmethod
     def filter_args(**kwargs):
         valid_args = (
-            "output_fs",
-            "output_wav_scale",
-            "output_audio_format",
-            "output_audio_subtype",
-        )
-        return dict(
-            (re.sub("output_", "", k), kwargs[k]) for k in valid_args if k in kwargs
+            "wav_scale",
+            "audio_format",
+            "audio_subtype",
         )
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -171,23 +196,27 @@ def add_class_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        # parser.add_argument(p1+'output-wav-scale', default=1, type=float,
-        #                      help=('scale to divide the waveform before writing'))
-
         parser.add_argument(
-            "--output-audio-format",
+            "--audio-format",
             default="flac",
             choices=["flac", "ogg", "wav"],
             help=("ouput audio format"),
         )
 
         parser.add_argument(
-            "--output-audio-subtype",
+            "--audio-subtype",
             default=None,
-            choices=["pcm_16", "pcm_24", "float", "double", "vorbis"],
+            choices=["pcm_16", "pcm_24", "pcm_32", "float", "double", "vorbis"],
             help=("coding format for audio file"),
         )
 
+        try:
+            parser.add_argument(
+                "--wav-scale", default="1.0", help=("input waveform scale wrt 1"),
+            )
+        except:
+            pass
+
         if prefix is not None:
             outer_parser.add_argument(
                 "--" + prefix, action=ActionParser(parser=parser),
diff --git a/hyperion/io/hyp_data_reader.py b/hyperion/io/hyp_data_reader.py
index 575c3087..63d463fb 100644
--- a/hyperion/io/hyp_data_reader.py
+++ b/hyperion/io/hyp_data_reader.py
@@ -76,9 +76,8 @@ def read_random_slice(self, key, num_samples, rng, field=""):
         dataset = key + field
         assert dataset in self.f, "Dataset %s not found" % dataset
         num_rows = self.f[dataset].shape[0]
-        # print('hola',num_rows,num_samples,num_rows-num_samples)
-        # index = rng.random_integers(low=0, high=num_rows-num_samples, size=1)[0]
-        index = rng.randint(low=0, high=num_rows - num_samples + 1)
+
+        index = rng.integers(low=0, high=num_rows - num_samples + 1)
         X = self.f[dataset][index : index + num_samples]
         return X, index
 
diff --git a/hyperion/io/packed_audio_reader.py b/hyperion/io/packed_audio_reader.py
index 17f78bc2..fb17cb18 100644
--- a/hyperion/io/packed_audio_reader.py
+++ b/hyperion/io/packed_audio_reader.py
@@ -378,7 +378,8 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             p1 + "wav-scale",
-            default=2 ** 15 - 1,
+            default=1.0,
+            # default=2 ** 15 - 1,
             type=float,
             help=("multiplicative factor for waveform"),
         )
@@ -633,7 +634,8 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             p1 + "wav-scale",
-            default=2 ** 15,
+            default=1.0,
+            # default=2 ** 15,
             type=float,
             help=("multiplicative factor for waveform"),
         )
diff --git a/hyperion/io/rw_specifiers.py b/hyperion/io/rw_specifiers.py
index 93123247..60e01ef1 100644
--- a/hyperion/io/rw_specifiers.py
+++ b/hyperion/io/rw_specifiers.py
@@ -387,11 +387,11 @@ def create(cls, rspecifier):
                 if archive.suffix == ".csv":
                     df = pd.read_csv(archive, nrows=2)
                     storage_path = df["storage_path"].values[0]
-                    if re.match(r".*\.h5$", scp_f2) is not None:
+                    if re.match(r".*\.h5$", storage_path) is not None:
                         archive_type = ArchiveType.H5
-                    elif re.match(r".*\.ark$", scp_f2) is not None:
+                    elif re.match(r".*\.ark$", storage_path) is not None:
                         archive_type = ArchiveType.ARK
-                    elif re.match(r".*[cvg]$", scp_f2) is not None:
+                    elif re.match(r".*[cvg]$", storage_path) is not None:
                         archive_type = ArchiveType.AUDIO
                     else:
                         raise ValueError(f"Unknown format for {storage_path}")
diff --git a/hyperion/np/augment/noise_augment.py b/hyperion/np/augment/noise_augment.py
index 799db930..1cc1a0be 100644
--- a/hyperion/np/augment/noise_augment.py
+++ b/hyperion/np/augment/noise_augment.py
@@ -26,7 +26,7 @@ class SingleNoiseAugment(object):
       min_snr: mininimum SNR(dB) to sample from.
       max_snr: maximum SNR(dB) to sample from.
       rng:     Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
     """
 
     def __init__(
@@ -46,7 +46,7 @@ def __init__(
         self.cache = None
         self.lock = multiprocessing.Lock()
         if rng is None:
-            self.rng = np.random.RandomState(seed=random_seed)
+            self.rng = np.random.default_rng(seed=random_seed)
         else:
             self.rng = deepcopy(rng)
 
@@ -96,7 +96,7 @@ def forward(self, x):
 
         while noise is None or noise.shape[0] < num_samples:
             with self.lock:
-                noise_idx = self.rng.randint(len(self.noise_keys))
+                noise_idx = self.rng.integers(len(self.noise_keys))
                 key = self.noise_keys[noise_idx]
                 noise_k, fs_k = self.r.read([key])
                 noise_k = noise_k[0]
@@ -112,12 +112,22 @@ def forward(self, x):
                 with self.lock:
                     self.cache = noise_k[need_samples:]
 
+        num_zeros = np.sum(noise == 0)
         with self.lock:
+            # add dither for noises files with many 0s.
+            if num_zeros > len(noise) // 3:
+                noise += 0.0001 * self.rng.standard_normal(
+                    noise.shape, dtype=noise.dtype
+                )
+
             target_snr = self.rng.uniform(self.min_snr, self.max_snr)
+
         scale = self._compute_noise_scale(x, noise, target_snr)
 
         info = {"noise_type": self.noise_type, "snr": target_snr}
-        return x + scale * noise, info
+        y = x + scale * noise
+
+        return y, info
 
     def __call__(self, x):
         return self.forward(x)
@@ -136,7 +146,7 @@ class NoiseAugment(object):
                   is proportional to how often we want to sample a given noise
                   type.
       rng:     Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
     """
 
     def __init__(self, noise_prob, noise_types, random_seed=112358, rng=None):
@@ -166,7 +176,7 @@ def __init__(self, noise_prob, noise_types, random_seed=112358, rng=None):
 
         self.lock = multiprocessing.Lock()
         if rng is None:
-            self.rng = np.random.RandomState(seed=random_seed)
+            self.rng = np.random.default_rng(seed=random_seed)
         else:
             self.rng = deepcopy(rng)
 
@@ -177,7 +187,7 @@ def create(cls, cfg, random_seed=112358, rng=None):
         Args:
           cfg: YAML file path or dictionary with noise options.
           rng: Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
 
         Returns:
           NoiseAugment object
@@ -208,7 +218,7 @@ def forward(self, x):
 
         # decide whether to add noise or not
         with self.lock:
-            p = self.rng.random_sample()
+            p = self.rng.random()
 
         if p > self.noise_prob:
             # we don't add noise
diff --git a/hyperion/np/augment/reverb_augment.py b/hyperion/np/augment/reverb_augment.py
index cf4cc6cb..0b1f3596 100644
--- a/hyperion/np/augment/reverb_augment.py
+++ b/hyperion/np/augment/reverb_augment.py
@@ -39,7 +39,7 @@ class SingleReverbAugment(object):
                   its first sample.
       preload_rirs: if True all RIRS are loaded into RAM.
       rng:     Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
     """
 
     def __init__(
@@ -80,7 +80,7 @@ def __init__(
 
         self.lock = multiprocessing.Lock()
         if rng is None:
-            self.rng = np.random.RandomState(seed=random_seed)
+            self.rng = np.random.default_rng(seed=random_seed)
         else:
             self.rng = deepcopy(rng)
 
@@ -129,7 +129,7 @@ def forward(self, x):
 
         num_samples = x.shape[0]
         with self.lock:
-            rir_idx = self.rng.randint(len(self.rir_keys))
+            rir_idx = self.rng.integers(len(self.rir_keys))
 
         if self.preload_rirs:
             h = self.rirs[rir_idx]
@@ -155,6 +155,7 @@ def forward(self, x):
             "h_max": h_max,
             "h_delay": h_delay,
         }
+
         return y, info
 
     def __call__(self, x):
@@ -176,7 +177,7 @@ class ReverbAugment(object):
       max_reverb_context: number of samples required as left context
                           for the convolution operation.
       rng:     Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
     """
 
     def __init__(
@@ -210,7 +211,7 @@ def __init__(
 
         self.lock = multiprocessing.Lock()
         if rng is None:
-            self.rng = np.random.RandomState(seed=random_seed)
+            self.rng = np.random.default_rng(seed=random_seed)
         else:
             self.rng = deepcopy(rng)
 
@@ -221,7 +222,7 @@ def create(cls, cfg, random_seed=112358, rng=None):
         Args:
           cfg: YAML file path or dictionary with reverb options.
           rng: Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
 
         Returns:
           ReverbAugment object.
@@ -267,7 +268,7 @@ def forward(self, x):
 
         # decide whether to add reverb or not
         with self.lock:
-            p = self.rng.random_sample()
+            p = self.rng.random()
 
         if p > self.reverb_prob:
             # we don't add reverb
diff --git a/hyperion/np/augment/speech_augment.py b/hyperion/np/augment/speech_augment.py
index 0b1233f1..c27ca321 100644
--- a/hyperion/np/augment/speech_augment.py
+++ b/hyperion/np/augment/speech_augment.py
@@ -37,7 +37,7 @@ def create(cls, cfg, random_seed=112358, rng=None):
         Args:
           cfg: YAML file path or dictionary with noise options.
           rng: Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
 
         Returns:
           SpeechAugment object.
diff --git a/hyperion/np/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
index 18a15651..a648190d 100644
--- a/hyperion/np/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -22,7 +22,7 @@ class SpeedAugment(object):
       keep_length: applies padding or cropping to keep the lenght of the signal.
       random_seed: random seed for random number generator.
       rng:     Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
     """
 
     def __init__(
@@ -34,14 +34,16 @@ def __init__(
         rng=None,
     ):
         logging.info(
-            "init speed augment with prob={}, speed_ratios={}, keep_length={}".
-            format(speed_prob, speed_ratios, keep_length))
+            "init speed augment with prob={}, speed_ratios={}, keep_length={}".format(
+                speed_prob, speed_ratios, keep_length
+            )
+        )
         self.speed_prob = speed_prob
         self.speed_ratios = speed_ratios
         self.keep_length = keep_length
 
         if rng is None:
-            self.rng = np.random.RandomState(seed=random_seed)
+            self.rng = np.random.default_rng(seed=random_seed)
         else:
             self.rng = deepcopy(rng)
 
@@ -52,7 +54,7 @@ def create(cls, cfg, random_seed=112358, rng=None):
         Args:
           cfg: YAML file path or dictionary with noise options.
           rng: Random number generator returned by
-               np.random.RandomState (optional).
+               np.random.default_rng (optional).
 
         Returns:
           NoiseAugment object.
@@ -84,7 +86,7 @@ def forward(self, x):
         """
 
         # decide whether to add noise or not
-        p = self.rng.random_sample()
+        p = self.rng.random()
         if p > self.speed_prob:
             # we don't add speed perturbation
             info = {"speed_ratio": 1}
@@ -98,14 +100,12 @@ def forward(self, x):
         # print(f"1 r={r} {x.shape} {y.shape}", flush=True)
         if self.keep_length:
             if r > 1:
-                dither = np.max(x) / 2**15  # we add some dither in the padding
-                pad_y = dither * np.ones(
-                    (x.shape[-1] - y.shape[-1], ), dtype=y.dtype)
+                dither = np.max(x) / 2 ** 15  # we add some dither in the padding
+                pad_y = dither * np.ones((x.shape[-1] - y.shape[-1],), dtype=y.dtype)
                 y = np.concatenate((y, pad_y), axis=-1)
             elif r < 1:
-                y = y[:x.shape[-1]]
+                y = y[: x.shape[-1]]
 
-        # print(f"2 r={r} {x.shape} {y.shape}", flush=True)
         return y, info
 
     def __call__(self, x):
diff --git a/hyperion/np/classifiers/binary_logistic_regression.py b/hyperion/np/classifiers/binary_logistic_regression.py
index 82a84529..e77115cd 100644
--- a/hyperion/np/classifiers/binary_logistic_regression.py
+++ b/hyperion/np/classifiers/binary_logistic_regression.py
@@ -29,7 +29,7 @@ class BinaryLogisticRegression(LogisticRegression):
                        In this case, x becomes [x, bias_scaling], i.e. a “synthetic” feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight.
                        Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) bias_scaling has to be increased.
       priors: prior prob for having a positive sample.
-      random_state: RandomState instance or None, optional, default: None
+      random_state: default_rng instance or None, optional, default: None
                     Used when solver == ‘sag’ or ‘liblinear’.
       solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},
                  default: ‘liblinear’ Algorithm to use in the optimization problem.
diff --git a/hyperion/np/classifiers/greedy_fusion.py b/hyperion/np/classifiers/greedy_fusion.py
index 842b850e..f03a05a0 100644
--- a/hyperion/np/classifiers/greedy_fusion.py
+++ b/hyperion/np/classifiers/greedy_fusion.py
@@ -42,8 +42,8 @@ class GreedyFusionBinaryLR(NPModel):
                        In this case, x becomes [x, bias_scaling], i.e. a “synthetic” feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight.
                        Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) bias_scaling has to be increased.
       priors: prior prob for having a positive sample.
-      random_state: int, RandomState instance or None, optional, default: None
-                       The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; . Used when solver == ‘sag’ or ‘liblinear’.
+      random_state: int, default_rng instance or None, optional, default: None
+                       The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If default_rng instance, random_state is the random number generator; . Used when solver == ‘sag’ or ‘liblinear’.
       solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},
                  default: ‘liblinear’ Algorithm to use in the optimization problem.
                  For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and
diff --git a/hyperion/np/classifiers/linear_gbe.py b/hyperion/np/classifiers/linear_gbe.py
index a6b8c7cc..f551af14 100644
--- a/hyperion/np/classifiers/linear_gbe.py
+++ b/hyperion/np/classifiers/linear_gbe.py
@@ -10,7 +10,7 @@
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
-from ...utils.math import int2onehot, invert_pdmat, logdet_pdmat, softmax
+from ...utils.math_funcs import int2onehot, invert_pdmat, logdet_pdmat, softmax
 from ..np_model import NPModel
 
 
@@ -426,7 +426,8 @@ def add_class_args(parser, prefix=None):
         parser.add_argument("--name", default="lgbe", help="model name")
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix, action=ActionParser(parser=parser),
+                "--" + prefix,
+                action=ActionParser(parser=parser),
             )
 
     @staticmethod
@@ -468,7 +469,8 @@ def add_eval_args(parser, prefix=None):
         )
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix, action=ActionParser(parser=parser),
+                "--" + prefix,
+                action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/np/classifiers/linear_gbe_up.py b/hyperion/np/classifiers/linear_gbe_up.py
index 8566aeab..37ac9656 100644
--- a/hyperion/np/classifiers/linear_gbe_up.py
+++ b/hyperion/np/classifiers/linear_gbe_up.py
@@ -9,8 +9,13 @@
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
-from ...utils.math import (fullcov_varfloor, int2onehot, invert_pdmat,
-                           logdet_pdmat, softmax)
+from ...utils.math_funcs import (
+    fullcov_varfloor,
+    int2onehot,
+    invert_pdmat,
+    logdet_pdmat,
+    softmax,
+)
 from ..np_model import NPModel
 from .linear_gbe import LinearGBE
 
diff --git a/hyperion/np/classifiers/linear_svmc.py b/hyperion/np/classifiers/linear_svmc.py
index 5d743a46..6a977df9 100644
--- a/hyperion/np/classifiers/linear_svmc.py
+++ b/hyperion/np/classifiers/linear_svmc.py
@@ -10,7 +10,7 @@
 from sklearn.svm import LinearSVC as SVC
 
 from ...hyp_defs import float_cpu
-from ...utils.math import softmax
+from ...utils.math_funcs import softmax
 from ..np_model import NPModel
 
 
@@ -41,7 +41,7 @@ class LinearSVMC(NPModel):
                     The “balanced” mode uses the values of y to automatically adjust
                     weights inversely proportional to class frequencies in the input
                     data as n_samples / (n_classes * np.bincount(y)).
-      random_state: RandomState instance or None, optional, default: None
+      random_state: default_rng instance or None, optional, default: None
       max_iter: int, default: 100
                    Useful only for the newton-cg, sag and lbfgs solvers.
                    Maximum number of iterations taken for the solvers to converge.
@@ -61,7 +61,7 @@ class LinearSVMC(NPModel):
                    penalty and dual will be ignored.
       verbose: int, default: 0
       balance_class_weight: if True and class_weight is None, it makes class_weight="balanced".
-      lr_seed: seed form RandomState, used when random_state is None.
+      lr_seed: seed form default_rng, used when random_state is None.
       labels: list of class labels
     """
 
@@ -93,7 +93,7 @@ def __init__(
             class_weight = "balanced"
 
         if random_state is None:
-            random_state = np.random.RandomState(seed=lr_seed)
+            random_state = np.random.default_rng(seed=lr_seed)
 
         self.use_bias = use_bias
         self.bias_scaling = bias_scaling
diff --git a/hyperion/np/classifiers/logistic_regression.py b/hyperion/np/classifiers/logistic_regression.py
index 8e3d7e2e..4c4c0cfc 100644
--- a/hyperion/np/classifiers/logistic_regression.py
+++ b/hyperion/np/classifiers/logistic_regression.py
@@ -9,7 +9,7 @@
 from sklearn.linear_model import LogisticRegression as LR
 
 from ...hyp_defs import float_cpu
-from ...utils.math import softmax
+from ...utils.math_funcs import softmax
 from ..np_model import NPModel
 
 
@@ -36,7 +36,7 @@ class LogisticRegression(NPModel):
                 Weights associated with classes in the form {class_label: weight}. If not given, all classes are supposed to have weight one.
                 The “balanced” mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).
                 Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.
-      random_state: RandomState instance or None, optional, default: None
+      random_state: default_rng instance or None, optional, default: None
                     Used when solver == ‘sag’ or ‘liblinear’.
       solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},
                  default: ‘liblinear’ Algorithm to use in the optimization problem.
@@ -93,7 +93,7 @@ def __init__(
         super().__init__(**kwargs)
 
         if random_state is None:
-            random_state = np.random.RandomState(seed=lr_seed)
+            random_state = np.random.default_rng(seed=lr_seed)
 
         if bias_scaling is None:
             if use_bias and solver == "liblinear":
diff --git a/hyperion/np/classifiers/q_scoring_homo_gbe.py b/hyperion/np/classifiers/q_scoring_homo_gbe.py
index 9e54e0f4..3345dd72 100644
--- a/hyperion/np/classifiers/q_scoring_homo_gbe.py
+++ b/hyperion/np/classifiers/q_scoring_homo_gbe.py
@@ -9,7 +9,7 @@
 from scipy.special import gammaln
 
 from ...hyp_defs import float_cpu
-from ...utils.math import int2onehot, invert_pdmat, logdet_pdmat, softmax
+from ...utils.math_funcs import int2onehot, invert_pdmat, logdet_pdmat, softmax
 from ..np_model import NPModel
 
 
diff --git a/hyperion/np/classifiers/svmc.py b/hyperion/np/classifiers/svmc.py
index 6b54034b..ac5211ef 100644
--- a/hyperion/np/classifiers/svmc.py
+++ b/hyperion/np/classifiers/svmc.py
@@ -12,7 +12,7 @@
 from sklearn.svm import SVC
 
 from ...hyp_defs import float_cpu
-from ...utils.math import softmax
+from ...utils.math_funcs import softmax
 from ...utils.misc import filter_func_args
 from ..np_model import NPModel
 
@@ -49,7 +49,7 @@ def __init__(
             class_weight = "balanced"
 
         if random_state is None:
-            random_state = np.random.RandomState(seed=lr_seed)
+            random_state = np.random.default_rng(seed=lr_seed)
 
         self.C = C
         self.kernel = kernel
diff --git a/hyperion/np/feats/energy_vad.py b/hyperion/np/feats/energy_vad.py
index 5b9eb751..1d578c68 100644
--- a/hyperion/np/feats/energy_vad.py
+++ b/hyperion/np/feats/energy_vad.py
@@ -5,6 +5,7 @@
 import logging
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
 from scipy.signal import lfilter
 
 from ...hyp_defs import float_cpu
@@ -19,7 +20,7 @@ class EnergyVAD(object):
        sample_frequency:      Waveform data sample frequency (must match the waveform file, if specified there) (default = 16000)
        frame_length:          Frame length in milliseconds (default = 25)
        frame_shift:           Frame shift in milliseconds (default = 10)
-       dither:                Dithering constant (0.0 means no dither) (default = 1)
+       dither:                Dithering constant (0.0 means no dither) (default = 2^(-15))
        snip_edges:            If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length.  If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (default = True)
        vad_energy_mean_scale: If this is set to s, to get the actual threshold we let m be the mean log-energy of the file, and use s*m + vad-energy-threshold (float, default = 0.5)
        vad_energy_threshold:  Constant term in energy threshold for MFCC0 for VAD (also see --vad-energy-mean-scale) (float, default = 5)
@@ -32,7 +33,7 @@ def __init__(
         sample_frequency=16000,
         frame_length=25,
         frame_shift=10,
-        dither=1,
+        dither=1 / 2 ** 15,
         snip_edges=True,
         vad_energy_mean_scale=0.5,
         vad_energy_threshold=5,
@@ -97,7 +98,7 @@ def compute(self, x, return_loge=False):
 
             # add dither
             if self.dither > 0:
-                n = self.dither * np.random.RandomState(seed=len(x)).randn(
+                n = self.dither * np.random.default_rng(seed=len(x)).randn(
                     len(x)
                 ).astype(float_cpu(), copy=False)
                 x = x + n
@@ -174,14 +175,12 @@ def add_class_args(parser, prefix=None):
           parser: Arguments parser
           prefix: Options prefix.
         """
-
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "sample-frequency",
+            "--sample-frequency",
             default=16000,
             type=int,
             help=(
@@ -191,24 +190,21 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            p1 + "frame-length",
-            type=int,
-            default=25,
-            help="Frame length in milliseconds",
+            "--frame-length", type=int, default=25, help="Frame length in milliseconds",
         )
         parser.add_argument(
-            p1 + "frame-shift", type=int, default=10, help="Frame shift in milliseconds"
+            "--frame-shift", type=int, default=10, help="Frame shift in milliseconds"
         )
 
         parser.add_argument(
-            p1 + "dither",
+            "--dither",
             type=float,
-            default=1,
+            default=1 / 2 ** 15,
             help="Dithering constant (0.0 means no dither)",
         )
 
         parser.add_argument(
-            p1 + "snip-edges",
+            "--snip-edges",
             default=True,
             type=str2bool,
             help=(
@@ -221,7 +217,7 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            p1 + "vad-energy-mean-scale",
+            "--vad-energy-mean-scale",
             type=float,
             default=0.5,
             help=(
@@ -231,13 +227,13 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            p1 + "vad-energy-threshold",
+            "--vad-energy-threshold",
             type=float,
             default=5,
             help="Constant term in energy threshold for MFCC0 for VAD",
         )
         parser.add_argument(
-            p1 + "vad-frames-context",
+            "--vad-frames-context",
             type=int,
             default=0,
             help=(
@@ -246,7 +242,7 @@ def add_class_args(parser, prefix=None):
             ),
         )
         parser.add_argument(
-            p1 + "vad-proportion-threshold",
+            "--vad-proportion-threshold",
             type=float,
             default=0.6,
             help=(
@@ -254,5 +250,7 @@ def add_class_args(parser, prefix=None):
                 "the window that need to have more energy than the threshold"
             ),
         )
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
diff --git a/hyperion/np/feats/mfcc.py b/hyperion/np/feats/mfcc.py
index cd98840d..b56728b8 100644
--- a/hyperion/np/feats/mfcc.py
+++ b/hyperion/np/feats/mfcc.py
@@ -6,6 +6,7 @@
 from enum import Enum
 
 import numpy as np
+from jsonargparse import ActionParser, ArgumentParser
 from scipy.fftpack import dct
 from scipy.signal import lfilter
 
@@ -72,7 +73,7 @@ class MFCC(object):
        preemphasis_coeff: Coefficient for use in signal preemphasis (default = 0.97)
        window_type:       Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"blackmann") (default = 'povey')
        use_fft2:          If true, it uses |X(f)|^2, if false, it uses |X(f)|, (default = True)
-       dither:            Dithering constant (0.0 means no dither) (default = 1)
+       dither:            Dithering constant (0.0 means no dither) (default = 1/2**15)
        fb_type:           Filter-bank type: mel_kaldi, mel_etsi, mel_librosa, mel_librosa_htk, linear (default = 'mel_kaldi')
        low_freq:          Low cutoff frequency for mel bins (default = 20)
        high_freq:         High cutoff frequency for mel bins (if < 0, offset from Nyquist) (default = 0)
@@ -98,7 +99,7 @@ def __init__(
         preemphasis_coeff=0.97,
         window_type="povey",
         use_fft2=True,
-        dither=1,
+        dither=1 / 2 ** 15,
         fb_type="mel_kaldi",
         low_freq=20,
         high_freq=0,
@@ -256,7 +257,7 @@ def compute(self, x, return_fft=False, return_spec=False, return_logfb=False):
 
             # add dither
             if self.dither > 0:
-                n = self.dither * np.random.RandomState(seed=len(x)).randn(
+                n = self.dither * np.random.default_rng(seed=len(x)).randn(
                     len(x)
                 ).astype(float_cpu(), copy=False)
                 x = x + n
@@ -400,14 +401,12 @@ def add_class_args(parser, prefix=None):
           parser: Arguments parser
           prefix: Options prefix.
         """
-
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "sample-frequency",
+            "--sample-frequency",
             default=16000,
             type=int,
             help="Waveform data sample frequency "
@@ -415,27 +414,22 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            p1 + "frame-length",
-            type=int,
-            default=25,
-            help="Frame length in milliseconds",
-        )
-        parser.add_argument(
-            p1 + "frame-shift", type=int, default=10, help="Frame shift in milliseconds"
+            "--frame-length", type=int, default=25, help="Frame length in milliseconds",
         )
         parser.add_argument(
-            p1 + "fft-length", type=int, default=512, help="Length of FFT"
+            "--frame-shift", type=int, default=10, help="Frame shift in milliseconds"
         )
+        parser.add_argument("--fft-length", type=int, default=512, help="Length of FFT")
 
         parser.add_argument(
-            p1 + "remove-dc-offset",
+            "--remove-dc-offset",
             default=True,
             type=str2bool,
             help="Subtract mean from waveform on each frame",
         )
 
         parser.add_argument(
-            p1 + "preemphasis-coeff",
+            "--preemphasis-coeff",
             type=float,
             default=0.97,
             help="Coefficient for use in signal preemphasis",
@@ -444,30 +438,30 @@ def add_class_args(parser, prefix=None):
         FWF.add_class_args(parser, prefix)
 
         parser.add_argument(
-            p1 + "use-fft2",
+            "--use-fft2",
             default=True,
             type=str2bool,
             help="If true, it uses |X(f)|^2, if false, it uses |X(f)|",
         )
 
         parser.add_argument(
-            p1 + "dither",
+            "--dither",
             type=float,
-            default=1,
+            default=1 / 2 ** 15,
             help="Dithering constant (0.0 means no dither)",
         )
 
         FBF.add_class_args(parser, prefix)
 
         parser.add_argument(
-            p1 + "num-ceps",
+            "--num-ceps",
             type=int,
             default=13,
             help="Number of cepstra in MFCC computation (including C0)",
         )
 
         parser.add_argument(
-            p1 + "snip-edges",
+            "--snip-edges",
             default=True,
             type=str2bool,
             help=(
@@ -480,34 +474,34 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            p1 + "energy-floor",
+            "--energy-floor",
             type=float,
             default=0,
             help="Floor on energy (absolute, not relative) in MFCC computation",
         )
 
         parser.add_argument(
-            p1 + "raw-energy",
+            "--raw-energy",
             default=True,
             type=str2bool,
             help="If true, compute energy before preemphasis and windowing",
         )
         parser.add_argument(
-            p1 + "use-energy",
+            "--use-energy",
             default=True,
             type=str2bool,
             help="Use energy (not C0) in MFCC computation",
         )
 
         parser.add_argument(
-            p1 + "cepstral-lifter",
+            "--cepstral-lifter",
             type=float,
             default=22,
             help="Constant that controls scaling of MFCCs",
         )
 
         parser.add_argument(
-            p1 + "input-step",
+            "--input-step",
             default="wave",
             choices=["wave", "fft", "spec", "log_spec", "logfb"],
             help=(
@@ -516,7 +510,7 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            p1 + "output-step",
+            "--output-step",
             default="mfcc",
             choices=["fft", "spec", "log_spec", "logfb", "mfcc"],
             help=(
@@ -524,4 +518,7 @@ def add_class_args(parser, prefix=None):
             ),
         )
 
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
     add_argparse_args = add_class_args
diff --git a/hyperion/np/metrics/__init__.py b/hyperion/np/metrics/__init__.py
index 36afdbf5..d45daba5 100644
--- a/hyperion/np/metrics/__init__.py
+++ b/hyperion/np/metrics/__init__.py
@@ -5,7 +5,10 @@
 
 from .acc import compute_accuracy
 from .confusion_matrix import *
-from .dcf import (compute_act_dcf, compute_dcf, compute_min_dcf,
-                  fast_eval_dcf_eer)
+from .dcf import compute_act_dcf, compute_dcf, compute_min_dcf, fast_eval_dcf_eer
 from .eer import compute_eer, compute_prbep
 from .utils import effective_prior
+from .verification_evaluator import (
+    VerificationEvaluator,
+    VerificationAdvAttackEvaluator,
+)
diff --git a/hyperion/np/metrics/cllr.py b/hyperion/np/metrics/cllr.py
index ec816286..cd97a97c 100644
--- a/hyperion/np/metrics/cllr.py
+++ b/hyperion/np/metrics/cllr.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from ..utils.math import neglogsigmoid
+from ..utils.math_funcs import neglogsigmoid
 from .utils import opt_loglr
 
 
diff --git a/hyperion/np/metrics/utils.py b/hyperion/np/metrics/utils.py
index 0715d809..e638fd1b 100644
--- a/hyperion/np/metrics/utils.py
+++ b/hyperion/np/metrics/utils.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from ...hyp_defs import float_cpu
-from ...utils.math import logsumexp, softmax
+from ...utils.math_funcs import logsumexp, softmax
 
 
 def effective_prior(p_tar, c_miss, c_fa):
diff --git a/hyperion/np/metrics/verification_evaluator.py b/hyperion/np/metrics/verification_evaluator.py
index 2adf15cf..e35e7cf7 100644
--- a/hyperion/np/metrics/verification_evaluator.py
+++ b/hyperion/np/metrics/verification_evaluator.py
@@ -2,8 +2,6 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
-
 import copy
 import logging
 import re
@@ -18,13 +16,13 @@
 import matplotlib.pyplot as plt
 
 from ...hyp_defs import float_cpu
-from ...utils import TrialKey, TrialScores
+from ...utils import TrialKey, TrialScores, SparseTrialKey, SparseTrialScores
 from ...utils.trial_stats import TrialStats
 from .dcf import fast_eval_dcf_eer
 from .utils import effective_prior
 
 
-class VerificationEvaluator(object):
+class VerificationEvaluator:
     """Class computes performance metrics for verification problems.
        Same metrics can be obtained from fast_eval_dcf_eer functions
 
@@ -34,21 +32,40 @@ class VerificationEvaluator(object):
        p_tar: target prior float or list/nparray sorted in ascending order
        c_miss: cost of miss
        c_fa: cost of false alarm
-
+       key_name: name describing the key
+       score_name: name describing the score
+       sparse: use sparse versions of TrialScores and Keys
     """
 
-    def __init__(self, key, scores, p_tar, c_miss=None, c_fa=None):
-
+    def __init__(
+        self,
+        key,
+        scores,
+        p_tar,
+        c_miss=None,
+        c_fa=None,
+        key_name=None,
+        score_name=None,
+        sparse=False,
+    ):
         if isinstance(key, str):
-            logging.info("Load key: %s" % key)
-            key = TrialKey.load(key)
+            logging.info("Load key: %s", key)
+            if sparse:
+                key = SparseTrialKey.load(key)
+            else:
+                key = TrialKey.load(key)
 
         if isinstance(scores, str):
-            logging.info("Load scores: %s" % scores)
-            scores = TrialScores.load(scores)
+            logging.info("Load scores: %s", scores)
+            if sparse:
+                scores = SparseTrialScores.load(scores)
+            else:
+                scores = TrialScores.load(scores)
 
         self.key = key
         self.scores = scores.align_with_ndx(key)
+        self.key_name = key_name
+        self.score_name = score_name
 
         # compute effective prior is c_miss and c_fa are given
         if isinstance(p_tar, float):
@@ -56,13 +73,16 @@ def __init__(self, key, scores, p_tar, c_miss=None, c_fa=None):
 
         p_tar = np.asarray(p_tar)
         if c_miss is not None and c_fa is not None:
+            assert len(c_miss) == len(p_tar)
+            assert len(c_fa) == len(p_tar)
             c_miss = np.asarray(c_miss)
             c_fa = np.asarray(c_fa)
             p_tar = effective_prior(p_tar, c_miss, c_fa)
 
+        self._p_tar_sort = np.argsort(p_tar)
         self.p_tar = p_tar
 
-    def compute_dcf_eer(self, return_df=False):
+    def compute_dcf_eer(self, return_df=True):
         """
         Computes DCF/EER
 
@@ -74,24 +94,38 @@ def compute_dcf_eer(self, return_df=False):
         """
         logging.info("separating tar/non")
         tar, non = self.scores.get_tar_non(self.key)
+        ntar = len(tar)
+        nnon = len(non)
         logging.info("computing EER/DCF")
-        min_dcf, act_dcf, eer, _ = fast_eval_dcf_eer(tar, non, self.p_tar)
+        min_dcf, act_dcf, eer, _ = fast_eval_dcf_eer(
+            tar, non, self.p_tar[self._p_tar_sort]
+        )
+        min_dcf[self._p_tar_sort] = min_dcf.copy()
+        act_dcf[self._p_tar_sort] = act_dcf.copy()
 
         if not return_df:
-            return min_dcf, act_dcf, eer
+            return min_dcf, act_dcf, eer, ntar, nnon
 
         if len(self.p_tar) == 1:
             eer = [eer]
             min_dcf = [min_dcf]
             act_dcf = [act_dcf]
 
-        df = pd.DataFrame({"eer": eer})
-
+        df = pd.DataFrame(
+            {
+                "scores": [self.score_name],
+                "key": [self.key_name],
+                "eer": eer,
+                "eer(%)": eer * 100,
+            }
+        )
         for i in range(len(min_dcf)):
             pi = self.p_tar[i]
             df["min-dcf-%.3f" % (pi)] = min_dcf[i]
             df["act-dcf-%.3f" % (pi)] = act_dcf[i]
 
+        df["num_targets"] = ntar
+        df["num_nontargets"] = nnon
         return df
 
 
@@ -116,9 +150,7 @@ class VerificationAdvAttackEvaluator(VerificationEvaluator):
     def __init__(
         self, key, scores, attack_scores, attack_stats, p_tar, c_miss=None, c_fa=None
     ):
-        super(VerificationAdvAttackEvaluator, self).__init__(
-            key, scores, p_tar, c_miss, c_fa
-        )
+        super().__init__(key, scores, p_tar, c_miss, c_fa)
         if not isinstance(attack_scores, list):
             attack_scores = [attack_scores]
         if not isinstance(attack_stats, list):
@@ -133,7 +165,7 @@ def __init__(
         if isinstance(attack_scores[0], str):
             l = []
             for file_path in attack_scores:
-                logging.info("Load attack scores: %s" % file_path)
+                logging.info("Load attack scores: %s", file_path)
                 scores = TrialScores.load(file_path)
                 l.append(scores)
             attack_scores = l
@@ -151,7 +183,7 @@ def __init__(
         if isinstance(attack_stats[0], str):
             l = []
             for file_path in attack_stats:
-                logging.info("Load attack stats: %s" % file_path)
+                logging.info("Load attack stats: %s", file_path)
                 scores = TrialStats.load(file_path)
                 l.append(scores)
             attack_stats = l
@@ -216,7 +248,7 @@ def compute_dcf_eer_vs_stats(
         stat_bins,
         attacked_trials="all",
         higher_better=False,
-        return_df=False,
+        return_df=True,
     ):
         """
         Computes DCF/EER versus SNR/Linf/etc curves
@@ -307,7 +339,7 @@ def find_best_attacks(
         threshold=None,
         prior_idx=0,
         higher_better=False,
-        return_df=False,
+        return_df=True,
     ):
         """
         Find the best attacks from the point of view of some of the stats. E.g.,
diff --git a/hyperion/np/pdfs/core/normal.py b/hyperion/np/pdfs/core/normal.py
index b8f8bb54..67872315 100644
--- a/hyperion/np/pdfs/core/normal.py
+++ b/hyperion/np/pdfs/core/normal.py
@@ -7,11 +7,20 @@
 import scipy.linalg as la
 
 from ....hyp_defs import float_cpu
-from ....utils.math import (fullcov_varfloor, invert_pdmat, invert_trimat,
-                            logdet_pdmat, symmat2vec, vec2symmat)
-from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
-                                plot_gaussian_ellipsoid_2D,
-                                plot_gaussian_ellipsoid_3D)
+from ....utils.math_funcs import (
+    fullcov_varfloor,
+    invert_pdmat,
+    invert_trimat,
+    logdet_pdmat,
+    symmat2vec,
+    vec2symmat,
+)
+from ....utils.plotting import (
+    plot_gaussian_1D,
+    plot_gaussian_3D,
+    plot_gaussian_ellipsoid_2D,
+    plot_gaussian_ellipsoid_3D,
+)
 from .exp_family import ExpFamily
 
 
@@ -213,7 +222,7 @@ def sample(self, num_samples, rng=None, seed=1024):
         assert self.is_init
 
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
         return rng.multivariate_normal(self.mu, self.Sigma, size=(num_samples,)).astype(
             float_cpu()
         )
diff --git a/hyperion/np/pdfs/core/normal_diag_cov.py b/hyperion/np/pdfs/core/normal_diag_cov.py
index c9986f4c..23535112 100644
--- a/hyperion/np/pdfs/core/normal_diag_cov.py
+++ b/hyperion/np/pdfs/core/normal_diag_cov.py
@@ -7,9 +7,12 @@
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
-                                plot_gaussian_ellipsoid_2D,
-                                plot_gaussian_ellipsoid_3D)
+from ....utils.plotting import (
+    plot_gaussian_1D,
+    plot_gaussian_3D,
+    plot_gaussian_ellipsoid_2D,
+    plot_gaussian_ellipsoid_3D,
+)
 from .exp_family import ExpFamily
 
 
@@ -183,7 +186,7 @@ def sample(self, num_samples, rng=None, seed=1024):
         """
         assert self.is_init
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
         x = rng.normal(size=(num_samples, self.x_dim)).astype(float_cpu())
         return self.mu + 1.0 / self.cholLambda * x
 
diff --git a/hyperion/np/pdfs/hmm/hmm.py b/hyperion/np/pdfs/hmm/hmm.py
index 80232e36..92d9c371 100644
--- a/hyperion/np/pdfs/hmm/hmm.py
+++ b/hyperion/np/pdfs/hmm/hmm.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from ....hyp_defs import float_cpu
-from ....utils.math import logsumexp, softmax
+from ....utils.math_funcs import logsumexp, softmax
 from ..core import PDF
 
 
@@ -232,7 +232,7 @@ def viterbi_decode(self, x, nbest=1):
 
     def sample(self, num_seqs, num_steps, rng=None, seed=1024):
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
 
         x = np.zeros((num_seqs, num_steps, self.num_states), dtype=float_cpu())
         x[:, 0, :] = rng.multinomial(1, self.pi, size=(num_seqs,))
diff --git a/hyperion/np/pdfs/jfa/jfa_total.py b/hyperion/np/pdfs/jfa/jfa_total.py
index 041431fb..6e2b79e3 100644
--- a/hyperion/np/pdfs/jfa/jfa_total.py
+++ b/hyperion/np/pdfs/jfa/jfa_total.py
@@ -7,8 +7,13 @@
 from scipy import linalg as la
 
 from ....hyp_defs import float_cpu
-from ....utils.math import (invert_pdmat, invert_trimat, logdet_pdmat,
-                            symmat2vec, vec2symmat)
+from ....utils.math_funcs import (
+    invert_pdmat,
+    invert_trimat,
+    logdet_pdmat,
+    symmat2vec,
+    vec2symmat,
+)
 from ..core.pdf import PDF
 
 
diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 5560882c..2186522e 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from ....hyp_defs import float_cpu
-from ....utils.math import logsumexp, softmax
+from ....utils.math_funcs import logsumexp, softmax
 from ....utils.queues import GeneratorQueue
 from ..core import PDF
 
diff --git a/hyperion/np/pdfs/mixtures/gmm.py b/hyperion/np/pdfs/mixtures/gmm.py
index ca197142..7b080dae 100644
--- a/hyperion/np/pdfs/mixtures/gmm.py
+++ b/hyperion/np/pdfs/mixtures/gmm.py
@@ -8,12 +8,22 @@
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.math import (fullcov_varfloor, invert_pdmat, invert_trimat,
-                            logdet_pdmat, logsumexp, softmax, symmat2vec,
-                            vec2symmat)
-from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
-                                plot_gaussian_ellipsoid_2D,
-                                plot_gaussian_ellipsoid_3D)
+from ....utils.math_funcs import (
+    fullcov_varfloor,
+    invert_pdmat,
+    invert_trimat,
+    logdet_pdmat,
+    logsumexp,
+    softmax,
+    symmat2vec,
+    vec2symmat,
+)
+from ....utils.plotting import (
+    plot_gaussian_1D,
+    plot_gaussian_3D,
+    plot_gaussian_ellipsoid_2D,
+    plot_gaussian_ellipsoid_3D,
+)
 from ...clustering import KMeans
 from ..core import Normal
 from .exp_family_mixture import ExpFamilyMixture
@@ -292,7 +302,7 @@ def sample(self, num_samples, rng=None, seed=1024, r=None):
           Generated samples with shape (num_samples, x_dim).
         """
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
 
         if r is None:
             r = rng.multinomial(1, self.pi, size=(num_samples,))
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index 90141573..7589243e 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -8,10 +8,13 @@
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.math import logsumexp, softmax
-from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
-                                plot_gaussian_ellipsoid_2D,
-                                plot_gaussian_ellipsoid_3D)
+from ....utils.math_funcs import logsumexp, softmax
+from ....utils.plotting import (
+    plot_gaussian_1D,
+    plot_gaussian_3D,
+    plot_gaussian_ellipsoid_2D,
+    plot_gaussian_ellipsoid_3D,
+)
 from ...clustering import KMeans
 from .exp_family_mixture import ExpFamilyMixture
 
@@ -262,7 +265,7 @@ def sample(self, num_samples=1, rng=None, seed=1024, r=None):
           Generated samples with shape (num_samples, x_dim).
         """
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
 
         if r is None:
             r = rng.multinomial(1, self.pi, size=(num_samples,))
diff --git a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
index 4dc8f46e..6ef7c891 100644
--- a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
@@ -7,10 +7,13 @@
 from scipy.special import erf
 
 from ....hyp_defs import float_cpu
-from ....utils.math import logsumexp, softmax
-from ....utils.plotting import (plot_gaussian_1D, plot_gaussian_3D,
-                                plot_gaussian_ellipsoid_2D,
-                                plot_gaussian_ellipsoid_3D)
+from ....utils.math_funcs import logsumexp, softmax
+from ....utils.plotting import (
+    plot_gaussian_1D,
+    plot_gaussian_3D,
+    plot_gaussian_ellipsoid_2D,
+    plot_gaussian_ellipsoid_3D,
+)
 from ...clustering import KMeans
 from .gmm_diag_cov import GMMDiagCov
 
@@ -193,7 +196,7 @@ def sample(self, num_samples=1, rng=None, seed=1024, r=None):
           Generated samples with shape (num_samples, x_dim).
         """
         if rng is None:
-            rng = np.random.RandomState(seed)
+            rng = np.random.default_rng(seed)
 
         if r is None:
             r = rng.multinomial(1, self.pi, size=(num_samples,))
diff --git a/hyperion/np/pdfs/plda/frplda.py b/hyperion/np/pdfs/plda/frplda.py
index 183725a7..af8c5d8b 100644
--- a/hyperion/np/pdfs/plda/frplda.py
+++ b/hyperion/np/pdfs/plda/frplda.py
@@ -7,7 +7,7 @@
 from scipy import linalg as sla
 
 from ....hyp_defs import float_cpu
-from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....utils.math_funcs import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
@@ -465,7 +465,7 @@ def sample(
         assert self.is_init
 
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
 
         Sb = invert_pdmat(self.B, return_inv=True)[-1]
         chol_Sb = sla.cholesky(Sb, lower=False)
diff --git a/hyperion/np/pdfs/plda/plda.py b/hyperion/np/pdfs/plda/plda.py
index fd2eb9a9..76299970 100644
--- a/hyperion/np/pdfs/plda/plda.py
+++ b/hyperion/np/pdfs/plda/plda.py
@@ -7,7 +7,7 @@
 from scipy import linalg as sla
 
 from ....hyp_defs import float_cpu
-from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....utils.math_funcs import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
@@ -674,7 +674,7 @@ def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
           Generated samples with shape (num_samples, x_dim).
         """
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
 
         x_dim = self.mu.shape[0]
 
diff --git a/hyperion/np/pdfs/plda/splda.py b/hyperion/np/pdfs/plda/splda.py
index f9322d26..5d397183 100644
--- a/hyperion/np/pdfs/plda/splda.py
+++ b/hyperion/np/pdfs/plda/splda.py
@@ -6,7 +6,7 @@
 from scipy import linalg as sla
 
 from ....hyp_defs import float_cpu
-from ....utils.math import invert_pdmat, invert_trimat, logdet_pdmat
+from ....utils.math_funcs import invert_pdmat, invert_trimat, logdet_pdmat
 from .plda_base import PLDABase
 
 
@@ -502,7 +502,7 @@ def sample(self, num_classes, num_samples_per_class, rng=None, seed=1024):
           Generated samples with shape (num_samples, x_dim).
         """
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
 
         Sw = invert_pdmat(self.W, return_inv=True)[-1]
         chol_Sw = sla.cholesky(Sw, lower=False)
diff --git a/hyperion/np/transforms/skl_tsne.py b/hyperion/np/transforms/skl_tsne.py
index 3f60c4be..ebabc6ec 100644
--- a/hyperion/np/transforms/skl_tsne.py
+++ b/hyperion/np/transforms/skl_tsne.py
@@ -23,7 +23,7 @@ class SklTSNE(NPModel):
       metric: the metric to use when calculating distance between instances in ['cosine', 'euclidean', 'l1', 'l2', 'precomputed'] or callable function.
       init: initialization method in ['random', 'pca'] or embedding matrix of shape (num_samples, num_comp)
       verbose: verbosity level.
-      rng: RandomState instance
+      rng: default_rng instance
       rng_seed: seed for random number generator
       method: gradient calculation method in [‘barnes_hut’, 'exact']
       angle: angle thetha in Barnes-Hut TSNE
@@ -53,7 +53,7 @@ def __init__(
         super().__init__(**kwargs)
         self.rng_seed = rng_seed
         if rng is None:
-            rng = np.random.RandomState(seed=rng_seed)
+            rng = np.random.default_rng(seed=rng_seed)
 
         self._tsne = TSNE(
             n_components=tsne_dim,
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index fa675fdb..f91d7d96 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -304,6 +304,7 @@ def __getitem__(self, segment):
         x, fs = self._read_audio(seg_id, start, duration)
         x, fs = self._resample(x, fs)
         data = {"seg_id": seg_id, "sample_freq": fs}
+
         if self.augmenters:
             # augmentations
             if duration == 0:
@@ -324,6 +325,17 @@ def __getitem__(self, segment):
 
         seg_info = self._get_segment_info(seg_id)
         data.update(seg_info)
+        if np.any(~np.isfinite(data["x"])):
+            print(
+                "zzz",
+                x.max(),
+                x.min(),
+                x.mean(),
+                data["x"].max(),
+                data["x"].min(),
+                data["x"].mean(),
+                flush=True,
+            )
         return data
 
     @staticmethod
diff --git a/hyperion/torch/layers/audio_feats_factory.py b/hyperion/torch/layers/audio_feats_factory.py
index a8398dac..6d0b4df4 100644
--- a/hyperion/torch/layers/audio_feats_factory.py
+++ b/hyperion/torch/layers/audio_feats_factory.py
@@ -315,7 +315,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--dither",
             type=float,
-            default=1,
+            default=1.0 / 2 ** 15,
             help="Dithering constant (0.0 means no dither)",
         )
 
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 06838ddd..29b6cdaa 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -7,11 +7,19 @@
 from .transducer import RNNRNNTransducer, RNNTransducer
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
-from .wav2transducer import (HFWav2Vec2ConformerV1RNNTransducer,
-                             HFWav2Vec2RNNRNNTransducer,
-                             HFWav2Vec2RNNTransducer, HFWav2Vec2Transducer)
-from .wav2xvectors import (HFHubert2ResNet1dXVector, HFWav2Vec2ResNet1dXVector,
-                           HFWavLM2ResNet1dXVector)
+from .wav2transducer import (
+    HFWav2Vec2ConformerV1RNNTransducer,
+    HFWav2Vec2RNNRNNTransducer,
+    HFWav2Vec2RNNTransducer,
+    HFWav2Vec2Transducer,
+)
+from .wav2xvectors import (
+    HFHubert2ResNet1dXVector,
+    HFWav2Vec2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+    Wav2ResNetXVector,
+    Wav2ResNet1dXVector,
+)
 from .xvectors.efficient_net_xvector import EfficientNetXVector
 from .xvectors.resnet1d_xvector import ResNet1dXVector
 from .xvectors.resnet_xvector import ResNetXVector
diff --git a/hyperion/torch/models/plda/splda.py b/hyperion/torch/models/plda/splda.py
index 2272793e..3a0f1dee 100644
--- a/hyperion/torch/models/plda/splda.py
+++ b/hyperion/torch/models/plda/splda.py
@@ -8,7 +8,7 @@
 import torch
 import torch.nn as nn
 
-from ...utils.math import invert_trimat
+from ...utils.math_funcs import invert_trimat
 from .plda_base import PLDABase
 
 
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index c2bcdf99..24ab5bbb 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -224,7 +224,7 @@ def extract_embed(
     ):
 
         if vad_samples is not None:
-            x, x_lengths = remove_silence(x, x_lengths)
+            x, x_lengths = remove_silence(x, vad_samples, x_lengths)
 
         feats, _, feat_lengths = self.forward_feats(
             x, x_lengths, chunk_length=hf_chunk_length, detach_chunks=detach_chunks
@@ -301,7 +301,7 @@ def set_train_mode(self, mode):
 
         logging.info("train mode set to %s", mode)
 
-        if "nograd" in mode:
+        if "nograd" in mode or mode == "ft-embed-affine":
             logging.info("using torch.no_grad for hf_feats")
             self._hf_context = torch.no_grad()
         else:
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index 0d9f1bc4..0e4faded 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -52,3 +52,21 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = ResNet1dXVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
index 1f7283a0..11d643af 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -52,3 +52,21 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = ResNetXVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNetXVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 4c21f478..4bbc0c4c 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import contextlib
 import logging
 
 from jsonargparse import ActionParser, ArgumentParser
@@ -35,6 +36,23 @@ def __init__(self, feats, xvector):
 
         self.feats = feats
         self.xvector = xvector
+        self._feats_context = contextlib.nullcontext()
+
+    @property
+    def sample_frequency(self):
+        return self.feats.sample_frequency
+
+    def compute_prototype_affinity(self):
+        return self.xvector.compute_prototype_affinity()
+
+    def update_loss_margin(self, epoch):
+        """Updates the value of the margin in AAM/AM-softmax losses
+           given the epoch number
+
+        Args:
+          epoch: epoch which is about to start
+        """
+        self.xvector.update_loss_margin(epoch)
 
     def rebuild_output_layer(
         self,
@@ -58,8 +76,9 @@ def rebuild_output_layer(
             num_subcenters=num_subcenters,
         )
 
-    def compute_prototype_affinity(self):
-        return self.xvector.compute_prototype_affinity()
+    def change_config(self, xvector):
+        logging.info("changing wav2xvector config")
+        self.xvector.change_config(**xvector)
 
     def forward(
         self,
@@ -73,15 +92,28 @@ def forward(
         return_output=True,
     ):
 
-        if vad_samples is not None:
-            x, x_lengths = remove_silence(x, x_lengths)
-        feats, feat_lengths = self.feats(x, x_lengths)
-        if vad_feats is not None:
-            feats, feat_lengths = remove_silence(feats, feat_lengths)
-
-        # feat_lengths = torch.div(x_lengths * feats.size(-1), x.size(-1))
-        return self.xvector(feats, feat_lengths, y, enc_layers, classif_layers,
-                            return_output)
+        with self._feats_context:
+            if vad_samples is not None:
+                x, x_lengths = remove_silence(x, vad_samples, x_lengths)
+
+            feats, feat_lengths = self.feats(x, x_lengths)
+            if vad_feats is not None:
+                feats, feat_lengths = remove_silence(feats, vad_feats, feat_lengths)
+
+        n = torch.sum(~torch.isfinite(feats))
+        if n > 0:
+            print(
+                "feats",
+                n,
+                torch.sum(torch.isnan(feats)),
+                torch.sum(torch.any(torch.isnan(x), dim=-1)),
+                x.dtype,
+                feats.dtype,
+                flush=True,
+            )
+        return self.xvector(
+            feats, feat_lengths, y, enc_layers, classif_layers, return_output
+        )
 
     def extract_embed(
         self,
@@ -94,18 +126,54 @@ def extract_embed(
         detach_chunks=False,
     ):
 
-        if vad_samples is not None:
-            x, x_lengths = remove_silence(x, x_lengths)
-        feats, feat_lengths = self.feats(x, x_lengths)
-        if vad_feats is not None:
-            feats, feat_lengths = remove_silence(feats, feat_lengths)
+        with self._feats_context:
+            if vad_samples is not None:
+                x, x_lengths = remove_silence(x, vad_samples, x_lengths)
 
-        feats = feats.transpose(1, 2)
-        return self.xvector.extract_embed(feats, feat_lengths, chunk_length,
-                                          embed_layer, detach_chunks)
+            feats, feat_lengths = self.feats(x, x_lengths)
+            if vad_feats is not None:
+                feats, feat_lengths = remove_silence(feats, vad_feats, feat_lengths)
+
+            chunk_length = int(chunk_length * feats.shape[1] / x.shape[-1])
+
+        return self.xvector.extract_embed(
+            feats, feat_lengths, chunk_length, embed_layer, detach_chunks
+        )
 
     def set_train_mode(self, mode):
-        self.xvector.set_train_mode(mode)
+        if mode == self._train_mode:
+            return
+
+        if mode == "full-feats-grad":
+            self._feats_context = contextlib.nullcontext()
+            xvector_mode = "full"
+        else:
+            logging.info("using torch.no_grad for feats")
+            self._feats_context = torch.no_grad()
+
+        self.xvector.set_train_mode(xvector_mode)
+        self._train_mode = mode
+
+    def _train(self, train_mode: str):
+
+        self.feats.train()
+        if train_mode in ["frozen"]:
+            super()._train(train_mode)
+        elif train_mode in ["full-feats-grad", "full"]:
+            self.xvector._train("full")
+        elif train_mode == "ft-embed-affine":
+            self.xvector._train("ft-embed_affine")
+        else:
+            raise ValueError(f"invalid train_mode={train_mode}")
+
+    @staticmethod
+    def valid_train_modes():
+        return [
+            "full",
+            "frozen",
+            "ft-embed-affine",
+            "full-feats-grad",
+        ]
 
     def get_config(self):
         feat_cfg = self.feats.get_config()
@@ -119,7 +187,7 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     @staticmethod
-    def filter_args(*kwargs):
+    def filter_args(**kwargs):
         """Filters Wav2XVector class arguments from arguments dictionary.
 
         Args:
@@ -150,5 +218,4 @@ def add_class_args(parser, prefix=None):
         AudioFeatsMVN.add_class_args(parser, prefix="feats")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index a9ad224e..440c22b6 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -50,6 +50,10 @@ def __init__(
         self.trans = trans
         self.aug_after_mvn = aug_after_mvn
 
+    @property
+    def sample_frequency(self):
+        return self.audio_feats.fs
+
     @property
     def fs(self):
         return self.audio_feats.fs
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 0cb887ca..e7020e1d 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -2,11 +2,11 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import os
 from collections import OrderedDict as ODict
 from copy import deepcopy
 from enum import Enum
 from typing import Optional
+from pathlib import Path
 
 import torch
 import torch.nn as nn
@@ -110,13 +110,11 @@ def valid_train_modes():
         return ["full", "frozen"]
 
     def save(self, file_path):
-        file_dir = os.path.dirname(file_path)
-        if not (os.path.isdir(file_dir)):
-            os.makedirs(file_dir, exist_ok=True)
-
-        config = self.get_config()
+        file_path = Path(file_path)
+        file_path.parent.mkdir(parents=True, exist_ok=True)
         torch.save(
-            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()}
+            {"model_cfg": self.get_config(), "model_state_dict": self.state_dict()},
+            file_path,
         )
 
     @staticmethod
@@ -176,7 +174,7 @@ def _fix_cfg_compatibility(class_obj, cfg):
           Fixed configuration dictionary.
         """
         # for compatibility with older x-vector models
-        XVector = torch_model_registry["xvector"]
+        XVector = TorchModel.registry["XVector"]
         if issubclass(class_obj, XVector):
             # We renamed AM-softmax scale parameer s to cos_scale
             if "s" in cfg:
@@ -195,8 +193,9 @@ def auto_load(file_path, extra_objs={}, map_location=None):
         cfg = model_data["model_cfg"]
         class_name = cfg["class_name"]
         del cfg["class_name"]
-        if class_name in torch_model_registry:
-            class_obj = torch_model_registry[class_name]
+        print(TorchModel.registry)
+        if class_name in TorchModel.registry:
+            class_obj = TorchModel.registry[class_name]
         elif class_name in extra_objs:
             class_obj = extra_objs[class_name]
         else:
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index fe72339f..4d4dd55a 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -100,3 +100,19 @@ def cat(cls, tables):
             )
             df["class_idx"].drop(columns=["class_idx"], inplace=True)
         return cls(df)
+
+    def filter(
+        self,
+        predicate=None,
+        items=None,
+        iindex=None,
+        columns=None,
+        by="id",
+        keep=True,
+        rebuild_idx=False,
+    ):
+        new_class_info = super().filter(predicate, items, iindex, columns, by, keep)
+        if rebuild_idx:
+            new_class_info.add_class_idx()
+
+        return new_class_info
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index d1d969fb..dd446576 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -4,13 +4,14 @@
 """
 import logging
 from pathlib import Path
-from typing import Dict, Optional, Union
+from typing import List, Dict, Optional, Union
 from copy import deepcopy
 import math
 import numpy as np
 import pandas as pd
 import yaml
 
+from .info_table import InfoTable
 from .class_info import ClassInfo
 from .feature_set import FeatureSet
 from .misc import PathLike
@@ -30,7 +31,7 @@ class Dataset:
     Attributes:
       segments:     SegmentSet object or path to it.
       classes:      Dictionary of ClassInfo objects or paths to then
-      recordings:   Dictionary of RecordingSet objects or paths to then
+      recordings:   RecordingSet object or paths to then
       features:     Dictionary of FeatureSet objects or paths to then
       enrollments:  Dictionary of EnrollmentMap objects or paths to then
       trials:       Dictionary of TrialKey/TrialNdx/SparseTrialKey objects
@@ -45,7 +46,7 @@ def __init__(
         self,
         segments: Union[SegmentSet, PathLike],
         classes: Optional[Dict[str, Union[ClassInfo, PathLike]]] = None,
-        recordings: Optional[Dict[str, Union[RecordingSet, PathLike]]] = None,
+        recordings: Optional[Union[RecordingSet, PathLike]] = None,
         features: Optional[Dict[str, Union[FeatureSet, PathLike]]] = None,
         enrollments: Optional[Dict[str, Union[EnrollmentMap, PathLike]]] = None,
         trials: Optional[
@@ -65,24 +66,65 @@ def __init__(
 
         self._classes, self._classes_paths = self._parse_dict_args(classes, ClassInfo)
 
-        self._recordings, self._recordings_paths = self._parse_dict_args(
-            recordings, RecordingSet
-        )
+        if isinstance(recordings, RecordingSet):
+            self._recordings = recordings
+            self._recordings_path = None
+        else:
+            assert isinstance(recordings, (str, Path))
+            self._recordings = None
+            self._recordings_path = Path(recordings)
+
+        # self._recordings, self._recordings_paths = self._parse_dict_args(
+        #     recordings, RecordingSet
+        # )
 
         self._features, self._features_paths = self._parse_dict_args(
             features, FeatureSet
         )
         self._enrollments, self._enrollments_paths = self._parse_dict_args(
-            enrollments,
-            EnrollmentMap,
+            enrollments, EnrollmentMap,
         )
         self._trials, self._trials_paths = self._parse_dict_args(
-            trials,
-            (TrialKey, TrialNdx, SparseTrialKey),
+            trials, (TrialKey, TrialNdx, SparseTrialKey),
         )
 
         self.sparse_trials = sparse_trials
         self.table_sep = table_sep
+        self._files_to_delete = []
+
+    def get_dataset_files(self):
+        file_paths = []
+        for file_path in [self._segments_path, self._recordings_path]:
+            if file_path is not None:
+                file_paths.append(file_path)
+
+        for path_dict in [
+            self._features_paths,
+            self._enrollments_paths,
+            self._trials_paths,
+        ]:
+            if path_dict is None:
+                continue
+            for k, v in path_dict.items():
+                file_paths.append(v)
+
+        return file_paths
+
+    def _delete_files(self, dataset_dir):
+        if not self._files_to_delete:
+            return
+
+        dataset_files = self.get_dataset_files()
+        for file_path in self._files_to_delete:
+            file_path = Path(file_path)
+            # if the file has been added again we don't delete
+            if file_path in dataset_files:
+                continue
+
+            # if we are saving the dataset to another location
+            # we don't delete the one in the original
+            if file_path.parent == dataset_dir and file_path.is_file():
+                file_path.unlink()
 
     def _parse_dict_args(self, data, types):
         if data is None:
@@ -109,17 +151,38 @@ def segments(self, keep_loaded: bool = True):
 
         return self._segments
 
-    def recordings_value(self, key: str, keep_loaded: bool = True):
-        if self._recordings[key] is None:
-            assert self._recordings_paths[key] is not None
-            recordings = RecordingSet.load(
-                self._recordings_paths[key], sep=self.table_sep
-            )
+    def __len__(self):
+        return len(self.segments())
+
+    def recordings(self, keep_loaded: bool = True):
+        if self._recordings is None:
+            assert self._recordings_path is not None
+            recordings = RecordingSet.load(self._recordings_path, sep=self.table_sep)
             if keep_loaded:
-                self._recordings[key] = recordings
+                self._recordings = recordings
             return recordings
 
-        return self._recordings[key]
+        return self._recordings
+
+    # def recordings_value(self, key: str, keep_loaded: bool = True):
+    #     if self._recordings[key] is None:
+    #         assert self._recordings_paths[key] is not None
+    #         recordings = RecordingSet.load(
+    #             self._recordings_paths[key], sep=self.table_sep
+    #         )
+    #         if keep_loaded:
+    #             self._recordings[key] = recordings
+    #         return recordings
+
+    #     return self._recordings[key]
+
+    def features_keys(self):
+        if self._features is not None:
+            return self._features.keys()
+        elif self._features_path is not None:
+            return self._features_path.keys()
+        else:
+            return {}
 
     def features_value(self, key: str, keep_loaded: bool = True):
         if self._features[key] is None:
@@ -131,6 +194,14 @@ def features_value(self, key: str, keep_loaded: bool = True):
 
         return self._features[key]
 
+    def classes_keys(self):
+        if self._classes is not None:
+            return self._classes.keys()
+        elif self._classes_path is not None:
+            return self._classes_path.keys()
+        else:
+            return {}
+
     def classes_value(self, key: str, keep_loaded: bool = True):
         if self._classes[key] is None:
             assert self._classes_paths[key] is not None
@@ -170,12 +241,12 @@ def trials_value(self, key: str, keep_loaded: bool = True):
 
         return self._trials[key]
 
-    def recordings(self, keep_loaded: bool = True):
-        if self._recordings is None:
-            yield from ()
-        else:
-            for key in self._recordings.keys():
-                yield key, self.recordings_value(key, keep_loaded)
+    # def recordings(self, keep_loaded: bool = True):
+    #     if self._recordings is None:
+    #         yield from ()
+    #     else:
+    #         for key in self._recordings.keys():
+    #             yield key, self.recordings_value(key, keep_loaded)
 
     def features(self, keep_loaded: bool = True):
         if self._features is None:
@@ -299,7 +370,6 @@ def save_changed(
         dataset_path: PathLike,
         update_paths: bool = True,
         table_sep: Optional[str] = None,
-        force_save_all: bool = False,
     ):
         """Saves the tables that change in disk or tables
            that are not in the ouput directory.
@@ -330,24 +400,36 @@ def save_changed(
             if update_paths:
                 self._segments_path = file_path
 
-        if self._recordings is not None:
-            file_names = {}
-            for k in self._recordings.keys():
-                file_name = k + table_ext
-                file_names[k] = file_name
-                file_path = dataset_dir / file_name
-                if (
-                    self._recordings[k] is not None
-                    or file_path != self._recordings_paths[k]
-                    or not file_path.exists()
-                ):
-                    v = self.recordings_value(k, keep_loaded=False)
-                    v.save(file_path, sep=table_sep)
-                    if update_paths:
-                        self._recordings_paths[k] = file_path
-
-            if file_names:
-                dataset["recordings"] = file_names
+        file_name = f"recordings{table_ext}"
+        dataset["recordings"] = file_name
+        file_path = dataset_dir / file_name
+        if (
+            self._recordings is not None
+            or file_path != self._recordings_path
+            or not file_path.exists()
+        ):
+            self.recordings(keep_loaded=False).save(file_path, sep=table_sep)
+            if update_paths:
+                self._recordings_path = file_path
+
+        # if self._recordings is not None:
+        #     file_names = {}
+        #     for k in self._recordings.keys():
+        #         file_name = k + table_ext
+        #         file_names[k] = file_name
+        #         file_path = dataset_dir / file_name
+        #         if (
+        #             self._recordings[k] is not None
+        #             or file_path != self._recordings_paths[k]
+        #             or not file_path.exists()
+        #         ):
+        #             v = self.recordings_value(k, keep_loaded=False)
+        #             v.save(file_path, sep=table_sep)
+        #             if update_paths:
+        #                 self._recordings_paths[k] = file_path
+
+        #     if file_names:
+        #         dataset["recordings"] = file_names
 
         if self._features is not None:
             file_names = {}
@@ -428,6 +510,8 @@ def save_changed(
         with open(dataset_file, "w") as f:
             yaml.dump(dataset, f)
 
+        self._delete_files(dataset_dir)
+
     def save_all(
         self,
         dataset_path: PathLike,
@@ -457,17 +541,24 @@ def save_all(
         if update_paths:
             self._segments_path = file_path
 
-        file_names = {}
-        for k, v in self.recordings(keep_loaded=False):
-            file_name = k + table_ext
-            file_names[k] = file_name
-            file_path = dataset_dir / file_name
-            v.save(file_path, sep=table_sep)
-            if update_paths:
-                self._recordings_paths[k] = file_path
+        file_name = f"recordings{table_ext}"
+        dataset["recordings"] = file_name
+        file_path = dataset_dir / file_name
+        self.recordings(keep_loaded=False).save(file_path, sep=table_sep)
+        if update_paths:
+            self._recordings_path = file_path
 
-        if file_names:
-            dataset["recordings"] = file_names
+        # file_names = {}
+        # for k, v in self.recordings(keep_loaded=False):
+        #     file_name = k + table_ext
+        #     file_names[k] = file_name
+        #     file_path = dataset_dir / file_name
+        #     v.save(file_path, sep=table_sep)
+        #     if update_paths:
+        #         self._recordings_paths[k] = file_path
+
+        # if file_names:
+        #     dataset["recordings"] = file_names
 
         file_names = {}
         for k, v in self.features(keep_loaded=False):
@@ -520,10 +611,13 @@ def save_all(
         with open(dataset_file, "w") as f:
             yaml.dump(dataset, f)
 
+        self._delete_files(dataset_dir)
+
     def update_from_disk(self):
         self.segments()
-        for k, v in self.recordings():
-            pass
+        self.recordings()
+        # for k, v in self.recordings():
+        #     pass
 
         for k, v in self.features():
             pass
@@ -568,9 +662,10 @@ def load(
                 classes[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "recordings" in dataset:
-            recordings = {}
-            for k, v in dataset["recordings"].items():
-                recordings[k] = Dataset.resolve_file_path(dataset_dir, v)
+            recordings = Dataset.resolve_file_path(dataset_dir, dataset["recordings"])
+            # recordings = {}
+            # for k, v in dataset["recordings"].items():
+            #     recordings[k] = Dataset.resolve_file_path(dataset_dir, v)
 
         if "features" in dataset:
             features = {}
@@ -615,32 +710,42 @@ def add_features(self, features_name: str, features: Union[PathLike, FeatureSet]
         else:
             raise ValueError()
 
-    def add_recordings(
-        self,
-        recordings_name: str,
-        recordings: Union[PathLike, RecordingSet],
+    def set_segments(
+        self, segments: Union[PathLike, SegmentSet], update_seg_durs: bool,
     ):
-        if self._recordings is None:
-            self._recordings = {}
-            self._recordings_paths = {}
+        if isinstance(segments, (str, Path)):
+            self._segments = None
+            self._segments_path = segments
+        elif isinstance(segments, SegmentSet):
+            self._segments = segments
+            self._segments_path = None
+        else:
+            raise ValueError()
 
-        if isinstance(features, (str, Path)):
-            self._recordings[features_name] = None
-            self._recordings_paths[recordings_name] = recordings
+    def set_recordings(
+        self, recordings: Union[PathLike, RecordingSet], update_seg_durs: bool,
+    ):
+        if isinstance(recordings, (str, Path)):
+            self._recordings = None
+            self._recordings_path = Path(recordings)
         elif isinstance(recordings, RecordingSet):
-            self._recordings[recordings_name] = recordings
-            self._recordings_paths[recordings_name] = None
+            self._recordings = recordings
+            self._recordings_path = None
         else:
             raise ValueError()
 
+        if update_seg_durs:
+            rec_ids = self.segments(keep_loaded=True).recordings()
+            self.segments()["duration"] = self.recordings().loc[rec_ids, "duration"]
+
     def add_classes(self, classes_name: str, classes: Union[PathLike, ClassInfo]):
         if self._classes is None:
             self._classes = {}
             self._classes_paths = {}
 
         if isinstance(classes, (str, Path)):
-            self._classes[features_name] = None
-            self._classes_paths[classes_name] = classes
+            self._classes[classes_name] = None
+            self._classes_paths[classes_name] = Path(classes)
         elif isinstance(classes, ClassInfo):
             self._classes[classes_name] = classes
             self._classes_paths[classes_name] = None
@@ -648,9 +753,7 @@ def add_classes(self, classes_name: str, classes: Union[PathLike, ClassInfo]):
             raise ValueError()
 
     def add_enrollments(
-        self,
-        enrollments_name: str,
-        enrollments: Union[PathLike, EnrollmentMap],
+        self, enrollments_name: str, enrollments: Union[PathLike, EnrollmentMap],
     ):
         if self._enrollments is None:
             self._enrollments = {}
@@ -658,7 +761,7 @@ def add_enrollments(
 
         if isinstance(enrollments, (str, Path)):
             self._enrollments[enrollments_name] = None
-            self._enrollments_paths[enrollments_name] = enrollments
+            self._enrollments_paths[enrollments_name] = Path(enrollments)
         elif isinstance(enrollments, EnrollmentMap):
             self._enrollments[enrollments_name] = enrollments
             self._enrollments_paths[enrollments_name] = None
@@ -675,8 +778,8 @@ def add_trials(
             self._trials_paths = {}
 
         if isinstance(trials, (str, Path)):
-            self._trials[features_name] = None
-            self._trials_paths[trials_name] = trials
+            self._trials[trials_name] = None
+            self._trials_paths[trials_name] = Path(trials)
         elif isinstance(trials, (TrialKey, TrialNdx, SparseTrialKey)):
             self._trials[trials_name] = trials
             self._trials_paths[trials_name] = None
@@ -685,85 +788,104 @@ def add_trials(
 
     def remove_features(self, features_name: str):
         if self._features_paths[features_name] is not None:
-            file_path = Path(self._features_paths[features_name])
-            if file_path.is_file():
-                file_path.unlink()
+            self._files_to_delete.append(self._features_paths[features_name])
 
         del self._features[features_name]
         del self._features_paths[features_name]
 
-    def remove_recordings(
-        self,
-        recordings_name: str,
-    ):
-        if self._recordingsr_paths[recordings_name] is not None:
-            file_path = Path(self._recordings_paths[recordings_name])
-            if file_path.is_file():
-                file_path.unlink()
+    def remove_recordings(self,):
+        if self._recordings_path is not None:
+            self._files_to_delete.append(self._recordings_path)
 
-        del self._recordings[recordings_name]
-        del self._recordings_paths[recordings_name]
+        self._recordings = None
+        self._recordings_path = None
+
+    # def remove_recordings(
+    #     self,
+    #     recordings_name: str,
+    # ):
+    #     if self._recordingsr_paths[recordings_name] is not None:
+    #         file_path = Path(self._recordings_paths[recordings_name])
+    #         if file_path.is_file():
+    #             file_path.unlink()
+
+    #     del self._recordings[recordings_name]
+    #     del self._recordings_paths[recordings_name]
 
     def remove_classes(self, classes_name: str):
         if self._classes_paths[classes_name] is not None:
-            file_path = Path(self._classes_paths[classes_name])
-            if file_path.is_file():
-                file_path.unlink()
+            self._files_to_delete.append(self._class_paths[class_name])
 
         del self._classes[classes_name]
         del self._classes_paths[classes_name]
 
     def remove_enrollments(
-        self,
-        enrollments_name: str,
+        self, enrollments_name: str,
     ):
         if self._enrollments_paths[enrollments_name] is not None:
-            file_path = Path(self._enrollments_paths[enrollments_name])
-            if file_path.is_file():
-                file_path.unlink()
+            self._files_to_delete.append(self._enrollments_paths[enrollments_name])
 
         del self._enrollments[enrollments_name]
         del self._enrollments_paths[enrollments_name]
 
     def remove_trials(
-        self,
-        trials_name: str,
+        self, trials_name: str,
     ):
         if self._trials_paths[trials_name] is not None:
-            file_path = Path(self._trials_paths[trials_name])
-            if file_path.is_file():
-                file_path.unlink()
+            self._files_to_delete.append(self._trials_paths[trials_name])
 
         del self._trials[trials_name]
         del self._trials_paths[trials_name]
 
-    def set_segments(self, segments: Union[PathLike, SegmentSet]):
-        if isinstance(segments, SegmentSet):
-            self._segments = segments
-        else:
-            self._segments_path = segments
+    def add_cols_to_segments(
+        self,
+        right_table: Union[InfoTable, pd.DataFrame, PathLike],
+        column_names: Union[None, str, List[str], np.ndarray] = None,
+        on: Union[str, List[str], np.ndarray] = "id",
+        right_on: Union[None, str, List[str], np.ndarray] = None,
+    ):
+        if isinstance(right_table, (str, Path)):
+            file_path = Path(right_table)
+            if file_path.is_file():
+                right_table = InfoTable.load(file_path)
+            else:
+                if right_table == "recordings":
+                    right_table = self.recordings()
+                elif right_table in self.features_keys():
+                    right_table = self.features_value(right_table)
+                elif right_table in self.classes_keys():
+                    right_table = self.classes_value
+                else:
+                    raise ValueError("%s not found", right_table)
+
+        segments = self.segments(keep_loaded=True)
+        segments.add_columns(right_table, column_names, on=on, right_on=right_on)
 
-    def clean(self):
-        rec_ids = self.segments().recording_ids()
-        for k, table in self.recordings():
-            table = table.loc[table["id"].isin(rec_ids)].copy()
-            self._recordings[k] = RecordingSet(table)
+    def clean(self, rebuild_class_idx=False):
+        rec_ids = self.segments().recordings()
+        # for k, table in self.recordings():
+        #     # table = table.loc[table["id"].isin(rec_ids)].copy()
+        #     # self._recordings[k] = RecordingSet(table)
+        self._recordings = self.recordings().filter(lambda df: df["id"].isin(rec_ids))
 
         ids = self.segments()["id"].values
         for k, table in self.features():
-            table = table.loc[table["id"].isin(ids)].copy()
-            self._features[k] = FeatureSet(table)
+            self._features[k] = table.filter(lambda df: df["id"].isin(ids))
+            # table = table.loc[table["id"].isin(ids)].copy()
+            # self._features[k] = FeatureSet(table)
 
         for k, table in self.classes():
             class_ids = self.segments()[k].unique()
-            table = table[table["id"].isin(class_ids)].copy()
-            self._classes[k] = ClassInfo(table)
+            self._classes[k] = table.filter(lambda df: df["id"].isin(class_ids))
+            # table = table[table["id"].isin(class_ids)].copy()
+            # self._classes[k] = ClassInfo(table)
 
         remove_keys = []
         for k, table in self.enrollments():
-            table = table.loc[table["segmentid"].isin(ids)].copy()
+            # table = table.loc[table["segmentid"].isin(ids)].copy()
+            table = table.filter(lambda df: df["segmentid"].isin(ids))
             if len(table) > 0:
-                self._enrollments[k] = EnrollmentMap(table)
+                self._enrollments[k] = table
             else:
                 remove_keys.append(k)
 
@@ -790,7 +912,7 @@ def _split_into_trials_and_cohort(
         seed: int,
     ):
         # select test speakers
-        rng = np.random.RandomState(seed=seed)
+        rng = np.random.default_rng(seed=seed)
 
         spks = segments["speaker"].unique()
         trial_spks = rng.choice(spks, size=(num_trial_speakers,), replace=False)
@@ -859,20 +981,14 @@ def split_into_trials_and_cohort(
             segments_male = SegmentSet(segments[segments["gender"] == "m"])
             segments_female = SegmentSet(segments[segments["gender"] == "f"])
             trials_male, enroll_male, cohort_male = self._split_into_trials_and_cohort(
-                segments_male,
-                num_tar_trials,
-                num_trial_speakers,
-                seed,
+                segments_male, num_tar_trials, num_trial_speakers, seed,
             )
             (
                 trials_female,
                 enroll_female,
                 cohort_female,
             ) = self._split_into_trials_and_cohort(
-                segments_female,
-                num_tar_trials,
-                num_trial_speakers,
-                seed,
+                segments_female, num_tar_trials, num_trial_speakers, seed,
             )
             trials = TrialKey.merge([trials_male, trials_female])
             enroll = EnrollmentMap.cat([enroll_male, enroll_female])
@@ -880,10 +996,7 @@ def split_into_trials_and_cohort(
         else:
             segments = self.segments()
             trials, enroll, cohort = self._split_into_trials_and_cohort(
-                segments,
-                num_tar_trials,
-                num_trial_speakers,
-                seed,
+                segments, num_tar_trials, num_trial_speakers, seed,
             )
 
         dataset_trials = self.clone()
@@ -899,3 +1012,176 @@ def split_into_trials_and_cohort(
         dataset_cohort.clean()
 
         return dataset_trials, dataset_cohort
+
+    def remove_short_segments(self, min_length: float, length_name: str = "duration"):
+        segments = self.segments()
+        self._segments = segments.filter(lambda df: df[length_name] >= min_length)
+        self.clean()
+
+    def remove_classes_few_segments(
+        self, class_name: str, min_segs: int, rebuild_idx: bool = False,
+    ):
+        segments = self.segments()
+        classes, counts = np.unique(segments[class_name], return_counts=True)
+        keep_classes = classes[counts >= min_segs]
+        self._segments = segments.filter(lambda df: df[class_name].isin(keep_classes))
+        self.clean()
+        if rebuild_idx:
+            class_info = self.classes_value(class_name)
+            class_info.add_class_idx()
+
+    def rebuild_class_idx(self, class_name: str):
+        class_info = self.classes_value(class_name)
+        class_info.add_class_idx()
+
+    def _segments_split(self, val_prob: float, rng: np.random.Generator):
+        segments = self.segments()
+        p = rng.permutation(len(segments))
+        num_train = int(round((1 - val_prob) * len(p)))
+
+        train_idx = p[:num_train]
+        train_segs = segments.filter(iindex=train_idx)
+        train_segs.sort()
+
+        val_idx = p[num_train:]
+        val_segs = segments.filter(iindex=val_idx)
+        val_segs.sort()
+
+        return train_segs, val_segs
+
+    def _segments_split_joint_classes(
+        self,
+        val_prob: float,
+        joint_classes: List[str],
+        min_train_samples: int,
+        rng: np.random.Generator,
+    ):
+        segments = self.segments()
+        classes = segments[joint_classes].apply("-".join, axis=1)
+        u_classes, class_ids = np.unique(classes, return_inverse=True)
+        train_mask = np.zeros(len(segments), dtype=bool)
+        kk = 0
+        for c_id in range(len(u_classes)):
+            idx = (class_ids == c_id).nonzero()[0]
+            count = len(idx)
+            p = rng.permutation(count)
+            num_train = max(
+                int(round((1 - val_prob) * count)), min(min_train_samples, count)
+            )
+            kk += count - num_train
+            train_idx = idx[p[:num_train]]
+            train_mask[train_idx] = True
+
+        train_idx = train_mask.nonzero()[0]
+        train_segs = segments.filter(iindex=train_idx)
+        train_segs.sort()
+
+        val_segs = segments.filter(iindex=train_idx, keep=False)
+        val_segs.sort()
+
+        return train_segs, val_segs
+
+    def _segments_split_disjoint_classes(
+        self, val_prob: float, disjoint_classes: List[str], rng: np.random.Generator,
+    ):
+        segments = self.segments()
+        classes = segments[disjoint_classes].apply("-".join, axis=1)
+        u_classes, class_ids = np.unique(classes, return_inverse=True)
+        p = rng.permutation(len(u_classes))
+        class_ids = p[class_ids]
+        num_train = int(round((1 - val_prob) * len(segments)))
+        train_mask = np.zeros(len(segments), dtype=bool)
+        count_acc = 0
+        for c_id in range(len(u_classes)):
+            idx = (class_ids == c_id).nonzero()[0]
+            train_mask[idx] = True
+            count = len(idx)
+            count_acc += count
+            if count_acc >= num_train:
+                break
+
+        train_idx = train_mask.nonzero()[0]
+        train_segs = segments.filter(iindex=train_idx)
+        train_segs.sort()
+
+        val_segs = segments.filter(iindex=train_idx, keep=False)
+        val_segs.sort()
+
+        return train_segs, val_segs
+
+    def _segments_split_joint_and_disjoint_classes(
+        self,
+        val_prob: float,
+        joint_classes: List[str],
+        disjoint_clases: List[str],
+        min_train_samples: int,
+        rng: np.random.Generator,
+    ):
+        raise NotImplementedError("I'll implement this when I need it")
+        segments = self.segments()
+        j_classes = segments[joint_classes].apply("-".join, axis=1)
+        ju_classes, j_class_ids = np.unique(j_classes, return_inverse=True)
+        d_classes = segments[disjoint_classes].apply("-".join, axis=1)
+        du_classes, d_class_ids = np.unique(d_classes, return_inverse=True)
+        d_p = rng.permutation(len(du_classes))
+        d_class_ids = d_p[d_class_ids]
+        d_sort_idx = np.argsort(d_class_ids)
+        d_sort_j_class_ids = j_class_ids[d_sort_idx]
+
+        train_d_classes = set()
+        for c_id in range(len(ju_classes)):
+            idx = (j_sort_class_ids == c_id).nonzero()[0]
+            count = len(idx)
+            num_train = max(
+                int(round((1 - val_prob) * count)), min(min_train_samples, count)
+            )
+            sel_d_class_ids = set(d_sort_idx[:num_train])
+            train_d_classes = train_d_classes.union(sel_d_class_ids)
+
+        train_mask = np.zeros(len(segments), dtype=bool)
+        for c_id in train_d_classes:
+            mask = d_class_ids == c_id
+            train_mask[mask] = True
+
+        train_idx = train_mask.nonzero()[0]
+        train_segs = segments.filter(iindex=train_idx)
+        train_segs.sort()
+
+        val_segs = segments.filter(iindex=train_idx, keep=False)
+        val_segs.sort()
+
+        return train_segs, val_segs
+
+    def split_train_val(
+        self,
+        val_prob: float,
+        joint_classes: Optional[List[str]] = None,
+        disjoint_classes: Optional[List[str]] = None,
+        min_train_samples: int = 1,
+        seed: int = 11235813,
+    ):
+        rng = np.random.default_rng(seed)
+        if joint_classes is None and disjoint_classes is None:
+            train_segs, val_segs = self._segments_split(val_prob, rng)
+        elif joint_classes is not None and disjoint_classes is None:
+            train_segs, val_segs = self._segments_split_joint_classes(
+                val_prob, joint_classes, min_train_samples, rng,
+            )
+        elif joint_classes is None and disjoint_classes is not None:
+            train_segs, val_segs = self._segments_split_disjoint_classes(
+                val_prob, disjoint_classes, rng,
+            )
+        else:
+            train_segs, val_segs = self._segments_split_joint_and_disjoint_classes(
+                val_prob, joint_classes, disjoint_classes, min_train_samples, rng,
+            )
+
+        train_ds = self.clone()
+        train_ds.set_segments(train_segs)
+        train_ds.clean()
+
+        val_ds = self.clone()
+        val_ds.set_segments(val_segs)
+        val_ds.clean()
+
+        return train_ds, val_ds
diff --git a/hyperion/utils/fold_list.py b/hyperion/utils/fold_list.py
index f22263cf..80b818d6 100644
--- a/hyperion/utils/fold_list.py
+++ b/hyperion/utils/fold_list.py
@@ -176,7 +176,7 @@ def create(
           FoldList object.
         """
         if shuffle:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
 
         if group_by_key is None:
             group_by_key = segment_key
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 45eab05f..57f3faf2 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict
 from copy import deepcopy
 from pathlib import Path
+from typing import Optional, Union, List
 
 import numpy as np
 import pandas as pd
@@ -192,14 +193,41 @@ def cat(cls, tables):
         ].is_unique, """there are duplicated ids in the tables we are concatenating"""
         return cls(df)
 
-    def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
+    def filter(
+        self, predicate=None, items=None, iindex=None, columns=None, by="id", keep=True
+    ):
+        """Filters the table and produce a new table with the elements to keep
+
+        Args:
+          predicate: callable function that defines the filtering criterion e.g.:
+            lambda df: df["duration"] > 1.0.
+          items: filters the table based in column value with pandas command:
+            df.loc[items, by], used only if predicate is None
+          iindex: filters the table based on integer index with pandas command:
+            df.iloc[iiindex], used if predicate and items are None
+          columns: columns to keep of remove.
+          by: column id to use with itmes criterion
+          keep: if True, the criterion is used to keep rows, if False it is used
+            to remove rows
+
+        Returns
+          InfoTable of the same class as the input.
+        """
         assert (
-            items is None or iindex is None
-        ), "items and iindex cannot be not None at the same time"
+            predicate is not None
+            or items is not None
+            or iindex is not None
+            or columns is not None
+        ), "predicate, items, iindex and columns cannot be not None at the same time"
         df = self.df
 
+        if predicate is not None:
+            mask = predicate(self.df)
+
         if not keep:
-            if items is not None:
+            if predicate is not None:
+                mask = np.logical_not(mask)
+            elif items is not None:
                 items = np.setdiff1d(df[by], items)
             elif iindex is not None:
                 iindex = np.setdiff1d(np.arange(len(df)), iindex)
@@ -207,7 +235,12 @@ def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
             if columns is not None:
                 columns = np.setdiff1d(df.columns, columns)
 
-        if items is not None:
+        if predicate is not None:
+            if columns is None:
+                df = df.loc[mask]
+            else:
+                df = df.loc[mask, columns]
+        elif items is not None:
             if by != "id":
                 missing = [False if v in df[by] else True for v in items]
                 if any(missing):
@@ -225,7 +258,7 @@ def filter(self, items=None, iindex=None, columns=None, by="id", keep=True):
             if columns is not None:
                 df = df[columns]
 
-        return self.__class__(df)
+        return self.__class__(df.copy())
 
     def __eq__(self, other):
         """Equal operator"""
@@ -255,7 +288,7 @@ def shuffle(self, seed=1024, rng=None):
           Index used to shuffle the list.
         """
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
         index = np.arange(len(self.df))
         rng.shuffle(index)
         self.df = self.df.iloc[index]
@@ -279,14 +312,33 @@ def get_loc(self, keys):
         loc = self.df.index.get_loc(keys)
         if isinstance(loc, int):
             return loc
-        elif isinstance(loc, np.ndarray) and loc.dtype == np.bool:
+
+        if isinstance(loc, np.ndarray) and loc.dtype == np.bool:
             return np.nonzero(loc)[0]
-        else:
-            return list(range(loc.start, loc.stop, loc.step))
+
+        return list(range(loc.start, loc.stop, loc.step))
 
     def get_col_idx(self, keys):
         return self.df.columns.get_loc(keys)
 
+    def add_columns(
+        self,
+        right_table,
+        column_names: Union[None, str, List[str], np.ndarray] = None,
+        on: Union[str, List[str], np.ndarray] = "id",
+        right_on: Union[None, str, List[str], np.ndarray] = None,
+    ):
+        if isinstance(right_table, InfoTable):
+            right_table = right_table.df
+
+        if column_names is not None:
+            right_table = right_table[column_names]
+
+        if right_on is None:
+            right_on = on
+
+        self.df = self.df.merge(right_table, how="left", left_on=on, right_on=right_on)
+
         # def __len__(self):
 
     #     """Returns the number of elements in the list."""
diff --git a/hyperion/utils/math.py b/hyperion/utils/math_funcs.py
similarity index 93%
rename from hyperion/utils/math.py
rename to hyperion/utils/math_funcs.py
index 84596f7d..5ee510b9 100644
--- a/hyperion/utils/math.py
+++ b/hyperion/utils/math_funcs.py
@@ -346,10 +346,26 @@ def int2onehot(class_ids, num_classes=None):
     return p
 
 
-def cosine_scoring(x1, x2):
+def average_vectors(x, ids):
+    assert x.shape[0] == len(ids)
+    num_ids = np.max(ids) + 1
+    x_avg = np.zeros((num_ids, x.shape[1]), dtype=x.dtype)
+    for i in range(num_ids):
+        mask = ids == i
+        x_avg[i] = np.mean(x[mask], axis=0)
 
-    l2_1 = np.sqrt(np.sum(x1 ** 2, axis=-1, keepdims=True))
-    l2_2 = np.sqrt(np.sum(x2 ** 2, axis=-1, keepdims=True))
+    return x_avg
+
+
+def cosine_scoring(x1, x2, ids1=None, ids2=None):
+    if ids1 is not None:
+        x1 = average_vectors(x1, ids1)
+
+    if ids2 is not None:
+        x2 = average_vectors(x2, ids2)
+
+    l2_1 = np.sqrt(np.sum(x1 ** 2, axis=-1, keepdims=True) + 1e-10)
+    l2_2 = np.sqrt(np.sum(x2 ** 2, axis=-1, keepdims=True) + 1e-10)
     x1 = x1 / l2_1
     x2 = x2 / l2_2
 
diff --git a/hyperion/utils/plotting.py b/hyperion/utils/plotting.py
index 2341beb4..ec617975 100644
--- a/hyperion/utils/plotting.py
+++ b/hyperion/utils/plotting.py
@@ -4,6 +4,7 @@
 """
 
 import matplotlib
+
 # matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
@@ -11,7 +12,7 @@
 import scipy.stats as stats
 from mpl_toolkits.mplot3d import Axes3D as plt3d
 
-from .math import invert_pdmat
+from .math_funcs import invert_pdmat
 
 
 def plot_gaussian_1D(mu, C, num_sigmas=3, num_pts=100, weight=1, **kwargs):
diff --git a/hyperion/utils/scp_list.py b/hyperion/utils/scp_list.py
index 5abf76f2..070e4f53 100644
--- a/hyperion/utils/scp_list.py
+++ b/hyperion/utils/scp_list.py
@@ -384,7 +384,7 @@ def shuffle(self, seed=1024, rng=None):
           Index used to shuffle the list.
         """
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
         index = np.arange(len(self.key))
         rng.shuffle(index)
 
diff --git a/hyperion/utils/segment_set.py b/hyperion/utils/segment_set.py
index 6aef5bb2..a99b4e1e 100644
--- a/hyperion/utils/segment_set.py
+++ b/hyperion/utils/segment_set.py
@@ -13,42 +13,48 @@ class SegmentSet(InfoTable):
 
     def __init__(self, df):
         super().__init__(df)
-        if "start" in df and "recording_id" not in df:
-            df["recording_id"] = df["id"]
+        if "start" in df and "recordings" not in df:
+            df["recordings"] = df["id"]
 
-        if "start" not in df and "recording_id" in df:
+        if "start" not in df and "recordings" in df:
             df["start"] = 0.0
 
     @property
     def has_time_marks(self):
-        return (
-            "recording_id" in self.df and "start" in self.df and "duration" in self.df
-        )
+        return "recordings" in self.df and "start" in self.df and "duration" in self.df
 
     @property
     def has_recording_ids(self):
-        return "recording_id" in self.df
+        return "recordings" in self.df
 
-    def recording_ids(self, ids=None):
+    @property
+    def has_recordings(self):
+        return "recordings" in self.df
+
+    def recordings(self, ids=None):
         if ids is None:
-            if "recording_id" in self.df:
-                return self.df["recording_id"]
+            if "recordings" in self.df:
+                return self.df["recordings"]
             else:
                 return self.df["id"]
 
-        if "recording_id" in self.df:
-            return self.df.loc[ids, "recording_id"]
+        if "recordings" in self.df:
+            return self.df.loc[ids, "recordings"]
 
         return ids
 
-    def recording_time_marks(self, ids):
-        if "recording" in self.df:
-            rec_col = "recording_id"
-        else:
-            rec_col = "id"
+    def recording_ids(self, ids=None):
+        return self.recordings(ids)
+
+    def recording_time_marks(self, ids, recordings_name: str = "recordings"):
+        if recordings_name == "recordings":
+            if "recordings" in self.df:
+                recordings_name = "recordings"
+            else:
+                recordings_name = "id"
 
         assert "duration" in self.df
         if "start" not in self.df:
             self.df["start"] = 0.0
 
-        return self.df.loc[ids, [rec_col, "start", "duration"]]
+        return self.df.loc[ids, [recordings_name, "start", "duration"]]
diff --git a/hyperion/utils/sparse_trial_key.py b/hyperion/utils/sparse_trial_key.py
index 1bc321a7..62fcd446 100644
--- a/hyperion/utils/sparse_trial_key.py
+++ b/hyperion/utils/sparse_trial_key.py
@@ -145,7 +145,7 @@ def load_table(cls, file_path, sep=None):
           file_path: File to read the list.
 
         Returns:
-          TrialKey object.
+          SparseTrialKey object.
         """
         file_path = Path(file_path)
         ext = file_path.suffix
@@ -156,19 +156,15 @@ def load_table(cls, file_path, sep=None):
         models = df["modelid"].values
         segments = df["segmentid"].values
         is_tar = (df["targettype"] == "target").values
-        model_set, _, model_idx = np.unique(
-            models, return_index=True, return_inverse=True
-        )
-        seg_set, _, seg_idx = np.unique(
-            segments, return_index=True, return_inverse=True
-        )
+        model_set, model_idx = np.unique(models, return_inverse=True)
+        seg_set, seg_idx = np.unique(segments, return_inverse=True)
         tar = sparse.lil_matrix((len(model_set), len(seg_set)), dtype="bool")
         non = sparse.lil_matrix((len(model_set), len(seg_set)), dtype="bool")
-        for item in zip(model_idx, seg_idx, is_tar):
-            if item[2]:
-                tar[item[0], item[1]] = True
+        for i, j, target_type in zip(model_idx, seg_idx, is_tar):
+            if target_type:
+                tar[i, j] = True
             else:
-                non[item[0], item[1]] = True
+                non[i, j] = True
         return cls(model_set, seg_set, tar.tocsr(), non.tocsr())
 
     @classmethod
diff --git a/hyperion/utils/sparse_trial_scores.py b/hyperion/utils/sparse_trial_scores.py
index 7ed9a1d1..760bd1f1 100644
--- a/hyperion/utils/sparse_trial_scores.py
+++ b/hyperion/utils/sparse_trial_scores.py
@@ -3,12 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import copy
 import logging
-import os.path as path
+from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import scipy.sparse as sparse
 
 from ..hyp_defs import float_cpu
@@ -18,9 +18,6 @@
 from .trial_ndx import TrialNdx
 from .trial_scores import TrialScores
 
-# import h5py
-
-
 
 class SparseTrialScores(TrialScores):
 
@@ -55,6 +52,26 @@ def save_txt(self, file_path):
                     % (self.model_set[r], self.seg_set[c], self.scores[r, c])
                 )
 
+    def save_table(self, file_path, sep=None):
+        """Saves object to pandas tabnle file.
+
+        Args:
+          file_path: File to write the list.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        self.score_mask.eliminate_zeros()
+        score_mask = self.score_mask.tocoo()
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(f"modelid{sep}segmentid{sep}LLR\n")
+            for i, j in zip(score_mask.row, score_mask.col):
+                f.write(
+                    f"{self.model_set[i]}{sep}{self.seg_set[j]}{sep}{self.scores[i,j]}\n"
+                )
+
     @classmethod
     def load_h5(cls, file_path):
         raise NotImplementedError()
@@ -90,6 +107,35 @@ def load_txt(cls, file_path):
             scores[item[0], item[1]] = item[2]
         return cls(model_set, seg_set, scores.tocsr(), score_mask.tocsr())
 
+    @classmethod
+    def load_table(cls, file_path, sep=None):
+        """Loads object from pandas table file
+
+        Args:
+          file_path: File to read the list.
+
+        Returns:
+          TrialScores object.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        models = df["modelid"].values
+        segments = df["segmentid"].values
+        score_list = df["LLR"].values
+        model_set, model_idx = np.unique(models, return_inverse=True)
+        seg_set, seg_idx = np.unique(segments, return_inverse=True)
+        scores = sparse.lil_matrix((len(model_set), len(seg_set)), dtype=float_cpu())
+        score_mask = sparse.lil_matrix(scores.shape, dtype="bool")
+        for i, j, score in zip(model_idx, seg_idx, score_list):
+            score_mask[i, j] = True
+            scores[i, j] = score
+
+        return cls(model_set, seg_set, scores.tocsr(), score_mask.tocsr())
+
     @classmethod
     def merge(cls, scr_list):
         raise NotImplementedError()
@@ -160,9 +206,9 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
 
         if not (np.all(f_mod) and np.all(f_seg)):
             for i in (f_mod == 0).nonzero()[0]:
-                logging.info("model %s not found" % model_set[i])
+                logging.info("model %s not found", model_set[i])
             for i in (f_seg == 0).nonzero()[0]:
-                logging.info("segment %s not found" % seg_set[i])
+                logging.info("segment %s not found", seg_set[i])
             if raise_missing:
                 raise Exception("some scores were not computed")
 
@@ -172,18 +218,36 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
         scores = self.scores.tocoo()
         new_data = scores.data
         new_row = scores.row.copy()
+        # for i, r in enumerate(mod_idx):
+        #     if f_mod[i] and i != r:
+        #         idx = scores.row == r
+        #         new_row[idx] = i
+
+        # new_col = scores.col.copy()
+        # for j, c in enumerate(seg_idx):
+        #     if f_seg[j] and j != c:
+        #         idx = scores.col == c
+        #         new_col[idx] = j
+
+        # idx = np.logical_and(new_row < num_mod, new_col < num_seg)
+        # if not np.all(idx):
+        #     new_data = new_data[idx]
+        #     new_row = new_row[idx]
+        #     new_col = new_col[idx]
+
+        new_row = -1 * np.ones_like(scores.row)
         for i, r in enumerate(mod_idx):
-            if f_mod[i] and i != r:
+            if f_mod[i]:
                 idx = scores.row == r
                 new_row[idx] = i
 
-        new_col = scores.col.copy()
+        new_col = -1 * np.ones_like(scores.col)
         for j, c in enumerate(seg_idx):
-            if f_seg[j] and j != c:
+            if f_seg[j]:
                 idx = scores.col == c
                 new_col[idx] = j
 
-        idx = np.logical_and(new_row < num_mod, new_col < num_seg)
+        idx = np.logical_and(new_row != -1, new_col != -1)
         if not np.all(idx):
             new_data = new_data[idx]
             new_row = new_row[idx]
@@ -193,19 +257,37 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
 
         score_mask = self.score_mask.tocoo()
         new_data = score_mask.data
-        new_row = score_mask.row.copy()
+        # new_row = score_mask.row.copy()
+        # for i, r in enumerate(mod_idx):
+        #     if f_mod[i] and i != r:
+        #         idx = score_mask.row == r
+        #         new_row[idx] = i
+
+        # new_col = score_mask.col.copy()
+        # for j, c in enumerate(seg_idx):
+        #     if f_seg[j] and j != c:
+        #         idx = score_mask.col == c
+        #         new_col[idx] = j
+
+        # idx = np.logical_and(new_row < num_mod, new_col < num_seg)
+        # if not np.all(idx):
+        #     new_data = new_data[idx]
+        #     new_row = new_row[idx]
+        #     new_col = new_col[idx]
+
+        new_row = -1 * np.ones_like(score_mask.row)
         for i, r in enumerate(mod_idx):
-            if f_mod[i] and i != r:
+            if f_mod[i]:
                 idx = score_mask.row == r
                 new_row[idx] = i
 
-        new_col = score_mask.col.copy()
+        new_col = -1 * np.ones_like(score_mask.col)
         for j, c in enumerate(seg_idx):
-            if f_seg[j] and j != c:
+            if f_seg[j]:
                 idx = score_mask.col == c
                 new_col[idx] = j
 
-        idx = np.logical_and(new_row < num_mod, new_col < num_seg)
+        idx = np.logical_and(new_row != -1, new_col != -1)
         if not np.all(idx):
             new_data = new_data[idx]
             new_row = new_row[idx]
@@ -249,7 +331,7 @@ def align_with_ndx(self, ndx, raise_missing=True):
             if not scr.score_mask[r, c]:
                 missing_scores = True
                 logging.info(
-                    "missing-scores for %s %s" % (scr.model_set[r], scr.seg_set[c])
+                    "missing-scores for %s %s", scr.model_set[r], scr.seg_set[c]
                 )
 
         if missing_scores and raise_missing:
@@ -291,7 +373,7 @@ def set_valid_scores(self, scores, ndx=None):
             self.scores = scr.scores
             self.score_mat = scr.score_mat
 
-        self.scores[self.score_mask]=scores
+        self.scores[self.score_mask] = scores
 
     @classmethod
     def from_trial_scores(cls, scr):
@@ -302,6 +384,12 @@ def from_trial_scores(cls, scr):
         score_mask.eliminate_zeros()
         return cls(scr.model_set, scr.seg_set, scores, score_mask)
 
+    def to_trial_scores(self):
+        scores = self.scores.toarray("C")
+        score_mask = self.score_mask.toarray("C")
+        # scores[~score_mask] = 0.0
+        return TrialScores(self.model_set, self.seg_set, scores, score_mask)
+
     def set_missing_to_value(self, ndx, val):
         """Aligns the scores with a TrialNdx and sets the trials with missing
         scores to the same value.
diff --git a/hyperion/utils/train_val_eval_list.py b/hyperion/utils/train_val_eval_list.py
index fd17e240..cbccf093 100644
--- a/hyperion/utils/train_val_eval_list.py
+++ b/hyperion/utils/train_val_eval_list.py
@@ -207,7 +207,7 @@ def create(
                 part_names = ["train", "eval"]
 
         if shuffle:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
 
         if group_by_key is None:
             group_by_key = segment_key
diff --git a/hyperion/utils/trial_key.py b/hyperion/utils/trial_key.py
index 4a99461b..5d8019b6 100644
--- a/hyperion/utils/trial_key.py
+++ b/hyperion/utils/trial_key.py
@@ -11,7 +11,8 @@
 import numpy as np
 import pandas as pd
 
-from .list_utils import *
+# from .list_utils import *
+from .list_utils import sort, intersect, ismember, split_list, list2ndarray
 from .trial_ndx import TrialNdx
 
 
@@ -178,7 +179,8 @@ def load(cls, file_path, sep=None):
         Returns:
           TrialKey object.
         """
-        _, file_ext = path.splitext(file_path)
+        file_path = Path(file_path)
+        file_ext = file_path.suffix
         if file_ext in (".h5", ".hdf5"):
             return cls.load_h5(file_path)
         elif file_ext in ("", ".txt"):
@@ -268,7 +270,7 @@ def load_txt(cls, file_path):
 
     @classmethod
     def load_table(cls, file_path, sep=None):
-        """Loads object from txt file
+        """Loads object from pandas table file
 
         Args:
           file_path: File to read the list.
@@ -285,12 +287,8 @@ def load_table(cls, file_path, sep=None):
         models = df["modelid"].values
         segments = df["segmentid"].values
         is_tar = (df["targettype"] == "target").values
-        model_set, _, model_idx = np.unique(
-            models, return_index=True, return_inverse=True
-        )
-        seg_set, _, seg_idx = np.unique(
-            segments, return_index=True, return_inverse=True
-        )
+        model_set, model_idx = np.unique(models, return_inverse=True)
+        seg_set, seg_idx = np.unique(segments, return_inverse=True)
         tar = np.zeros((len(model_set), len(seg_set)), dtype="bool")
         non = np.zeros((len(model_set), len(seg_set)), dtype="bool")
         for i, j, target_type in zip(model_idx, seg_idx, is_tar):
diff --git a/hyperion/utils/trial_ndx.py b/hyperion/utils/trial_ndx.py
index e26d19e2..b7b873df 100644
--- a/hyperion/utils/trial_ndx.py
+++ b/hyperion/utils/trial_ndx.py
@@ -4,12 +4,14 @@
 """
 
 import copy
-import os.path as path
+from pathlib import Path
 
 import h5py
 import numpy as np
+import pandas as pd
 
-from .list_utils import *
+# from .list_utils import *
+from .list_utils import sort, intersect, ismember, split_list, list2ndarray
 
 
 class TrialNdx(object):
@@ -46,17 +48,20 @@ def sort(self):
         self.seg_set, s_idx = sort(self.seg_set, return_index=True)
         self.trial_mask = self.trial_mask[np.ix_(m_idx, s_idx)]
 
-    def save(self, file_path):
+    def save(self, file_path, sep=None):
         """Saves object to txt/h5 file.
 
         Args:
           file_path: File to write the list.
         """
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        file_path = Path(file_path)
+        file_ext = file_path.suffix
+        if file_ext in [".h5", ".hdf5"]:
             self.save_h5(file_path)
-        else:
+        elif file_ext in [".txt", ""]:
             self.save_txt(file_path)
+        else:
+            self.save_table(file_path, sep=sep)
 
     def save_h5(self, file_path):
         """Saves object to h5 file.
@@ -71,15 +76,6 @@ def save_h5(self, file_path):
             f.create_dataset("ID/column_ids", data=seg_set)
             f.create_dataset("trial_mask", data=self.trial_mask.astype("uint8"))
 
-            # model_set = self.model_set.astype('S')
-            # f.create_dataset('ID/row_ids', self.model_set.shape, dtype=model_set.dtype)
-            # f['ID/row_ids'] = model_set
-            # seg_set = self.seg_set.astype('S')
-            # f.create_dataset('ID/column_ids', self.seg_set.shape, dtype=seg_set.dtype)
-            # f['ID/column_ids'] = seg_set
-            # f.create_dataset('trial_mask', self.trial_mask.shape, dtype='uint8')
-            # f['trial_mask'] = self.trial_mask.astype('uint8')
-
     def save_txt(self, file_path):
         """Saves object to txt file.
 
@@ -91,8 +87,25 @@ def save_txt(self, file_path):
             for item in zip(idx[0], idx[1]):
                 f.write("%s %s\n" % (self.model_set[item[1]], self.seg_set[item[0]]))
 
+    def save_table(self, file_path, sep=None):
+        """Saves object to pandas tabnle file.
+
+        Args:
+          file_path: File to write the list.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(f"modelid{sep}segmentid{sep}\n")
+            I, J = self.trial_mask.nonzero()
+            for i, j in zip(I, J):
+                f.write(f"{self.model_set[i]}{sep}{self.seg_set[j]}\n")
+
     @classmethod
-    def load(cls, file_path):
+    def load(cls, file_path, sep=None):
         """Loads object from txt/h5 file
 
         Args:
@@ -101,11 +114,14 @@ def load(cls, file_path):
         Returns:
           TrialNdx object.
         """
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        file_path = Path(file_path)
+        file_ext = file_path.suffix
+        if file_ext in (".h5", ".hdf5"):
             return cls.load_h5(file_path)
-        else:
+        elif file_ext in ("", ".txt"):
             return cls.load_txt(file_path)
+        else:
+            return cls.load_table(file_path, sep)
 
     @classmethod
     def load_h5(cls, file_path):
@@ -148,6 +164,36 @@ def load_txt(cls, file_path):
             trial_mask[item[0], item[1]] = True
         return cls(model_set, seg_set, trial_mask)
 
+    @classmethod
+    def load_table(cls, file_path, sep=None):
+        """Loads object from pandas table file
+
+        Args:
+          file_path: File to read the list.
+
+        Returns:
+          TrialNdx object.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        models = df["modelid"].values
+        segments = df["segmentid"].values
+        model_set, _, model_idx = np.unique(
+            models, return_index=True, return_inverse=True
+        )
+        seg_set, _, seg_idx = np.unique(
+            segments, return_index=True, return_inverse=True
+        )
+        trial_mask = np.zeros((len(model_set), len(seg_set)), dtype="bool")
+        for i, j in zip(model_idx, seg_idx):
+            trial_mask[i, j] = True
+
+        return cls(model_set, seg_set, trial_mask)
+
     @classmethod
     def merge(cls, ndx_list):
         """Merges several index objects.
diff --git a/hyperion/utils/trial_scores.py b/hyperion/utils/trial_scores.py
index a486647d..9e7fcd5d 100644
--- a/hyperion/utils/trial_scores.py
+++ b/hyperion/utils/trial_scores.py
@@ -3,16 +3,18 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import copy
 import logging
-import os.path as path
+from pathlib import Path
 
 import h5py
 import numpy as np
+import pandas as pd
 
 from ..hyp_defs import float_cpu
-from .list_utils import *
+
+# from .list_utils import *
+from .list_utils import sort, intersect, ismember, split_list, list2ndarray
 from .trial_key import TrialKey
 from .trial_ndx import TrialNdx
 
@@ -56,17 +58,20 @@ def sort(self):
         self.scores = self.scores[ix]
         self.score_mask = self.score_mask[ix]
 
-    def save(self, file_path):
+    def save(self, file_path, sep=None):
         """Saves object to txt/h5 file.
 
         Args:
           file_path: File to write the list.
         """
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        file_path = Path(file_path)
+        file_ext = file_path.suffix
+        if file_ext in [".h5", ".hdf5"]:
             self.save_h5(file_path)
-        else:
+        elif file_ext in ["", ".txt"]:
             self.save_txt(file_path)
+        else:
+            self.save_table(file_path, sep=sep)
 
     def save_h5(self, file_path):
         """Saves object to h5 file.
@@ -100,8 +105,27 @@ def save_txt(self, file_path):
                     )
                 )
 
+    def save_table(self, file_path, sep=None):
+        """Saves object to pandas tabnle file.
+
+        Args:
+          file_path: File to write the list.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(f"modelid{sep}segmentid{sep}LLR\n")
+            I, J = self.score_mask.nonzero()
+            for i, j in zip(I, J):
+                f.write(
+                    f"{self.model_set[i]}{sep}{self.seg_set[j]}{sep}{self.scores[i,j]}\n"
+                )
+
     @classmethod
-    def load(cls, file_path):
+    def load(cls, file_path, sep=None):
         """Loads object from txt/h5 file
 
         Args:
@@ -110,11 +134,14 @@ def load(cls, file_path):
         Returns:
           TrialScores object.
         """
-        file_base, file_ext = path.splitext(file_path)
-        if file_ext == ".h5" or file_ext == ".hdf5":
+        file_path = Path(file_path)
+        file_ext = file_path.suffix
+        if file_ext in (".h5", ".hdf5"):
             return cls.load_h5(file_path)
-        else:
+        elif file_ext in ("", ".txt"):
             return cls.load_txt(file_path)
+        else:
+            return cls.load_table(file_path, sep)
 
     @classmethod
     def load_h5(cls, file_path):
@@ -163,6 +190,35 @@ def load_txt(cls, file_path):
             scores[item[0], item[1]] = item[2]
         return cls(model_set, seg_set, scores, score_mask)
 
+    @classmethod
+    def load_table(cls, file_path, sep=None):
+        """Loads object from pandas table file
+
+        Args:
+          file_path: File to read the list.
+
+        Returns:
+          TrialScores object.
+        """
+        file_path = Path(file_path)
+        ext = file_path.suffix
+        if sep is None:
+            sep = "\t" if ".tsv" in ext else ","
+
+        df = pd.read_csv(file_path, sep=sep)
+        models = df["modelid"].values
+        segments = df["segmentid"].values
+        score_list = df["LLR"].values
+        model_set, model_idx = np.unique(models, return_inverse=True)
+        seg_set, seg_idx = np.unique(segments, return_inverse=True)
+        score_mask = np.zeros((len(model_set), len(seg_set)), dtype="bool")
+        scores = np.zeros((len(model_set), len(seg_set)), dtype=float_cpu())
+        for i, j, score in zip(model_idx, seg_idx, score_list):
+            score_mask[i, j] = True
+            scores[i, j] = score
+
+        return cls(model_set, seg_set, scores, score_mask)
+
     @classmethod
     def merge(cls, scr_list):
         """Merges several score objects.
@@ -235,7 +291,7 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
           Filtered TrialScores object.
         """
 
-        if not (keep):
+        if not keep:
             model_set = np.setdiff1d(self.model_set, model_set)
             seg_set = np.setdiff1d(self.model_set, seg_set)
 
@@ -244,15 +300,15 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
 
         if np.all(f_mod) and np.all(f_seg):
             model_set = self.model_set[mod_idx]
-            set_set = self.seg_set[seg_idx]
+            seg_set = self.seg_set[seg_idx]
             ix = np.ix_(mod_idx, seg_idx)
             scores = self.scores[ix]
             score_mask = self.score_mask[ix]
         else:
             for i in (f_mod == 0).nonzero()[0]:
-                logging.info("model %s not found" % model_set[i])
+                logging.info("model %s not found", model_set[i])
             for i in (f_seg == 0).nonzero()[0]:
-                logging.info("segment %s not found" % seg_set[i])
+                logging.info("segment %s not found", seg_set[i])
             if raise_missing:
                 raise Exception("some scores were not computed")
 
diff --git a/hyperion/utils/utt2info.py b/hyperion/utils/utt2info.py
index edf2c23a..c1c429f2 100644
--- a/hyperion/utils/utt2info.py
+++ b/hyperion/utils/utt2info.py
@@ -261,7 +261,7 @@ def shuffle(self, seed=1024, rng=None):
           Index used to shuffle the list.
         """
         if rng is None:
-            rng = np.random.RandomState(seed=seed)
+            rng = np.random.default_rng(seed=seed)
         index = np.arange(len(self.key))
         rng.shuffle(index)
         self.utt_info = self.utt_info.iloc[index]

From 89c6e2016b391818c35ab91644bbd091db4f9986 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 8 Sep 2023 11:24:03 -0400
Subject: [PATCH 106/154] finished vox v1.2 except plda

---
 egs/voxceleb/v1.2/run_007_eval_be.sh          | 321 ++++++++++++++++++
 .../eval_cosine_scoring_backend_with_qmf.py   | 253 +++++++++++---
 hyperion/bin/merge_scores.py                  |  19 +-
 hyperion/bin/train_qmf.py                     | 135 ++++++++
 .../np/classifiers/logistic_regression.py     |   3 +-
 hyperion/torch/utils/misc.py                  |   4 +-
 hyperion/utils/trial_scores.py                | 138 +++++++-
 7 files changed, 800 insertions(+), 73 deletions(-)
 create mode 100755 egs/voxceleb/v1.2/run_007_eval_be.sh
 create mode 100755 hyperion/bin/train_qmf.py

diff --git a/egs/voxceleb/v1.2/run_007_eval_be.sh b/egs/voxceleb/v1.2/run_007_eval_be.sh
new file mode 100755
index 00000000..9084d35b
--- /dev/null
+++ b/egs/voxceleb/v1.2/run_007_eval_be.sh
@@ -0,0 +1,321 @@
+#!/bin/bash
+# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
+#                
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh 
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+plda_label=${plda_type}y${plda_y_dim}_v1
+be_name=lda${lda_dim}_${plda_label}_${plda_data}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name
+score_plda_dir=$score_dir/${be_name}/plda
+score_cosine_dir=$score_dir/cosine
+score_cosine_snorm_dir=$score_dir/cosine_snorm
+score_cosine_qmf_dir=$score_dir/cosine_qmf
+
+if [ $stage -le 3 ];then
+
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  num_parts=8
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $train_cmd $score_cosine_dir/log/voxceleb1_${i}_${j}.log \
+		 hyp_utils/conda_env.sh \
+		 eval_cosine_scoring_backend.py \
+		 --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		 --ndx-file data/voxceleb1_test/trials.csv \
+		 --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		 --score-file $score_cosine_dir/voxceleb1_scores.csv \
+		 --enroll-part-idx $i --num-enroll-parts $num_parts \
+		 --test-part-idx $j --num-test-parts $num_parts &
+    done
+  done
+  wait
+  merge_scores.py --output-file $score_cosine_dir/voxceleb1_scores.csv \
+		  --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     eval_verification_metrics.py \
+	     --score-files $score_cosine_dir/voxceleb1_scores.csv \
+	     --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	     --score-names voxceleb1 \
+	     --key-names O E H \
+	     --sparse \
+	     --output-file $score_cosine_dir/voxceleb1_results.csv
+
+  cat $score_cosine_dir/voxceleb1_results.csv
+fi
+
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+  echo "Eval voxsrc2 with Cosine scoring"
+  $train_cmd $score_cosine_dir/log/voxsrc22_dev.log \
+	     hyp_utils/conda_env.sh \
+	     eval_cosine_scoring_backend.py \
+	     --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+	     --ndx-file data/voxsrc22_dev/trials.csv \
+	     --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+	     --score-file $score_cosine_dir/voxsrc22_dev_scores.csv
+
+  # $train_cmd $score_cosine_dir/log/voxsrc22_eval.log \
+  # 	     hyp_utils/conda_env.sh \
+  # 	     eval_cosine_scoring_backend.py \
+  # 	     --feats-file csv:$xvector_dir/voxsrc22_eval/xvector.csv \
+  # 	     --ndx-file data/voxsrc22_eval/trials.csv \
+  # 	     --enroll-map-file data/voxsrc22_eval/enrollment.csv  \
+  # 	     --score-file $score_cosine_dir/voxsrc22_eval_scores.csv
+  
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxsrc22_dev.log \
+	     eval_verification_metrics.py \
+	     --score-files $score_cosine_dir/voxsrc22_dev_scores.csv \
+	     --key-files data/voxsrc22_dev/trials.csv \
+	     --score-names voxsrc22_dev \
+	     --key-names all \
+	     --output-file $score_cosine_dir/voxsrc22_dev_results.csv
+
+  cat $score_cosine_dir/voxsrc22_dev_results.csv
+
+fi
+
+if [ "$do_snorm" == "true" ];then
+  if [ $stage -le 5 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd --mem 22G $score_cosine_snorm_dir/log/voxceleb1_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   eval_cosine_scoring_backend.py \
+		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		   --ndx-file data/voxceleb1_test/trials.csv \
+		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		   --score-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    merge_scores.py --output-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
+		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+    
+    $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
+	       eval_verification_metrics.py \
+	       --score-files $score_cosine_snorm_dir/voxceleb1_scores.csv \
+	       --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	       --score-names voxceleb1 \
+	       --key-names O E H \
+	       --sparse \
+	       --output-file $score_cosine_snorm_dir/voxceleb1_results.csv
+    
+    cat $score_cosine_snorm_dir/voxceleb1_results.csv
+  fi
+
+  if [ $stage -le 6 ] && [ "$do_voxsrc22" == "true" ];then
+    echo "Eval voxsrc2 with Cosine scoring + AS-Norm"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do    
+	$train_cmd $score_cosine_snorm_dir/log/voxsrc22_dev_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   eval_cosine_scoring_backend.py \
+		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+		   --ndx-file data/voxsrc22_dev/trials.csv \
+		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+		   --score-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+	sleep 5s
+      done
+      sleep 10s
+    done
+    wait
+    merge_scores.py --output-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+    $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
+	     eval_verification_metrics.py \
+	     --score-files $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+	     --key-files data/voxsrc22_dev/trials.csv \
+	     --score-names voxsrc22_dev \
+	     --key-names all \
+	     --output-file $score_cosine_snorm_dir/voxsrc22_dev_results.csv
+
+    cat $score_cosine_snorm_dir/voxsrc22_dev_results.csv
+
+  fi
+
+fi
+
+if [ "$do_qmf" == "true" ];then
+  if [ $stage -le 7 ];then
+    echo "Train QMF in Vox2"
+    echo "...Calculating quality measures for Vox2"
+    num_parts=8
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd $score_cosine_qmf_dir/log/voxceleb2_trials_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   eval_cosine_scoring_backend_with_qmf.py \
+		   --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --ndx-file data/voxceleb2cat_train_trials/trials.csv \
+		   --enroll-map-file data/voxceleb2cat_train_trials/enrollments.csv  \
+		   --score-file $score_cosine_qmf_dir/voxceleb2_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    merge_scores.py --output-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+      		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+    train_qmf.py --score-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+		 --key-file data/voxceleb2cat_train_trials/trials.csv \
+		 --model-file $score_cosine_qmf_dir/qmf.h5
+		 
+  fi
+
+  if [ $stage -le 8 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm + QMF"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd --mem 22G $score_cosine_qmf_dir/log/voxceleb1_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   eval_cosine_scoring_backend_with_qmf.py \
+		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		   --ndx-file data/voxceleb1_test/trials.csv \
+		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		   --score-file $score_cosine_qmf_dir/voxceleb1_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --qmf-file $score_cosine_qmf_dir/qmf.h5 \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    for suffix in "" .snorm .snorm.qmf
+    do
+      (
+	merge_scores.py --output-file $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
+	
+	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1$suffix.log \
+		   eval_verification_metrics.py \
+		   --score-files $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
+		   --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+		   --score-names voxceleb1 \
+		   --key-names O E H \
+		   --sparse \
+		   --output-file $score_cosine_qmf_dir/voxceleb1_results$suffix.csv
+
+	echo "$score_cosine_qmf_dir/voxceleb1_results$suffix.csv:"
+	cat $score_cosine_qmf_dir/voxceleb1_results$suffix.csv
+      ) &
+    done
+    wait
+  fi
+  
+  if [ $stage -le 9 ] && [ "$do_voxsrc22" == "true" ];then
+    echo "Eval voxsrc2 with Cosine scoring + QMF"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do    
+	$train_cmd $score_cosine_qmf_dir/log/voxsrc22_dev_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   eval_cosine_scoring_backend_with_qmf.py \
+		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+		   --ndx-file data/voxsrc22_dev/trials.csv \
+		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+		   --score-file $score_cosine_qmf_dir/voxsrc22_dev_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --qmf-file $score_cosine_qmf_dir/qmf.h5 \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+	sleep 5s
+      done
+      sleep 10s
+    done
+    wait
+    for suffix in "" .snorm .snorm.qmf
+    do
+      (
+	merge_scores.py --output-file $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
+
+	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxsrc22_dev$suffix.log \
+		   eval_verification_metrics.py \
+		   --score-files $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
+		   --key-files data/voxsrc22_dev/trials.csv \
+		   --score-names voxsrc22_dev \
+		   --key-names all \
+		   --output-file $score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv
+
+	echo "$score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv:"
+	cat $score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv
+      ) &
+    done
+    wait
+  fi
+
+fi
+
diff --git a/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
index f567dd81..0333669f 100755
--- a/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
+++ b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
@@ -30,6 +30,7 @@
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import TransformList
 from hyperion.np.score_norm import AdaptSNorm
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
 
 
 def get_precomp_qm_names(quality_measures):
@@ -38,7 +39,6 @@ def get_precomp_qm_names(quality_measures):
 
 
 def normalize_duration(q, min_dur, max_dur, frame_rate):
-
     q = q / frame_rate
     q = np.log(np.clip(q / frame_rate, a_min=min_dur, a_max=max_dur))
     log_min_dur = np.log(min_dur)
@@ -99,6 +99,9 @@ def load_trial_data(
         test_segments.add_columns(test_feats_set)
         if enroll_feats_set != test_feats_set or enroll_segments != test_segments:
             enroll_segments.add_columns(enroll_feats_set)
+    else:
+        test_segments = test_feats_set
+        enroll_segments = enroll_feats_set
 
     # now we retrive the quality measures
     q_e = []
@@ -132,7 +135,6 @@ def load_trial_data(
 
 
 def load_cohort_data(segments_file, feats_file):
-
     segments = SegmentSet.load(segments_file)
     feats_reader = DRF.create(feats_file)
     x = feats_reader.read(segments["id"], squeeze=True)
@@ -160,16 +162,13 @@ def get_score_filepath(
     test_part_idx,
     num_test_parts,
 ):
-
     score_file = Path(score_file)
     new_suffix = ""
     if score_name is not None:
         new_suffix = f".{score_name}"
 
     if num_enroll_parts > 1 or num_test_parts > 1:
-        new_suffix = (
-            f"{new_suffix}.{enroll_part_idx}.{test_part_idx}{score_file.suffix}"
-        )
+        new_suffix = f"{new_suffix}.{enroll_part_idx}.{test_part_idx}"
 
     if new_suffix:
         new_suffix = f"{new_suffix}{score_file.suffix}"
@@ -177,25 +176,58 @@ def get_score_filepath(
 
     return score_file
 
-def save_scores(ndx, scores, score_file, score_name,     enroll_part_idx,
+
+def save_scores(
+    ndx,
+    scores,
+    score_file,
+    score_name,
+    q_measures,
+    enroll_part_idx,
     num_enroll_parts,
     test_part_idx,
-    num_test_parts):
+    num_test_parts,
+):
+    score_file = get_score_filepath(
+        score_file,
+        score_name,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
+    logging.info("saving scores with to %s", score_file)
+    scores = TrialScores(
+        ndx.model_set, ndx.seg_set, scores, ndx.trial_mask, q_measures=q_measures
+    )
+    scores.save(score_file)
+
 
-def save_empty_scores(ndx, score_file, score_name,     enroll_part_idx,
+def save_empty_scores(
+    ndx,
+    score_file,
+    score_name,
+    q_measures,
+    enroll_part_idx,
     num_enroll_parts,
     test_part_idx,
-    num_test_parts):
+    num_test_parts,
+):
     scores = np.zeros(ndx.trial_mask.shape, dtype="float32")
-    score_file = get_score_filepath(score_file, score_name,enroll_part_idx,
-    num_enroll_parts,
-    test_part_idx,
-    num_test_parts)
-
-    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
-    scores.save(score_file)
-
+    if q_measures is not None:
+        q_measures = {k: scores for k in q_measures}
 
+    save_scores(
+        ndx,
+        scores,
+        score_file,
+        score_name,
+        q_measures,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
 
 
 def segment_to_trial_qm(q_e, q_t):
@@ -226,31 +258,29 @@ def align_scores_to_ndx(enroll_set, ndx, scores, scores_norm, q_trial):
     return scores, scores_norm, q_trial
 
 
-def make_qm_table(ndx, scores, scores_norm, q_trial):
-    if scores_norm is None:
-        scores = scores[ndx.trial_mask]
-    else:
-        scores = scores_norm[ndx.trial_mask]
-
-    for qm in q_trial:
-        q_trial[qm] = q_trial[qm][ndx.trial_mask]
+# def make_qm_table(ndx, scores, scores_norm, q_trial):
+#     if scores_norm is None:
+#         scores = scores[ndx.trial_mask]
+#     else:
+#         scores = scores_norm[ndx.trial_mask]
 
-    I, J = np.nonzero(ndx.trial_mask)
-    modelid = ndx.model_set[I]
-    segmentid = ndx.seg_set[J]
-    unique_id = [f"{a}-{b}" for a, b in zip(modelid, segmentid)]
-
-    q_dict = {
-        "id": unique_id,
-        "modelid": modelid,
-        "segmentid": segmentid,
-        "scores": scores,
-    }
-    q_dict.update(q_trial)
-    df = pd.DataFrame(q_dict)
-    return InfoTable(df)
+#     for qm in q_trial:
+#         q_trial[qm] = q_trial[qm][ndx.trial_mask]
 
+#     I, J = np.nonzero(ndx.trial_mask)
+#     modelid = ndx.model_set[I]
+#     segmentid = ndx.seg_set[J]
+#     unique_id = [f"{a}-{b}" for a, b in zip(modelid, segmentid)]
 
+#     q_dict = {
+#         "id": unique_id,
+#         "modelid": modelid,
+#         "segmentid": segmentid,
+#         "scores": scores,
+#     }
+#     q_dict.update(q_trial)
+#     df = pd.DataFrame(q_dict)
+#     return InfoTable(df)
 
 
 def eval_backend(
@@ -276,7 +306,6 @@ def eval_backend(
     test_part_idx,
     num_test_parts,
 ):
-
     logging.info("loading data")
     enroll_map, ndx, x_e, x_t, q_e, q_t = load_trial_data(
         enroll_map_file,
@@ -297,8 +326,43 @@ def eval_backend(
 
     if not np.any(ndx.trial_mask):
         # this part doesn't have any trials, save empty files
-        
-    
+        if qmf_file is not None:
+            quality_measures = None
+            save_empty_scores(
+                ndx,
+                score_file,
+                "snorm.qmf" if cohort_segments_file is not None else "qmf",
+                quality_measures,
+                enroll_part_idx,
+                num_enroll_parts,
+                test_part_idx,
+                num_test_parts,
+            )
+
+        save_empty_scores(
+            ndx,
+            score_file,
+            None,
+            quality_measures,
+            enroll_part_idx,
+            num_enroll_parts,
+            test_part_idx,
+            num_test_parts,
+        )
+
+        if cohort_segments_file is not None:
+            save_empty_scores(
+                ndx,
+                score_file,
+                "snorm",
+                quality_measures,
+                enroll_part_idx,
+                num_enroll_parts,
+                test_part_idx,
+                num_test_parts,
+            )
+        return
+
     enroll_set, enroll_ids = np.unique(enroll_map["id"], return_inverse=True)
     q_e = average_qm(q_e, enroll_set, enroll_ids)
 
@@ -362,46 +426,123 @@ def eval_backend(
         enroll_set, ndx, scores, scores_norm, q_trial
     )
     if qmf_file is None:
-        qm_table = make_qm_table(ndx, scores, scores_norm, q_trial)
-        qm_file = get_score_filepath(
+        save_scores(
+            ndx,
+            scores,
             score_file,
-            "qm",
+            None,
+            q_trial,
             enroll_part_idx,
             num_enroll_parts,
             test_part_idx,
             num_test_parts,
         )
-        qm_table.save(qm_file)
+
+        if scores_norm is not None:
+            save_scores(
+                ndx,
+                scores_norm,
+                score_file,
+                "snorm",
+                q_trial,
+                enroll_part_idx,
+                num_enroll_parts,
+                test_part_idx,
+                num_test_parts,
+            )
+        # qm_table = make_qm_table(ndx, scores, scores_norm, q_trial)
+        # qm_file = get_score_filepath(
+        #     score_file,
+        #     "qm",
+        #     enroll_part_idx,
+        #     num_enroll_parts,
+        #     test_part_idx,
+        #     num_test_parts,
+        # )
+        # qm_table.save(qm_file)
         return
 
-    score_file_nonorm = get_score_filepath(
+    save_scores(
+        ndx,
+        scores,
         score_file,
         None,
+        None,
         enroll_part_idx,
         num_enroll_parts,
         test_part_idx,
         num_test_parts,
     )
-    logging.info("saving scores to %s", score_file_nonorm)
-    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
-    scores.save(score_file_nonorm)
 
     if scores_norm is not None:
-        score_file_snorm = get_score_filepath(
+        save_scores(
+            ndx,
+            scores_norm,
             score_file,
             "snorm",
+            None,
             enroll_part_idx,
             num_enroll_parts,
             test_part_idx,
             num_test_parts,
         )
-        logging.info("saving scores with AS-Norm to %s", score_file_snorm)
-        scores.scores = scores_norm
-        scores.save(score_file_snorm)
 
+    logging.info("applying qmf")
+    if scores_norm is None:
+        score_name = "qmf"
+        scores_fus = [scores.ravel()]
+    else:
+        score_name = "snorm.qmf"
+        scores_fus = [scores_norm.ravel()]
+
+    q_names = list(q_trial.keys())
+    q_names.sort()
+    for q_name in q_names:
+        scores_fus.append(q_trial[q_name].ravel())
+
+    scores_fus = np.vstack(scores_fus).T
+    lr = LR.load(qmf_file)
+    scores_fus = lr.predict(scores_fus)
+    scores_fus = np.reshape(scores_fus, (ndx.num_models, ndx.num_tests))
+    save_scores(
+        ndx,
+        scores_fus,
+        score_file,
+        score_name,
+        None,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
 
-if __name__ == "__main__":
+    # score_file_nonorm = get_score_filepath(
+    #     score_file,
+    #     None,
+    #     enroll_part_idx,
+    #     num_enroll_parts,
+    #     test_part_idx,
+    #     num_test_parts,
+    # )
+    # logging.info("saving scores to %s", score_file_nonorm)
+    # scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
+    # scores.save(score_file_nonorm)
+
+    # if scores_norm is not None:
+    #     score_file_snorm = get_score_filepath(
+    #         score_file,
+    #         "snorm",
+    #         enroll_part_idx,
+    #         num_enroll_parts,
+    #         test_part_idx,
+    #         num_test_parts,
+    #     )
+    #     logging.info("saving scores with AS-Norm to %s", score_file_snorm)
+    #     scores.scores = scores_norm
+    #     scores.save(score_file_snorm)
 
+
+if __name__ == "__main__":
     parser = ArgumentParser(
         description="Eval cosine-scoring with optional AS-Norm and QMF"
     )
diff --git a/hyperion/bin/merge_scores.py b/hyperion/bin/merge_scores.py
index 6a275f5c..cb8524b7 100755
--- a/hyperion/bin/merge_scores.py
+++ b/hyperion/bin/merge_scores.py
@@ -18,14 +18,19 @@
 
 
 def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, base_idx):
-
     output_file = Path(output_file)
     output_file.parent.mkdir(exist_ok=True, parents=True)
 
     ext = output_file.suffix
 
     if input_files is None:
-        input_file_base = output_file.with_suffix("")
+        if ext in [".h5", ".csv", ".tsv"]:
+            input_file_base = output_file
+        else:
+            input_file_base = output_file.parent / (output_file.name + ".txt")
+            ext = ""
+
+        logging.info("merging %s* -> %s", input_file_base.with_suffix(""), output_file)
         input_files = []
         for i in range(num_enroll_parts):
             idx_i = base_idx + i
@@ -33,6 +38,8 @@ def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, bas
                 idx_j = base_idx + j
                 input_file_i = input_file_base.with_suffix(f".{idx_i}.{idx_j}{ext}")
                 input_files.append(input_file_i)
+    else:
+        logging.info("merging %s -> %s", " + ".join(input_files), output_file)
 
     if ext == ".h5":
         # if files are h5 we need to load everything in RAM
@@ -57,7 +64,6 @@ def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, bas
 
 
 if __name__ == "__main__":
-
     parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument(
@@ -88,7 +94,12 @@ def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, bas
         help="""index of the first job, typically 0 or 1""",
     )
     parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
     )
 
     args = parser.parse_args()
diff --git a/hyperion/bin/train_qmf.py b/hyperion/bin/train_qmf.py
new file mode 100755
index 00000000..a97e8a5f
--- /dev/null
+++ b/hyperion/bin/train_qmf.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+  Trains calibration for SRE18 tel condition
+"""
+
+import sys
+import os
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+)
+import time
+import logging
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import float_cpu, config_logger
+from hyperion.utils.trial_scores import TrialScores
+from hyperion.utils.trial_key import TrialKey
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+
+
+def print_q_stats(scr, q_names):
+    for k in q_names:
+        q_vec = scr.q_measures[k][scr.score_mask]
+        s = f"{k} stats mean={np.mean(q_vec)} min={np.min(q_vec)} max={np.max(q_vec)} median={np.median(q_vec)}"
+        logging.info(s)
+
+
+def train_qmf(
+    score_file, key_file, model_file, prior, lambda_reg, quality_measures, verbose
+):
+    logging.info("load key: %s", key_file)
+    key = TrialKey.load(key_file)
+    logging.info("load scores: %s", score_file)
+    scr = TrialScores.load(score_file)
+    tar, non = scr.get_tar_non(key)
+    ntar = len(tar)
+    nnon = len(non)
+
+    if quality_measures is None:
+        quality_measures = list(scr.q_measures.keys())
+        quality_measures.sort()
+
+    print_q_stats(scr, quality_measures)
+    q_tar, q_non = scr.get_tar_non_q_measures(key, quality_measures)
+
+    min_dcf, p_miss, p_fa = compute_min_dcf(tar, non, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "min_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f",
+        min_dcf,
+        p_miss * 100,
+        p_fa * 100,
+        n_miss,
+        n_fa,
+    )
+
+    logging.info("train calibration")
+    # tar = np.vstack((tar, maxnf_tar, minnf_tar, maxcohmu_tar, mincohmu_tar)).T
+    # non = np.vstack((non, maxnf_non, minnf_non, maxcohmu_non, mincohmu_non)).T
+    tar = np.hstack((tar[:, None], q_tar))
+    non = np.hstack((non[:, None], q_non))
+
+    x = np.vstack((tar, non))
+    y = np.concatenate(
+        (np.ones((ntar,), dtype="int32"), np.zeros((nnon,), dtype="int32"))
+    )
+    lr = LR(
+        prior=prior,
+        lambda_reg=lambda_reg,
+        bias_scaling=1,
+        solver="liblinear",
+        verbose=verbose,
+    )
+    lr.fit(x, y)
+    logging.info(f"A={lr.A} b={lr.b}")
+    logging.info("save calibration at %s", model_file)
+    lr.save(model_file)
+
+    logging.info("calibrate scores")
+    tar_cal = lr.predict(tar)
+    non_cal = lr.predict(non)
+    act_dcf, p_miss, p_fa = compute_act_dcf(tar_cal, non_cal, prior)
+    n_miss = p_miss * ntar
+    n_fa = p_fa * nnon
+    logging.info(
+        "act_dcf: %.3f p_miss: %.2f p_fa: %.2f n_miss: %.1f n_fa: %.1f",
+        act_dcf,
+        p_miss * 100,
+        p_fa * 100,
+        n_miss,
+        n_fa,
+    )
+
+    score_file = Path(score_file)
+    output_file = score_file.with_suffix(f".qmf{score_file.suffix}")
+    scr_out = TrialScores(key.model_set, key.seg_set)
+    scr_out.scores[key.tar] = tar_cal
+    scr_out.scores[key.non] = non_cal
+    scr_out.score_mask = np.logical_or(key.tar, key.non)
+    scr_out.save(output_file)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description="Trains QMF calibration")
+
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument("--key-file", required=True)
+    parser.add_argument("--model-file", required=True)
+    parser.add_argument("--prior", type=float, default=0.01)
+    parser.add_argument("--lambda-reg", type=float, default=1e-5)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    parser.add_argument(
+        "--quality-measures",
+        default=None,
+        nargs="+",
+        choices=["snorm-mu/s", "snorm-mu", "speech_duration", "num_speech_frames"],
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    logging.debug(args)
+
+    train_qmf(**namespace_to_dict(args))
diff --git a/hyperion/np/classifiers/logistic_regression.py b/hyperion/np/classifiers/logistic_regression.py
index 4c4c0cfc..03d9fd13 100644
--- a/hyperion/np/classifiers/logistic_regression.py
+++ b/hyperion/np/classifiers/logistic_regression.py
@@ -93,7 +93,8 @@ def __init__(
         super().__init__(**kwargs)
 
         if random_state is None:
-            random_state = np.random.default_rng(seed=lr_seed)
+            # random_state = np.random.default_rng(seed=lr_seed)
+            random_state = np.random.RandomState(seed=lr_seed)
 
         if bias_scaling is None:
             if use_bias and solver == "liblinear":
diff --git a/hyperion/torch/utils/misc.py b/hyperion/torch/utils/misc.py
index b2a3810f..46c09080 100644
--- a/hyperion/torch/utils/misc.py
+++ b/hyperion/torch/utils/misc.py
@@ -4,8 +4,8 @@
 """
 
 import torch
-import torch.cuda.amp as amp
 import torch.nn as nn
+import torch.cuda.amp as amp
 
 
 def l2_norm(x, dim=1, axis=None):
@@ -104,3 +104,5 @@ def get_selfsim_tarnon(y, return_mask=False):
 
     mask = torch.triu(torch.ones_like(y_bin, dtype=torch.bool), diagonal=1)
     return y_bin, mask
+
+  
diff --git a/hyperion/utils/trial_scores.py b/hyperion/utils/trial_scores.py
index 9e7fcd5d..4a5e59da 100644
--- a/hyperion/utils/trial_scores.py
+++ b/hyperion/utils/trial_scores.py
@@ -14,7 +14,7 @@
 from ..hyp_defs import float_cpu
 
 # from .list_utils import *
-from .list_utils import sort, intersect, ismember, split_list, list2ndarray
+from .list_utils import intersect, ismember, list2ndarray, sort, split_list
 from .trial_key import TrialKey
 from .trial_ndx import TrialNdx
 
@@ -28,13 +28,22 @@ class TrialScores(object):
       seg_set: List of test segment names.
       scores: Matrix with the scores (num_models x num_segments).
       score_mask: Boolean matrix with the trials with valid scores to True (num_models x num_segments).
+      q_measures: optional dictionary of quality measure matrices
     """
 
-    def __init__(self, model_set=None, seg_set=None, scores=None, score_mask=None):
+    def __init__(
+        self,
+        model_set=None,
+        seg_set=None,
+        scores=None,
+        score_mask=None,
+        q_measures=None,
+    ):
         self.model_set = model_set
         self.seg_set = seg_set
         self.scores = scores
         self.score_mask = score_mask
+        self.q_measures = q_measures
         if (model_set is not None) and (seg_set is not None):
             self.validate()
 
@@ -57,6 +66,9 @@ def sort(self):
         ix = np.ix_(m_idx, s_idx)
         self.scores = self.scores[ix]
         self.score_mask = self.score_mask[ix]
+        if self.q_measures is not None:
+            for k in self.q_measures.keys():
+                self.q_measures[k] = self.q_measures[k][ix]
 
     def save(self, file_path, sep=None):
         """Saves object to txt/h5 file.
@@ -86,6 +98,10 @@ def save_h5(self, file_path):
             f.create_dataset("ID/column_ids", data=seg_set)
             f.create_dataset("scores", data=self.scores)
             f.create_dataset("score_mask", data=self.score_mask.astype("uint8"))
+            if self.q_measures is not None:
+                q_grp = f.create_group("q_measures")
+                for k, v in self.q_measures.items():
+                    q_grp.create_dataset(k, data=v)
 
     def save_txt(self, file_path):
         """Saves object to txt file.
@@ -105,6 +121,9 @@ def save_txt(self, file_path):
                     )
                 )
 
+        if self.q_measures is not None:
+            logging.warning("q_measures cannot be saved to txt file")
+
     def save_table(self, file_path, sep=None):
         """Saves object to pandas tabnle file.
 
@@ -116,12 +135,20 @@ def save_table(self, file_path, sep=None):
         if sep is None:
             sep = "\t" if ".tsv" in ext else ","
 
+        q_str = ""
+        if self.q_measures is not None:
+            q_str = sep + sep.join(self.q_measures.keys())
+
         with open(file_path, "w", encoding="utf-8") as f:
-            f.write(f"modelid{sep}segmentid{sep}LLR\n")
+            f.write(f"modelid{sep}segmentid{sep}LLR{q_str}\n")
             I, J = self.score_mask.nonzero()
             for i, j in zip(I, J):
+                if self.q_measures is not None:
+                    q_str = sep + sep.join(
+                        [str(v[i, j]) for k, v in self.q_measures.items()]
+                    )
                 f.write(
-                    f"{self.model_set[i]}{sep}{self.seg_set[j]}{sep}{self.scores[i,j]}\n"
+                    f"{self.model_set[i]}{sep}{self.seg_set[j]}{sep}{self.scores[i,j]}{q_str}\n"
                 )
 
     @classmethod
@@ -158,7 +185,12 @@ def load_h5(cls, file_path):
             seg_set = [t.decode("utf-8") for t in f["ID/column_ids"]]
             scores = np.asarray(f["scores"], dtype=float_cpu())
             score_mask = np.asarray(f["score_mask"], dtype="bool")
-        return cls(model_set, seg_set, scores, score_mask)
+            if "q_measures" in f:
+                q_grp = f["q_measures"]
+                q_measures = {k: q_grp[k] for k in q_grp}
+            else:
+                q_measures = None
+        return cls(model_set, seg_set, scores, score_mask, q_measures)
 
     @classmethod
     def load_txt(cls, file_path):
@@ -217,7 +249,21 @@ def load_table(cls, file_path, sep=None):
             score_mask[i, j] = True
             scores[i, j] = score
 
-        return cls(model_set, seg_set, scores, score_mask)
+        if len(df.columns) > 3:
+            q_names = df.columns[3:]
+            q_vals = df.iloc[:, 3:].values
+            q_measures = {}
+            for q_name in q_names:
+                q_measures[q_name] = np.zeros(scores.shape, dtype=float_cpu())
+
+            for i, j, q_row in zip(model_idx, seg_idx, q_vals):
+                for col, q_name in enumerate(q_names):
+                    q_measures[q_name][i, j] = q_row[col]
+
+        else:
+            q_measures = None
+
+        return cls(model_set, seg_set, scores, score_mask, q_measures)
 
     @classmethod
     def merge(cls, scr_list):
@@ -234,6 +280,7 @@ def merge(cls, scr_list):
         seg_set = scr_list[0].seg_set
         scores = scr_list[0].scores
         score_mask = scr_list[0].score_mask
+        q_measures = scr_list[0].q_measures
         for i in range(1, num_scr):
             scr_i = scr_list[i]
             new_model_set = np.union1d(model_set, scr_i.model_set)
@@ -252,6 +299,10 @@ def merge(cls, scr_list):
             scores_1[ix_a] = scores[ix_b]
             score_mask_1 = np.zeros(shape, dtype="bool")
             score_mask_1[ix_a] = score_mask[ix_b]
+            if q_measures is not None:
+                q_measures_1 = {k: np.zeros(shape) for k in q_measures.keys()}
+                for k in q_measures.keys():
+                    q_measures_1[k][ix_a] = q_measures[k][ix_b]
 
             trial_mask_2 = np.zeros(
                 (len(new_model_set), len(new_seg_set)), dtype="bool"
@@ -268,14 +319,21 @@ def merge(cls, scr_list):
             scores_2[ix_a] = scr_i.scores[ix_b]
             score_mask_2 = np.zeros(shape, dtype="bool")
             score_mask_2[ix_a] = scr_i.score_mask[ix_b]
+            if q_measures is not None:
+                q_measures_2 = {k: np.zeros(shape) for k in q_measures.keys()}
+                for k in q_measures.keys():
+                    q_measures_2[k][ix_a] = scr_i.q_measures[k][ix_b]
 
             model_set = new_model_set
             seg_set = new_seg_set
             scores = scores_1 + scores_2
             assert not (np.any(np.logical_and(score_mask_1, score_mask_2)))
             score_mask = np.logical_or(score_mask_1, score_mask_2)
+            if q_measures is not None:
+                for k in q_measures.keys():
+                    q_measures[k] = q_measures_1[k] + q_measures_2[k]
 
-        return cls(model_set, seg_set, scores, score_mask)
+        return cls(model_set, seg_set, scores, score_mask, q_measures)
 
     def filter(self, model_set, seg_set, keep=True, raise_missing=True):
         """Removes elements from TrialScores object.
@@ -297,13 +355,17 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
 
         f_mod, mod_idx = ismember(model_set, self.model_set)
         f_seg, seg_idx = ismember(seg_set, self.seg_set)
-
+        q_measures = None
         if np.all(f_mod) and np.all(f_seg):
             model_set = self.model_set[mod_idx]
             seg_set = self.seg_set[seg_idx]
             ix = np.ix_(mod_idx, seg_idx)
             scores = self.scores[ix]
             score_mask = self.score_mask[ix]
+            if self.q_measures is not None:
+                q_measures = {}
+                for k in self.q_measures.keys():
+                    q_measures[k] = self.q_measures[k][ix]
         else:
             for i in (f_mod == 0).nonzero()[0]:
                 logging.info("model %s not found", model_set[i])
@@ -318,8 +380,13 @@ def filter(self, model_set, seg_set, keep=True, raise_missing=True):
             ix2 = np.ix_(mod_idx[f_mod], seg_idx[f_seg])
             scores[ix1] = self.scores[ix2]
             score_mask[ix1] = self.score_mask[ix2]
+            if self.q_measures is not None:
+                q_measures = {}
+                for k in self.q_measures.keys():
+                    q_measures[k] = np.zeros(scores.shape, dtype=float_cpu())
+                    q_measures[k][ix1] = self.q_measures[k][ix2]
 
-        return TrialScores(model_set, seg_set, scores, score_mask)
+        return TrialScores(model_set, seg_set, scores, score_mask, q_measures)
 
     def split(self, model_idx, num_model_parts, seg_idx, num_seg_parts):
         """Splits the TrialScores into num_model_parts x num_seg_parts and returns part
@@ -340,7 +407,13 @@ def split(self, model_idx, num_model_parts, seg_idx, num_seg_parts):
         ix = np.ix_(model_idx1, seg_idx1)
         scores = self.scores[ix]
         score_mask = self.score_mask[ix]
-        return TrialScores(model_set, seg_set, scores, score_mask)
+        q_measures = None
+        if self.q_measures is not None:
+            q_measures = {}
+            for k in self.q_measures.keys():
+                q_measures[k] = self.q_measures[k][ix]
+
+        return TrialScores(model_set, seg_set, scores, score_mask, q_measures)
 
     def validate(self):
         """Validates the attributes of the TrialScores object."""
@@ -362,6 +435,10 @@ def validate(self):
         else:
             assert self.score_mask.shape == (len(self.model_set), len(self.seg_set))
 
+        if self.q_measures is not None:
+            for k in self.q_measures.keys():
+                assert self.q_measures[k].shape == self.scores.shape
+
     def align_with_ndx(self, ndx, raise_missing=True):
         """Aligns scores, model_set and seg_set with TrialNdx or TrialKey.
 
@@ -412,6 +489,34 @@ def get_tar_non(self, key):
         non = scr.scores[non_mask]
         return tar, non
 
+    def get_tar_non_q_measures(self, key, q_names=None, return_dict=False):
+        """Returns target and non target scores.
+
+        Args:
+          key: TrialKey object.
+          q_names: names of quality measures to return, if None it will return all
+
+        Returns:
+          Numpy array with target scores.
+          Numpy array with non-target scores.
+        """
+        scr = self.align_with_ndx(key)
+        tar_mask = np.logical_and(scr.score_mask, key.tar)
+        if q_names is None:
+            q_names = self.q_measures.keys()
+        tar = {}
+        for k in q_names:
+            tar[k] = self.q_measures[k][tar_mask]
+        non_mask = np.logical_and(scr.score_mask, key.non)
+        non = {}
+        for k in q_names:
+            non[k] = self.q_measures[k][non_mask]
+
+        if not return_dict:
+            tar = np.vstack(tuple(tar[k] for k in q_names)).T
+            non = np.vstack(tuple(non[k] for k in q_names)).T
+        return tar, non
+
     def set_missing_to_value(self, ndx, val):
         """Aligns the scores with a TrialNdx and sets the trials with missing
         scores to the same value.
@@ -450,6 +555,18 @@ def __eq__(self, other):
         eq = eq and np.all(self.seg_set == other.seg_set)
         eq = eq and np.all(np.isclose(self.scores, other.scores, atol=1e-5))
         eq = eq and np.all(self.score_mask == other.score_mask)
+        if self.q_measures is not None:
+            eq = eq and other.q_measures is not None
+            if eq:
+                eq = self.q_measures.keys() == other.q_measures.keys()
+                if eq:
+                    for k in self.q_measures.keys():
+                        eq = eq and np.all(
+                            np.isclose(
+                                self.q_measures[k], other.q_measures[k], atol=1e-5
+                            )
+                        )
+
         return eq
 
     def __ne__(self, other):
@@ -463,7 +580,6 @@ def __cmp__(self, other):
         return 1
 
     def test(key_file="core-core_det5_key.h5"):
-
         key = TrialKey.load(key_file)
 
         mask = np.logical_or(key.tar, key.non)

From 44f085a86b8c6e9206431cdfbb4f26954dfb4672 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sun, 10 Sep 2023 11:16:43 -0400
Subject: [PATCH 107/154] introduce entry points

---
 README.md                                     |   4 +-
 egs/voxceleb/v1.2/conf/reverb_noise_aug.yaml  |  34 +++
 egs/voxceleb/v1.2/run_001_prepare_data.sh     |  26 +-
 egs/voxceleb/v1.2/run_002_compute_evad.sh     |  16 +-
 .../v1.2/run_003_prepare_noises_rirs.sh       | 102 +++----
 .../v1.2/run_004_prepare_xvec_train_data.sh   |  46 +--
 egs/voxceleb/v1.2/run_005_train_xvector.sh    |   4 +-
 egs/voxceleb/v1.2/run_006_extract_xvectors.sh |  16 +-
 egs/voxceleb/v1.2/run_007_eval_be.sh          |  80 ++---
 hyperion/bin/__init__.py                      |   0
 hyperion/bin/adv_finetune_xvector_from_wav.py |  18 +-
 hyperion/bin/apply_mvn_select_frames.py       |  36 ++-
 hyperion/bin/audio_to_duration.py             |  17 +-
 hyperion/bin/compute_energy_vad.py            |  17 +-
 hyperion/bin/compute_mfcc_feats.py            |  21 +-
 hyperion/bin/copy_feats.py                    |   7 +-
 hyperion/bin/decode_wav2transducer.py         |  20 +-
 hyperion/bin/decode_wav2vec2rnn_transducer.py |  92 +++---
 hyperion/bin/eval_cosine_scoring_backend.py   |  27 +-
 .../eval_cosine_scoring_backend_with_qmf.py   |  38 +--
 hyperion/bin/eval_verification_metrics.py     |  25 +-
 ...l_xvec_cosine_scoring_from_adv_test_wav.py |  23 +-
 ...osine_scoring_from_adv_test_wav_wavegan.py |  26 +-
 ...l_xvec_cosine_scoring_from_art_test_wav.py |  27 +-
 .../eval_xvec_cosine_scoring_from_test_wav.py |  22 +-
 ...sine_scoring_from_transfer_adv_test_wav.py |  20 +-
 ...sine_scoring_from_transfer_art_test_wav.py |  27 +-
 hyperion/bin/eval_xvec_logits_from_wav.py     |  28 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |  28 +-
 hyperion/bin/extract_wav2xvectors.py          |  23 +-
 hyperion/bin/extract_xvectors_from_feats.py   |  22 +-
 hyperion/bin/extract_xvectors_from_wav.py     |  23 +-
 .../extract_xvectors_slidwin_from_feats.py    |  34 ++-
 .../bin/extract_xvectors_slidwin_from_wav.py  |  36 ++-
 hyperion/bin/finetune_wav2vec2transducer.py   |  52 ++--
 hyperion/bin/finetune_wav2vec2xvector.py      |  26 +-
 hyperion/bin/finetune_wav2xvector.py          |  22 +-
 .../bin/finetune_xvector_dfr_from_feats.py    |  17 +-
 hyperion/bin/finetune_xvector_dfr_from_wav.py |  20 +-
 hyperion/bin/finetune_xvector_from_feats.py   |  16 +-
 hyperion/bin/finetune_xvector_from_wav.py     |  18 +-
 .../generate_adv_attacks_xvector_classif.py   |  31 +-
 .../bin/generate_adv_attacks_xvector_verif.py |  18 +-
 hyperion/bin/hyperion_dataset.py              |  62 ++--
 hyperion/bin/hyperion_tables.py               |  21 +-
 hyperion/bin/make_babble_noise_audio_files.py |  20 +-
 hyperion/bin/make_wav2xvector.py              |  21 +-
 hyperion/bin/merge_scores.py                  |  12 +-
 hyperion/bin/pack_wav_rirs.py                 |  15 +-
 hyperion/bin/plot_embedding_tsne.py           |  17 +-
 hyperion/bin/plot_embedding_tsne_per_class.py |  23 +-
 hyperion/bin/prepare_data.py                  |  11 +-
 hyperion/bin/preprocess_audio_files.py        |  20 +-
 .../split_dataset_into_trials_and_cohort.py   |  11 +-
 hyperion/bin/train_qmf.py                     |  28 +-
 hyperion/bin/train_wav2rnn_transducer.py      |  77 ++---
 hyperion/bin/train_wav2vec2rnn_transducer.py  |  90 +++---
 hyperion/bin/train_wav2vec2transducer.py      |  79 ++---
 hyperion/bin/train_wav2vec2xvector.py         |  28 +-
 hyperion/bin/train_wav2xvector.py             |  28 +-
 hyperion/bin/train_xvector_from_feats.py      |  18 +-
 hyperion/bin/train_xvector_from_wav.py        |  18 +-
 hyperion/io/__init__.py                       |   8 +-
 .../np/pdfs/mixtures/exp_family_mixture.py    | 165 ----------
 .../torch/lr_schedulers/red_lr_on_plateau.py  |   6 +-
 hyperion/utils/queues.py                      | 287 ------------------
 setup.py                                      |  33 +-
 67 files changed, 1110 insertions(+), 1193 deletions(-)
 create mode 100644 egs/voxceleb/v1.2/conf/reverb_noise_aug.yaml
 create mode 100644 hyperion/bin/__init__.py
 delete mode 100644 hyperion/utils/queues.py

diff --git a/README.md b/README.md
index 7132a031..4838157b 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,11 @@ The full API is described in the documentation page [https://hyperion-ml.readthe
     We use anaconda or miniconda, though you should be able to make it work in other python distributions
     To start, you should create a new enviroment and install PyTorch>=1.9, (older versions are not supported any longer) e.g.:
 ```
-conda create --name ${your_env} python=3.8
+conda create --name ${your_env} python=3.11
 conda activate ${your_env}
 conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=10.2 -c pytorch
+conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
 ```
-In next Hyperion versions, we will upgrade to Pytorch>=1.9 and drop compatibility with older PyTorch versions.
 
 ### Installing Hyperion
 
diff --git a/egs/voxceleb/v1.2/conf/reverb_noise_aug.yaml b/egs/voxceleb/v1.2/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..86f55073
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/reverb_noise_aug.yaml
@@ -0,0 +1,34 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: csv:data/rirs_smallroom/rirs.csv
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: csv:data/rirs_mediumroom/rirs.csv
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: csv:data/rirs_real/rirs.csv
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/recordings.csv
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/recordings.csv
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/recordings.csv
+      min_snr: 3
+      max_snr: 18
diff --git a/egs/voxceleb/v1.2/run_001_prepare_data.sh b/egs/voxceleb/v1.2/run_001_prepare_data.sh
index aef70e96..563d3c2d 100755
--- a/egs/voxceleb/v1.2/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.2/run_001_prepare_data.sh
@@ -16,31 +16,31 @@ config_file=default_config.sh
 
 if [ $stage -le 1 ];then
   # Prepare the VoxCeleb2 dataset for training.
-  prepare_data.py voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
-		  --cat-videos --use-kaldi-ids \
-		  --output-dir data/voxceleb2cat_train
+  hyperion-prepare-data voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
+			--cat-videos --use-kaldi-ids \
+			--output-dir data/voxceleb2cat_train
 fi
 
 if [ $stage -le 2 ];then
   # prepare voxceleb1 for test
-  prepare_data.py voxceleb1 --task test --corpus-dir $voxceleb1_root \
-		  --use-kaldi-ids \
-		  --output-dir data/voxceleb1_test
+  hyperion-prepare-data voxceleb1 --task test --corpus-dir $voxceleb1_root \
+			--use-kaldi-ids \
+			--output-dir data/voxceleb1_test
 fi
 
 if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
-  prepare_data.py voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
-		  --vox1-corpus-dir $voxceleb1_root \
-		  --output-dir data/voxsrc22_dev
+  hyperion-prepare-data voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
+			--vox1-corpus-dir $voxceleb1_root \
+			--output-dir data/voxsrc22_dev
 fi
 
 # if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
-#   prepare_data.py voxsrc22 --subset test --corpus-dir $voxsrc22_root \
-# 		  --vox1-corpus-dir $voxceleb1_root \
-# 		  --output-dir data/voxsrc22_test
+  #   hyperion-prepare-data voxsrc22 --subset test --corpus-dir $voxsrc22_root \
+  # 		  --vox1-corpus-dir $voxceleb1_root \
+  # 		  --output-dir data/voxsrc22_test
 # fi
 
 if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
   # split vox2 into 2 parts, for cohort and qmf training
-  split_dataset_into_trials_and_cohort.py --data-dir data/voxceleb2cat_train
+  hyperion-split-dataset-into-trials-and-cohort --data-dir data/voxceleb2cat_train
 fi
diff --git a/egs/voxceleb/v1.2/run_002_compute_evad.sh b/egs/voxceleb/v1.2/run_002_compute_evad.sh
index e7593df2..acccace3 100755
--- a/egs/voxceleb/v1.2/run_002_compute_evad.sh
+++ b/egs/voxceleb/v1.2/run_002_compute_evad.sh
@@ -48,18 +48,18 @@ if [ $stage -le 2 ];then
     echo "compute vad for $name"
     $train_cmd JOB=1:$nj $vad_dir/$name/log/vad.JOB.log \
 	       hyp_utils/conda_env.sh \
-	       compute_energy_vad.py --cfg $vad_config \
+	       hyperion-compute-energy-vad --cfg $vad_config \
 	       --recordings-file data/$name/recordings.csv \
 	       --output-spec ark,csv:$vad_dir/$name/vad.JOB.ark,$vad_dir/$name/vad.JOB.csv \
 	       --part-idx JOB --num-parts $nj || exit 1
 
-    hyperion_tables.py cat \
-		       --table-type features \
-		       --output-file $vad_dir/$name/vad.csv --num-tables $nj
-    hyperion_dataset.py add_features \
-			--dataset data/$name \
-			--features-name vad \
-			--features-file $vad_dir/$name/vad.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $vad_dir/$name/vad.csv --num-tables $nj
+    hyperion-dataset add_features \
+		     --dataset data/$name \
+		     --features-name vad \
+		     --features-file $vad_dir/$name/vad.csv
   done
 fi
 
diff --git a/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh b/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
index aed1dae4..73c7ed82 100755
--- a/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
+++ b/egs/voxceleb/v1.2/run_003_prepare_noises_rirs.sh
@@ -18,10 +18,10 @@ config_file=default_config.sh
 if [ $stage -le 1 ]; then
   for name in noise music speech
   do
-    prepare_data.py musan \
-		    --corpus-dir $musan_root \
-		    --subset $name \
-		    --output-dir data/musan_$name
+    hyperion-prepare-data musan \
+			  --corpus-dir $musan_root \
+			  --subset $name \
+			  --output-dir data/musan_$name
   done
 fi
 
@@ -37,66 +37,66 @@ if [ $stage -le 2 ]; then
     output_dir=exp/proc_audio/$name
     $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${name}.JOB.log \
 	       hyp_utils/conda_env.sh \
-	       preprocess_audio_files.py \
+	       hyperion-preprocess-audio-files \
 	       --audio-format flac  \
 	       --part-idx JOB --num-parts $nj \
 	       --recordings-file $input_data_dir/recordings.csv \
 	       --output-path $output_dir \
 	       --output-recordings-file $output_dir/recordings.JOB.csv
-   
-    hyperion_tables.py cat \
-		       --table-type recordings \
-		       --output-file $output_dir/recordings.csv --num-tables $nj
-    hyperion_dataset.py set_recordings \
-			--dataset $input_data_dir \
-			--recordings-file $output_dir/recordings.csv \
-			--output-dataset $output_data_dir
-		     
+    
+    hyperion-tables cat \
+		    --table-type recordings \
+		    --output-file $output_dir/recordings.csv --num-tables $nj
+    hyperion-dataset set_recordings \
+		     --dataset $input_data_dir \
+		     --recordings-file $output_dir/recordings.csv \
+		     --output-dataset $output_data_dir
+    
     
   done
 fi
 
 if [ $stage -le 3 ]; then
-    # Create Babble noise from MUSAN speech files
-    for name in musan_speech
-    do
-      input_data_dir=data/$name
-      output_data_dir=data/${name}_babble
-      output_dir=exp/proc_audio/${name}_babble
-      $train_cmd $output_dir/log/make_babble_noise_${name}.log \
-		 hyp_utils/conda_env.sh \
-		 make_babble_noise_audio_files.py \
-		 --audio-format flac \
-		 --min-spks 3 --max-spks 10 --num-reuses 5 \
-		 --recordings-file $input_data_dir/recordings.csv \
-		 --output-path $output_dir \
-		 --output-recordings-file $output_data_dir/recordings.csv
-      hyperion_dataset.py make_from_recordings \
-			  --dataset $output_data_dir \
-			  --recordings-file $output_data_dir/recordings.csv
-    done
+  # Create Babble noise from MUSAN speech files
+  for name in musan_speech
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_babble
+    output_dir=exp/proc_audio/${name}_babble
+    $train_cmd $output_dir/log/make_babble_noise_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-make-babble-noise-audio-files \
+	       --audio-format flac \
+	       --min-spks 3 --max-spks 10 --num-reuses 5 \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_data_dir/recordings.csv
+    hyperion-dataset make_from_recordings \
+		     --dataset $output_data_dir \
+		     --recordings-file $output_data_dir/recordings.csv
+  done
 fi
 
 if [ $stage -le 4 ]; then
-    if [ ! -d "RIRS_NOISES" ]; then
-      # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
-      wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
-      unzip rirs_noises.zip
-    fi
-    prepare_data.py rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
-    prepare_data.py rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
-    prepare_data.py rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
-    for rirs in rirs_smallroom rirs_mediumroom rirs_real
-    do
-      output_dir=exp/rirs/$rirs
-      data_dir=data/$rirs
-      $train_cmd $output_dir/log/pack_rirs_${name}.log \
-		 hyp_utils/conda_env.sh \
-		 pack_wav_rirs.py ${args} --input $data_dir/recordings.csv \
-		 --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
-      hyperion_dataset.py add_features --dataset $data_dir \
-			  --features-name rirs --features-file $output_dir/rirs.csv
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
+  for rirs in rirs_smallroom rirs_mediumroom rirs_real
+  do
+    output_dir=exp/rirs/$rirs
+    data_dir=data/$rirs
+    $train_cmd $output_dir/log/pack_rirs_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-pack-wav-rirs ${args} --input $data_dir/recordings.csv \
+	       --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
+    hyperion-dataset add_features --dataset $data_dir \
+		     --features-name rirs --features-file $output_dir/rirs.csv
 
-    done
+  done
 fi
 
diff --git a/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh b/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
index 7649ff22..4e0c5b19 100755
--- a/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
+++ b/egs/voxceleb/v1.2/run_004_prepare_xvec_train_data.sh
@@ -35,42 +35,42 @@ if [ $stage -le 2 ];then
   
   $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${nnet_data}.JOB.log \
 	     hyp_utils/conda_env.sh \
-	     preprocess_audio_files.py \
+	     hyperion-preprocess-audio-files \
 	     --audio-format flac --remove-dc-offset $vad_args \
 	     --part-idx JOB --num-parts $nj \
 	     --recordings-file data/$nnet_data/recordings.csv \
 	     --output-path $output_dir \
 	     --output-recordings-file $output_dir/recordings.JOB.csv
 
-  hyperion_tables.py cat \
-		     --table-type recordings \
-		     --output-file $output_dir/recordings.csv --num-tables $nj
+  hyperion-tables cat \
+		  --table-type recordings \
+		  --output-file $output_dir/recordings.csv --num-tables $nj
 
-  hyperion_dataset.py set_recordings $update_durs \
-		      --dataset data/$nnet_data \
-		      --recordings-file $output_dir/recordings.csv \
-		      --output-dataset data/${nnet_data}_proc_audio \
-		      --remove-features vad
+  hyperion-dataset set_recordings $update_durs \
+		   --dataset data/$nnet_data \
+		   --recordings-file $output_dir/recordings.csv \
+		   --output-dataset data/${nnet_data}_proc_audio \
+		   --remove-features vad
 fi
 
 if [ $stage -le 3 ];then
-  hyperion_dataset.py remove_short_segments \
-		      --dataset data/${nnet_data}_proc_audio \
-		      --output-dataset data/${nnet_data}_filtered \
-		      --length-name duration --min-length 2.0
+  hyperion-dataset remove_short_segments \
+		   --dataset data/${nnet_data}_proc_audio \
+		   --output-dataset data/${nnet_data}_filtered \
+		   --length-name duration --min-length 2.0
 
-  hyperion_dataset.py remove_classes_few_segments \
-		      --dataset data/${nnet_data}_filtered \
-		      --class-name speaker --min-segs 4
+  hyperion-dataset remove_classes_few_segments \
+		   --dataset data/${nnet_data}_filtered \
+		   --class-name speaker --min-segs 4
 fi
 
 if [ $stage -le 4 ];then
-  hyperion_dataset.py split_train_val \
-		      --dataset data/${nnet_data}_filtered \
-		      --val-prob 0.03 \
-		      --joint-classes speaker --min-train-samples 1 \
-		      --seed 1123581321 \
-		      --train-dataset data/${nnet_data}_xvector_train \
-		      --val-dataset data/${nnet_data}_xvector_val 
+  hyperion-dataset split_train_val \
+		   --dataset data/${nnet_data}_filtered \
+		   --val-prob 0.03 \
+		   --joint-classes speaker --min-train-samples 1 \
+		   --seed 1123581321 \
+		   --train-dataset data/${nnet_data}_xvector_train \
+		   --val-dataset data/${nnet_data}_xvector_val 
 fi
 
diff --git a/egs/voxceleb/v1.2/run_005_train_xvector.sh b/egs/voxceleb/v1.2/run_005_train_xvector.sh
index d2f31ea1..2479d565 100755
--- a/egs/voxceleb/v1.2/run_005_train_xvector.sh
+++ b/egs/voxceleb/v1.2/run_005_train_xvector.sh
@@ -44,7 +44,7 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    train_wav2xvector.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    hyperion-train-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
     --data.train.dataset.class-files $train_data_dir/speaker.csv \
@@ -65,7 +65,7 @@ if [ $stage -le 2 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s2_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    finetune_wav2xvector.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
     --data.train.dataset.class-files $train_data_dir/speaker.csv \
diff --git a/egs/voxceleb/v1.2/run_006_extract_xvectors.sh b/egs/voxceleb/v1.2/run_006_extract_xvectors.sh
index 09b8c8e9..0dc58048 100755
--- a/egs/voxceleb/v1.2/run_006_extract_xvectors.sh
+++ b/egs/voxceleb/v1.2/run_006_extract_xvectors.sh
@@ -58,15 +58,15 @@ if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qm
     echo "Extracting x-vectors for $name"
     $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      extract_wav2xvectors.py ${xvec_args} ${vad_args} \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
 	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
 	      --model-path $nnet  \
 	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
-    hyperion_tables.py cat \
-		       --table-type features \
-		       --output-file $output_dir/xvector.csv --num-tables $nj
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
 
   done
 fi
@@ -88,14 +88,14 @@ if [ $stage -le 2 ]; then
     echo "Extracting x-vectors for $name"
     $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      extract_wav2xvectors.py ${xvec_args} ${vad_args} \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
 	      --model-path $nnet  \
 	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
-    hyperion_tables.py cat \
-		       --table-type features \
-		       --output-file $output_dir/xvector.csv --num-tables $nj
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
 
   done
 fi
diff --git a/egs/voxceleb/v1.2/run_007_eval_be.sh b/egs/voxceleb/v1.2/run_007_eval_be.sh
index 9084d35b..53621488 100755
--- a/egs/voxceleb/v1.2/run_007_eval_be.sh
+++ b/egs/voxceleb/v1.2/run_007_eval_be.sh
@@ -56,7 +56,7 @@ if [ $stage -le 3 ];then
     do
       $train_cmd $score_cosine_dir/log/voxceleb1_${i}_${j}.log \
 		 hyp_utils/conda_env.sh \
-		 eval_cosine_scoring_backend.py \
+		 hyperion-eval-cosine-scoring-backend \
 		 --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
 		 --ndx-file data/voxceleb1_test/trials.csv \
 		 --enroll-map-file data/voxceleb1_test/enrollment.csv  \
@@ -66,11 +66,11 @@ if [ $stage -le 3 ];then
     done
   done
   wait
-  merge_scores.py --output-file $score_cosine_dir/voxceleb1_scores.csv \
-		  --num-enroll-parts $num_parts --num-test-parts $num_parts
+  hyperion-merge-scores --output-file $score_cosine_dir/voxceleb1_scores.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
 
   $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
-	     eval_verification_metrics.py \
+	     hyperion-eval-verification-metrics \
 	     --score-files $score_cosine_dir/voxceleb1_scores.csv \
 	     --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
 	     --score-names voxceleb1 \
@@ -85,22 +85,22 @@ if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
   echo "Eval voxsrc2 with Cosine scoring"
   $train_cmd $score_cosine_dir/log/voxsrc22_dev.log \
 	     hyp_utils/conda_env.sh \
-	     eval_cosine_scoring_backend.py \
+	     hyperion-eval-cosine-scoring-backend \
 	     --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
 	     --ndx-file data/voxsrc22_dev/trials.csv \
 	     --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
 	     --score-file $score_cosine_dir/voxsrc22_dev_scores.csv
 
   # $train_cmd $score_cosine_dir/log/voxsrc22_eval.log \
-  # 	     hyp_utils/conda_env.sh \
-  # 	     eval_cosine_scoring_backend.py \
-  # 	     --feats-file csv:$xvector_dir/voxsrc22_eval/xvector.csv \
-  # 	     --ndx-file data/voxsrc22_eval/trials.csv \
-  # 	     --enroll-map-file data/voxsrc22_eval/enrollment.csv  \
-  # 	     --score-file $score_cosine_dir/voxsrc22_eval_scores.csv
+    # 	     hyp_utils/conda_env.sh \
+    # 	     hyperion-eval-cosine-scoring-backend \
+    # 	     --feats-file csv:$xvector_dir/voxsrc22_eval/xvector.csv \
+    # 	     --ndx-file data/voxsrc22_eval/trials.csv \
+    # 	     --enroll-map-file data/voxsrc22_eval/enrollment.csv  \
+    # 	     --score-file $score_cosine_dir/voxsrc22_eval_scores.csv
   
   $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxsrc22_dev.log \
-	     eval_verification_metrics.py \
+	     hyperion-eval-verification-metrics \
 	     --score-files $score_cosine_dir/voxsrc22_dev_scores.csv \
 	     --key-files data/voxsrc22_dev/trials.csv \
 	     --score-names voxsrc22_dev \
@@ -121,7 +121,7 @@ if [ "$do_snorm" == "true" ];then
       do
 	$train_cmd --mem 22G $score_cosine_snorm_dir/log/voxceleb1_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
-		   eval_cosine_scoring_backend.py \
+		   hyperion-eval-cosine-scoring-backend \
 		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
 		   --ndx-file data/voxceleb1_test/trials.csv \
 		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
@@ -135,11 +135,11 @@ if [ "$do_snorm" == "true" ];then
       sleep 5s
     done
     wait
-    merge_scores.py --output-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
-		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+    hyperion-merge-scores --output-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
+			  --num-enroll-parts $num_parts --num-test-parts $num_parts
     
     $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
-	       eval_verification_metrics.py \
+	       hyperion-eval-verification-metrics \
 	       --score-files $score_cosine_snorm_dir/voxceleb1_scores.csv \
 	       --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
 	       --score-names voxceleb1 \
@@ -159,7 +159,7 @@ if [ "$do_snorm" == "true" ];then
       do    
 	$train_cmd $score_cosine_snorm_dir/log/voxsrc22_dev_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
-		   eval_cosine_scoring_backend.py \
+		   hyperion-eval-cosine-scoring-backend \
 		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
 		   --ndx-file data/voxsrc22_dev/trials.csv \
 		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
@@ -174,16 +174,16 @@ if [ "$do_snorm" == "true" ];then
       sleep 10s
     done
     wait
-    merge_scores.py --output-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
-		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+    hyperion-merge-scores --output-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+			  --num-enroll-parts $num_parts --num-test-parts $num_parts
 
     $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
-	     eval_verification_metrics.py \
-	     --score-files $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
-	     --key-files data/voxsrc22_dev/trials.csv \
-	     --score-names voxsrc22_dev \
-	     --key-names all \
-	     --output-file $score_cosine_snorm_dir/voxsrc22_dev_results.csv
+	       hyperion-eval-verification-metrics \
+	       --score-files $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+	       --key-files data/voxsrc22_dev/trials.csv \
+	       --score-names voxsrc22_dev \
+	       --key-names all \
+	       --output-file $score_cosine_snorm_dir/voxsrc22_dev_results.csv
 
     cat $score_cosine_snorm_dir/voxsrc22_dev_results.csv
 
@@ -202,7 +202,7 @@ if [ "$do_qmf" == "true" ];then
       do
 	$train_cmd $score_cosine_qmf_dir/log/voxceleb2_trials_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
-		   eval_cosine_scoring_backend_with_qmf.py \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
 		   --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
 		   --ndx-file data/voxceleb2cat_train_trials/trials.csv \
 		   --enroll-map-file data/voxceleb2cat_train_trials/enrollments.csv  \
@@ -216,13 +216,13 @@ if [ "$do_qmf" == "true" ];then
       sleep 5s
     done
     wait
-    merge_scores.py --output-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
-      		    --num-enroll-parts $num_parts --num-test-parts $num_parts
+    hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+      			  --num-enroll-parts $num_parts --num-test-parts $num_parts
 
-    train_qmf.py --score-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
-		 --key-file data/voxceleb2cat_train_trials/trials.csv \
-		 --model-file $score_cosine_qmf_dir/qmf.h5
-		 
+    hyperion-train-qmf --score-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+		       --key-file data/voxceleb2cat_train_trials/trials.csv \
+		       --model-file $score_cosine_qmf_dir/qmf.h5
+    
   fi
 
   if [ $stage -le 8 ];then
@@ -234,7 +234,7 @@ if [ "$do_qmf" == "true" ];then
       do
 	$train_cmd --mem 22G $score_cosine_qmf_dir/log/voxceleb1_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
-		   eval_cosine_scoring_backend_with_qmf.py \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
 		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
 		   --ndx-file data/voxceleb1_test/trials.csv \
 		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
@@ -252,11 +252,11 @@ if [ "$do_qmf" == "true" ];then
     for suffix in "" .snorm .snorm.qmf
     do
       (
-	merge_scores.py --output-file $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
-			--num-enroll-parts $num_parts --num-test-parts $num_parts
+	hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
+			      --num-enroll-parts $num_parts --num-test-parts $num_parts
 	
 	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1$suffix.log \
-		   eval_verification_metrics.py \
+		   hyperion-eval-verification-metrics \
 		   --score-files $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
 		   --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
 		   --score-names voxceleb1 \
@@ -280,7 +280,7 @@ if [ "$do_qmf" == "true" ];then
       do    
 	$train_cmd $score_cosine_qmf_dir/log/voxsrc22_dev_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
-		   eval_cosine_scoring_backend_with_qmf.py \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
 		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
 		   --ndx-file data/voxsrc22_dev/trials.csv \
 		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
@@ -299,11 +299,11 @@ if [ "$do_qmf" == "true" ];then
     for suffix in "" .snorm .snorm.qmf
     do
       (
-	merge_scores.py --output-file $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
-			--num-enroll-parts $num_parts --num-test-parts $num_parts
+	hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
+			      --num-enroll-parts $num_parts --num-test-parts $num_parts
 
 	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxsrc22_dev$suffix.log \
-		   eval_verification_metrics.py \
+		   hyperion-eval-verification-metrics \
 		   --score-files $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
 		   --key-files data/voxsrc22_dev/trials.csv \
 		   --score-names voxsrc22_dev \
diff --git a/hyperion/bin/__init__.py b/hyperion/bin/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hyperion/bin/adv_finetune_xvector_from_wav.py b/hyperion/bin/adv_finetune_xvector_from_wav.py
index f45b84a0..ea3d3b80 100755
--- a/hyperion/bin/adv_finetune_xvector_from_wav.py
+++ b/hyperion/bin/adv_finetune_xvector_from_wav.py
@@ -13,6 +13,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.adv_attacks import AttackFactory
@@ -29,8 +36,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorAdvTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -43,7 +48,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -138,7 +142,6 @@ def init_attack(feat_extractor, model, wav_scale, **kwargs):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -231,8 +234,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="""Fine-tune x-vector model from audio files 
         with adversarial training"""
@@ -266,6 +268,10 @@ def make_parser(xvec_class):
     train_xvec(gpu_id, args_sc)
 
 
+if __name__ == "__main__":
+    main()
+
+
 # def init_data(
 #     audio_path,
 #     train_list,
diff --git a/hyperion/bin/apply_mvn_select_frames.py b/hyperion/bin/apply_mvn_select_frames.py
index bdf53786..f8299edc 100755
--- a/hyperion/bin/apply_mvn_select_frames.py
+++ b/hyperion/bin/apply_mvn_select_frames.py
@@ -10,6 +10,13 @@
 import time
 
 import numpy as np
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import RandomAccessDataReaderFactory as RDRF
@@ -18,8 +25,6 @@
 from hyperion.np.feats import MeanVarianceNorm as MVN
 from hyperion.utils import Utt2Info
 from hyperion.utils.kaldi_matrix import compression_methods
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def process_feats(
@@ -35,7 +40,6 @@ def process_feats(
     compression_method,
     **kwargs
 ):
-
     logging.info("initializing")
     mvn_args = MVN.filter_args(**kwargs)
     mvn = MVN(**mvn_args)
@@ -49,16 +53,23 @@ def process_feats(
 
     logging.info("opening output stream: %s" % (output_spec))
     with DWF.create(
-        output_spec, compress=compress, compression_method=compression_method,
+        output_spec,
+        compress=compress,
+        compression_method=compression_method,
     ) as writer:
-
         logging.info("opening input stream: %s" % (output_spec))
         with DRF.create(
-            input_spec, path_prefix=path_prefix, part_idx=part_idx, num_parts=num_parts,
+            input_spec,
+            path_prefix=path_prefix,
+            part_idx=part_idx,
+            num_parts=num_parts,
         ) as reader:
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s" % (vad_spec))
-                v_reader = RDRF.create(vad_spec, path_prefix=vad_path_prefix,)
+                v_reader = RDRF.create(
+                    vad_spec,
+                    path_prefix=vad_path_prefix,
+                )
 
             while not reader.eof():
                 key, data = reader.read(1)
@@ -91,8 +102,7 @@ def process_feats(
         u2nf.save(write_num_frames_spec)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Apply CMVN and remove silence")
 
     parser.add_argument("--input", dest="input_spec", required=True)
@@ -105,7 +115,9 @@ def process_feats(
         "--path-prefix", dest="path_prefix", default=None, help=("scp file_path prefix")
     )
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
     parser.add_argument(
         "--part-idx",
@@ -150,3 +162,7 @@ def process_feats(
     logging.debug(args)
 
     process_feats(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/audio_to_duration.py b/hyperion/bin/audio_to_duration.py
index ac8852a4..8ef6b5c1 100755
--- a/hyperion/bin/audio_to_duration.py
+++ b/hyperion/bin/audio_to_duration.py
@@ -9,15 +9,19 @@
 import time
 
 import numpy as np
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.utils import SegmentSet
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def audio_to_duration(audio_file, output_file, **kwargs):
-
     input_args = AR.filter_args(**kwargs)
     logging.info(f"input_args={input_args}")
 
@@ -36,8 +40,7 @@ def audio_to_duration(audio_file, output_file, **kwargs):
     seg_set.save(output_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Writes audio file durations to table")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -59,3 +62,7 @@ def audio_to_duration(audio_file, output_file, **kwargs):
     logging.debug(args)
 
     audio_to_duration(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/compute_energy_vad.py b/hyperion/bin/compute_energy_vad.py
index 9d50388c..fe0b1d8e 100755
--- a/hyperion/bin/compute_energy_vad.py
+++ b/hyperion/bin/compute_energy_vad.py
@@ -9,10 +9,6 @@
 import time
 
 import numpy as np
-from hyperion.hyp_defs import config_logger
-from hyperion.io import DataWriterFactory as DWF
-from hyperion.io import SequentialAudioReader as AR
-from hyperion.np.feats import EnergyVAD
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -20,9 +16,13 @@
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.np.feats import EnergyVAD
 
-def compute_vad(recordings_file, output_spec, write_num_frames, **kwargs):
 
+def compute_vad(recordings_file, output_spec, write_num_frames, **kwargs):
     vad_args = EnergyVAD.filter_args(**kwargs)
     vad = EnergyVAD(**vad_args)
 
@@ -78,8 +78,7 @@ def compute_vad(recordings_file, output_spec, write_num_frames, **kwargs):
         f_num_frames.close()
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Compute Kaldi Energy VAD")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -105,3 +104,7 @@ def compute_vad(recordings_file, output_spec, write_num_frames, **kwargs):
     logging.debug(args)
 
     compute_vad(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/compute_mfcc_feats.py b/hyperion/bin/compute_mfcc_feats.py
index 442e4141..f42f260d 100755
--- a/hyperion/bin/compute_mfcc_feats.py
+++ b/hyperion/bin/compute_mfcc_feats.py
@@ -9,20 +9,24 @@
 import time
 
 import numpy as np
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import SequentialDataReaderFactory as DRF
 from hyperion.io import compression_methods
 from hyperion.np.feats import MFCC
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def compute_mfcc_feats(
     input_path, output_path, compress, compression_method, write_num_frames, **kwargs
 ):
-
     mfcc_args = MFCC.filter_args(**kwargs)
     mfcc = MFCC(**mfcc_args)
 
@@ -34,7 +38,9 @@ def compute_mfcc_feats(
         reader = DRF.create(input_path, **input_args)
 
     writer = DWF.create(
-        output_path, compress=compress, compression_method=compression_method,
+        output_path,
+        compress=compress,
+        compression_method=compression_method,
     )
 
     if write_num_frames is not None:
@@ -68,8 +74,7 @@ def compute_mfcc_feats(
         f_num_frames.close()
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Compute MFCC features")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -109,3 +114,7 @@ def compute_mfcc_feats(
     logging.debug(args)
 
     compute_mfcc_feats(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/copy_feats.py b/hyperion/bin/copy_feats.py
index 4549caec..4ffc1a58 100755
--- a/hyperion/bin/copy_feats.py
+++ b/hyperion/bin/copy_feats.py
@@ -12,11 +12,12 @@
 import time
 
 import numpy as np
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import CopyFeats as CF
 
-if __name__ == "__main__":
 
+def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         fromfile_prefix_chars="@",
@@ -37,3 +38,7 @@
     logging.debug(args)
 
     CF(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/decode_wav2transducer.py b/hyperion/bin/decode_wav2transducer.py
index 972b247c..bcf9e05c 100755
--- a/hyperion/bin/decode_wav2transducer.py
+++ b/hyperion/bin/decode_wav2transducer.py
@@ -15,18 +15,22 @@
 import sentencepiece as spm
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
-                                                              greedy_search)
+from hyperion.torch.models.wav2transducer.beam_search import beam_search, greedy_search
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
@@ -118,7 +122,6 @@ def decode_one_batch(
 def decode_transducer(
     input_spec, output_spec, model_path, bpe_model, use_gpu, **kwargs
 ):
-
     device = init_device(use_gpu)
     model = load_model(model_path, device)
 
@@ -202,8 +205,7 @@ def decode_transducer(
                     )
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Extracts x-vectors from waveform computing " "acoustic features on the fly"
@@ -235,3 +237,7 @@ def decode_transducer(
     logging.debug(args)
 
     decode_transducer(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/decode_wav2vec2rnn_transducer.py b/hyperion/bin/decode_wav2vec2rnn_transducer.py
index 4fdc3140..33aea8c3 100755
--- a/hyperion/bin/decode_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/decode_wav2vec2rnn_transducer.py
@@ -15,19 +15,23 @@
 import sentencepiece as spm
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.np.augment import SpeechAugment
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.models import HFWav2Vec2RNNTransducer
-from hyperion.torch.models.wav2transducer.beam_search import (beam_search,
-                                                              greedy_search)
+from hyperion.torch.models.wav2transducer.beam_search import beam_search, greedy_search
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
@@ -48,10 +52,11 @@ def load_model(model_path, device):
 
 
 def decode_one_batch(
-        model: nn.Module,
-        sp: spm.SentencePieceProcessor,
-        x: torch.Tensor,
-        decoding_method="beam_search") -> Dict[str, List[List[str]]]:
+    model: nn.Module,
+    sp: spm.SentencePieceProcessor,
+    x: torch.Tensor,
+    decoding_method="beam_search",
+) -> Dict[str, List[List[str]]]:
     """Decode one batch and return the result in a dict. The dict has the
     following format:
         - key: It indicates the setting used for decoding. For example,
@@ -77,7 +82,7 @@ def decode_one_batch(
       the returned dict.
     """
     device = model.device
-    feature = x  #batch["inputs"]
+    feature = x  # batch["inputs"]
     assert x.shape[0] == 1
     assert feature.ndim == 2
 
@@ -87,7 +92,8 @@ def decode_one_batch(
     feature_lens = torch.Tensor([x.shape[1]]).int()
 
     encoder_out, hid_feats, encoder_out_lens = model.forward_feats(
-        x=feature, x_lengths=feature_lens)
+        x=feature, x_lengths=feature_lens
+    )
 
     hyps = []
     batch_size = encoder_out.size(0)
@@ -114,9 +120,16 @@ def decode_one_batch(
         return hyps[0]
 
 
-def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
-                      infer_args, use_gpu, **kwargs):
-
+def decode_transducer(
+    input_spec,
+    output_spec,
+    scp_sep,
+    model_path,
+    bpe_model,
+    infer_args,
+    use_gpu,
+    **kwargs,
+):
     device = init_device(use_gpu)
     model = load_model(model_path, device)
 
@@ -142,8 +155,9 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
                 t2 = time.time()
                 logging.info("processing utt %s", key)
                 with torch.no_grad():
-                    x = torch.tensor(
-                        x[None, :], dtype=torch.get_default_dtype()).to(device)
+                    x = torch.tensor(x[None, :], dtype=torch.get_default_dtype()).to(
+                        device
+                    )
 
                     tot_frames = x.shape[1]
                     logging.info(
@@ -157,10 +171,10 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
                     if x.shape[1] == 0:
                         y = [""]
                     else:
-                        #y = decode_one_batch(model=model, sp=sp, x=x)
-                        x_lengths = torch.tensor((x.shape[1], ),
-                                                 dtype=torch.long,
-                                                 device=device)
+                        # y = decode_one_batch(model=model, sp=sp, x=x)
+                        x_lengths = torch.tensor(
+                            (x.shape[1],), dtype=torch.long, device=device
+                        )
                         y = model.infer(x, x_lengths, **infer_args)
 
                     y = sp.decode(y[0])
@@ -172,10 +186,12 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
                     tot_time = t4 - t1
                     infer_time = t3 - t2
                     logging.info(
-                        ("utt %s total-time=%.3f read-time=%.3f "
-                         "infer-time=%.3f "
-                         "write-time=%.3f "
-                         "infer-rt-factor=%.2f tot-rt-factor=%.2f"),
+                        (
+                            "utt %s total-time=%.3f read-time=%.3f "
+                            "infer-time=%.3f "
+                            "write-time=%.3f "
+                            "infer-rt-factor=%.2f tot-rt-factor=%.2f"
+                        ),
                         key,
                         tot_time,
                         t2 - t1,
@@ -186,16 +202,14 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
                     )
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
-        description=("ASR decoding for RNN-T with Wav2vec features"))
+        description=("ASR decoding for RNN-T with Wav2vec features")
+    )
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument("--input", dest="input_spec", required=True)
-    parser.add_argument("--scp-sep",
-                        default=" ",
-                        help=("scp file field separator"))
+    parser.add_argument("--scp-sep", default=" ", help=("scp file field separator"))
 
     AR.add_class_args(parser)
     parser.add_argument("--model-path", required=True)
@@ -203,16 +217,12 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
 
     HFWav2Vec2RNNTransducer.add_infer_args(parser, "infer-args")
     parser.add_argument("--output", dest="output_spec", required=True)
-    parser.add_argument("--use-gpu",
-                        default=False,
-                        action="store_true",
-                        help="extract xvectors in gpu")
-    parser.add_argument("-v",
-                        "--verbose",
-                        dest="verbose",
-                        default=1,
-                        choices=[0, 1, 2, 3],
-                        type=int)
+    parser.add_argument(
+        "--use-gpu", default=False, action="store_true", help="extract xvectors in gpu"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
 
     args = parser.parse_args()
     config_logger(args.verbose)
@@ -220,3 +230,7 @@ def decode_transducer(input_spec, output_spec, scp_sep, model_path, bpe_model,
     logging.debug(args)
 
     decode_transducer(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_cosine_scoring_backend.py b/hyperion/bin/eval_cosine_scoring_backend.py
index 1a740024..835cae0b 100755
--- a/hyperion/bin/eval_cosine_scoring_backend.py
+++ b/hyperion/bin/eval_cosine_scoring_backend.py
@@ -4,24 +4,24 @@
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 
 """
+import logging
+import time
+from pathlib import Path
+
+import numpy as np
 from jsonargparse import (
-    ArgumentParser,
     ActionConfigFile,
     ActionParser,
+    ArgumentParser,
     namespace_to_dict,
 )
-import time
-import logging
-from pathlib import Path
-
-import numpy as np
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import TrialNdx, TrialKey, TrialScores, EnrollmentMap, SegmentSet
-from hyperion.utils.math_funcs import cosine_scoring
 from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.np.transforms import TransformList
 from hyperion.np.score_norm import AdaptSNorm
+from hyperion.np.transforms import TransformList
+from hyperion.utils import EnrollmentMap, SegmentSet, TrialKey, TrialNdx, TrialScores
+from hyperion.utils.math_funcs import cosine_scoring
 
 
 def load_trial_data(
@@ -58,7 +58,6 @@ def load_trial_data(
 
 
 def load_cohort_data(segments_file, feats_file):
-
     segments = SegmentSet.load(segments_file)
     feats_reader = DRF.create(feats_file)
     x = feats_reader.read(segments["id"], squeeze=True)
@@ -81,7 +80,6 @@ def eval_backend(
     cohort_nbest,
     avg_cohort_by,
 ):
-
     logging.info("loading data")
     enroll_map, ndx, x_e, x_t = load_trial_data(
         enroll_map_file,
@@ -151,8 +149,7 @@ def eval_backend(
     scores.save(score_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Eval cosine-scoring with optional AS-Norm")
 
     parser.add_argument("--enroll-feats-file", default=None)
@@ -198,3 +195,7 @@ def eval_backend(
     logging.debug(args)
 
     eval_backend(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
index 0333669f..4fecf2f3 100755
--- a/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
+++ b/hyperion/bin/eval_cosine_scoring_backend_with_qmf.py
@@ -4,33 +4,33 @@
   Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
 
 """
-from jsonargparse import (
-    ArgumentParser,
-    ActionConfigFile,
-    ActionParser,
-    namespace_to_dict,
-)
-import time
 import logging
+import time
 from pathlib import Path
 
 import numpy as np
 import pandas as pd
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
 
 from hyperion.hyp_defs import config_logger
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.score_norm import AdaptSNorm
+from hyperion.np.transforms import TransformList
 from hyperion.utils import (
-    TrialNdx,
-    TrialKey,
-    TrialScores,
     EnrollmentMap,
-    SegmentSet,
     InfoTable,
+    SegmentSet,
+    TrialKey,
+    TrialNdx,
+    TrialScores,
 )
-from hyperion.utils.math_funcs import cosine_scoring, average_vectors
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.np.transforms import TransformList
-from hyperion.np.score_norm import AdaptSNorm
-from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.utils.math_funcs import average_vectors, cosine_scoring
 
 
 def get_precomp_qm_names(quality_measures):
@@ -542,7 +542,7 @@ def eval_backend(
     #     scores.save(score_file_snorm)
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(
         description="Eval cosine-scoring with optional AS-Norm and QMF"
     )
@@ -611,3 +611,7 @@ def eval_backend(
     logging.debug(args)
 
     eval_backend(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_verification_metrics.py b/hyperion/bin/eval_verification_metrics.py
index 83227558..98fd37e2 100755
--- a/hyperion/bin/eval_verification_metrics.py
+++ b/hyperion/bin/eval_verification_metrics.py
@@ -5,19 +5,19 @@
 """
 import logging
 from pathlib import Path
-import pandas as pd
-
-from hyperion.hyp_defs import config_logger
-from hyperion.np.metrics import VerificationEvaluator as VE
 
+import pandas as pd
 from jsonargparse import (
     ActionConfigFile,
-    ActionYesNo,
     ActionParser,
+    ActionYesNo,
     ArgumentParser,
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.np.metrics import VerificationEvaluator as VE
+
 
 def eval_verification_metrics(
     key_files,
@@ -30,7 +30,6 @@ def eval_verification_metrics(
     sparse,
     output_file,
 ):
-
     assert len(key_files) == len(key_names)
     assert len(score_files) == len(score_names)
     dfs = []
@@ -61,8 +60,7 @@ def eval_verification_metrics(
     print(df.to_string(), flush=True)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Evaluate speaker verification metrics")
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument("--key-files", required=True, nargs="+")
@@ -85,7 +83,12 @@ def eval_verification_metrics(
     parser.add_argument("--sparse", default=False, action=ActionYesNo)
     parser.add_argument("--output-file", required=True)
     parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
     )
 
     args = parser.parse_args()
@@ -94,3 +97,7 @@ def eval_verification_metrics(
     del kwargs["verbose"]
     del kwargs["cfg"]
     eval_verification_metrics(**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
index 7c9d4104..1baad913 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav.py
@@ -12,6 +12,13 @@
 import pandas as pd
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -26,8 +33,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
@@ -44,7 +49,6 @@ def __init__(
         self.sigma = sigma
 
     def forward(self, s_t):
-
         if self.sigma > 0:
             s_t = s_t + self.sigma * torch.randn_like(s_t)
 
@@ -107,7 +111,6 @@ def load_calibrator(cal_file, threshold):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -143,7 +146,6 @@ def eval_cosine_scoring(
     num_seg_parts,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     feat_extractor = init_feats(**kwargs)
     xvector_model = load_model(model_path)
@@ -319,8 +321,7 @@ def eval_cosine_scoring(
     attack_stats.to_csv(stats_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Eval cosine-scoring given enroll x-vector and test wave"
     )
@@ -336,7 +337,9 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -415,3 +418,7 @@ def eval_cosine_scoring(
     logging.debug(args)
 
     eval_cosine_scoring(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
index fb0d402c..3e4e9229 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_adv_test_wav_wavegan.py
@@ -7,6 +7,7 @@
 import os
 import sys
 import time
+
 # [Added Sonal May21]
 from pathlib import Path
 
@@ -14,6 +15,13 @@
 import pandas as pd
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -29,8 +37,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 torch.backends.cudnn.enabled = False
 
@@ -45,7 +51,7 @@ def __init__(
         sigma=0,
         smoothing_after_wavegan=None,
         wave_gan_defender=None,
-        wav_scale=2 ** 15 - 1,
+        wav_scale=2**15 - 1,
     ):
         super().__init__()
         self.feat_extractor = feat_extractor
@@ -61,7 +67,6 @@ def __init__(
         self.apply_wavegan = False if wave_gan_defender is None else True
 
     def forward(self, s_t):
-
         # Pre-proceessing defense, wavegan + smoothing [Added Sonal May21]
         s_t = s_t / self.wav_scale
         if self.smoothing_after_wavegan:
@@ -149,7 +154,6 @@ def load_calibrator(cal_file, threshold):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -188,7 +192,6 @@ def eval_cosine_scoring_wavegan(
     wave_gan_model_ckpt,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     feat_extractor = init_feats(**kwargs)
 
@@ -374,8 +377,7 @@ def eval_cosine_scoring_wavegan(
     attack_stats.to_csv(stats_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Eval cosine-scoring given enroll x-vector and test wave"
     )
@@ -391,7 +393,9 @@ def eval_cosine_scoring_wavegan(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -488,3 +492,7 @@ def eval_cosine_scoring_wavegan(
     logging.debug(args)
 
     eval_cosine_scoring_wavegan(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
index 2d5baa17..781cdbdf 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_art_test_wav.py
@@ -15,6 +15,13 @@
 import torch.nn as nn
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -22,16 +29,15 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
@@ -69,7 +75,6 @@ def load_calibrator(cal_file):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -156,7 +161,6 @@ def eval_cosine_scoring(
     num_seg_parts,
     **kwargs
 ):
-
     device_type = "gpu" if use_gpu else "cpu"
     device = init_device(use_gpu)
     feat_extractor = init_feats(**kwargs)
@@ -343,8 +347,7 @@ def eval_cosine_scoring(
     attack_stats.to_csv(stats_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Eval cosine-scoring given enroll x-vector "
@@ -363,7 +366,9 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -431,3 +436,7 @@ def eval_cosine_scoring(
     logging.debug(args)
 
     eval_cosine_scoring(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
index 76af5d75..2ebb7e3d 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_test_wav.py
@@ -12,6 +12,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import RandomAccessAudioReader as AR
 from hyperion.io import RandomAccessDataReaderFactory as DRF
@@ -24,8 +31,6 @@
 from hyperion.torch.utils.misc import l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_device(use_gpu):
@@ -66,7 +71,6 @@ def load_calibrator(cal_file, device):
 
 
 def read_data(v_file, ndx_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     try:
@@ -104,7 +108,6 @@ def eval_cosine_scoring(
     num_seg_parts,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
     model = load_model(model_path, device)
@@ -199,8 +202,7 @@ def eval_cosine_scoring(
     s.save_txt(score_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Eval cosine-scoring given enroll x-vector and test wave"
     )
@@ -216,7 +218,9 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -266,3 +270,7 @@ def eval_cosine_scoring(
     logging.debug(args)
 
     eval_cosine_scoring(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index f33402a1..a6f8efa4 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -12,6 +12,13 @@
 import pandas as pd
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -26,8 +33,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
@@ -104,7 +109,6 @@ def load_calibrator(cal_file, threshold):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -146,7 +150,6 @@ def eval_cosine_scoring(
     num_seg_parts,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     # load victim model
     feat_extractor = init_feats(**kwargs["feats"])
@@ -204,7 +207,7 @@ def eval_cosine_scoring(
 
     if vad_spec is not None:
         logging.info("opening VAD stream: %s", vad_spec)
-        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix
+        v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     scores = np.zeros((key.num_models, key.num_tests), dtype="float32")
     attack_stats = pd.DataFrame(
@@ -337,8 +340,7 @@ def eval_cosine_scoring(
     attack_stats.to_csv(stats_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Eval cosine-scoring given enroll x-vector and "
@@ -435,3 +437,7 @@ def eval_cosine_scoring(
     logging.debug(args)
 
     eval_cosine_scoring(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
index f94dc497..7b8bc245 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_art_test_wav.py
@@ -15,6 +15,13 @@
 import torch.nn as nn
 from art.classifiers import PyTorchClassifier
 from art.estimators.classification import PyTorchClassifier
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -22,16 +29,15 @@
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
 from hyperion.torch import TorchModelLoader as TML
-from hyperion.torch.adv_attacks.art_attack_factory import \
-    ARTAttackFactory as AttackFactory
+from hyperion.torch.adv_attacks.art_attack_factory import (
+    ARTAttackFactory as AttackFactory,
+)
 from hyperion.torch.layers import LinBinCalibrator as Calibrator
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
@@ -113,7 +119,6 @@ def load_calibrator(cal_file):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -155,7 +160,6 @@ def eval_cosine_scoring(
     num_seg_parts,
     **kwargs
 ):
-
     device_type = "gpu" if use_gpu else "cpu"
     device = init_device(use_gpu)
     # load victim model
@@ -361,8 +365,7 @@ def eval_cosine_scoring(
     attack_stats.to_csv(stats_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Eval cosine-scoring given enroll x-vector and "
@@ -384,7 +387,9 @@ def eval_cosine_scoring(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -456,3 +461,7 @@ def eval_cosine_scoring(
     logging.debug(args)
 
     eval_cosine_scoring(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_xvec_logits_from_wav.py b/hyperion/bin/eval_xvec_logits_from_wav.py
index f60c7508..b2e6a665 100755
--- a/hyperion/bin/eval_xvec_logits_from_wav.py
+++ b/hyperion/bin/eval_xvec_logits_from_wav.py
@@ -12,6 +12,13 @@
 import numpy as np
 import pandas as pd
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -21,12 +28,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_device(use_gpu):
@@ -110,7 +111,6 @@ def eval_xvec(
     use_gpu,
     **kwargs
 ):
-
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
@@ -131,15 +131,16 @@ def eval_xvec(
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec) as writer:
-
         logging.info(
             "opening input stream: {} with args={}".format(input_spec, ar_args)
         )
         with AR(input_spec, **ar_args) as reader:
-
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
+                v_reader = VRF.create(
+                    vad_spec,
+                    path_prefix=vad_path_prefix,
+                )
 
             while not reader.eof():
                 t1 = time.time()
@@ -224,8 +225,7 @@ def eval_xvec(
         aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Evaluates x-vectors logits from waveform computing "
@@ -299,3 +299,7 @@ def eval_xvec(
     logging.debug(args)
 
     eval_xvec(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 5eba1b99..f2df9581 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -13,6 +13,13 @@
 import pandas as pd
 import torch
 import torchaudio.transforms as tat
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -21,12 +28,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 resamplers = {}
 
@@ -122,7 +123,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs,
 ):
-
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     model = load_model(model_path, device)
@@ -143,13 +143,14 @@ def extract_xvectors(
     ar_args["wav_scale"] = 1.0
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec) as writer:
-
         logging.info(f"opening input stream: {recordings_file} with args={ar_args}")
         with AR(recordings_file, **ar_args) as reader:
-
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
+                v_reader = VRF.create(
+                    vad_spec,
+                    path_prefix=vad_path_prefix,
+                )
 
             while not reader.eof():
                 t1 = time.time()
@@ -252,8 +253,7 @@ def extract_xvectors(
         aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Extracts x-vectors from waveform computing " "acoustic features on the fly"
@@ -340,3 +340,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_wav2xvectors.py b/hyperion/bin/extract_wav2xvectors.py
index 7b04fcc8..763df3fc 100755
--- a/hyperion/bin/extract_wav2xvectors.py
+++ b/hyperion/bin/extract_wav2xvectors.py
@@ -13,6 +13,13 @@
 import pandas as pd
 import torch
 import torchaudio.transforms as tat
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -21,12 +28,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 resamplers = {}
 
@@ -121,7 +122,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs,
 ):
-
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     model = load_model(model_path, device)
@@ -143,10 +143,8 @@ def extract_xvectors(
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s with args=%s", output_spec, str(ar_args))
     with DWF.create(output_spec, metadata_columns=metadata_columns) as writer:
-
         logging.info(f"opening input stream: {recordings_file} with args={ar_args}")
         with AR(recordings_file, **ar_args) as reader:
-
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
                 v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
@@ -255,8 +253,7 @@ def extract_xvectors(
         aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="""Extracts x-vectors from waveform computing acoustic features on the fly"""
     )
@@ -331,3 +328,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_xvectors_from_feats.py b/hyperion/bin/extract_xvectors_from_feats.py
index b02db70c..e70225c2 100755
--- a/hyperion/bin/extract_xvectors_from_feats.py
+++ b/hyperion/bin/extract_xvectors_from_feats.py
@@ -11,6 +11,13 @@
 
 import numpy as np
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
@@ -19,12 +26,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_device(use_gpu):
@@ -82,7 +83,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs
 ):
-
     logging.info("initializing")
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
@@ -96,7 +96,6 @@ def extract_xvectors(
     dr_args = DRF.filter_args(**kwargs)
     logging.info("opening output stream: %s" % (output_spec))
     with DWF.create(output_spec) as writer:
-
         logging.info("opening input stream: %s" % (input_spec))
         with DRF.create(input_spec, **dr_args) as reader:
             if vad_spec is not None:
@@ -174,8 +173,7 @@ def extract_xvectors(
         u2nf.save(write_num_frames_spec)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Extracts x-vectors from features")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -244,3 +242,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_xvectors_from_wav.py b/hyperion/bin/extract_xvectors_from_wav.py
index 6a8130d3..71a24bd4 100755
--- a/hyperion/bin/extract_xvectors_from_wav.py
+++ b/hyperion/bin/extract_xvectors_from_wav.py
@@ -12,6 +12,13 @@
 import numpy as np
 import pandas as pd
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -21,12 +28,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_device(use_gpu):
@@ -111,7 +112,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs
 ):
-
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
@@ -132,12 +132,10 @@ def extract_xvectors(
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec) as writer:
-
         logging.info(
             "opening input stream: {} with args={}".format(recordings_file, ar_args)
         )
         with AR(recordings_file, **ar_args) as reader:
-
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
                 v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
@@ -235,8 +233,7 @@ def extract_xvectors(
         aug_df.to_csv(aug_info_path, index=False, na_rep="n/a")
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Extracts x-vectors from waveform computing acoustic features on the fly"
@@ -317,3 +314,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_feats.py b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
index bcec5133..a1186ed2 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_feats.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_feats.py
@@ -12,6 +12,13 @@
 import numpy as np
 import torch
 import yaml
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialDataReaderFactory as DRF
@@ -20,12 +27,6 @@
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_device(use_gpu):
@@ -73,7 +74,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs
 ):
-
     logging.info("initializing")
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
@@ -86,7 +86,6 @@ def extract_xvectors(
     dr_args = DRF.filter_args(**kwargs)
     logging.info("opening output stream: %s" % (output_spec))
     with DWF.create(output_spec) as writer:
-
         logging.info("opening input stream: %s" % (output_spec))
         with DRF.create(input_spec, **dr_args) as reader:
             if vad_spec is not None:
@@ -118,7 +117,13 @@ def extract_xvectors(
 
                 t4 = time.time()
                 if x.shape[0] == 0:
-                    y = np.zeros((1, model.embed_dim,), dtype=float_cpu(),)
+                    y = np.zeros(
+                        (
+                            1,
+                            model.embed_dim,
+                        ),
+                        dtype=float_cpu(),
+                    )
                 else:
                     xx = torch.tensor(x.T[None, :], dtype=torch.get_default_dtype())
                     with torch.no_grad():
@@ -195,8 +200,7 @@ def extract_xvectors(
             yaml.dump(params, f)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Extract x-vectors over a sliding window")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -208,7 +212,9 @@ def extract_xvectors(
     )
     parser.add_argument("--slidwin-params-path", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     MVN.add_class_args(parser, prefix="mvn")
@@ -298,3 +304,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/extract_xvectors_slidwin_from_wav.py b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
index f1a64e1b..f973b566 100755
--- a/hyperion/bin/extract_xvectors_slidwin_from_wav.py
+++ b/hyperion/bin/extract_xvectors_slidwin_from_wav.py
@@ -13,6 +13,13 @@
 import pandas as pd
 import torch
 import yaml
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import DataWriterFactory as DWF
 from hyperion.io import SequentialAudioReader as AR
@@ -22,12 +29,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_device(use_gpu):
@@ -99,7 +100,6 @@ def extract_xvectors(
     use_gpu,
     **kwargs
 ):
-
     rng = np.random.default_rng(seed=1123581321 + kwargs["part_idx"])
     device = init_device(use_gpu)
     feat_extractor = init_feats(device, **kwargs)
@@ -124,15 +124,16 @@ def extract_xvectors(
     ar_args = AR.filter_args(**kwargs)
     logging.info("opening output stream: %s", output_spec)
     with DWF.create(output_spec) as writer:
-
         logging.info(
             "opening input stream: {} with args={}".format(input_spec, ar_args)
         )
         with AR(input_spec, **ar_args) as reader:
-
             if vad_spec is not None:
                 logging.info("opening VAD stream: %s", vad_spec)
-                v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix,)
+                v_reader = VRF.create(
+                    vad_spec,
+                    path_prefix=vad_path_prefix,
+                )
 
             while not reader.eof():
                 t1 = time.time()
@@ -172,7 +173,13 @@ def extract_xvectors(
 
                         t6 = time.time()
                         if x.shape[1] == 0:
-                            y = np.zeros((1, model.embed_dim,), dtype=float_cpu(),)
+                            y = np.zeros(
+                                (
+                                    1,
+                                    model.embed_dim,
+                                ),
+                                dtype=float_cpu(),
+                            )
                         else:
                             x = x.transpose(1, 2).contiguous()
                             y = (
@@ -255,8 +262,7 @@ def extract_xvectors(
             yaml.dump(params, f)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Extract x-vectors over a sliding window"
@@ -347,3 +353,7 @@ def extract_xvectors(
     logging.debug(args)
 
     extract_xvectors(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_wav2vec2transducer.py b/hyperion/bin/finetune_wav2vec2transducer.py
index 6f17f800..138f18f7 100755
--- a/hyperion/bin/finetune_wav2vec2transducer.py
+++ b/hyperion/bin/finetune_wav2vec2transducer.py
@@ -14,6 +14,14 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+from torch.nn.utils.rnn import pad_sequence
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
@@ -22,9 +30,6 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
     "hf_wav2vec2transducer": HFWav2Vec2Transducer,
@@ -43,8 +48,7 @@ def transducer_collate(batch):
     audio = pad_sequence(audio)
     audio_length = torch.as_tensor(audio_length)
     target = k2.RaggedTensor(target)
-    return torch.transpose(audio,0,1), audio_length, target
-
+    return torch.transpose(audio, 0, 1), audio_length, target
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
@@ -73,7 +77,9 @@ def init_data(partition, rank, num_gpus, **kwargs):
     largs = (
         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
     )
-    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate)
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate
+    )
     return data_loader
 
 
@@ -89,11 +95,7 @@ def init_model(in_model_file, rank, model_class, **kwargs):
     return model
 
 
-
-
-
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -119,7 +121,7 @@ def train_model(gpu_id, args):
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
-    metrics = {} 
+    metrics = {}
     trainer = Trainer(
         model,
         device=device,
@@ -135,7 +137,7 @@ def train_model(gpu_id, args):
 
 def make_parser(model_class):
     parser = ArgumentParser()
-    
+
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
     AD.add_class_args(train_parser, prefix="dataset", skip={})
@@ -161,27 +163,23 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-
     parser.add_argument(
         "--data.train.dataset.text_file",
-        type=str, 
+        type=str,
     )
-    
-    parser.add_argument("--data.val.dataset.text_file", type=str) 
-    
+
+    parser.add_argument("--data.val.dataset.text_file", type=str)
+
     parser.add_argument(
         "--data.train.dataset.bpe_model",
-        type=str, 
+        type=str,
     )
 
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
 
-    parser.link_arguments(
-        "data.train.dataset.bpe_model", "data.val.dataset.bpe_model"
-    )
-
+    parser.link_arguments("data.train.dataset.bpe_model", "data.val.dataset.bpe_model")
 
     parser.add_argument("--in-model-file", required=True)
     model_class.add_finetune_args(parser, prefix="model")
@@ -198,8 +196,10 @@ def make_parser(model_class):
     return parser
 
 
-if __name__ == "__main__":
-    parser = ArgumentParser(description="Fine-tune  Wav2Vec2Transducer model from audio files")
+def main():
+    parser = ArgumentParser(
+        description="Fine-tune  Wav2Vec2Transducer model from audio files"
+    )
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
@@ -228,3 +228,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     # multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_wav2vec2xvector.py b/hyperion/bin/finetune_wav2vec2xvector.py
index fc3c7084..7020e32f 100755
--- a/hyperion/bin/finetune_wav2vec2xvector.py
+++ b/hyperion/bin/finetune_wav2vec2xvector.py
@@ -13,18 +13,25 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
-from hyperion.torch.models import (HFHubert2ResNet1dXVector,
-                                   HFWav2Vec2ResNet1dXVector,
-                                   HFWavLM2ResNet1dXVector)
+from hyperion.torch.models import (
+    HFHubert2ResNet1dXVector,
+    HFWav2Vec2ResNet1dXVector,
+    HFWavLM2ResNet1dXVector,
+)
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
@@ -34,7 +41,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -99,7 +105,6 @@ def init_hard_prototype_mining(model, train_loader, val_loader, rank):
 
 
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -182,8 +187,7 @@ def make_parser(model_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Finetunes Wav2Vec2XVector model from audio files"
     )
@@ -215,3 +219,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_wav2xvector.py b/hyperion/bin/finetune_wav2xvector.py
index b100b544..97356c01 100755
--- a/hyperion/bin/finetune_wav2xvector.py
+++ b/hyperion/bin/finetune_wav2xvector.py
@@ -11,6 +11,13 @@
 from pathlib import Path
 
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
@@ -27,12 +34,6 @@
 # from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -45,7 +46,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -115,7 +115,6 @@ def init_hard_prototype_mining(model, train_loader, val_loader, rank):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -196,8 +195,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -226,3 +224,7 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_xvector_dfr_from_feats.py b/hyperion/bin/finetune_xvector_dfr_from_feats.py
index 17cafb85..140cc3a2 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_feats.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_feats.py
@@ -14,6 +14,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
@@ -22,8 +29,6 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainerDeepFeatReg as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
@@ -60,7 +65,6 @@ def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **k
 def init_xvector(
     num_classes, in_model_path, prior_model_path, rank, train_mode, **kwargs
 ):
-
     xvec_args = XVec.filter_finetune_args(**kwargs)
     if rank == 0:
         logging.info("xvector network ft args={}".format(xvec_args))
@@ -194,8 +198,7 @@ def train_xvec(gpu_id, args):
 #     trainer.fit(train_loader, test_loader)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Fine-tune x-vector model with deep feature loss regularization"
     )
@@ -278,3 +281,7 @@ def train_xvec(gpu_id, args):
     # del args.seed
 
     # train_xvec(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_xvector_dfr_from_wav.py b/hyperion/bin/finetune_xvector_dfr_from_wav.py
index f7832a47..9d745e67 100755
--- a/hyperion/bin/finetune_xvector_dfr_from_wav.py
+++ b/hyperion/bin/finetune_xvector_dfr_from_wav.py
@@ -8,10 +8,18 @@
 import os
 import sys
 import time
+from pathlib import Path
 
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
@@ -21,8 +29,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerDeepFeatRegFromWav as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(
@@ -36,7 +42,6 @@ def init_data(
     rank,
     **kwargs
 ):
-
     ad_args = AD.filter_args(**kwargs)
     sampler_args = Sampler.filter_args(**kwargs)
     if rank == 0:
@@ -82,7 +87,6 @@ def init_feats(rank, **kwargs):
 def init_xvector(
     num_classes, in_model_path, prior_model_path, rank, train_mode, **kwargs
 ):
-
     xvec_args = XVec.filter_finetune_args(**kwargs)
     if rank == 0:
         logging.info("xvector network ft args={}".format(xvec_args))
@@ -103,7 +107,6 @@ def init_xvector(
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -231,8 +234,7 @@ def train_xvec(gpu_id, args):
 #     trainer.fit(train_loader, test_loader)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Fine-tune x-vector model with deep feature loss "
@@ -327,3 +329,7 @@ def train_xvec(gpu_id, args):
     # del args.seed
 
     # train_xvec(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_xvector_from_feats.py b/hyperion/bin/finetune_xvector_from_feats.py
index ac9c2d0b..01e0c778 100755
--- a/hyperion/bin/finetune_xvector_from_feats.py
+++ b/hyperion/bin/finetune_xvector_from_feats.py
@@ -12,6 +12,13 @@
 
 import numpy as np
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
@@ -20,8 +27,6 @@
 from hyperion.torch.models import XVector as XVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp, open_device
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 def init_data(data_rspec, train_list, val_list, num_workers, num_gpus, rank, **kwargs):
@@ -161,8 +166,7 @@ def train_xvec(gpu_id, args):
 #     trainer.fit(train_loader, test_loader)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Fine-tune x-vector model")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -230,3 +234,7 @@ def train_xvec(gpu_id, args):
     # del args.seed
 
     # train_xvec(**vars(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/finetune_xvector_from_wav.py b/hyperion/bin/finetune_xvector_from_wav.py
index 1c7cbe58..2c884d0b 100755
--- a/hyperion/bin/finetune_xvector_from_wav.py
+++ b/hyperion/bin/finetune_xvector_from_wav.py
@@ -11,6 +11,13 @@
 from pathlib import Path
 
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch import TorchModelLoader as TML
 from hyperion.torch.data import AudioDataset as AD
@@ -25,8 +32,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -39,7 +44,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -120,7 +124,6 @@ def init_hard_prototype_mining(model, train_loader, val_loader, rank):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -208,8 +211,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Fine-tune x-vector model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -238,3 +240,7 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 4336b7b9..00452695 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -14,6 +14,13 @@
 import torch
 import torch.nn as nn
 import yaml
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -24,12 +31,6 @@
 from hyperion.torch.utils import open_device
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialNdx, Utt2Info
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
@@ -156,14 +157,13 @@ def generate_attacks(
     num_parts,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     model = init_model(model_path, **kwargs)
     model.to(device)
 
     logging.info("opening audio read stream: %s" % (wav_file))
     audio_args = AR.filter_args(**kwargs)
-    audio_reader = AR(wav_file ** audio_args)
+    audio_reader = AR(wav_file**audio_args)
     wav_scale = audio_reader.wav_scale
 
     logging.info("opening audio write stream: %s" % (output_wav_dir))
@@ -207,7 +207,7 @@ def generate_attacks(
         s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
         target = torch.as_tensor([class_id], dtype=torch.long).to(device)
         if vad_spec is not None:
-            vad = v_reader.read([key.seg_set[j]])[0]
+            vad = v_reader.read([key])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
             vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
@@ -217,7 +217,7 @@ def generate_attacks(
             logging.info(
                 "utt %s detected %d/%d (%.2f %%) speech frames"
                 % (
-                    key.seg_set[j],
+                    key,
                     speech_frames,
                     tot_frames,
                     speech_frames / tot_frames * 100,
@@ -315,8 +315,7 @@ def generate_attacks(
             yaml.dump(attacks_info, f, sort_keys=True)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Generate Attacks for speaker classification with x-vectors"
     )
@@ -332,7 +331,9 @@ def generate_attacks(
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
+        "--vad-path-prefix",
+        default=None,
+        help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
@@ -413,3 +414,7 @@ def generate_attacks(
     logging.debug(args)
 
     generate_attacks(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index 363e3afc..ab7d907b 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -14,6 +14,13 @@
 import torch
 import torch.nn as nn
 import yaml
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, float_cpu, set_float_cpu
 from hyperion.io import AudioWriter as AW
 from hyperion.io import RandomAccessAudioReader as AR
@@ -28,8 +35,6 @@
 from hyperion.torch.utils.misc import compute_stats_adv_attack, l2_norm
 from hyperion.utils import TrialKey, TrialNdx, TrialScores, Utt2Info
 from hyperion.utils.list_utils import ismember
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 
 class MyModel(nn.Module):
@@ -73,7 +78,6 @@ def forward(self, s_t):
 
 
 def read_data(v_file, key_file, enroll_file, seg_part_idx, num_seg_parts):
-
     r = DRF.create(v_file)
     enroll = Utt2Info.load(enroll_file)
     key = TrialKey.load(key_file)
@@ -173,7 +177,6 @@ def generate_attacks(
     random_seed,
     **kwargs
 ):
-
     device = init_device(use_gpu)
     model = init_model(model_path, embed_layer, cal_file, threshold, **kwargs)
     model.to(device)
@@ -346,8 +349,7 @@ def generate_attacks(
             yaml.dump(attacks_info, f, sort_keys=True)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Generate Attacks for speaker verification with x-vectors+cos+calibration"
     )
@@ -442,3 +444,7 @@ def generate_attacks(
     logging.debug(args)
 
     generate_attacks(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
index 2e3a35ec..17fff2ba 100755
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -7,6 +7,14 @@
 from pathlib import Path
 from typing import List, Optional, Union
 
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import (
     ClassInfo,
@@ -18,13 +26,6 @@
     RecordingSet,
     SegmentSet,
 )
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-    ActionYesNo,
-)
 
 subcommand_list = [
     "add_features",
@@ -41,7 +42,12 @@
 
 def add_common_args(parser):
     parser.add_argument(
-        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int,
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
     )
 
 
@@ -145,7 +151,8 @@ def make_make_from_recordings_parser():
 
 
 def make_from_recordings(
-    dataset: PathLike, recordings_file: PathLike,
+    dataset: PathLike,
+    recordings_file: PathLike,
 ):
     output_dataset = dataset
     import pandas as pd
@@ -186,7 +193,10 @@ def make_remove_short_segments_parser():
 
 
 def remove_short_segments(
-    dataset: PathLike, min_length: float, length_name: str, output_dataset: PathLike,
+    dataset: PathLike,
+    min_length: float,
+    length_name: str,
+    output_dataset: PathLike,
 ):
     if output_dataset is None:
         output_dataset = dataset
@@ -216,7 +226,9 @@ def make_rebuild_class_idx_parser():
 
 
 def rebuild_class_idx(
-    dataset: PathLike, class_name: str, output_dataset: PathLike,
+    dataset: PathLike,
+    class_name: str,
+    output_dataset: PathLike,
 ):
     if output_dataset is None:
         output_dataset = dataset
@@ -301,14 +313,21 @@ def make_split_train_val_parser():
         help="""types of classes that need to have different classes in train and val""",
     )
     parser.add_argument(
-        "--seed", default=11235813, type=int, help="""random seed""",
+        "--seed",
+        default=11235813,
+        type=int,
+        help="""random seed""",
     )
 
     parser.add_argument(
-        "--train-dataset", required=True, help="""output train dataset dir""",
+        "--train-dataset",
+        required=True,
+        help="""output train dataset dir""",
     )
     parser.add_argument(
-        "--val-dataset", required=True, help="""output val dataset dir""",
+        "--val-dataset",
+        required=True,
+        help="""output val dataset dir""",
     )
 
     add_common_args(parser)
@@ -361,7 +380,8 @@ def make_copy_parser():
 
 
 def copy(
-    dataset: PathLike, output_dataset: PathLike,
+    dataset: PathLike,
+    output_dataset: PathLike,
 ):
     dataset = Dataset.load(dataset, lazy=True)
     dataset.save(output_dataset)
@@ -383,7 +403,10 @@ def make_add_cols_to_segments_parser():
         help="""columns to copy to segments table""",
     )
     parser.add_argument(
-        "--on", default=["id"], nargs="+", help="""columns to match both tables rows""",
+        "--on",
+        default=["id"],
+        nargs="+",
+        help="""columns to match both tables rows""",
     )
     parser.add_argument(
         "--right-on",
@@ -418,8 +441,7 @@ def add_cols_to_segments(
     dataset.save(output_dataset)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Tool to manipulates the Hyperion dataset")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -436,3 +458,7 @@ def add_cols_to_segments(
     del kwargs["verbose"]
     del kwargs["cfg"]
     globals()[subcommand](**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/hyperion_tables.py b/hyperion/bin/hyperion_tables.py
index 7f61b35a..59472d83 100755
--- a/hyperion/bin/hyperion_tables.py
+++ b/hyperion/bin/hyperion_tables.py
@@ -7,6 +7,13 @@
 from pathlib import Path
 from typing import List, Optional, Union
 
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import (
     ClassInfo,
@@ -17,12 +24,6 @@
     RecordingSet,
     SegmentSet,
 )
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 subcommand_list = ["cat"]
 table_dict = {
@@ -87,7 +88,6 @@ def cat(
     num_tables: int,
     base_idx: int = 1,
 ):
-
     assert input_files is not None or num_tables != 0
     output_file = Path(output_file)
     if input_files is None:
@@ -108,8 +108,7 @@ def cat(
     output_table.save(output_file)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -126,3 +125,7 @@ def cat(
     del kwargs["verbose"]
     del kwargs["cfg"]
     globals()[subcommand](**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/make_babble_noise_audio_files.py b/hyperion/bin/make_babble_noise_audio_files.py
index 68e5b22b..43d6ab91 100755
--- a/hyperion/bin/make_babble_noise_audio_files.py
+++ b/hyperion/bin/make_babble_noise_audio_files.py
@@ -10,11 +10,6 @@
 import time
 
 import numpy as np
-from hyperion.hyp_defs import config_logger
-from hyperion.io import AudioWriter as Writer
-from hyperion.io import RandomAccessAudioReader as AR
-from hyperion.io import VADReaderFactory as VRF
-from hyperion.utils import Utt2Info
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -22,9 +17,14 @@
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.io import AudioWriter as Writer
+from hyperion.io import RandomAccessAudioReader as AR
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.utils import Utt2Info
+
 
 def make_noise(xs, max_value):
-
     lens = np.array([x.shape[0] for x in xs])
     max_len = np.max(lens)
     num_tiles = np.ceil(max_len / lens)
@@ -53,7 +53,6 @@ def make_babble_noise_audio_files(
     random_seed=112358,
     **kwargs,
 ):
-
     input_args = AR.filter_args(**kwargs)
     output_args = Writer.filter_args(**kwargs)
     logging.info(f"input_args={input_args}")
@@ -105,8 +104,7 @@ def make_babble_noise_audio_files(
     logging.info("finished making babble files, elapsed-time=%f", time.time() - t1)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Creates babble noise by adding speech files")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -137,3 +135,7 @@ def make_babble_noise_audio_files(
     logging.debug(args)
 
     make_babble_noise_audio_files(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/make_wav2xvector.py b/hyperion/bin/make_wav2xvector.py
index b5972d1b..b3a1a2d5 100755
--- a/hyperion/bin/make_wav2xvector.py
+++ b/hyperion/bin/make_wav2xvector.py
@@ -12,6 +12,13 @@
 import numpy as np
 import pandas as pd
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 
 # from hyperion.torch import TorchModelLoader as TML
@@ -26,12 +33,6 @@
 from hyperion.torch.models import Wav2ResNet1dXVector as W2R1dXVec
 from hyperion.torch.models import Wav2ResNetXVector as W2RXVec
 from hyperion.torch.narchs import AudioFeatsMVN as AF
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 
 def init_feats(feats):
@@ -51,7 +52,6 @@ def load_model(model_path):
 
 
 def make_wav2xvector(feats, xvector_path, output_path):
-
     feats = init_feats(feats)
     xvector_model = load_model(xvector_path)
     if isinstance(xvector_model, RXVec):
@@ -67,8 +67,7 @@ def make_wav2xvector(feats, xvector_path, output_path):
     model.save(output_path)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="""Combines the feature extractor config with XVector model
         to produce a Wav2XVector model with integrated feature extraction"""
@@ -89,3 +88,7 @@ def make_wav2xvector(feats, xvector_path, output_path):
     logging.debug(args)
 
     make_wav2xvector(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/merge_scores.py b/hyperion/bin/merge_scores.py
index cb8524b7..72ab6010 100755
--- a/hyperion/bin/merge_scores.py
+++ b/hyperion/bin/merge_scores.py
@@ -6,9 +6,6 @@
 import logging
 from pathlib import Path
 
-from hyperion.hyp_defs import config_logger
-
-from hyperion.utils import TrialScores
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -16,6 +13,9 @@
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import TrialScores
+
 
 def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, base_idx):
     output_file = Path(output_file)
@@ -63,7 +63,7 @@ def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, bas
                         write_header = False
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
     parser.add_argument("--cfg", action=ActionConfigFile)
     parser.add_argument(
@@ -108,3 +108,7 @@ def merge_scores(input_files, output_file, num_enroll_parts, num_test_parts, bas
     del kwargs["verbose"]
     del kwargs["cfg"]
     merge_scores(**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/pack_wav_rirs.py b/hyperion/bin/pack_wav_rirs.py
index b2a1bc2b..bf88d674 100755
--- a/hyperion/bin/pack_wav_rirs.py
+++ b/hyperion/bin/pack_wav_rirs.py
@@ -10,9 +10,6 @@
 import time
 
 import numpy as np
-from hyperion.hyp_defs import config_logger
-from hyperion.io import DataWriterFactory as DWF
-from hyperion.io import SequentialAudioReader as AR
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -20,9 +17,12 @@
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.io import DataWriterFactory as DWF
+from hyperion.io import SequentialAudioReader as AR
 
-def pack_wav_rirs(input_path, output_spec, **kwargs):
 
+def pack_wav_rirs(input_path, output_spec, **kwargs):
     writer = DWF.create(output_spec, compress=False)
     t1 = time.time()
     with AR(input_path, wav_scale=1) as reader:
@@ -47,8 +47,7 @@ def pack_wav_rirs(input_path, output_spec, **kwargs):
     logging.info("Packed RIRS elapsed-time=%.f", time.time() - t1)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Packs RIRs in wave format to h5/ark files")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -69,3 +68,7 @@ def pack_wav_rirs(input_path, output_spec, **kwargs):
     logging.debug(args)
 
     pack_wav_rirs(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/plot_embedding_tsne.py b/hyperion/bin/plot_embedding_tsne.py
index e2157e3e..60d7ac5c 100755
--- a/hyperion/bin/plot_embedding_tsne.py
+++ b/hyperion/bin/plot_embedding_tsne.py
@@ -13,12 +13,18 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.transforms import PCA, LNorm, SklTSNE
 from hyperion.utils import SegmentSet
-from jsonargparse import (ActionConfigFile, ActionParser, ActionYesNo,
-                          ArgumentParser, namespace_to_dict)
 
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
@@ -40,7 +46,6 @@ def plot_embedding_tsne(
     output_dir,
     **kwargs,
 ):
-
     output_dir = Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     logging.info("loading data")
@@ -126,8 +131,7 @@ def plot_embedding_tsne(
     # plt.clf()
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Projects embeddings using TSNE")
 
     parser.add_argument("--train-v-file", required=True)
@@ -162,6 +166,9 @@ def plot_embedding_tsne(
     plot_embedding_tsne(**namespace_to_dict(args))
 
 
+if __name__ == "__main__":
+    main()
+
 # #!/usr/bin/env python
 # """
 #  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
diff --git a/hyperion/bin/plot_embedding_tsne_per_class.py b/hyperion/bin/plot_embedding_tsne_per_class.py
index 14da4d07..08e4ef70 100755
--- a/hyperion/bin/plot_embedding_tsne_per_class.py
+++ b/hyperion/bin/plot_embedding_tsne_per_class.py
@@ -13,12 +13,6 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from hyperion.hyp_defs import config_logger
-from hyperion.io import RandomAccessDataReaderFactory as DRF
-from hyperion.np.clustering import AHC
-from hyperion.np.transforms import PCA, LNorm, SklTSNE
-from hyperion.utils import SegmentSet
-from hyperion.utils.math_funcs import cosine_scoring
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -27,6 +21,13 @@
     namespace_to_dict,
 )
 
+from hyperion.hyp_defs import config_logger
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.clustering import AHC
+from hyperion.np.transforms import PCA, LNorm, SklTSNE
+from hyperion.utils import SegmentSet
+from hyperion.utils.math_funcs import cosine_scoring
+
 matplotlib.use("Agg")
 colors = ["b", "g", "r", "c", "m", "y", "k"]
 markers = ["x", "o", "+", "*", "s", "h", "D", "^", "v", "p", "8"]
@@ -50,7 +51,6 @@ def plot_embedding_tsne(
     output_dir,
     **kwargs,
 ):
-
     output_dir = Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     logging.info("loading data")
@@ -92,7 +92,7 @@ def plot_embedding_tsne(
         if do_ahc:
             if cluster_tsne:
                 # in the low dim space, we cannot use cosine scoring
-                x2 = np.sum(x_tsne ** 2, axis=1)[:, None]
+                x2 = np.sum(x_tsne**2, axis=1)[:, None]
                 d2 = x2 - 2 * np.dot(x_tsne, x_tsne.T) + x2.T
                 d2 = np.clip(d2, a_min=0, a_max=None)
                 scores = -np.sqrt(d2)
@@ -140,8 +140,7 @@ def plot_embedding_tsne(
         train_segs.save(output_dir / "segments.csv")
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description=(
             "Projects embeddings using TSNE, "
@@ -194,3 +193,7 @@ def plot_embedding_tsne(
     logging.debug(args)
 
     plot_embedding_tsne(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/prepare_data.py b/hyperion/bin/prepare_data.py
index f6723c7d..dd1bde27 100755
--- a/hyperion/bin/prepare_data.py
+++ b/hyperion/bin/prepare_data.py
@@ -6,8 +6,6 @@
 import logging
 from pathlib import Path
 
-from hyperion.data_prep import DataPrep
-from hyperion.hyp_defs import config_logger
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -15,6 +13,9 @@
     namespace_to_dict,
 )
 
+from hyperion.data_prep import DataPrep
+from hyperion.hyp_defs import config_logger
+
 
 def make_parser(data_prep_class):
     parser = ArgumentParser()
@@ -22,7 +23,7 @@ def make_parser(data_prep_class):
     return parser
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(
         description="""Prepares a dataset into relational database tables"""
     )
@@ -39,3 +40,7 @@ def make_parser(data_prep_class):
     args = namespace_to_dict(args)[args.subcommand]
     data_prep = data_prep_class(**args)
     data_prep.prepare()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/preprocess_audio_files.py b/hyperion/bin/preprocess_audio_files.py
index bda9a503..5e98a477 100755
--- a/hyperion/bin/preprocess_audio_files.py
+++ b/hyperion/bin/preprocess_audio_files.py
@@ -10,11 +10,6 @@
 import time
 
 import numpy as np
-from hyperion.hyp_defs import config_logger
-from hyperion.io import AudioWriter as Writer
-from hyperion.io import SequentialAudioReader as AR
-from hyperion.io import VADReaderFactory as VRF
-from hyperion.utils import Utt2Info
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -23,6 +18,12 @@
 )
 from scipy import ndimage, signal
 
+from hyperion.hyp_defs import config_logger
+from hyperion.io import AudioWriter as Writer
+from hyperion.io import SequentialAudioReader as AR
+from hyperion.io import VADReaderFactory as VRF
+from hyperion.utils import Utt2Info
+
 
 def resample_vad(vad, length):
     step = (len(vad) - 1) / length
@@ -59,7 +60,6 @@ def process_audio_files(
     remove_dc_offset=False,
     **kwargs,
 ):
-
     input_args = AR.filter_args(**kwargs)
     output_args = Writer.filter_args(**kwargs)
     logging.info(f"input_args={input_args}")
@@ -72,7 +72,6 @@ def process_audio_files(
     with AR(recordings_file, **input_args) as reader, Writer(
         output_path, output_recordings_file, **output_args
     ) as writer:
-
         if vad_spec is not None:
             logging.info("opening VAD stream: %s", vad_spec)
             v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
@@ -147,8 +146,7 @@ def process_audio_files(
         u2td.save(write_time_durs_spec)
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(
         description="Process pipes in wav.scp file, optionally applies vad and save all audios in the same format"
     )
@@ -204,3 +202,7 @@ def process_audio_files(
     logging.debug(args)
 
     process_audio_files(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/split_dataset_into_trials_and_cohort.py b/hyperion/bin/split_dataset_into_trials_and_cohort.py
index 24ec10bf..50c2f1f2 100755
--- a/hyperion/bin/split_dataset_into_trials_and_cohort.py
+++ b/hyperion/bin/split_dataset_into_trials_and_cohort.py
@@ -6,8 +6,6 @@
 import logging
 from pathlib import Path
 
-from hyperion.hyp_defs import config_logger
-from hyperion.utils import Dataset
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
@@ -16,8 +14,11 @@
     namespace_to_dict,
 )
 
-if __name__ == "__main__":
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import Dataset
 
+
+def main():
     parser = ArgumentParser(
         description=(
             """Split speakers in dataset into test speaker to create ASV trials and 
@@ -66,3 +67,7 @@
     trials_dataset, cohort_dataset = dataset.split_into_trials_and_cohort(**args)
     trials_dataset.save(trials_dir)
     cohort_dataset.save(cohort_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_qmf.py b/hyperion/bin/train_qmf.py
index a97e8a5f..42aabe0c 100755
--- a/hyperion/bin/train_qmf.py
+++ b/hyperion/bin/train_qmf.py
@@ -6,25 +6,25 @@
   Trains calibration for SRE18 tel condition
 """
 
-import sys
+import logging
 import os
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
 from jsonargparse import (
-    ArgumentParser,
     ActionConfigFile,
     ActionParser,
+    ArgumentParser,
     namespace_to_dict,
 )
-import time
-import logging
-from pathlib import Path
 
-import numpy as np
-
-from hyperion.hyp_defs import float_cpu, config_logger
-from hyperion.utils.trial_scores import TrialScores
-from hyperion.utils.trial_key import TrialKey
-from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.hyp_defs import config_logger, float_cpu
 from hyperion.np.classifiers import BinaryLogisticRegression as LR
+from hyperion.np.metrics import compute_act_dcf, compute_min_dcf
+from hyperion.utils.trial_key import TrialKey
+from hyperion.utils.trial_scores import TrialScores
 
 
 def print_q_stats(scr, q_names):
@@ -110,7 +110,7 @@ def train_qmf(
     scr_out.save(output_file)
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(description="Trains QMF calibration")
 
     parser.add_argument("--score-file", required=True)
@@ -133,3 +133,7 @@ def train_qmf(
     logging.debug(args)
 
     train_qmf(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 8930b299..c00c4633 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -14,15 +14,20 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+from torch.nn.utils.rnn import pad_sequence
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.models import Wav2RNNRNNTransducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
     "rnn_rnn_transducer": Wav2RNNRNNTransducer,
@@ -72,14 +77,12 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     num_workers = data_kwargs["data_loader"]["num_workers"]
     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = ({
-        "num_workers": num_workers_per_gpu,
-        "pin_memory": True
-    } if num_gpus > 0 else {})
-    data_loader = torch.utils.data.DataLoader(dataset,
-                                              batch_sampler=sampler,
-                                              **largs,
-                                              collate_fn=transducer_collate)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate
+    )
     return data_loader
 
 
@@ -97,7 +100,6 @@ def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
 
 
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -105,8 +107,8 @@ def train_model(gpu_id, args):
     kwargs = namespace_to_dict(args)
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
-    #torch.backends.cudnn.deterministic = True
-    #torch.backends.cudnn.benchmark = False
+    # torch.backends.cudnn.deterministic = True
+    # torch.backends.cudnn.benchmark = False
     torch.backends.cudnn.enabled = False
 
     ddp_args = ddp.filter_ddp_args(**kwargs)
@@ -115,8 +117,11 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
-                       train_loader.dataset.sp.get_piece_size(), **kwargs)
+    model = init_model(
+        train_loader.dataset.sp.piece_to_id("<blk>"),
+        train_loader.dataset.sp.get_piece_size(),
+        **kwargs,
+    )
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
@@ -159,8 +164,7 @@ def make_parser(model_class):
         help="num_workers of data loader",
     )
     data_parser = ArgumentParser(prog="")
-    data_parser.add_argument("--train",
-                             action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
@@ -176,34 +180,27 @@ def make_parser(model_class):
         type=str,
     )
 
-    parser.link_arguments("data.train.data_loader.num_workers",
-                          "data.val.data_loader.num_workers")
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
 
-    parser.link_arguments("data.train.dataset.bpe_model",
-                          "data.val.dataset.bpe_model")
+    parser.link_arguments("data.train.dataset.bpe_model", "data.val.dataset.bpe_model")
 
     model_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(parser,
-                           prefix="trainer",
-                           train_modes=model_class.valid_train_modes())
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
     ddp.add_ddp_args(parser)
-    parser.add_argument("--seed",
-                        type=int,
-                        default=1123581321,
-                        help="random seed")
-    parser.add_argument("-v",
-                        "--verbose",
-                        dest="verbose",
-                        default=1,
-                        choices=[0, 1, 2, 3],
-                        type=int)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
 
     return parser
 
 
-if __name__ == "__main__":
-    parser = ArgumentParser(
-        description="Train RNN Transducer model from audio files")
+def main():
+    parser = ArgumentParser(description="Train RNN Transducer model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
@@ -232,3 +229,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     # multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index 7018c406..5b802454 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -14,23 +14,29 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+from torch.nn.utils.rnn import pad_sequence
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-from hyperion.torch.models import (HFWav2Vec2ConformerV1RNNTransducer,
-                                   HFWav2Vec2RNNRNNTransducer,
-                                   HFWav2Vec2RNNTransducer)
+from hyperion.torch.models import (
+    HFWav2Vec2ConformerV1RNNTransducer,
+    HFWav2Vec2RNNRNNTransducer,
+    HFWav2Vec2RNNTransducer,
+)
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
     "hf_wav2vec2rnn_transducer": HFWav2Vec2RNNTransducer,
     "hf_wav2vec2rnn_rnn_transducer": HFWav2Vec2RNNRNNTransducer,
-    "hf_wav2vec2conformer_v1_rnn_transducer":
-    HFWav2Vec2ConformerV1RNNTransducer,
+    "hf_wav2vec2conformer_v1_rnn_transducer": HFWav2Vec2ConformerV1RNNTransducer,
     # "hf_hubert2rnn_transducer": HFWav2Vec2RNNTransducer,
     # "hf_hubert2rnn_rnn_transducer": Hubert2RNNRNNTransducer,
     # "hf_wavlm2rnn_transducer": HFHubert2RNNTransducer,
@@ -88,14 +94,12 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     num_workers = data_kwargs["data_loader"]["num_workers"]
     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = ({
-        "num_workers": num_workers_per_gpu,
-        "pin_memory": True
-    } if num_gpus > 0 else {})
-    data_loader = torch.utils.data.DataLoader(dataset,
-                                              batch_sampler=sampler,
-                                              **largs,
-                                              collate_fn=transducer_collate)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate
+    )
     return data_loader
 
 
@@ -113,7 +117,6 @@ def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
 
 
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -121,8 +124,8 @@ def train_model(gpu_id, args):
     kwargs = namespace_to_dict(args)
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
-    #torch.backends.cudnn.deterministic = True
-    #torch.backends.cudnn.benchmark = False
+    # torch.backends.cudnn.deterministic = True
+    # torch.backends.cudnn.benchmark = False
     torch.backends.cudnn.enabled = False
 
     ddp_args = ddp.filter_ddp_args(**kwargs)
@@ -137,13 +140,16 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
-                       train_loader.dataset.sp.get_piece_size(), **kwargs)
+    model = init_model(
+        train_loader.dataset.sp.piece_to_id("<blk>"),
+        train_loader.dataset.sp.get_piece_size(),
+        **kwargs,
+    )
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
-    metrics = {}  #{"acc": CategoricalAccuracy()}
+    metrics = {}  # {"acc": CategoricalAccuracy()}
     trainer = Trainer(
         model,
         device=device,
@@ -181,8 +187,7 @@ def make_parser(model_class):
         help="num_workers of data loader",
     )
     data_parser = ArgumentParser(prog="")
-    data_parser.add_argument("--train",
-                             action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
@@ -198,34 +203,29 @@ def make_parser(model_class):
         type=str,
     )
 
-    parser.link_arguments("data.train.data_loader.num_workers",
-                          "data.val.data_loader.num_workers")
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
 
-    parser.link_arguments("data.train.dataset.bpe_model",
-                          "data.val.dataset.bpe_model")
+    parser.link_arguments("data.train.dataset.bpe_model", "data.val.dataset.bpe_model")
 
     model_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(parser,
-                           prefix="trainer",
-                           train_modes=model_class.valid_train_modes())
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
     ddp.add_ddp_args(parser)
-    parser.add_argument("--seed",
-                        type=int,
-                        default=1123581321,
-                        help="random seed")
-    parser.add_argument("-v",
-                        "--verbose",
-                        dest="verbose",
-                        default=1,
-                        choices=[0, 1, 2, 3],
-                        type=int)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
 
     return parser
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(
-        description="Train Wav2Vec2Transducer model from audio files")
+        description="Train Wav2Vec2Transducer model from audio files"
+    )
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
@@ -254,3 +254,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     # multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2vec2transducer.py b/hyperion/bin/train_wav2vec2transducer.py
index 55f3b996..77a22bb8 100755
--- a/hyperion/bin/train_wav2vec2transducer.py
+++ b/hyperion/bin/train_wav2vec2transducer.py
@@ -14,6 +14,14 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+from torch.nn.utils.rnn import pad_sequence
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
@@ -21,9 +29,6 @@
 from hyperion.torch.models import HFWav2Vec2Transducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
-from torch.nn.utils.rnn import pad_sequence
 
 model_dict = {
     "hf_wav2vec2transducer": HFWav2Vec2Transducer,
@@ -73,14 +78,12 @@ def init_data(partition, rank, num_gpus, **kwargs):
 
     num_workers = data_kwargs["data_loader"]["num_workers"]
     num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
-    largs = ({
-        "num_workers": num_workers_per_gpu,
-        "pin_memory": True
-    } if num_gpus > 0 else {})
-    data_loader = torch.utils.data.DataLoader(dataset,
-                                              batch_sampler=sampler,
-                                              **largs,
-                                              collate_fn=transducer_collate)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(
+        dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate
+    )
     return data_loader
 
 
@@ -98,7 +101,6 @@ def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
 
 
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -106,8 +108,8 @@ def train_model(gpu_id, args):
     kwargs = namespace_to_dict(args)
     torch.manual_seed(args.seed)
     set_float_cpu("float32")
-    #torch.backends.cudnn.deterministic = True
-    #torch.backends.cudnn.benchmark = False
+    # torch.backends.cudnn.deterministic = True
+    # torch.backends.cudnn.benchmark = False
     torch.backends.cudnn.enabled = False
 
     ddp_args = ddp.filter_ddp_args(**kwargs)
@@ -122,13 +124,16 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
-    model = init_model(train_loader.dataset.sp.piece_to_id("<blk>"),
-                       train_loader.dataset.sp.get_piece_size(), **kwargs)
+    model = init_model(
+        train_loader.dataset.sp.piece_to_id("<blk>"),
+        train_loader.dataset.sp.get_piece_size(),
+        **kwargs,
+    )
 
     trn_args = Trainer.filter_args(**kwargs["trainer"])
     if rank == 0:
         logging.info("trainer args={}".format(trn_args))
-    metrics = {}  #{"acc": CategoricalAccuracy()}
+    metrics = {}  # {"acc": CategoricalAccuracy()}
     trainer = Trainer(
         model,
         device=device,
@@ -166,8 +171,7 @@ def make_parser(model_class):
         help="num_workers of data loader",
     )
     data_parser = ArgumentParser(prog="")
-    data_parser.add_argument("--train",
-                             action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
@@ -183,34 +187,29 @@ def make_parser(model_class):
         type=str,
     )
 
-    parser.link_arguments("data.train.data_loader.num_workers",
-                          "data.val.data_loader.num_workers")
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
 
-    parser.link_arguments("data.train.dataset.bpe_model",
-                          "data.val.dataset.bpe_model")
+    parser.link_arguments("data.train.dataset.bpe_model", "data.val.dataset.bpe_model")
 
     model_class.add_class_args(parser, prefix="model")
-    Trainer.add_class_args(parser,
-                           prefix="trainer",
-                           train_modes=model_class.valid_train_modes())
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=model_class.valid_train_modes()
+    )
     ddp.add_ddp_args(parser)
-    parser.add_argument("--seed",
-                        type=int,
-                        default=1123581321,
-                        help="random seed")
-    parser.add_argument("-v",
-                        "--verbose",
-                        dest="verbose",
-                        default=1,
-                        choices=[0, 1, 2, 3],
-                        type=int)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
 
     return parser
 
 
-if __name__ == "__main__":
+def main():
     parser = ArgumentParser(
-        description="Train Wav2Vec2Transducer model from audio files")
+        description="Train Wav2Vec2Transducer model from audio files"
+    )
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
@@ -239,3 +238,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     # multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index f132a35c..e6dd3d3e 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -14,6 +14,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
@@ -25,12 +32,6 @@
 )
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 model_dict = {
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
@@ -40,7 +41,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -83,7 +83,6 @@ def init_model(num_classes, rank, model_class, **kwargs):
 
 
 def train_model(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -105,7 +104,11 @@ def train_model(gpu_id, args):
         logging.info(f"trainer args={trn_args}")
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
-        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
@@ -162,8 +165,7 @@ def make_parser(model_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Train Wav2Vec2XVector model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -193,3 +195,7 @@ def make_parser(model_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_model(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2xvector.py b/hyperion/bin/train_wav2xvector.py
index ddf292b8..7373a338 100755
--- a/hyperion/bin/train_wav2xvector.py
+++ b/hyperion/bin/train_wav2xvector.py
@@ -9,6 +9,13 @@
 from pathlib import Path
 
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
@@ -23,12 +30,6 @@
 # from hyperion.torch.models import TransformerXVectorV1 as TFXVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (
-    ActionConfigFile,
-    ActionParser,
-    ArgumentParser,
-    namespace_to_dict,
-)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -41,7 +42,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -84,7 +84,6 @@ def init_xvector(num_classes, rank, xvec_class, **kwargs):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -107,7 +106,11 @@ def train_xvec(gpu_id, args):
         logging.info("trainer args={}".format(trn_args))
     metrics = {"acc": CategoricalAccuracy()}
     trainer = Trainer(
-        model, device=device, metrics=metrics, ddp=world_size > 1, **trn_args,
+        model,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
     )
     trainer.load_last_checkpoint()
     trainer.fit(train_loader, val_loader)
@@ -164,8 +167,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Train Wav2XVector from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -194,3 +196,7 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index 71bba080..a2acdf4c 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -13,6 +13,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.data import FeatSeqDataset as SD
@@ -25,8 +32,6 @@
 from hyperion.torch.models import TransformerXVectorV1 as TFXVec
 from hyperion.torch.trainers import XVectorTrainer as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -39,7 +44,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     sd_args = SD.filter_args(**kwargs["dataset"])
     sampler_args = Sampler.filter_args(**kwargs["sampler"])
@@ -80,7 +84,6 @@ def init_xvector(num_classes, rank, xvec_class, **kwargs):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -164,8 +167,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Train XVector from audio files")
 
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -196,3 +198,7 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index b2e36cac..c3f6170d 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -9,6 +9,13 @@
 from pathlib import Path
 
 import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
@@ -22,8 +29,6 @@
 from hyperion.torch.narchs import AudioFeatsMVN as AF
 from hyperion.torch.trainers import XVectorTrainerFromWav as Trainer
 from hyperion.torch.utils import ddp
-from jsonargparse import (ActionConfigFile, ActionParser, ArgumentParser,
-                          namespace_to_dict)
 
 xvec_dict = {
     "resnet": RXVec,
@@ -36,7 +41,6 @@
 
 
 def init_data(partition, rank, num_gpus, **kwargs):
-
     kwargs = kwargs["data"][partition]
     ad_args = AD.filter_args(**kwargs["dataset"])
     sampler_args = kwargs["sampler"]
@@ -90,7 +94,6 @@ def init_xvector(num_classes, rank, xvec_class, **kwargs):
 
 
 def train_xvec(gpu_id, args):
-
     config_logger(args.verbose)
     del args.verbose
     logging.debug(args)
@@ -176,8 +179,7 @@ def make_parser(xvec_class):
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = ArgumentParser(description="Train XVector from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
@@ -206,3 +208,7 @@ def make_parser(xvec_class):
     # torch docs recommend using forkserver
     multiprocessing.set_start_method("forkserver")
     train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/io/__init__.py b/hyperion/io/__init__.py
index 14b1b35f..aa5ac653 100644
--- a/hyperion/io/__init__.py
+++ b/hyperion/io/__init__.py
@@ -16,10 +16,10 @@
 from .hyp_data_reader import *
 from .hyp_data_writer import *
 from .kaldi_data_reader import *
-from .packed_audio_reader import (RandomAccessPackedAudioReader,
-                                  SequentialPackedAudioReader)
+from .packed_audio_reader import (
+    RandomAccessPackedAudioReader,
+    SequentialPackedAudioReader,
+)
 from .packed_audio_writer import PackedAudioWriter
 from .segment_vad_reader import SegmentVADReader
 from .vad_rw_factory import VADReaderFactory
-
-# from .queues import *
diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 2186522e..d1cf7f68 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -8,7 +8,6 @@
 
 from ....hyp_defs import float_cpu
 from ....utils.math_funcs import logsumexp, softmax
-from ....utils.queues import GeneratorQueue
 from ..core import PDF
 
 
@@ -110,86 +109,6 @@ def fit(
         else:
             return elbo, elbo / x.shape[0], elbo_val, elbo_val / x.shape[0]
 
-    def fit_generator(
-        self,
-        generator,
-        train_steps,
-        epochs=10,
-        val_data=None,
-        val_steps=0,
-        max_queue_size=10,
-        workers=1,
-        use_multiprocessing=False,
-    ):
-        """Trains the model from data read by a generator function.
-           This function is deprecated.
-
-        Args:
-          generator: train data generator function returning a tuple
-                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
-          train_steps: number of training steps / epoch
-          epochs: number of epochs.
-          val_data: val. data generator function returning a tuple
-                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
-          val_steps: number of validation steps / epoch
-          max_queue_size: max. size of the generator queue.
-          workers: number of workers in the generator.
-          use_multiprocessing: use multi-processing in the generator queue.
-
-        Returns:
-          log p(X) of the training data.
-          log p(x) per sample.
-          log p(X) of the val. data, if present.
-          log p(x) of the val. data per sample, if present.
-        """
-
-        do_validation = bool(val_data)
-        val_gen = hasattr(val_data, "next") or hasattr(val_data, "__next__")
-        if val_gen and not val_steps:
-            raise ValueError(
-                "When using a generator for validation data, "
-                "you must specify a value for "
-                "`val_steps`."
-            )
-
-        if do_validation and not val_gen:
-            x, u_x_val, sample_weight_val = self.tuple2data(val_data)
-            log_h_val = self.accum_log_h(x, sample_weight_val)
-
-        elbo = np.zeros((epochs,), dtype=float_cpu())
-        elbo_val = np.zeros((epochs,), dtype=float_cpu())
-        for epoch in range(epochs):
-            N, u_x, log_h = self.Estep_generator(
-                generator,
-                train_steps,
-                return_log_h=True,
-                max_queue_size=max_queue_size,
-                workers=workers,
-                use_multiprocessing=use_multiprocessing,
-            )
-
-            self.Mstep(N, u_x)
-            elbo[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h)
-
-            if val_data is not None:
-                if val_gen:
-                    N, u_x, log_h_val = self.Estep_generator(
-                        val_data,
-                        train_steps,
-                        return_log_h=True,
-                        max_queue_size=max_queue_size,
-                        workers=workers,
-                        use_multiprocessing=use_multiprocessing,
-                    )
-                else:
-                    N, u_x = self.Estep(val_data, u_x_val, sample_weight_val)
-                elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
-
-        if val_data is None:
-            return elbo, elbo / x.shape[0]
-        else:
-            return elbo, elbo / x.shape[0], elbo_val, elbo_val / x.shape[0]
-
     def log_h(self, x):
         """Computes log h(x) of the exp. family."""
         return 0
@@ -404,7 +323,6 @@ def _accum_suff_stats_segments_prob_1batch(
     def _accum_suff_stats_segments_prob_nbatches(
         self, x, prob, sample_weight, batch_size
     ):
-
         sw_i = None
         for i1 in range(0, x.shape[0], batch_size):
             i2 = np.minimum(i1 + batch_size, x.shape[0])
@@ -458,7 +376,6 @@ def accum_suff_stats_sorttime(
     def _accum_suff_stats_sorttime_1batch(
         self, x, frame_length, frame_shift, u_x=None, sample_weight=None
     ):
-
         K = len(self.pi)
         num_frames = x.shape[0]
         num_segments = int(np.floor((num_frames - frame_length) / frame_shift + 1))
@@ -494,7 +411,6 @@ def _accum_suff_stats_sorttime_1batch(
     def _accum_suff_stats_sorttime_nbatches(
         self, x, frame_length, frame_shift, sample_weight, batch_size
     ):
-
         K = len(self.pi)
         num_frames = x.shape[0]
         num_segments = int(np.floor((num_frames - frame_length) / frame_shift + 1))
@@ -539,65 +455,6 @@ def Estep(self, x, u_x=None, sample_weight=None, batch_size=None):
         """
         return self.accum_suff_stats(x, u_x, sample_weight, batch_size)
 
-    def Estep_generator(
-        self,
-        generator,
-        num_steps,
-        return_log_h,
-        max_queue_size=10,
-        workers=1,
-        use_multiprocessing=False,
-    ):
-        """Expectation step, where data is read from a generator function.
-
-        Args:
-          generator: data generator function returning a tuple
-                (x, u_x, sample_weight), (x, u_x), (x, sample_weight) or x.
-          num_steps: number of steps / epoch
-          return_log_h: returns accumlated log h(x).
-          max_queue_size: max. size of the generator queue.
-          workers: number of workers in the generator.
-          use_multiprocessing: use multi-processing in the generator queue.
-
-        Returns:
-          N zero order sufficient statistics (number of samples).
-          Accumlated sufficient statistics \sum u(x).
-          Accumlated log h(x) (optional).
-        """
-        wait_time = 0.01  # in secs
-        queue = None
-        N = None
-        acc_u_x = None
-        log_h = 0
-        try:
-            queue = GeneratorQueue(
-                generator, use_multiprocessing=use_multiprocessing, wait_time=wait_time
-            )
-            queue.start(workers=workers, max_queue_size=max_queue_size)
-            queue_generator = queue.get()
-
-            cur_step = 0
-            for cur_step in range(num_steps):
-                data = next(queue_generator)
-                x, u_x, sample_weight = self.tuple2data(data)
-                N_i, u_x_i = self.Estep(x, u_x, sample_weight)
-                if return_log_h:
-                    log_h += self.accum_log_h(x)
-                if cur_step == 0:
-                    N = N_i
-                    acc_u_x = u_x_i
-                else:
-                    N += N_i
-                    acc_u_x += u_x_i
-        finally:
-            if queue is not None:
-                queue.stop()
-
-        if return_log_h:
-            return N, acc_u_x, log_h
-        else:
-            return N, acc_u_x
-
     def sum_suff_stats(self, N, u_x):
         """Sums suff. stats from muttiple sub-processes.
 
@@ -754,28 +611,6 @@ def get_config(self):
         base_config = super(ExpFamilyMixture, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-    @staticmethod
-    def tuple2data(data):
-        if isinstance(data, tuple):
-            if len(data) == 2:
-                x, u_x = data
-                if u_x.ndim == 2:
-                    sample_weight = None
-                elif u_x.ndim == 1:
-                    sample_weight = u_x
-                    u_x = None
-                else:
-                    raise ValueError("Generator output: " + str(data))
-            elif len(data) == 3:
-                x, u_x, sample_weight = data
-            else:
-                raise ValueError("Generator output: " + str(data))
-        else:
-            x = data
-            u_x = None
-            sample_weight = None
-        return x, u_x, sample_weight
-
     @staticmethod
     def compute_A_nat(eta):
         """Computes A_theta from the natural param."""
diff --git a/hyperion/torch/lr_schedulers/red_lr_on_plateau.py b/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
index 7a2e82f8..3f7b2ec7 100644
--- a/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
+++ b/hyperion/torch/lr_schedulers/red_lr_on_plateau.py
@@ -7,7 +7,11 @@
 from functools import partial
 
 import torch
-from torch._six import inf
+
+try:
+    from torch import inf
+except:
+    from torch._six import inf
 
 from .lr_scheduler import LRScheduler
 
diff --git a/hyperion/utils/queues.py b/hyperion/utils/queues.py
deleted file mode 100644
index 8bfd0166..00000000
--- a/hyperion/utils/queues.py
+++ /dev/null
@@ -1,287 +0,0 @@
-"""
- Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import copy
-import multiprocessing
-import threading
-import time
-import warnings
-from abc import abstractmethod
-
-import numpy as np
-import six
-
-try:
-    import queue
-except ImportError:
-    import Queue as queue
-
-
-class SequenceQueue(object):
-    """Base class to enqueue inputs.
-
-    The task of an Queue is to use parallelism to speed up preprocessing.
-    This is done with processes or threads.
-
-    # Examples
-
-    ```python
-    enqueuer = SequenceQueue(...)
-    enqueuer.start()
-    datas = enqueuer.get()
-    for data in datas:
-        # Use the inputs; training, evaluating, predicting.
-        # ... stop sometime.
-    enqueuer.close()
-    ```
-
-    The `enqueuer.get()` should be an infinite stream of datas.
-
-    """
-
-    @abstractmethod
-    def is_running(self):
-        raise NotImplemented
-
-    @abstractmethod
-    def start(self, workers=1, max_queue_size=10):
-        """Starts the handler's workers.
-
-        # Arguments
-            workers: number of worker threads
-            max_queue_size: queue size
-                (when full, threads could block on `put()`).
-        """
-        raise NotImplemented
-
-    @abstractmethod
-    def stop(self, timeout=None):
-        """Stop running threads and wait for them to exit, if necessary.
-
-        Should be called by the same thread which called start().
-
-        # Arguments
-            timeout: maximum time to wait on thread.join()
-        """
-        raise NotImplemented
-
-    @abstractmethod
-    def get(self):
-        """Creates a generator to extract data from the queue.
-
-        Skip the data if it is `None`.
-
-        # Returns
-            Generator yielding tuples `(inputs, targets)`
-                or `(inputs, targets, sample_weights)`.
-        """
-        raise NotImplemented
-
-
-class OrderedQueue(SequenceQueue):
-    """Builds a Queue from a Sequence.
-
-    Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
-
-    # Arguments
-        sequence: A `keras.utils.data_utils.Sequence` object.
-        use_multiprocessing: use multiprocessing if True, otherwise threading
-        scheduling: Sequential querying of datas if 'sequential', random otherwise.
-    """
-
-    def __init__(self, sequence, use_multiprocessing=False, scheduling="sequential"):
-        self.sequence = sequence
-        self.use_multiprocessing = use_multiprocessing
-        self.scheduling = scheduling
-        self.workers = 0
-        self.executor = None
-        self.queue = None
-        self.run_thread = None
-        self.stop_signal = None
-
-    def is_running(self):
-        return self.stop_signal is not None and not self.stop_signal.is_set()
-
-    def start(self, workers=1, max_queue_size=10):
-        """Start the handler's workers.
-
-        # Arguments
-            workers: number of worker threads
-            max_queue_size: queue size
-                (when full, workers could block on `put()`)
-        """
-        if self.use_multiprocessing:
-            self.executor = multiprocessing.Pool(workers)
-        else:
-            self.executor = ThreadPool(workers)
-        self.queue = queue.Queue(max_queue_size)
-        self.stop_signal = threading.Event()
-        self.run_thread = threading.Thread(target=self._run)
-        self.run_thread.daemon = True
-        self.run_thread.start()
-
-    def _run(self):
-        """Function to submit request to the executor and queue the `Future` objects."""
-        sequence = list(range(len(self.sequence)))
-        while True:
-            if self.scheduling is not "sequential":
-                random.shuffle(sequence)
-            for i in sequence:
-                if self.stop_signal.is_set():
-                    return
-                self.queue.put(
-                    self.executor.apply_async(get_index, (self.sequence, i)), block=True
-                )
-
-    def get(self):
-        """Creates a generator to extract data from the queue.
-
-        Skip the data if it is `None`.
-
-        # Returns
-            Generator yielding tuples (inputs, targets)
-                or (inputs, targets, sample_weights)
-        """
-        try:
-            while self.is_running():
-                inputs = self.queue.get(block=True).get()
-                if inputs is not None:
-                    yield inputs
-        except Exception as e:
-            self.stop()
-            raise StopIteration(e)
-
-    def stop(self, timeout=None):
-        """Stops running threads and wait for them to exit, if necessary.
-
-        Should be called by the same thread which called `start()`.
-
-        # Arguments
-            timeout: maximum time to wait on `thread.join()`
-        """
-        self.stop_signal.set()
-        with self.queue.mutex:
-            self.queue.queue.clear()
-            self.queue.unfinished_tasks = 0
-            self.queue.not_full.notify()
-        self.executor.close()
-        self.executor.join()
-        self.run_thread.join(timeout)
-
-
-class GeneratorQueue(SequenceQueue):
-    """Builds a queue out of a data generator.
-
-    Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
-
-    # Arguments
-        generator: a generator function which endlessly yields data
-        use_multiprocessing: use multiprocessing if True, otherwise threading
-        wait_time: time to sleep in-between calls to `put()`
-        random_seed: Initial seed for workers,
-            will be incremented by one for each workers.
-    """
-
-    def __init__(
-        self, generator, use_multiprocessing=False, wait_time=0.05, random_seed=None
-    ):
-        self.wait_time = wait_time
-        self._generator = generator
-        self._use_multiprocessing = use_multiprocessing
-        self._threads = []
-        self._stop_event = None
-        self.queue = None
-        self.random_seed = random_seed
-
-    def start(self, workers=1, max_queue_size=10):
-        """Kicks off threads which add data from the generator into the queue.
-
-        # Arguments
-            workers: number of worker threads
-            max_queue_size: queue size
-                (when full, threads could block on `put()`)
-        """
-
-        def data_generator_task():
-            while not self._stop_event.is_set():
-                try:
-                    if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
-                        generator_output = next(self._generator)
-                        self.queue.put(generator_output)
-                    else:
-                        time.sleep(self.wait_time)
-                except Exception:
-                    self._stop_event.set()
-                    raise
-
-        try:
-            if self._use_multiprocessing:
-                self.queue = multiprocessing.Queue(maxsize=max_queue_size)
-                self._stop_event = multiprocessing.Event()
-            else:
-                self.queue = queue.Queue()
-                self._stop_event = threading.Event()
-
-            for _ in range(workers):
-                if self._use_multiprocessing:
-                    # Reset random seed else all children processes
-                    # share the same seed
-                    np.random.seed(self.random_seed)
-                    thread = multiprocessing.Process(target=data_generator_task)
-                    thread.daemon = True
-                    if self.random_seed is not None:
-                        self.random_seed += 1
-                else:
-                    thread = threading.Thread(target=data_generator_task)
-                self._threads.append(thread)
-                thread.start()
-        except:
-            self.stop()
-            raise
-
-    def is_running(self):
-        return self._stop_event is not None and not self._stop_event.is_set()
-
-    def stop(self, timeout=None):
-        """Stops running threads and wait for them to exit, if necessary.
-
-        Should be called by the same thread which called `start()`.
-
-        # Arguments
-            timeout: maximum time to wait on `thread.join()`.
-        """
-        if self.is_running():
-            self._stop_event.set()
-
-        for thread in self._threads:
-            if thread.is_alive():
-                if self._use_multiprocessing:
-                    thread.terminate()
-                else:
-                    thread.join(timeout)
-
-        if self._use_multiprocessing:
-            if self.queue is not None:
-                self.queue.close()
-
-        self._threads = []
-        self._stop_event = None
-        self.queue = None
-
-    def get(self):
-        """Creates a generator to extract data from the queue.
-
-        Skip the data if it is `None`.
-
-        # Returns
-            A generator
-        """
-        while self.is_running():
-            if not self.queue.empty():
-                inputs = self.queue.get()
-                if inputs is not None:
-                    yield inputs
-            else:
-                time.sleep(self.wait_time)
diff --git a/setup.py b/setup.py
index 9780586d..e1fb35cc 100644
--- a/setup.py
+++ b/setup.py
@@ -15,15 +15,26 @@
 # limitations under the License.
 #
 
-import setuptools
 from pathlib import Path
 
+import setuptools
+
 project_root = Path(__file__).parent
 
-with open(project_root / "apps.txt") as f:
-    apps = f.read().splitlines()
+# with open(project_root / "apps.txt") as f:
+#     apps = f.read().splitlines()
 
-apps = [str(project_root / "hyperion" / "bin" / app) for app in apps]
+# apps = [str(project_root / "hyperion" / "bin" / app) for app in apps]
+binaries = (project_root / "hyperion" / "bin").glob("*.py")
+console_scripts = []
+for binary in binaries:
+    stem = binary.stem
+    script_name = stem.replace("hyperion_", "").replace("_", "-")
+    if script_name[0] == "-":
+        continue
+    module = f"hyperion.bin.{stem}:main"
+    console_script = f"hyperion-{script_name} = {module}"
+    console_scripts.append(console_script)
 
 with open(project_root / "requirements.txt") as f:
     requirements = f.read().splitlines()
@@ -77,10 +88,22 @@ def get_version():
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",
     ],
     python_requires=">=3.7",
     install_requires=requirements,
-    scripts=apps,
+    entry_points={
+        "console_scripts": console_scripts,
+    }
+    # entry_points={
+    #     "console_scripts": [
+    #         "hyperion-prepare-data = hyperion.bin.prepare_data:main",
+    #         "hyperion-train-wav2xvector = hyperion.bin.train_wav2xvector:main",
+    #     ]
+    # },
+    # scripts=apps,
 )

From 610547682764789844af201c1a16bccc6b8d34ab Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sun, 10 Sep 2023 20:01:36 -0400
Subject: [PATCH 108/154] make it work with cuda 11

---
 README.md                   | 11 +++++++++--
 hyp_utils/conda_env.sh      | 32 +++++++++++++++++---------------
 hyperion/torch/utils/ddp.py | 13 +++++--------
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 4838157b..d56406d7 100644
--- a/README.md
+++ b/README.md
@@ -26,14 +26,21 @@ The full API is described in the documentation page [https://hyperion-ml.readthe
 ### Prerequisites
 
     We use anaconda or miniconda, though you should be able to make it work in other python distributions
-    To start, you should create a new enviroment and install PyTorch>=1.9, (older versions are not supported any longer) e.g.:
+    To start, you should create a new enviroment and install PyTorch:
 ```
 conda create --name ${your_env} python=3.11
 conda activate ${your_env}
-conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=10.2 -c pytorch
 conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
 ```
 
+For systems with cuda 10.2 driver:
+```
+conda create --name ${your_env} python=3.10
+conda activate ${your_env}
+conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=10.2 -c pytorch
+```
+
+
 ### Installing Hyperion
 
 - First, clone the repo:
diff --git a/hyp_utils/conda_env.sh b/hyp_utils/conda_env.sh
index 8d5c67c1..90ffa369 100755
--- a/hyp_utils/conda_env.sh
+++ b/hyp_utils/conda_env.sh
@@ -52,22 +52,24 @@ fi
 # echo "LRU_CACHE_CAPACITY=$LRU_CACHE_CAPACITY"
 
 conda activate $conda_env
-command="python"
+command=""
 if [ $num_gpus -gt 0 ];then
-  # set CUDA_VISIBLE_DEVICES
-  if [ ! -z "$SGE_HGR_gpu" ]; then
-    echo "SGE_HGR_gpu=$SGE_HGR_gpu"
-    export CUDA_VISIBLE_DEVICES=$(echo $SGE_HGR_gpu | sed 's@ @,@g')
-  else
-    # seach location of free-gpu program in the PATH or hyp_utils directory
-    free_gpu=$(which free-gpu)
-    if [ -z "$free_gpu" ];then
-      free_gpu=$(which hyp_utils/free-gpu)
-    fi
-    
-    if [ ! -z "$free_gpu" ];then
-      # if free-gpu found set env var, otherwise we assume that you can use any gpu
-      export CUDA_VISIBLE_DEVICES=$($free_gpu -n $num_gpus)
+  if [ -z "$CUDA_VISIBLE_DEVICES" ];then
+    # set CUDA_VISIBLE_DEVICES
+    if [ ! -z "$SGE_HGR_gpu" ]; then
+      echo "SGE_HGR_gpu=$SGE_HGR_gpu"
+      export CUDA_VISIBLE_DEVICES=$(echo $SGE_HGR_gpu | sed 's@ @,@g')
+    else
+      # seach location of free-gpu program in the PATH or hyp_utils directory
+      free_gpu=$(which free-gpu)
+      if [ -z "$free_gpu" ];then
+	free_gpu=$(which hyp_utils/free-gpu)
+      fi
+      
+      if [ ! -z "$free_gpu" ];then
+	# if free-gpu found set env var, otherwise we assume that you can use any gpu
+	export CUDA_VISIBLE_DEVICES=$($free_gpu -n $num_gpus)
+      fi
     fi
   fi
   echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 1aefb3d4..4f006c0a 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -6,19 +6,16 @@
 import logging
 import os
 
-from fairscale.nn.data_parallel import \
-    FullyShardedDataParallel as FullyShardedDDP
-from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
-
 import torch
 import torch.distributed as dist
 import torch.nn as nn
+from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP
+from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP
 
 from .devices import open_device
 
 
 def add_ddp_args(parser):
-
     parser.add_argument(
         "--num-gpus", type=int, default=1, help="number of gpus, if 0 it uses cpu"
     )
@@ -50,7 +47,6 @@ def filter_ddp_args(**kwargs):
 def ddp_init(
     gpu_id, num_gpus, node_id=0, num_nodes=1, master_addr="localhost", master_port=None
 ):
-
     rank = node_id * num_gpus + gpu_id
     world_size = num_nodes * num_gpus
 
@@ -62,15 +58,16 @@ def ddp_init(
     os.environ["MASTER_PORT"] = master_port
 
     logging.info(
-        f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port} gpu_id={gpu_id}" 
+        f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port} gpu_id={gpu_id}"
     )
     dist.init_process_group(
         "nccl",
         rank=rank,
         world_size=world_size,
     )
+    torch.cuda.set_device(rank)
     torch.tensor([0]).to(gpu_id)
-    device = torch.device('cuda', gpu_id)
+    device = torch.device("cuda", gpu_id)
     return device, rank, world_size
     # return gpu_id, rank, world_size
 

From 392cd30f6bae594e9121bde48379aae787d16e6f Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 11 Sep 2023 11:41:35 -0400
Subject: [PATCH 109/154] started vox/v2.1 recipe and fix some readmes

---
 egs/voxceleb/v1.1/README.md                   |   2 +
 egs/voxceleb/v1.2/README.md                   | 249 ++++++--------
 .../train_cfwseresnet34_xvec_stage1_v3.0.yaml |  72 ++++
 .../train_cfwseresnet34_xvec_stage2_v3.0.yaml |  69 ++++
 .../train_cwseresnet34_xvec_stage1_v3.0.yaml  |  72 ++++
 .../train_cwseresnet34_xvec_stage2_v3.0.yaml  |  69 ++++
 .../train_fwseresnet34_xvec_stage1_v3.0.yaml  |  72 ++++
 .../train_fwseresnet34_xvec_stage2_v3.0.yaml  |  69 ++++
 ...rain_idrnd_resnet100_xvec_stage1_v3.0.yaml |  73 ++++
 ...rain_idrnd_resnet100_xvec_stage2_v3.0.yaml |  69 ++++
 .../conf/train_resnet34_xvec_stage1_v3.0.yaml |  71 ++++
 .../conf/train_resnet34_xvec_stage2_v3.0.yaml |  69 ++++
 .../train_tseresnet34_xvec_stage1_v3.0.yaml   |  72 ++++
 .../train_tseresnet34_xvec_stage2_v3.0.yaml   |  69 ++++
 .../config_fbank80_stmn_cfwseresnet34.v3.0.sh |  44 +++
 .../config_fbank80_stmn_cwseresnet34.v3.0.sh  |  45 +++
 .../config_fbank80_stmn_fwseresnet34.v3.0.sh  |  44 +++
 ...onfig_fbank80_stmn_idrnd_resnet100.v3.0.sh |  44 +++
 .../config_fbank80_stmn_resnet34.v3.0.sh      |  44 +++
 .../config_fbank80_stmn_tseresnet34.v3.0.sh   |  44 +++
 egs/voxceleb/v2.1/cmd.sh                      |  28 ++
 egs/voxceleb/v2.1/conf/clsp.conf              |  11 +
 egs/voxceleb/v2.1/conf/coe_gpu_bigmem.conf    |  11 +
 egs/voxceleb/v2.1/conf/coe_gpu_long.conf      |  13 +
 egs/voxceleb/v2.1/conf/coe_gpu_rtx.conf       |  11 +
 egs/voxceleb/v2.1/conf/coe_gpu_short.conf     |  11 +
 egs/voxceleb/v2.1/conf/coe_gpu_v100.conf      |  11 +
 egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml  |  35 ++
 ...lsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml |  59 ++++
 ...c2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...baseplus9l_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...lmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml |  63 ++++
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml |  73 ++++
 ...lmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...lmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml |  63 ++++
 ...lmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml |  73 ++++
 ...avlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml |  63 ++++
 ...avlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml |  73 ++++
 ...wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml |  59 ++++
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml |  63 ++++
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml |  73 ++++
 egs/voxceleb/v2.1/conf/vad_16k.yaml           |   8 +
 ...v2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml |  45 +++
 ...wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml |  44 +++
 .../wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml |  44 +++
 .../wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml  |  45 +++
 .../wavlmbaseplus_ecapatdnn512x3_v2.0.yaml    |  44 +++
 .../wavlmlarge12l_ecapatdnn512x3_v2.0.yaml    |  45 +++
 .../conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml  |  44 +++
 egs/voxceleb/v2.1/datapath.sh                 |  23 ++
 egs/voxceleb/v2.1/default_config.sh           |   1 +
 ...wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh |  54 +++
 ...g_wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.sh |  54 +++
 ...ig_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh |  54 +++
 ...fig_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh |  54 +++
 ...onfig_wavlmbaseplus_ecapatdnn512x3_v2.0.sh |  54 +++
 ...onfig_wavlmlarge12l_ecapatdnn512x3_v2.0.sh |  54 +++
 .../config_wavlmlarge_ecapatdnn512x3_v2.0.sh  |  54 +++
 egs/voxceleb/v2.1/hyp_utils                   |   1 +
 egs/voxceleb/v2.1/path.sh                     |   5 +
 egs/voxceleb/v2.1/run_001_prepare_data.sh     |  46 +++
 egs/voxceleb/v2.1/run_002_compute_evad.sh     |  66 ++++
 .../v2.1/run_003_prepare_noises_rirs.sh       | 102 ++++++
 .../v2.1/run_004_prepare_xvec_train_data.sh   |  76 +++++
 egs/voxceleb/v2.1/run_005_train_xvector.sh    |  78 +++++
 egs/voxceleb/v2.1/run_006_extract_xvectors.sh | 103 ++++++
 egs/voxceleb/v2.1/run_007_eval_be.sh          | 321 ++++++++++++++++++
 egs/voxceleb/v2/README.md                     |  10 +-
 egs/voxceleb/v2/default_config.sh             |   2 +-
 71 files changed, 3829 insertions(+), 152 deletions(-)
 create mode 100644 egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
 create mode 100755 egs/voxceleb/v2.1/cmd.sh
 create mode 100644 egs/voxceleb/v2.1/conf/clsp.conf
 create mode 100644 egs/voxceleb/v2.1/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/voxceleb/v2.1/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/v2.1/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/v2.1/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/v2.1/conf/coe_gpu_v100.conf
 create mode 100644 egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/vad_16k.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/datapath.sh
 create mode 120000 egs/voxceleb/v2.1/default_config.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh
 create mode 120000 egs/voxceleb/v2.1/hyp_utils
 create mode 100755 egs/voxceleb/v2.1/path.sh
 create mode 100755 egs/voxceleb/v2.1/run_001_prepare_data.sh
 create mode 100755 egs/voxceleb/v2.1/run_002_compute_evad.sh
 create mode 100755 egs/voxceleb/v2.1/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/voxceleb/v2.1/run_004_prepare_xvec_train_data.sh
 create mode 100755 egs/voxceleb/v2.1/run_005_train_xvector.sh
 create mode 100755 egs/voxceleb/v2.1/run_006_extract_xvectors.sh
 create mode 100755 egs/voxceleb/v2.1/run_007_eval_be.sh

diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md
index 3b9eeaa9..efdb77c1 100644
--- a/egs/voxceleb/v1.1/README.md
+++ b/egs/voxceleb/v1.1/README.md
@@ -1,5 +1,7 @@
 # VoxCeleb V1.1
 
+This recipe will be deprecated, use V1.2
+
 Recipe for the VoxCeleb Speaker Verification Task
 
 ## Differences w.r.t VoxCeleb V1 recipe
diff --git a/egs/voxceleb/v1.2/README.md b/egs/voxceleb/v1.2/README.md
index 1ee9468f..6e8ba07a 100644
--- a/egs/voxceleb/v1.2/README.md
+++ b/egs/voxceleb/v1.2/README.md
@@ -1,4 +1,4 @@
-# VoxCeleb V1.1
+# VoxCeleb V1.2
 
 Recipe for the VoxCeleb Speaker Verification Task
 
@@ -9,7 +9,7 @@ In recipe version V1:
    - Augmentation is performed using Kaldi scripts and wav-reverbate tool
    - Babble noise is created on-the-fly when computing features by mixing 3-7 single speaker files.
 
-In this recipe:
+In V1.1:
    - We compute speech augmentations and acoustic features are computed always on-the-fly,
      we don't dump any features to disk. 
    - Augmentation is performed using Hyperin SpeechAugment class.
@@ -18,6 +18,11 @@ In this recipe:
      which mimics the proportions of noise and RIR types, and SNRs used in the V1 or the recipe.
    - Babble noise is created offline by mixing 3-10 single speaker files.
 
+In V1.2:
+   - Feaure extractor is embedded into the pytorch model in classes derived from Wav2XVector base class.
+   - Kaldi format is replaced by new format based on pandas tables
+   - Kaldi style bash scripts are removed and replaced by python scripts
+   - Most python scripts are called using Hyperion entry points 
 
 ## Citing
 
@@ -30,13 +35,11 @@ In this recipe:
 ## Test data
 
    - Test data is VoxCeleb 1
-   - We evaluate 6 conditions:
+   - We evaluate the 3 conditions (with cleaned lists):
       - VoxCeleb-O (Original): Original Voxceleb test set with 40 speakers
-      - Voxceleb-O-cleaned: VoxCeleb-O cleaned-up of some errors
       - VoxCeleb-E (Entire): List using all utterances of VoxCeleb1
-      - Voxceleb-E-cleaned: VoxCeleb-E cleaned-up of some errors
       - VoxCeleb-H (Hard): List of hard trials between all utterances of VoxCeleb1, same gender and nationality trials.
-      - Voxceleb-H-cleaned: VoxCeleb-H cleaned-up of some errors
+ 
 
 ## Usage
 
@@ -44,9 +47,9 @@ In this recipe:
    - By default it will use Light ResNet (16 base channels)
    - For better performance use full ResNet (64 base channels) using `config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh` file as
 ```bash
-run_011_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
-run_030_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
-run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+run_005_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+run_006_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
+run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
 ```
 
    - To train with mixed precision training use config file `config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh`
@@ -66,25 +69,26 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
       - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
       - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
 
-   - `run_010_prepare_xvec_train_data.sh`
+   - `run_004_prepare_xvec_train_data.sh`
       - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
       - Removes silence from the audios
       - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
       - Creates training and validation lists for x-vector training
 
-   - `run_011_train_xvector.sh`
+   - `run_005_train_xvector.sh`
       - Trains the x-vector network
 
-   - `run_030_extract_xvectors.sh`
+   - `run_006_extract_xvectors.sh`
       - Extracts x-vectors for VoxCeleb2 or VoxCeleb2+augmentation for PLDA training
       - Exctracts x-vectors for VoxCeleb1 test sets
 
-   - `run_040_eval_be.sh`
+   - `run_007_eval_be.sh`
       - Trains PLDA and evals PLDA and cosine scoring back-ends
 
 
 ## Results
 
+
 ### VoxCeleb 1 Original-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -95,9 +99,28 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
 | | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
 | | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | || |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.048 | 0.071 |
+| | | | Cosine + AS-Norm | 0.70 | 0.039 | 0.048 |
+| | | | Cosine + QMF | 0.62 | 0.034 | 0.042 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.76 | 0.048 | 0.071 |
+| | | | Cosine + AS-Norm | 0.70 | 0.041 | 0.061 |
+| | | | Cosine + QMF | 0.62 | 0.037 | 0.056 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.48 | 0.077 |
+| | | | Cosine + AS-Norm | 0.68 | 0.040 | 0.062|
+| | | | Cosine + QMF | 0.62 | 0.036 | 0.063 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.78 | 0.053 | 0.082 |
+| | | | Cosine + AS-Norm | 0.70 | 0.043 | 0.076 |
+| | | | Cosine + QMF | 0.63 | 0.042 | 0.071 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.78 | 0.051 | 0.095 |
+| | | | Cosine + AS-Norm | 0.72 | 0.046 | 0.070 |
+| | | | Cosine + QMF | 0.67 | 0.039 | 0.074 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.040 | 0.065 |
+| | | | Cosine + AS-Norm | 0.52 | 0.033 | 0.045 |
+| | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.60 | 0.043 | 0.071 |
+| | | | Cosine + AS-Norm | 0.53 | 0.034 | 0.063 |
+| | | | Cosine + QMF | 0.49 | 0.033 | 0.054 |
+
 
 ### VoxCeleb 1 Entire-Clean trial list
 
@@ -109,9 +132,27 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
 | | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
 | | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.86 | 0.054 | 0.098 |
+| | | | Cosine + AS-Norm | 0.81 | 0.049 | 0.087 |
+| | | | Cosine + QMF | 0.77 | 0.046 | 0.082  |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.89 | 0.058 | 0.098 |
+| | | | Cosine + AS-Norm | 0.84 | 0.053 | 0.087|
+| | | | Cosine + QMF | 0.80 | 0.050  | 0.081 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.83 | 0.053 | 0.098 |
+| | | | Cosine + AS-Norm | 0.78 | 0.047| 0.085 |
+| | | | Cosine + QMF | 0.74 | 0.045 | 0.081 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.91 | 0.057 | 0.100 |
+| | | | Cosine + AS-Norm | 0.85 | 0.052 | 0.089 |
+| | | | Cosine + QMF | 0.81 | 0.049 | 0.085 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.94 | 0.059 | 0.105 |
+| | | | Cosine + AS-Norm | 0.88 | 0.053 | 0.093 |
+| | | | Cosine + QMF | 0.84 | 0.051 | 0.088 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
+| | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
+| | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.75 | 0.047 | 0.077 |
+| | | | Cosine + AS-Norm | 0.70 | 0.042 | 0.072 |
+| | | | Cosine + QMF | 0.68 | 0.040 | 0.069 |
 
 ### VoxCeleb 1 Hard-Clean trial list
 
@@ -123,9 +164,28 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
 | | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
 | | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | | | |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.62 | 0.098 | 0.164 |
+| | | | Cosine + AS-Norm | 1.45 | 0.085 | 0.142 |
+| | | | Cosine + QMF | 1.36 | 0.082 | 0.137 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.70 | 0.1 |  0.165 |
+| | | | Cosine + AS-Norm | 1.50 | 0.086 | 0.138 |
+| | | | Cosine + QMF | 1.44 | 0.085  | 0.139 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.59 | 0.096 | 0.165 |
+| | | | Cosine + AS-Norm | 1.41 | 0.083 | 0.143 |
+| | | | Cosine + QMF | 1.34 | 0.079 | 0.136 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.75 | 0.104 | 0.171 |
+| | | | Cosine + AS-Norm | 1.56 | 0.091 | 0.152 |
+| | | | Cosine + QMF | 1.50 | 0.087 | 0.145 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.76 | 0.104 | 0.174 |
+| | | | Cosine + AS-Norm |  1.58 | 0.092 | 0.152 |
+| | | | Cosine + QMF | 1.51 | 0.089 | 0.149 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
+| | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
+| | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.41 | 0.081 | 0.132 |
+| | | | Cosine + AS-Norm | 1.28 | 0.071 | 0.116 |
+| | | | Cosine + QMF | 1.21 | 0.069 | 0.113 |
+
 
 ### VoxSRC2022 dev
 
@@ -137,127 +197,24 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
 | | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
 | | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
-| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | ||  |
-| | | | Cosine + AS-Norm | | | |
-| | | | Cosine + QMF | | | |
-
-## Results before 2023
-
-### VoxCeleb 1 Original-Clean trial list
-
-| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
-| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.00 | 0.129 | 0.216 |
-| | | | Cosine | 2.04 | 0.138 | 0.210 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA |  1.35 | 0.091 | 0.159 |
-| | | | Cosine |  1.22 | 0.082 | 0.129 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.074 | 0.124 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA |  1.30 | 0.090 | 0.160 |
-| | | | Cosine |  1.44 | 0.100 | 0.173 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.23 | 0.091 | 0.143 |
-| | | | Cosine |  1.17 | 0.081 | 0.110 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.37 | 0.104 | 0.179 |
-| | | | Cosine | 1.31 | 0.080 | 0.139 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.29 | 0.088 | 0.129 |
-| | | | Cosine | 1.23 | 0.083 | 0.136 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.095 | 0.156 |
-| | | | Cosine | 1.29 | 0.089 | 0.146 |
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.20 | 0.084 | 0.136 |
-| | | | Cosine | 1.18 | 0.078 | 0.115 |
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.084 | 0.145 |
-| | | | Cosine | 1.12 | 0.073 | 0.131 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.53 | 0.104 | 0.189 |
-| | | | Cosine | 1.31 | 0.084 | 0.132 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  0.98 | 0.066 | 0.116 |
-| | | | Cosine | 1.12 | 0.071 | 0.103 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.05 | 0.077 | 0.123 |
-| | | | Cosine | 0.96 | 0.065 | 0.110 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.04 | 0.071 | 0.118 |
-| | | | Cosine | 0.93 | 0.067 | 0.108 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  0.90 | 0.067 | 0.118 |
-| | | | Cosine | 0.85 | 0.060 | 0.094 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
-| | | | Cosine | 1.29 | 0.084 | 0.140 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |
-
-
-### VoxCeleb 1 Entire-Clean trial list
-
-| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
-| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.86 | 0.124 | 0.210 |
-| | | | Cosine | 1.93 | 0.122 | 0.201 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.43 | 0.091 | 0.159 |
-| | | | Cosine | 1.24 | 0.080 | 0.136 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 1.19 | 0.077 | 0.132 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 1.27 | 0.084 | 0.150 |
-| | | | Cosine | 1.30 | 0.082 | 0.150 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 1.30 | 0.083 | 0.146 |
-| | | | Cosine | 1.09 | 0.071 | 0.124 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 1.45 | 0.097 | 0.165 |
-| | | | Cosine | 1.15 | 0.076 | 0.132 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 1.47 | 0.094 | 0.165 |
-| | | | Cosine | 1.27 | 0.082 | 0.148 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  1.31 | 0.086 | 0.149 |
-| | | | Cosine | 1.22 | 0.079 | 0.134 |
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.27 | 0.082 | 0.145 |
-| | | | Cosine | 1.16 | 0.074 | 0.130 |
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.077 | 0.136 |
-| | | | Cosine | 1.11 | 0.071 | 0.125 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  1.46 | 0.097 | 0.173 |
-| | | | Cosine | 1.24 | 0.080 | 0.140 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  1.11 | 0.071 | 0.127 |
-| | | | Cosine | 1.05 | 0.067 | 0.117 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  1.23 | 0.078 | 0.134 |
-| | | | Cosine | 1.05 | 0.069 | 0.121 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  1.18 | 0.075 | 0.131 |
-| | | | Cosine | 0.98 | 0.063 | 0.110 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA |  1.17 | 0.072 | 0.123 |
-| | | | Cosine | 0.94 | 0.061 | 0.107 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
-| | | | Cosine | 1.27 | 0.079 | 0.142 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |
-
-
-### VoxCeleb 1 Hard-Clean trial list
-
-| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
-| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
-| config_fbank80_stmn_lresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | LResNet34 | ArcFace s=30/m=0.3 | PLDA | 3.29 | 0.195 | 0.318 |
-| | | | Cosine | 3.27 | 0.188 | 0.303 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.66 | 0.160 | 0.258 |
-| | | | Cosine | 2.32 | 0.139 | 0.232 |
-| config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | ResNet34 | + SWA | Cosine | 2.19 | 0.133 | 0.215 |
-| config_fbank80_stmn_resnet50_arcs30m0.3_adam_lr0.05_amp.v1.sh | ResNet50 | ArcFace s=30/m=0.3 | PLDA | 2.33 | 0.139 | 0.227 |
-| | | | Cosine | 2.33 | 0.142 | 0.235 |
-| config_fbank80_stmn_tseresnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-ResNet34 | ArcFace s=30/m=0.3 | PLDA | 2.46 | 0.142 | 0.237 |
-| | | | Cosine | 2.14 | 0.126 | 0.203 |
-| config_fbank80_stmn_effnetb4_v2_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b4 v2 | EfficientNet-b4 with strides=1122121 <br> ArcFace s=30/m=0.3 | 2.57 | 0.153 | 0.255 |
-| | | | Cosine | 2.11 | 0.127 | 0.205 |
-| config_fbank80_stmn_effnetb7_v2_eina_hln_arcs30m0.3_adam_lr0.01_amp.v1.sh | EfficientNet-b7 v2 | EfficientNet-b7 with strides=1122121 <br> Instance-Norm with affine transform in Encoder <br> Layer-Norm in head <br> ArcFace s=30/m=0.3 | 2.64 | 0.157 | 0.244 |
-| | | | Cosine | 2.33 | 0.141 | 0.232 |
-| config_fbank80_stmn_res2net34w16s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=16x4 | ArcFace s=30/m=0.3 | PLDA |  2.42 | 0.144 | 0.245 |
-| | | | Cosine | 2.26 | 0.133 | 0.224
-| config_fbank80_stmn_res2net34w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net34 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.39 | 0.141 | 0.235 |
-| | | | Cosine | 2.17 | 0.128 | 0.215
-| config_fbank80_stmn_res2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x4 | ArcFace s=30/m=0.3 | PLDA |  2.28 | 0.131 | 0.225 |
-| | | | Cosine | 2.11 | 0.124 | 0.204 |
-| config_fbank80_stmn_seres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | SE-Res2Net50 | se-r=16 <br> ArcFace s=30/m=0.3 | PLDA |  2.77 | 0.172 | 0.271 |
-| | | | Cosine | 2.45 | 0.141 | 0.225 |
-| config_fbank80_stmn_tseres2net50w26s4_arcs30m0.3_adam_lr0.05_amp.v1.sh | Time-SE-Res2Net50 | se-r=256 <br> ArcFace s=30/m=0.3 | PLDA |  2.07 | 0.124 | 0.201 |
-| | | | Cosine | 1.95 | 0.113 | 0.181 |
-| config_fbank80_stmn_res2net50w13s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=13x8 | ArcFace s=30/m=0.3 | PLDA |  2.34 | 0.136 | 0.230 |
-| | | | Cosine | 1.99 | 0.119 | 0.196 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA |  2.18 | 0.127 | 0.211 |
-| | | | Cosine | 1.89 | 0.112 | 0.184 |
-| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA |  2.14 | 0.125 | 0.209 |
-| | | | Cosine | 1.84 | 0.110 | 0.186 |
-| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
-| | | | Cosine | 2.26 | 0.134 | 0.214 |
-| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
-| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |
-| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
+| config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.19 | 0.142 | 0.242 |
+| | | | Cosine + AS-Norm | 2.00 | 0.133 | 0.254 |
+| | | | Cosine + QMF | 1.86 | 0.126 | 0.229 |
+| config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.34 | 0.145 | 0.246 |
+| | | | Cosine + AS-Norm | 2.10 | 0.135 | 0.248 |
+| | | | Cosine + QMF | 2.01 | 0.127 | 0.218 |
+| config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.239 |
+| | | | Cosine + AS-Norm | 1.99 | 0.127 | 0.232 |
+| | | | Cosine + QMF | 1.87 | 0.119 | 0.216 |
+| config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.36 | 0.153 | 0.259 |
+| | | | Cosine + AS-Norm | 2.18 | 0.139 | 0.249 |
+| | | | Cosine + QMF | 2.08 | 0.128 | 0.222 |
+| config_fbank80_stmn_cfwseresnet34.v3.0.sh | CwFwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.49 | 0.158 | 0.265 |
+| | | | Cosine + AS-Norm | 2.29 | 0.145 | 0.251 |
+| | | | Cosine + QMF | 2.17 | 0.133 | 0.230 |
+| config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
+| | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
+| | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
+| config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.96 | 0.124 | 0.211 |
+| | | | Cosine + AS-Norm | 1.79 | 0.118 | 0239 |
+| | | | Cosine + QMF | 1.68 | 0.114 | 0.216 |
diff --git a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..f4306e2e
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: cfwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 32
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..0923a608
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..b5458f9d
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: seresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 32
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 25
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..0923a608
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..01b2cc50
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 4
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..0923a608
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..74553395
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseidrndresnet100
+    in_channels: 1
+    in_feats: 80
+    conv_channels: 128
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.05
+    se_r: 4
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..11d33ae2
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..6659b2f6
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: resnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..0923a608
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..58d22733
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
@@ -0,0 +1,72 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: tseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 256
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 25
+  eff_batch_size: 256
diff --git a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
new file mode 100644
index 00000000..0923a608
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
new file mode 100644
index 00000000..56d18bd0
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cfwseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# Channel-freq-wise-SE-ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_cfwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
new file mode 100644
index 00000000..68849f78
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_cwseresnet34.v3.0.sh
@@ -0,0 +1,45 @@
+# Channel-wise ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_cwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0025.pth
+
+
+nnet_s2_base_cfg=conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
new file mode 100644
index 00000000..f962c2b3
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# Freq-wise-SE ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=false #true
+do_qmf=false #true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
new file mode 100644
index 00000000..6ea334b4
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.0.sh
@@ -0,0 +1,44 @@
+# IdRnd ResNet100
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet100.v3.0
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0029.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
new file mode 100644
index 00000000..bb5d990c
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_resnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name.kk2
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
new file mode 100644
index 00000000..2528d13f
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_tseresnet34.v3.0.sh
@@ -0,0 +1,44 @@
+# TSE-ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_tseresnet34.v3.0
+
+nnet_s1_base_cfg=conf/train_tseresnet34_xvec_stage1_v3.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0025.pth
+
+nnet_s2_base_cfg=conf/train_tseresnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=false #true
+do_qmf=false #true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/cmd.sh b/egs/voxceleb/v2.1/cmd.sh
new file mode 100755
index 00000000..040f458b
--- /dev/null
+++ b/egs/voxceleb/v2.1/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/voxceleb/v2.1/conf/clsp.conf b/egs/voxceleb/v2.1/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/v2.1/conf/coe_gpu_bigmem.conf b/egs/voxceleb/v2.1/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/voxceleb/v2.1/conf/coe_gpu_long.conf b/egs/voxceleb/v2.1/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/v2.1/conf/coe_gpu_rtx.conf b/egs/voxceleb/v2.1/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/v2.1/conf/coe_gpu_short.conf b/egs/voxceleb/v2.1/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/v2.1/conf/coe_gpu_v100.conf b/egs/voxceleb/v2.1/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml b/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..ad991124
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..0b1d0454
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..254ff796
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..52be6db5
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..bd3e7f86
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..69a8322b
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..3443591a
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..abe5da6e
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..7287188c
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..3443591a
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..69a8322b
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..5e1260ad
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
new file mode 100644
index 00000000..2addaa1e
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model: wavlmlarge_ecapatdnn512x3_v2.0.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.4
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 2600
+    #min_lr: 4e-4
+    min_lr: 1e-6
+    warmup_steps: 2600
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+ 
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..69a8322b
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,63 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: class_id
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..5e1260ad
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: class_id
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/vad_16k.yaml b/egs/voxceleb/v2.1/conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..c3466259
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+  drop_layers_gt: 12
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
new file mode 100644
index 00000000..d9c9b782
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 1024
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 1024
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 3072
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..dc3737e3
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..d7e3388f
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+  drop_layers_gt: 9
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..b2430d97
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-base-plus
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..5025f047
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,45 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+  drop_layers_gt: 12
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
new file mode 100644
index 00000000..0a6303f5
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
@@ -0,0 +1,44 @@
+hf_feats:
+  pretrained_model_path: microsoft/wavlm-large
+xvector:
+  resnet_enc:
+    in_feats: 765
+    in_conv_channels: 512
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 512
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 4
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 1536
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  cos_scale: 32.0
+  margin: 0.2
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/datapath.sh b/egs/voxceleb/v2.1/datapath.sh
new file mode 100644
index 00000000..a7eb575c
--- /dev/null
+++ b/egs/voxceleb/v2.1/datapath.sh
@@ -0,0 +1,23 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  # voxceleb1_root=/export/corpora5/VoxCeleb1_v1 #voxceleb1 v1
+  voxceleb1_root=/export/corpora5/VoxCeleb1_v2 #voxceleb1 v2
+  voxceleb2_root=/export/corpora5/VoxCeleb2
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
+  voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
+  voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  voxsrc22_root=/exp/jvillalba/corpora/voxsrc22
+  musan_root=/expscratch/dgromero/corpora-open/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/egs/voxceleb/v2.1/default_config.sh b/egs/voxceleb/v2.1/default_config.sh
new file mode 120000
index 00000000..f2d8812d
--- /dev/null
+++ b/egs/voxceleb/v2.1/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..67a4665e
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# Wav2Vec2 Multilingual 300M params layers 2-12
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m12l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.sh
new file mode 100644
index 00000000..b4130fad
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.sh
@@ -0,0 +1,54 @@
+# Wav2Vec2 Multilingual 300M params
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn1024x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..80ee785b
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# Wav2Vec2 Multilingual 300M params
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..c2b30f68
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus9l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..373535c2
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmbaseplus
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..530096cc
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge12l
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..1b276bcd
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wavlmlarge_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,54 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 512x3
+
+# hugging face model
+hf_model_name=wavlmlarge
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wavlm2resnet1d
+
+nnet_s1_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v2.1/hyp_utils b/egs/voxceleb/v2.1/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/voxceleb/v2.1/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/path.sh b/egs/voxceleb/v2.1/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/voxceleb/v2.1/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/voxceleb/v2.1/run_001_prepare_data.sh b/egs/voxceleb/v2.1/run_001_prepare_data.sh
new file mode 100755
index 00000000..563d3c2d
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_001_prepare_data.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. datapath.sh 
+. $config_file
+
+if [ $stage -le 1 ];then
+  # Prepare the VoxCeleb2 dataset for training.
+  hyperion-prepare-data voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
+			--cat-videos --use-kaldi-ids \
+			--output-dir data/voxceleb2cat_train
+fi
+
+if [ $stage -le 2 ];then
+  # prepare voxceleb1 for test
+  hyperion-prepare-data voxceleb1 --task test --corpus-dir $voxceleb1_root \
+			--use-kaldi-ids \
+			--output-dir data/voxceleb1_test
+fi
+
+if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
+  hyperion-prepare-data voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
+			--vox1-corpus-dir $voxceleb1_root \
+			--output-dir data/voxsrc22_dev
+fi
+
+# if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+  #   hyperion-prepare-data voxsrc22 --subset test --corpus-dir $voxsrc22_root \
+  # 		  --vox1-corpus-dir $voxceleb1_root \
+  # 		  --output-dir data/voxsrc22_test
+# fi
+
+if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
+  # split vox2 into 2 parts, for cohort and qmf training
+  hyperion-split-dataset-into-trials-and-cohort --data-dir data/voxceleb2cat_train
+fi
diff --git a/egs/voxceleb/v2.1/run_002_compute_evad.sh b/egs/voxceleb/v2.1/run_002_compute_evad.sh
new file mode 100755
index 00000000..acccace3
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_002_compute_evad.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+vad_dir=`pwd`/exp/vad_e
+vad_config=conf/vad_16k.yaml
+nj=40
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ -z "$vad_config" ];then
+  echo "We are not using VAD in this configuration"
+  exit 0
+fi
+
+if [ "$do_voxsrc22" == "true" ];then
+  extra_data="voxsrc22_dev"
+fi
+
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    hyp_utils/create_data_split_dirs.sh \
+      $vad_dir/$name \
+      $USER/hyp-data/voxceleb/v1.2/vad $nodes
+  done
+fi
+
+#Train datasets
+if [ $stage -le 2 ];then
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    # This creates links to distribute data in CLSP grid
+    # If you are not at CLSP grid, it does nothing and can be deleted
+    hyp_utils/create_data_split_links.sh $vad_dir/$name/vad.JOB.ark $nj
+    echo "compute vad for $name"
+    $train_cmd JOB=1:$nj $vad_dir/$name/log/vad.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-compute-energy-vad --cfg $vad_config \
+	       --recordings-file data/$name/recordings.csv \
+	       --output-spec ark,csv:$vad_dir/$name/vad.JOB.ark,$vad_dir/$name/vad.JOB.csv \
+	       --part-idx JOB --num-parts $nj || exit 1
+
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $vad_dir/$name/vad.csv --num-tables $nj
+    hyperion-dataset add_features \
+		     --dataset data/$name \
+		     --features-name vad \
+		     --features-file $vad_dir/$name/vad.csv
+  done
+fi
+
+
diff --git a/egs/voxceleb/v2.1/run_003_prepare_noises_rirs.sh b/egs/voxceleb/v2.1/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..73c7ed82
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nj=10
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+  for name in noise music speech
+  do
+    hyperion-prepare-data musan \
+			  --corpus-dir $musan_root \
+			  --subset $name \
+			  --output-dir data/musan_$name
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # # Prepare to distribute data over multiple machines
+  # # This only does something at CLSP grid
+  # hyp_utils/create_data_split_dirs.sh $vad_dir $USER/hyp-data/voxceleb/v1.2/vad $nodes
+
+  for name in musan_noise musan_music
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_proc_audio
+    output_dir=exp/proc_audio/$name
+    $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${name}.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-preprocess-audio-files \
+	       --audio-format flac  \
+	       --part-idx JOB --num-parts $nj \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_dir/recordings.JOB.csv
+    
+    hyperion-tables cat \
+		    --table-type recordings \
+		    --output-file $output_dir/recordings.csv --num-tables $nj
+    hyperion-dataset set_recordings \
+		     --dataset $input_data_dir \
+		     --recordings-file $output_dir/recordings.csv \
+		     --output-dataset $output_data_dir
+    
+    
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  # Create Babble noise from MUSAN speech files
+  for name in musan_speech
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_babble
+    output_dir=exp/proc_audio/${name}_babble
+    $train_cmd $output_dir/log/make_babble_noise_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-make-babble-noise-audio-files \
+	       --audio-format flac \
+	       --min-spks 3 --max-spks 10 --num-reuses 5 \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_data_dir/recordings.csv
+    hyperion-dataset make_from_recordings \
+		     --dataset $output_data_dir \
+		     --recordings-file $output_data_dir/recordings.csv
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
+  for rirs in rirs_smallroom rirs_mediumroom rirs_real
+  do
+    output_dir=exp/rirs/$rirs
+    data_dir=data/$rirs
+    $train_cmd $output_dir/log/pack_rirs_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-pack-wav-rirs ${args} --input $data_dir/recordings.csv \
+	       --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
+    hyperion-dataset add_features --dataset $data_dir \
+		     --features-name rirs --features-file $output_dir/rirs.csv
+
+  done
+fi
+
diff --git a/egs/voxceleb/v2.1/run_004_prepare_xvec_train_data.sh b/egs/voxceleb/v2.1/run_004_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..4e0c5b19
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_004_prepare_xvec_train_data.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+nodes=b1
+nj=40
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  hyp_utils/create_data_split_dirs.sh \
+    exp/xvector_audios/$nnet_data \
+    $USER/hyp-data/voxceleb/v1.2/xvector_audios/$nnet_data $nodes
+fi
+
+if [ $stage -le 2 ];then
+  output_dir=exp/proc_audio/$nnet_data
+  # This creates links to distribute data in CLSP grid
+  # If you are not at CLSP grid, it does nothing and can be deleted
+  hyp_utils/create_audios_split_links.sh $output_dir data/$nnet_data/recordings.csv flac
+  if [ -n "$vad_config" ];then
+    vad_args="--vad csv:data/$nnet_data/vad.csv"
+    update_durs="--update-seg-durs"
+  fi
+  
+  $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${nnet_data}.JOB.log \
+	     hyp_utils/conda_env.sh \
+	     hyperion-preprocess-audio-files \
+	     --audio-format flac --remove-dc-offset $vad_args \
+	     --part-idx JOB --num-parts $nj \
+	     --recordings-file data/$nnet_data/recordings.csv \
+	     --output-path $output_dir \
+	     --output-recordings-file $output_dir/recordings.JOB.csv
+
+  hyperion-tables cat \
+		  --table-type recordings \
+		  --output-file $output_dir/recordings.csv --num-tables $nj
+
+  hyperion-dataset set_recordings $update_durs \
+		   --dataset data/$nnet_data \
+		   --recordings-file $output_dir/recordings.csv \
+		   --output-dataset data/${nnet_data}_proc_audio \
+		   --remove-features vad
+fi
+
+if [ $stage -le 3 ];then
+  hyperion-dataset remove_short_segments \
+		   --dataset data/${nnet_data}_proc_audio \
+		   --output-dataset data/${nnet_data}_filtered \
+		   --length-name duration --min-length 2.0
+
+  hyperion-dataset remove_classes_few_segments \
+		   --dataset data/${nnet_data}_filtered \
+		   --class-name speaker --min-segs 4
+fi
+
+if [ $stage -le 4 ];then
+  hyperion-dataset split_train_val \
+		   --dataset data/${nnet_data}_filtered \
+		   --val-prob 0.03 \
+		   --joint-classes speaker --min-train-samples 1 \
+		   --seed 1123581321 \
+		   --train-dataset data/${nnet_data}_xvector_train \
+		   --val-dataset data/${nnet_data}_xvector_val 
+fi
+
diff --git a/egs/voxceleb/v2.1/run_005_train_xvector.sh b/egs/voxceleb/v2.1/run_005_train_xvector.sh
new file mode 100755
index 00000000..2479d565
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_005_train_xvector.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_data_dir=data/${nnet_data}_xvector_train
+val_data_dir=data/${nnet_data}_xvector_val
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-train-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v2.1/run_006_extract_xvectors.sh b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
new file mode 100755
index 00000000..0dc58048
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=false
+xvec_chunk_length=120.0
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+  xvec_args="--use-gpu --chunk-length $xvec_chunk_length"
+  xvec_cmd="$cuda_eval_cmd --gpu 1 --mem 6G"
+  num_gpus=1
+else
+  xvec_cmd="$train_cmd --mem 12G"
+  num_gpus=0
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
+  # Extract xvectors for training LDA/PLDA
+  nj=100
+  for name in voxceleb2cat_train
+  do
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # Extracts x-vectors for evaluation
+  nj=100
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb1_test $extra_data
+  do
+    num_segs=$(wc -l data/$name/segments.csv | awk '{ print $1-1}')
+    nj=$(($num_segs < 100 ? $num_segs:100))
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+
diff --git a/egs/voxceleb/v2.1/run_007_eval_be.sh b/egs/voxceleb/v2.1/run_007_eval_be.sh
new file mode 100755
index 00000000..53621488
--- /dev/null
+++ b/egs/voxceleb/v2.1/run_007_eval_be.sh
@@ -0,0 +1,321 @@
+#!/bin/bash
+# Copyright       2018   Johns Hopkins University (Author: Jesus Villalba)
+#                
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh 
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+plda_label=${plda_type}y${plda_y_dim}_v1
+be_name=lda${lda_dim}_${plda_label}_${plda_data}
+
+xvector_dir=exp/xvectors/$nnet_name
+be_dir=exp/be/$nnet_name/$be_name
+score_dir=exp/scores/$nnet_name
+score_plda_dir=$score_dir/${be_name}/plda
+score_cosine_dir=$score_dir/cosine
+score_cosine_snorm_dir=$score_dir/cosine_snorm
+score_cosine_qmf_dir=$score_dir/cosine_qmf
+
+if [ $stage -le 3 ];then
+
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  num_parts=8
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $train_cmd $score_cosine_dir/log/voxceleb1_${i}_${j}.log \
+		 hyp_utils/conda_env.sh \
+		 hyperion-eval-cosine-scoring-backend \
+		 --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		 --ndx-file data/voxceleb1_test/trials.csv \
+		 --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		 --score-file $score_cosine_dir/voxceleb1_scores.csv \
+		 --enroll-part-idx $i --num-enroll-parts $num_parts \
+		 --test-part-idx $j --num-test-parts $num_parts &
+    done
+  done
+  wait
+  hyperion-merge-scores --output-file $score_cosine_dir/voxceleb1_scores.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
+
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     hyperion-eval-verification-metrics \
+	     --score-files $score_cosine_dir/voxceleb1_scores.csv \
+	     --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	     --score-names voxceleb1 \
+	     --key-names O E H \
+	     --sparse \
+	     --output-file $score_cosine_dir/voxceleb1_results.csv
+
+  cat $score_cosine_dir/voxceleb1_results.csv
+fi
+
+if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+  echo "Eval voxsrc2 with Cosine scoring"
+  $train_cmd $score_cosine_dir/log/voxsrc22_dev.log \
+	     hyp_utils/conda_env.sh \
+	     hyperion-eval-cosine-scoring-backend \
+	     --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+	     --ndx-file data/voxsrc22_dev/trials.csv \
+	     --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+	     --score-file $score_cosine_dir/voxsrc22_dev_scores.csv
+
+  # $train_cmd $score_cosine_dir/log/voxsrc22_eval.log \
+    # 	     hyp_utils/conda_env.sh \
+    # 	     hyperion-eval-cosine-scoring-backend \
+    # 	     --feats-file csv:$xvector_dir/voxsrc22_eval/xvector.csv \
+    # 	     --ndx-file data/voxsrc22_eval/trials.csv \
+    # 	     --enroll-map-file data/voxsrc22_eval/enrollment.csv  \
+    # 	     --score-file $score_cosine_dir/voxsrc22_eval_scores.csv
+  
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxsrc22_dev.log \
+	     hyperion-eval-verification-metrics \
+	     --score-files $score_cosine_dir/voxsrc22_dev_scores.csv \
+	     --key-files data/voxsrc22_dev/trials.csv \
+	     --score-names voxsrc22_dev \
+	     --key-names all \
+	     --output-file $score_cosine_dir/voxsrc22_dev_results.csv
+
+  cat $score_cosine_dir/voxsrc22_dev_results.csv
+
+fi
+
+if [ "$do_snorm" == "true" ];then
+  if [ $stage -le 5 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd --mem 22G $score_cosine_snorm_dir/log/voxceleb1_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   hyperion-eval-cosine-scoring-backend \
+		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		   --ndx-file data/voxceleb1_test/trials.csv \
+		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		   --score-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    hyperion-merge-scores --output-file $score_cosine_snorm_dir/voxceleb1_scores.csv \
+			  --num-enroll-parts $num_parts --num-test-parts $num_parts
+    
+    $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxceleb1.log \
+	       hyperion-eval-verification-metrics \
+	       --score-files $score_cosine_snorm_dir/voxceleb1_scores.csv \
+	       --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	       --score-names voxceleb1 \
+	       --key-names O E H \
+	       --sparse \
+	       --output-file $score_cosine_snorm_dir/voxceleb1_results.csv
+    
+    cat $score_cosine_snorm_dir/voxceleb1_results.csv
+  fi
+
+  if [ $stage -le 6 ] && [ "$do_voxsrc22" == "true" ];then
+    echo "Eval voxsrc2 with Cosine scoring + AS-Norm"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do    
+	$train_cmd $score_cosine_snorm_dir/log/voxsrc22_dev_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   hyperion-eval-cosine-scoring-backend \
+		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+		   --ndx-file data/voxsrc22_dev/trials.csv \
+		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+		   --score-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+	sleep 5s
+      done
+      sleep 10s
+    done
+    wait
+    hyperion-merge-scores --output-file $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+			  --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+    $train_cmd --mem 12G --num-threads 6 $score_cosine_snorm_dir/log/score_voxsrc22_dev.log \
+	       hyperion-eval-verification-metrics \
+	       --score-files $score_cosine_snorm_dir/voxsrc22_dev_scores.csv \
+	       --key-files data/voxsrc22_dev/trials.csv \
+	       --score-names voxsrc22_dev \
+	       --key-names all \
+	       --output-file $score_cosine_snorm_dir/voxsrc22_dev_results.csv
+
+    cat $score_cosine_snorm_dir/voxsrc22_dev_results.csv
+
+  fi
+
+fi
+
+if [ "$do_qmf" == "true" ];then
+  if [ $stage -le 7 ];then
+    echo "Train QMF in Vox2"
+    echo "...Calculating quality measures for Vox2"
+    num_parts=8
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd $score_cosine_qmf_dir/log/voxceleb2_trials_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
+		   --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --ndx-file data/voxceleb2cat_train_trials/trials.csv \
+		   --enroll-map-file data/voxceleb2cat_train_trials/enrollments.csv  \
+		   --score-file $score_cosine_qmf_dir/voxceleb2_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+      			  --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+    hyperion-train-qmf --score-file $score_cosine_qmf_dir/voxceleb2_scores.snorm.csv \
+		       --key-file data/voxceleb2cat_train_trials/trials.csv \
+		       --model-file $score_cosine_qmf_dir/qmf.h5
+    
+  fi
+
+  if [ $stage -le 8 ];then
+    echo "Eval Voxceleb 1 with Cosine scoring + Adaptive SNorm + QMF"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do
+	$train_cmd --mem 22G $score_cosine_qmf_dir/log/voxceleb1_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
+		   --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		   --ndx-file data/voxceleb1_test/trials.csv \
+		   --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		   --score-file $score_cosine_qmf_dir/voxceleb1_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --qmf-file $score_cosine_qmf_dir/qmf.h5 \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+      done
+      sleep 5s
+    done
+    wait
+    for suffix in "" .snorm .snorm.qmf
+    do
+      (
+	hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
+			      --num-enroll-parts $num_parts --num-test-parts $num_parts
+	
+	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxceleb1$suffix.log \
+		   hyperion-eval-verification-metrics \
+		   --score-files $score_cosine_qmf_dir/voxceleb1_scores$suffix.csv \
+		   --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+		   --score-names voxceleb1 \
+		   --key-names O E H \
+		   --sparse \
+		   --output-file $score_cosine_qmf_dir/voxceleb1_results$suffix.csv
+
+	echo "$score_cosine_qmf_dir/voxceleb1_results$suffix.csv:"
+	cat $score_cosine_qmf_dir/voxceleb1_results$suffix.csv
+      ) &
+    done
+    wait
+  fi
+  
+  if [ $stage -le 9 ] && [ "$do_voxsrc22" == "true" ];then
+    echo "Eval voxsrc2 with Cosine scoring + QMF"
+    num_parts=16
+    for((i=1;i<=$num_parts;i++));
+    do
+      for((j=1;j<=$num_parts;j++));
+      do    
+	$train_cmd $score_cosine_qmf_dir/log/voxsrc22_dev_${i}_${j}.log \
+		   hyp_utils/conda_env.sh \
+		   hyperion-eval-cosine-scoring-backend-with-qmf \
+		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
+		   --ndx-file data/voxsrc22_dev/trials.csv \
+		   --enroll-map-file data/voxsrc22_dev/enrollment.csv  \
+		   --score-file $score_cosine_qmf_dir/voxsrc22_dev_scores.csv \
+		   --cohort-segments-file data/voxceleb2cat_train_cohort/segments.csv \
+		   --cohort-feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+		   --cohort-nbest 1000 --avg-cohort-by speaker \
+		   --qmf-file $score_cosine_qmf_dir/qmf.h5 \
+		   --enroll-part-idx $i --num-enroll-parts $num_parts \
+		   --test-part-idx $j --num-test-parts $num_parts &
+	sleep 5s
+      done
+      sleep 10s
+    done
+    wait
+    for suffix in "" .snorm .snorm.qmf
+    do
+      (
+	hyperion-merge-scores --output-file $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
+			      --num-enroll-parts $num_parts --num-test-parts $num_parts
+
+	$train_cmd --mem 12G --num-threads 6 $score_cosine_qmf_dir/log/score_voxsrc22_dev$suffix.log \
+		   hyperion-eval-verification-metrics \
+		   --score-files $score_cosine_qmf_dir/voxsrc22_dev_scores$suffix.csv \
+		   --key-files data/voxsrc22_dev/trials.csv \
+		   --score-names voxsrc22_dev \
+		   --key-names all \
+		   --output-file $score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv
+
+	echo "$score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv:"
+	cat $score_cosine_qmf_dir/voxsrc22_dev_results$suffix.csv
+      ) &
+    done
+    wait
+  fi
+
+fi
+
diff --git a/egs/voxceleb/v2/README.md b/egs/voxceleb/v2/README.md
index a005b6e8..0bafe85e 100644
--- a/egs/voxceleb/v2/README.md
+++ b/egs/voxceleb/v2/README.md
@@ -26,12 +26,12 @@ Recipe for the VoxCeleb Speaker Verification Task using Wav2Vec2, WavLM or Huber
 ## Usage
 
    - Run the run_0*.sh scripts in sequence
-   - By default it will use 
+   - By default it will use config global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
    - For better performance use 
 ```bash
-run_011_train_xvector.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
-run_030_extract_xvectors.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh --use-gpu true
-run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr0.05_amp.v1.sh
+run_011_train_xvector.sh --config-file global_conf/other_config.sh
+run_030_extract_xvectors.sh --config-file global_conf/other_config.sh --use-gpu true
+run_040_eval_be.sh --config-file global_conf/other_config.sh
 ```
 
 
@@ -155,7 +155,7 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | | | | Cosine + QMF | 2.38 | 0.159 | 0.266 |
 | config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.62 | 0.153 | 0.251 |
 | | | | Cosine + AS-Norm | 2.53 | 0.149 | 0.247 |
-| | | | Cosine + QMF | 0.242 | 0.144 | 0.231 |
+| | | | Cosine + QMF | 2.42 | 0.144 | 0.231 |
 | config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.225 |
 | | | | Cosine + AS-Norm | 2.01 | 0.125 | 0.209 |
 | | | | Cosine + QMF | 1.92 | 0.117 | 0.200 |
diff --git a/egs/voxceleb/v2/default_config.sh b/egs/voxceleb/v2/default_config.sh
index abcc2a2e..f2d8812d 120000
--- a/egs/voxceleb/v2/default_config.sh
+++ b/egs/voxceleb/v2/default_config.sh
@@ -1 +1 @@
-global_conf/config_wavlmbaseplus_ecapatdnn512x3_v1.0.sh
\ No newline at end of file
+global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
\ No newline at end of file

From ed35173f534f98cb85b609642226b99d17163ddb Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 11 Sep 2023 12:12:49 -0400
Subject: [PATCH 110/154]  vox/v2.1 recipe done, not tested

---
 egs/voxceleb/v2.1/run_005_train_xvector.sh    | 27 ++++++++++++++++---
 egs/voxceleb/v2.1/run_006_extract_xvectors.sh |  9 ++++---
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/egs/voxceleb/v2.1/run_005_train_xvector.sh b/egs/voxceleb/v2.1/run_005_train_xvector.sh
index 2479d565..eb1c591e 100755
--- a/egs/voxceleb/v2.1/run_005_train_xvector.sh
+++ b/egs/voxceleb/v2.1/run_005_train_xvector.sh
@@ -44,7 +44,7 @@ if [ $stage -le 1 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-train-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    hyperion-train-wav2vec2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
     --data.train.dataset.class-files $train_data_dir/speaker.csv \
@@ -56,7 +56,7 @@ if [ $stage -le 1 ]; then
 fi
 
 
-# Large Margin Fine-tuning
+# Finetune full model
 if [ $stage -le 2 ]; then
   if [ "$use_wandb" == "true" ];then
     extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
@@ -65,7 +65,7 @@ if [ $stage -le 2 ]; then
   $cuda_cmd \
     --gpu $ngpu $nnet_s2_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
     --data.train.dataset.class-files $train_data_dir/speaker.csv \
@@ -76,3 +76,24 @@ if [ $stage -le 2 ]; then
     --num-gpus $ngpu \
   
 fi
+
+# Finetune full model
+if [ $stage -le 3 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v2.1/run_006_extract_xvectors.sh b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
index 0dc58048..2cfe27fe 100755
--- a/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
+++ b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
@@ -8,15 +8,16 @@
 set -e
 
 stage=1
-nnet_stage=2
+nnet_stage=3
 config_file=default_config.sh
 use_gpu=false
+hf_chunk_length=120.0 #seconds
 xvec_chunk_length=120.0
 . parse_options.sh || exit 1;
 . $config_file
 
 if [ "$use_gpu" == "true" ];then
-  xvec_args="--use-gpu --chunk-length $xvec_chunk_length"
+  xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
   xvec_cmd="$cuda_eval_cmd --gpu 1 --mem 6G"
   num_gpus=1
 else
@@ -58,7 +59,7 @@ if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qm
     echo "Extracting x-vectors for $name"
     $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      hyperion-extract-wav2vec2xvectors ${xvec_args} ${vad_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
 	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
@@ -88,7 +89,7 @@ if [ $stage -le 2 ]; then
     echo "Extracting x-vectors for $name"
     $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      hyperion-extract-wav2vec2xvectors ${xvec_args} ${vad_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
 	      --model-path $nnet  \

From 8760d055520609a57bc69ac9fc05ef159e9f336a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 12 Sep 2023 14:06:02 -0400
Subject: [PATCH 111/154] implemented lora in w2v2, not tested

---
 hyperion/io/bin_vad_reader.py                 |   4 +-
 hyperion/np/augment/noise_augment.py          |   2 +-
 hyperion/torch/layers/__init__.py             |  13 +-
 hyperion/torch/layers/lora.py                 |  80 +++++
 .../models/wav2xvectors/hf_wav2xvector.py     |  26 +-
 hyperion/torch/tpm/hf/hf_wav2vec2.py          |  18 +-
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 320 ++++++++++++++----
 hyperion/utils/dataset.py                     |  68 +++-
 requirements.txt                              |   4 +-
 9 files changed, 425 insertions(+), 110 deletions(-)
 create mode 100644 hyperion/torch/layers/lora.py

diff --git a/hyperion/io/bin_vad_reader.py b/hyperion/io/bin_vad_reader.py
index 82e2a0c5..8ce91d15 100644
--- a/hyperion/io/bin_vad_reader.py
+++ b/hyperion/io/bin_vad_reader.py
@@ -59,7 +59,7 @@ def read(
         vad = self.r.read(keys)
         output_vad = []
         for i in range(len(keys)):
-            vad_i = vad[i].astype(np.bool, copy=False)
+            vad_i = vad[i].astype(bool, copy=False)
             offset_i = offset[i] if offset_is_list else offset
             num_frames_i = num_frames[i] if num_frames_is_list else num_frames
             vad_i = self._get_bin_vad_slice(vad_i, offset_i, num_frames_i)
@@ -77,7 +77,7 @@ def read_timestamps(self, keys, merge_tol=0.001):
         vad = self.r.read(keys)
         ts = []
         for i in range(len(keys)):
-            vad_i = vad[i].astype(np.bool, copy=False)
+            vad_i = vad[i].astype(bool, copy=False)
             ts_i = bin_vad_to_timestamps(
                 vad_i,
                 self.frame_length / 1000,
diff --git a/hyperion/np/augment/noise_augment.py b/hyperion/np/augment/noise_augment.py
index 1cc1a0be..92bd57dd 100644
--- a/hyperion/np/augment/noise_augment.py
+++ b/hyperion/np/augment/noise_augment.py
@@ -55,7 +55,7 @@ def __init__(
     @staticmethod
     def _power(x):
         """Computes power of x in dB."""
-        return 10 * np.log10((x ** 2).sum())
+        return 10 * np.log10((x**2).sum() + 1e-10)
 
     @staticmethod
     def snr(x, n):
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index 6b508b0e..bea52c95 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -4,20 +4,23 @@
 """
 
 from .activation_factory import ActivationFactory
-from .attention import (LocalScaledDotProdAttRelPosEncV1,
-                        LocalScaledDotProdAttV1, ScaledDotProdAttRelPosEncV1,
-                        ScaledDotProdAttV1)
+from .attention import (
+    LocalScaledDotProdAttRelPosEncV1,
+    LocalScaledDotProdAttV1,
+    ScaledDotProdAttRelPosEncV1,
+    ScaledDotProdAttV1,
+)
 from .audio_feats import *
 from .audio_feats_factory import AudioFeatsFactory
 from .calibrators import LinBinCalibrator
 from .dropout import DropConnect1d, DropConnect2d, Dropout1d
 from .global_pool import *
 from .interpolate import Interpolate
+from .lora import LoRAFactory
 from .margin_losses import ArcLossOutput, CosLossOutput, SubCenterArcLossOutput
 from .mvn import MeanVarianceNorm
 from .norm_layer_factory import NormLayer1dFactory, NormLayer2dFactory
 from .pool_factory import GlobalPool1dFactory
-from .pos_encoder import (ConvPosEncoder, NoPosEncoder, PosEncoder,
-                          RelPosEncoder)
+from .pos_encoder import ConvPosEncoder, NoPosEncoder, PosEncoder, RelPosEncoder
 from .spec_augment import AxisMasker, SpecAugment, SpecWarper
 from .subpixel_convs import ICNR1d, ICNR2d, SubPixelConv1d, SubPixelConv2d
diff --git a/hyperion/torch/layers/lora.py b/hyperion/torch/layers/lora.py
new file mode 100644
index 00000000..1436caf5
--- /dev/null
+++ b/hyperion/torch/layers/lora.py
@@ -0,0 +1,80 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from typing import Union
+
+import loralib as lora
+import torch.nn as nn
+from loralib import *
+
+
+class LoRAFactory:
+    def create_from_pretrained(
+        layer: Union[nn.Embedding, nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d],
+        r: int = 8,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        merge_weights: bool = True,
+    ):
+        if isinstance(layer, nn.Embedding):
+            lora_layer = lora.Embedding(
+                layer.num_embeddings,
+                layer.embedding_dim,
+                padding_idx=layer.padding_idx,
+                max_norm=layer.max_norm,
+                norm_type=layer.norm_type,
+                scale_grad_by_freq=layer.scale_grad_by_freq,
+                sparse=layer.sparse,
+                r=r,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+                merge_weights=merge_weights,
+            )
+            lora_layer.weight.data = layer.weight.data
+
+        elif isinstance(layer, nn.Linear):
+            bias = layer.bias is not None
+            lora_layer = lora.Linear(
+                layer.in_features,
+                layer.out_features,
+                bias=bias,
+                r=r,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+                merge_weights=merge_weights,
+            )
+            lora_layer.weight.data = layer.weight.data
+            if bias:
+                lora_layer.bias.data = layer.bias.data
+
+        elif isinstance(layer, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+            if isinstance(layer, nn.Conv1d):
+                lora_class = lora.Conv1d
+            elif isinstance(layer, nn.Conv2d):
+                lora_class = lora.Conv2d
+            elif isinstance(layer, nn.Conv3d):
+                lora_class = lora.Conv3d
+
+            bias = layer.bias is not None
+            lora_layer = lora_class(
+                layer.in_channels,
+                layer.out_channels,
+                layer.kernel_size,
+                stride=layer.stride,
+                padding=layer.padding,
+                dilation=layer.dilation,
+                groups=layer.groups,
+                bias=bias,
+                padding_mode=layer.padding_mode,
+                r=r,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+                merge_weights=merge_weights,
+            )
+            lora_layer.weight.data = layer.weight.data
+            if bias:
+                lora_layer.bias.data = layer.bias.data
+
+        return lora_layer
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 24ab5bbb..925f1172 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -5,10 +5,9 @@
 import contextlib
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
@@ -29,7 +28,6 @@ class HFWav2XVector(TorchModel):
     def __init__(
         self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
     ):
-
         super().__init__()
         self.hf_feats = hf_feats
         self.xvector = xvector
@@ -222,7 +220,6 @@ def extract_embed(
         embed_layer=None,
         detach_chunks=False,
     ):
-
         if vad_samples is not None:
             x, x_lengths = remove_silence(x, vad_samples, x_lengths)
 
@@ -256,6 +253,9 @@ def freeze_hf_feats(self):
     def freeze_hf_feature_encoder(self):
         self.hf_feats.freeze_feature_encoder()
 
+    def freeze_hf_except_lora(self, bias=None):
+        self.hf_feats.freeze_except_lora(bias)
+
     def has_param_groups(self):
         return self.hf_feats.has_param_groups()
 
@@ -296,6 +296,15 @@ def set_train_mode(self, mode):
         elif mode == "hf-feat-extractor-frozen":
             self.unfreeze()
             self.freeze_hf_feature_encoder()
+        elif mode == "hf-lora":
+            self.unfreeze()
+            self.freeze_hf_except_lora()
+        elif mode == "hf-all-bias-lora":
+            self.unfreeze()
+            self.freeze_hf_except_lora(bias="all")
+        elif mode == "hf-lora-with-bias":
+            self.unfreeze()
+            self.freeze_hf_except_lora(bias="lora_only")
         else:
             raise ValueError(f"invalid train_mode={mode}")
 
@@ -310,7 +319,6 @@ def set_train_mode(self, mode):
         self._train_mode = mode
 
     def _train(self, train_mode: str):
-
         if train_mode in ["full", "frozen"]:
             super()._train(train_mode)
         elif train_mode == "ft-embed-affine":
@@ -322,6 +330,9 @@ def _train(self, train_mode: str):
             "ft-xvector-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
+            "hf-lora",
+            "hf-all-bias-lora",
+            "hf-lora-with-bias",
         ]:
             self.hf_feats.train()
             self.xvector._train("full")
@@ -339,6 +350,9 @@ def valid_train_modes():
             "ft-xvector-nograd",
             "hf-feats-frozen-nograd",
             "hf-feat-extractor-frozen",
+            "hf-lora",
+            "hf-all-bias-lora",
+            "hf-lora-with-bias",
         ]
 
     @staticmethod
@@ -353,7 +367,6 @@ def filter_args(**kwargs):
         return args
 
     def get_config(self):
-
         hf_cfg = self.hf_feats.get_config()
         xvec_cfg = self.xvector.get_config()
         del hf_cfg["class_name"]
@@ -375,7 +388,6 @@ def change_config(self, hf_feats, xvector):
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index 26da7beb..dd5de2fe 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import Wav2Vec2Config, Wav2Vec2Model
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2Config, Wav2Vec2Model
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
@@ -204,8 +203,13 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        use_lora: bool = False,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
     ):
-
         super().__init__(
             pretrained_model_path=pretrained_model_path,
             normalize_input=normalize_input,
@@ -223,6 +227,12 @@ def __init__(
             sample_frequency=sample_frequency,
             feat_extract_lr=feat_extract_lr,
             encoder_lr=encoder_lr,
+            use_lora=use_lora,
+            lora_components=lora_components,
+            lora_rank=lora_rank,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            lora_merge_weights=lora_merge_weights,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index a9c4ddef..2c8d239f 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -8,12 +8,13 @@
 from turtle import right
 from typing import List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Processor
 
+from ....utils.misc import filter_func_args
+from ...layers import LoRAFactory
 from ...torch_model import TorchModel
 from ...utils import scale_seq_lengths, seq_lengths_to_mask
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
@@ -55,6 +56,12 @@ class HFWav2VecBase(TorchModel):
         sample_frequency: (`int`) waveform sample frequency used to train the model.
         feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
         encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
+        use_lora: use low-rank adapters
+        lora_components: list of components where we apply LoRA, eg [Wq, Wv]
+        lora_rank: rank of LoRA
+        lora_alpha: scale for LoRA
+        lora_dropout: dropout rate for LoRA
+        lora_merge_weights: lora weights are merged with the pretrained weights at inference.
     """
 
     def __init__(
@@ -75,6 +82,12 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        use_lora: bool = False,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
     ):
         super().__init__()
         self.pretrained_model_path = pretrained_model_path
@@ -90,6 +103,12 @@ def __init__(
         self.left_encoder_context = left_encoder_context
         self.feat_extract_lr = feat_extract_lr
         self.encoder_lr = encoder_lr
+        self.use_lora = use_lora
+        self.lora_components = lora_components
+        self.lora_rank = lora_rank
+        self.lora_alpha = lora_alpha
+        self.lora_dropout = lora_dropout
+        self.lora_merge_weights = lora_merge_weights
 
         if pretrained_model_path is not None and not ignore_pretrained:
             rank = ddp_get_rank()
@@ -153,6 +172,16 @@ def __init__(
 
         self._feature_encoder_context = None
         self._frame_shift = None
+        self.hf_model = None
+
+        if use_lora:
+            self._make_lora_layers(
+                lora_components,
+                lora_rank,
+                lora_alpha,
+                lora_dropout,
+                lora_merge_weights,
+            )
 
     def __deepcopy__(self, memo):
         """Reimplementation of deepcopy for Hugging Face models.
@@ -225,18 +254,36 @@ def change_config(
         self,
         override_dropouts: bool,
         override_spec_augment: bool,
+        override_lora: bool,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        use_lora: bool = False,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
         **kwargs,
     ):
         if override_spec_augment:
-            logging.info("overriding speech augment")
+            logging.info(f"overriding speech augment with args={kwargs}")
             self.change_spec_augment(**kwargs)
 
         if override_dropouts:
-            logging.info("overriding hf model dropouts")
+            logging.info(f"overriding hf model dropouts with args={kwargs}")
             self.change_dropouts(**kwargs)
 
+        if override_lora:
+            logging.info("overriding LoRA config")
+            self.change_lora(
+                use_lora=use_lora,
+                lora_components=lora_components,
+                lora_rank=lora_rank,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+                lora_merge_weights=lora_merge_weights,
+            )
+
         self.feat_extract_lr = feat_extract_lr
         self.encoder_lr = encoder_lr
 
@@ -259,12 +306,109 @@ def change_spec_augment(
         self.hf_model.config.mask_feature_length = mask_feature_length
         self.hf_model.config.mask_feature_min_masks = mask_feature_min_masks
 
+    def change_lora(
+        self,
+        use_lora: bool = False,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
+    ):
+        if not self.use_lora:
+            if use_lora:
+                self._make_lora_layers(
+                    lora_components,
+                    lora_rank,
+                    lora_alpha,
+                    lora_dropout,
+                    lora_merge_weights,
+                )
+                pass
+            else:
+                # TODO
+                pass
+        else:
+            if use_lora:
+                # TODO
+                pass
+            else:
+                # TODO
+                pass
+
+        self.use_lora = use_lora
+        self.lora_components = lora_components
+        self.lora_rank = lora_rank
+        self.lora_alpha = lora_alpha
+        self.lora_dropout = lora_dropout
+        self.lora_merge_weights = lora_merge_weights
+
+    def _make_lora_layers(
+        self,
+        lora_components: List[str],
+        lora_rank: int,
+        lora_alpha: int,
+        lora_dropout: float,
+        lora_merge_weights: bool,
+    ):
+        counts = {k: 0 for k in lora_components}
+        self._recursive_replace_layer_by_lora(
+            self.hf_model,
+            counts,
+            lora_components,
+            lora_rank,
+            lora_alpha,
+            lora_dropout,
+            lora_merge_weights,
+        )
+        for k, v in counts.items():
+            logging.info("count of LoRA layers for %s = %d", k, v)
+            assert v > 0, f"did not make any {k} LoRA"
+
+    @staticmethod
+    def _recursive_replace_layer_by_lora(
+        model: nn.Module,
+        counts: dict,
+        lora_components: List[str],
+        lora_rank: int,
+        lora_alpha: int,
+        lora_dropout: float,
+        lora_merge_weights: bool,
+    ):
+        for name, module in model.named_children():
+            if len(list(module.children())) > 0:
+                HFWav2VecBase._recursive_replace_layer_by_lora(
+                    module,
+                    counts,
+                    lora_components,
+                    lora_rank,
+                    lora_alpha,
+                    lora_dropout,
+                    lora_merge_weights,
+                )
+            if isinstance(module, nn.Linear) and name in lora_components:
+                lora_layer = LoRAFactory.create_from_pretrained(
+                    module,
+                    r=lora_rank,
+                    lora_alpha=lora_alpha,
+                    lora_dropout=lora_dropout,
+                    merge_weights=lora_merge_weights,
+                )
+                setattr(model, name, lora_layer)
+                counts[name] += 1
+
     def change_dropouts(self, **kwargs):
         pass  # needs to be overloaded
 
     def freeze_feature_encoder(self):
         self.hf_model.freeze_feature_encoder()
 
+    def freeze_except_lora(self, bias=None):
+        bias = "none" if bias is None else bias
+        from ...layers.lora import mark_only_lora_as_trainable
+
+        mark_only_lora_as_trainable(self.hf_model, bias=bias)
+
     def has_param_groups(self):
         return self.feat_extract_lr is not None or self.encoder_lr is not None
 
@@ -302,14 +446,14 @@ def _normalize(self, x, x_mask=None):
         """Normalizes the audio to have zero mean and unit variance."""
         if x_mask is None:
             x = x - x.mean(dim=1, keepdim=True)
-            std = torch.sqrt((x ** 2).mean(dim=1, keepdim=True) + 1e-7)
+            std = torch.sqrt((x**2).mean(dim=1, keepdim=True) + 1e-7)
             x = x / std
         else:
             x_mask = x_mask.to(dtype=x.dtype)
             x_samples = torch.mean(x_mask, dim=1, keepdim=True)
             x_mean = torch.mean(x * x_mask, dim=1, keepdim=True) / x_samples
-            x2_mean = torch.mean(x ** 2 * x_mask, dim=1, keepdim=True) / x_samples
-            std = torch.sqrt(x2_mean - x_mean ** 2 + 1e-7)
+            x2_mean = torch.mean(x**2 * x_mask, dim=1, keepdim=True) / x_samples
+            std = torch.sqrt(x2_mean - x_mean**2 + 1e-7)
             x = (x - x_mean) / std
         return x
 
@@ -544,14 +688,6 @@ def forward_long_impl(
             else scale_seq_lengths(x_lengths, max_out_length, max_in_length)
         )
         output["hidden_states_lengths"] = feat_lengths
-        # print(
-        #     "lens",
-        #     mol0,
-        #     max_out_length,
-        #     output.last_hidden_state.size(1),
-        #     output.hidden_states[0].size(1),
-        #     flush=True,
-        # )
         return output
 
     def get_config(self):
@@ -572,6 +708,14 @@ def get_config(self):
             "left_encoder_context": self.left_encoder_context,
             "right_encoder_context": self.right_encoder_context,
             "sample_frequency": self.sample_frequency,
+            "feat_extract_lr": self.feat_extract_lr,
+            "encoder_lr": self.encoder_lr,
+            "use_lora": self.use_lora,
+            "lora_components": self.lora_components,
+            "lora_rank": self.lora_rank,
+            "lora_alpha": self.lora_alpha,
+            "lora_dropout": self.lora_dropout,
+            "lora_merge_weights": self.lora_merge_weights,
         }
 
         base_config = super().get_config()
@@ -584,24 +728,78 @@ def save(self, file_path: str):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "pretrained_model_path",
-            "normalize_input",
-            "use_input_attention_mask",
-            "cache_dir",
-            "force_download",
-            "resume_download",
-            "revision",
-            "drop_layers_gt",
-            "ignore_pretrained",
-            "override_dropouts",
-            "override_spec_augment",
-            "left_encoder_context",
-            "right_encoder_context",
-            "sample_frequency",
+        return filter_func_args(HFWav2VecBase.__init__, **kwargs)
+        # valid_args = (
+        #     "pretrained_model_path",
+        #     "normalize_input",
+        #     "use_input_attention_mask",
+        #     "cache_dir",
+        #     "force_download",
+        #     "resume_download",
+        #     "revision",
+        #     "drop_layers_gt",
+        #     "ignore_pretrained",
+        #     "override_dropouts",
+        #     "override_spec_augment",
+        #     "left_encoder_context",
+        #     "right_encoder_context",
+        #     "sample_frequency",
+        # )
+        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        # return args
+
+    @staticmethod
+    def _add_lr_args(parser):
+        parser.add_argument(
+            "--feat-extractor-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for conv feature extractor, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
+        parser.add_argument(
+            "--encoder-lr",
+            default=None,
+            type=float,
+            help=(
+                "lr for transformer encoder, it serves to set a lr "
+                "different than the global one."
+            ),
+        )
+
+    @staticmethod
+    def _add_lora_args(parser):
+        parser.add_argument(
+            "--use-lora",
+            default=False,
+            action=ActionYesNo,
+            help="use low-rank adapters",
+        )
+        parser.add_argument(
+            "--lora-components",
+            default=["q_proj", "v_proj"],
+            nargs="+",
+            choices=[
+                "k_proj",
+                "q_proj",
+                "v_proj",
+                "out_proj",
+                "intermediate_dense",
+                "output_dense",
+            ],
+            help="list of components where we apply LoRA, eg [Wq, Wv]",
+        )
+        parser.add_argument("--lora-rank", default=4, help="rank of LoRA")
+        parser.add_argument("--lora-alpha", default=1.0, help="scale for LoRA")
+        parser.add_argument("--lora-dropout", default=0.0, help="dropout rate for LoRA")
+        parser.add_argument(
+            "--lora-merge-weights",
+            default=True,
+            action=ActionYesNo,
+            help="lora weights are merged with the pretrained weights at inference.",
         )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        return args
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
@@ -703,36 +901,22 @@ def add_class_args(parser, prefix=None, skip=set()):
                 "when the signal is evaluated chunk by chunk."
             ),
         )
-        parser.add_argument(
-            "--feat-extractor-lr",
-            default=None,
-            type=float,
-            help=(
-                "lr for conv feature extractor, it serves to set a lr "
-                "different than the global one."
-            ),
-        )
-        parser.add_argument(
-            "--encoder-lr",
-            default=None,
-            type=float,
-            help=(
-                "lr for transformer encoder, it serves to set a lr "
-                "different than the global one."
-            ),
-        )
+
+        HFWav2VecBase._add_lr_args(parser)
+        HFWav2VecBase._add_lora_args(parser)
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        valid_args = (
-            "override_dropouts",
-            "override_spec_augment",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        return args
+        return filter_func_args(HFWav2VecBase.change_config, **kwargs)
+        # valid_args = (
+        #     "override_dropouts",
+        #     "override_spec_augment",
+        # )
+        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        # return args
 
     @staticmethod
     def add_finetune_args(parser, prefix=None, skip=set()):
@@ -759,23 +943,13 @@ def add_finetune_args(parser, prefix=None, skip=set()):
             ),
         )
         parser.add_argument(
-            "--feat-extractor-lr",
-            default=None,
-            type=float,
-            help=(
-                "lr for conv feature extractor, it serves to set a lr "
-                "different than the global one."
-            ),
-        )
-        parser.add_argument(
-            "--encoder-lr",
-            default=None,
-            type=float,
-            help=(
-                "lr for transformer encoder, it serves to set a lr "
-                "different than the global one."
-            ),
+            "--override-lora",
+            default=False,
+            action=ActionYesNo,
+            help=("whether to change the config of LoRA layers in the model."),
         )
 
+        HFWav2VecBase._add_lr_args(parser)
+        HFWav2VecBase._add_lora_args(parser)
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index dd446576..51f0f37a 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -55,7 +55,6 @@ def __init__(
         sparse_trials: bool = False,
         table_sep: Optional[str] = None,
     ):
-
         if isinstance(segments, SegmentSet):
             self._segments = segments
             self._segments_path = None
@@ -82,10 +81,12 @@ def __init__(
             features, FeatureSet
         )
         self._enrollments, self._enrollments_paths = self._parse_dict_args(
-            enrollments, EnrollmentMap,
+            enrollments,
+            EnrollmentMap,
         )
         self._trials, self._trials_paths = self._parse_dict_args(
-            trials, (TrialKey, TrialNdx, SparseTrialKey),
+            trials,
+            (TrialKey, TrialNdx, SparseTrialKey),
         )
 
         self.sparse_trials = sparse_trials
@@ -711,7 +712,8 @@ def add_features(self, features_name: str, features: Union[PathLike, FeatureSet]
             raise ValueError()
 
     def set_segments(
-        self, segments: Union[PathLike, SegmentSet], update_seg_durs: bool,
+        self,
+        segments: Union[PathLike, SegmentSet],
     ):
         if isinstance(segments, (str, Path)):
             self._segments = None
@@ -723,7 +725,9 @@ def set_segments(
             raise ValueError()
 
     def set_recordings(
-        self, recordings: Union[PathLike, RecordingSet], update_seg_durs: bool,
+        self,
+        recordings: Union[PathLike, RecordingSet],
+        update_seg_durs: bool = False,
     ):
         if isinstance(recordings, (str, Path)):
             self._recordings = None
@@ -753,7 +757,9 @@ def add_classes(self, classes_name: str, classes: Union[PathLike, ClassInfo]):
             raise ValueError()
 
     def add_enrollments(
-        self, enrollments_name: str, enrollments: Union[PathLike, EnrollmentMap],
+        self,
+        enrollments_name: str,
+        enrollments: Union[PathLike, EnrollmentMap],
     ):
         if self._enrollments is None:
             self._enrollments = {}
@@ -793,7 +799,9 @@ def remove_features(self, features_name: str):
         del self._features[features_name]
         del self._features_paths[features_name]
 
-    def remove_recordings(self,):
+    def remove_recordings(
+        self,
+    ):
         if self._recordings_path is not None:
             self._files_to_delete.append(self._recordings_path)
 
@@ -820,7 +828,8 @@ def remove_classes(self, classes_name: str):
         del self._classes_paths[classes_name]
 
     def remove_enrollments(
-        self, enrollments_name: str,
+        self,
+        enrollments_name: str,
     ):
         if self._enrollments_paths[enrollments_name] is not None:
             self._files_to_delete.append(self._enrollments_paths[enrollments_name])
@@ -829,7 +838,8 @@ def remove_enrollments(
         del self._enrollments_paths[enrollments_name]
 
     def remove_trials(
-        self, trials_name: str,
+        self,
+        trials_name: str,
     ):
         if self._trials_paths[trials_name] is not None:
             self._files_to_delete.append(self._trials_paths[trials_name])
@@ -981,14 +991,20 @@ def split_into_trials_and_cohort(
             segments_male = SegmentSet(segments[segments["gender"] == "m"])
             segments_female = SegmentSet(segments[segments["gender"] == "f"])
             trials_male, enroll_male, cohort_male = self._split_into_trials_and_cohort(
-                segments_male, num_tar_trials, num_trial_speakers, seed,
+                segments_male,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
             )
             (
                 trials_female,
                 enroll_female,
                 cohort_female,
             ) = self._split_into_trials_and_cohort(
-                segments_female, num_tar_trials, num_trial_speakers, seed,
+                segments_female,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
             )
             trials = TrialKey.merge([trials_male, trials_female])
             enroll = EnrollmentMap.cat([enroll_male, enroll_female])
@@ -996,7 +1012,10 @@ def split_into_trials_and_cohort(
         else:
             segments = self.segments()
             trials, enroll, cohort = self._split_into_trials_and_cohort(
-                segments, num_tar_trials, num_trial_speakers, seed,
+                segments,
+                num_tar_trials,
+                num_trial_speakers,
+                seed,
             )
 
         dataset_trials = self.clone()
@@ -1019,7 +1038,10 @@ def remove_short_segments(self, min_length: float, length_name: str = "duration"
         self.clean()
 
     def remove_classes_few_segments(
-        self, class_name: str, min_segs: int, rebuild_idx: bool = False,
+        self,
+        class_name: str,
+        min_segs: int,
+        rebuild_idx: bool = False,
     ):
         segments = self.segments()
         classes, counts = np.unique(segments[class_name], return_counts=True)
@@ -1082,7 +1104,10 @@ def _segments_split_joint_classes(
         return train_segs, val_segs
 
     def _segments_split_disjoint_classes(
-        self, val_prob: float, disjoint_classes: List[str], rng: np.random.Generator,
+        self,
+        val_prob: float,
+        disjoint_classes: List[str],
+        rng: np.random.Generator,
     ):
         segments = self.segments()
         classes = segments[disjoint_classes].apply("-".join, axis=1)
@@ -1165,15 +1190,24 @@ def split_train_val(
             train_segs, val_segs = self._segments_split(val_prob, rng)
         elif joint_classes is not None and disjoint_classes is None:
             train_segs, val_segs = self._segments_split_joint_classes(
-                val_prob, joint_classes, min_train_samples, rng,
+                val_prob,
+                joint_classes,
+                min_train_samples,
+                rng,
             )
         elif joint_classes is None and disjoint_classes is not None:
             train_segs, val_segs = self._segments_split_disjoint_classes(
-                val_prob, disjoint_classes, rng,
+                val_prob,
+                disjoint_classes,
+                rng,
             )
         else:
             train_segs, val_segs = self._segments_split_joint_and_disjoint_classes(
-                val_prob, joint_classes, disjoint_classes, min_train_samples, rng,
+                val_prob,
+                joint_classes,
+                disjoint_classes,
+                min_train_samples,
+                rng,
             )
 
         train_ds = self.clone()
diff --git a/requirements.txt b/requirements.txt
index c3410829..1e1aea9b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,6 @@ memory_profiler
 gdown
 fairscale==0.4.4
 tensorboard>=2.5.0
-yapf
 jsonargparse>=3.5.0
 wandb>=0.10.30
 librosa>=0.8.1
@@ -22,3 +21,6 @@ twine
 wheel
 transformers>=4.16.2
 sentencepiece>=0.1.97
+loralib
+lhotse
+

From a75610ee27acf2cd15ecc38151f5efff6fa09623 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 13 Sep 2023 10:59:46 -0400
Subject: [PATCH 112/154] vox2.1 working and lora

---
 egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml  | 13 ++--
 ...lsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml | 13 ++--
 ...c2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...baseplus9l_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...lmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml | 13 ++--
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml | 13 ++--
 ...lmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...lmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml | 13 ++--
 ...lmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml | 13 ++--
 ...avlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml | 13 ++--
 ...avlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml | 13 ++--
 ...wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml | 13 ++--
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml | 13 ++--
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml | 13 ++--
 ...rge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml | 71 ++++++++++++++++++
 ...rge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml | 74 +++++++++++++++++++
 ...vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh | 55 ++++++++++++++
 hyperion/torch/layers/lora.py                 | 52 +++++++++++--
 hyperion/torch/models/xvectors/xvector.py     | 36 +--------
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      | 22 +++++-
 22 files changed, 378 insertions(+), 140 deletions(-)
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
 create mode 100644 egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh

diff --git a/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml b/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
index 4fdf8068..86f55073 100644
--- a/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
+++ b/egs/voxceleb/v2.1/conf/reverb_noise_aug.yaml
@@ -4,32 +4,31 @@ reverb_aug:
   rir_types: 
     smallroom:
       weight: 1
-      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_path: csv:data/rirs_smallroom/rirs.csv
       rir_norm: max
     mediumroom:
       weight: 1
-      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_path: csv:data/rirs_mediumroom/rirs.csv
       rir_norm: max
     realroom:
       weight: 1
-      rir_path: scp:data/rirs_real/rirs.scp
+      rir_path: csv:data/rirs_real/rirs.csv
       rir_norm: max
 noise_aug:
   noise_prob: 0.7
   noise_types: 
     noise:
       weight: 1
-      noise_path: data/musan_noise_proc_audio/wav.scp
+      noise_path: data/musan_noise_proc_audio/recordings.csv
       min_snr: 0
       max_snr: 18
     music:
       weight: 1
-      noise_path: data/musan_music_proc_audio/wav.scp
+      noise_path: data/musan_music_proc_audio/recordings.csv
       min_snr: 3
       max_snr: 18
     babble:
       weight: 1
-      noise_path: data/musan_speech_babble/wav.scp
+      noise_path: data/musan_speech_babble/recordings.csv
       min_snr: 3
       max_snr: 18
-
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
index ad991124..ffd2f374 100644
--- a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
index 0b1d0454..7dcc56ef 100644
--- a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
index 254ff796..3f5c46bc 100644
--- a/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
index 52be6db5..9e1d0928 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus9l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
index bd3e7f86..0d0dc398 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
index 69a8322b..8504db9e 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model:
@@ -60,4 +60,5 @@ trainer:
   log_interval: 1000
   epochs: 8
   eff_batch_size: 512
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
index 3443591a..dda0c632 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -15,7 +15,7 @@ data:
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -25,11 +25,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -38,7 +38,7 @@ data:
       max_chunk_length: 3.0
       min_chunk_length: 3.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -70,4 +70,5 @@ trainer:
   log_interval: 1000
   epochs: 4
   eff_batch_size: 256
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
index abe5da6e..46ee7d18 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
index 7287188c..db36f8ee 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 64
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 64
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model:
@@ -60,4 +60,5 @@ trainer:
   log_interval: 1000
   epochs: 8
   eff_batch_size: 512
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
index 3443591a..dda0c632 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -15,7 +15,7 @@ data:
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -25,11 +25,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -38,7 +38,7 @@ data:
       max_chunk_length: 3.0
       min_chunk_length: 3.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -70,4 +70,5 @@ trainer:
   log_interval: 1000
   epochs: 4
   eff_batch_size: 256
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
index 69a8322b..8504db9e 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model:
@@ -60,4 +60,5 @@ trainer:
   log_interval: 1000
   epochs: 8
   eff_batch_size: 512
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
index 5e1260ad..ad56e80d 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -15,7 +15,7 @@ data:
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -25,11 +25,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -38,7 +38,7 @@ data:
       max_chunk_length: 3.0
       min_chunk_length: 3.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -70,4 +70,5 @@ trainer:
   log_interval: 1000
   epochs: 4
   eff_batch_size: 256
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
index 2addaa1e..40341a27 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage1_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 128
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model: wavlmlarge_ecapatdnn512x3_v2.0.yaml
@@ -55,5 +55,6 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 1024
+  target_key: speaker
   train_mode: hf-feats-frozen-nograd
  
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
index 69a8322b..8504db9e 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -14,17 +14,17 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -32,7 +32,7 @@ data:
       min_batch_size: 32
       max_chunk_length: 3.0
       min_chunk_length: 3.0
-      class_name: class_id
+      class_name: speaker
     data_loader:
       num_workers: 8
 model:
@@ -60,4 +60,5 @@ trainer:
   log_interval: 1000
   epochs: 8
   eff_batch_size: 512
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
index 5e1260ad..ad56e80d 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -15,7 +15,7 @@ data:
       max_chunk_length: 6.0
       min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -25,11 +25,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
       target_sample_freq: 16000
       wav_scale: 1
     sampler:
@@ -38,7 +38,7 @@ data:
       max_chunk_length: 3.0
       min_chunk_length: 3.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       weight_exponent: 0.5
       weight_mode: data-prior
       seg_weight_mode: data-prior
@@ -70,4 +70,5 @@ trainer:
   log_interval: 1000
   epochs: 4
   eff_batch_size: 256
+  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
new file mode 100644
index 00000000..b5b9b6b6
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
@@ -0,0 +1,71 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: speaker
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: speaker
+    data_loader:
+      num_workers: 8
+model:
+  hf_feats:
+    override_lora: true
+    use_lora: true
+    lora_rank: 4
+    lora_components:
+    - q_proj
+    - v_proj
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 5e-2
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 5e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 512
+  target_key: speaker
+  train_mode: hf-lora
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
new file mode 100644
index 00000000..a39445ff
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 2e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 5000
+    hold_steps: 6000
+    min_lr: 1e-4
+    warmup_steps: 6000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 4
+  eff_batch_size: 256
+  target_key: speaker
+  train_mode: hf-lora
diff --git a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh
new file mode 100644
index 00000000..96ef76c5
--- /dev/null
+++ b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh
@@ -0,0 +1,55 @@
+# Wav2Vec2 Multilingual 300M params
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage1_v2.0.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn512x3_v2.0
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
+nnet_s2_args=""
+nnet_name=${hf_model_name}_loraqv_ecapatdnn512x3_v2.0
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/hyperion/torch/layers/lora.py b/hyperion/torch/layers/lora.py
index 1436caf5..18401669 100644
--- a/hyperion/torch/layers/lora.py
+++ b/hyperion/torch/layers/lora.py
@@ -7,7 +7,47 @@
 
 import loralib as lora
 import torch.nn as nn
-from loralib import *
+from loralib import mark_only_lora_as_trainable
+
+
+def repr_lora(self, str_base):
+    if isinstance(self.lora_dropout, nn.Dropout):
+        lora_dropout = self.lora_dropout.p
+    else:
+        lora_dropout = 0
+
+    str_lora = f", r={self.r}, alpha={self.lora_alpha}, dropout={lora_dropout}, merge_weights={self.merge_weights})"
+    return str_base[:-1] + str_lora
+
+
+class LinearLoRA(lora.Linear):
+    def __repr__(self):
+        str_base = super().__repr__()
+        return repr_lora(self, str_base)
+
+
+class EmbeddingLoRA(lora.Embedding):
+    def __repr__(self):
+        str_base = super().__repr__()
+        return repr_lora(self, str_base)
+
+
+class Conv1dLoRA(lora.Conv1d):
+    def __repr__(self):
+        str_base = super().__repr__()
+        return repr_lora(self, str_base)
+
+
+class Conv2dLoRA(lora.Conv2d):
+    def __repr__(self):
+        str_base = super().__repr__()
+        return repr_lora(self, str_base)
+
+
+class Conv3dLoRA(lora.Conv3d):
+    def __repr__(self):
+        str_base = super().__repr__()
+        return repr_lora(self, str_base)
 
 
 class LoRAFactory:
@@ -19,7 +59,7 @@ def create_from_pretrained(
         merge_weights: bool = True,
     ):
         if isinstance(layer, nn.Embedding):
-            lora_layer = lora.Embedding(
+            lora_layer = EmbeddingLoRA(
                 layer.num_embeddings,
                 layer.embedding_dim,
                 padding_idx=layer.padding_idx,
@@ -36,7 +76,7 @@ def create_from_pretrained(
 
         elif isinstance(layer, nn.Linear):
             bias = layer.bias is not None
-            lora_layer = lora.Linear(
+            lora_layer = LinearLoRA(
                 layer.in_features,
                 layer.out_features,
                 bias=bias,
@@ -51,11 +91,11 @@ def create_from_pretrained(
 
         elif isinstance(layer, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
             if isinstance(layer, nn.Conv1d):
-                lora_class = lora.Conv1d
+                lora_class = Conv1dLoRA
             elif isinstance(layer, nn.Conv2d):
-                lora_class = lora.Conv2d
+                lora_class = Conv2dLoRA
             elif isinstance(layer, nn.Conv3d):
-                lora_class = lora.Conv3d
+                lora_class = Conv3dLoRA
 
             bias = layer.bias is not None
             lora_layer = lora_class(
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index d67785d2..9ccd0d31 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -6,10 +6,9 @@
 from enum import Enum
 from typing import Optional
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ....utils.misc import filter_func_args
 from ...layer_blocks import TDNNBlock
@@ -52,7 +51,6 @@ def __init__(
         in_feats=None,
         proj_feats=None,
     ):
-
         super().__init__()
 
         # encoder network
@@ -407,7 +405,6 @@ def extract_embed_slidwin(
         embed_layer=None,
         detach_chunks=False,
     ):
-
         if feat_frame_shift is not None:
             # assume win_length/shift are in secs, transform to frames
             # pass feat times from msecs to secs
@@ -464,7 +461,6 @@ def compute_slidwin_timestamps(
         feat_frame_shift=10,
         feat_snip_edges=False,
     ):
-
         P = self.compute_slidwin_left_padding(
             win_length,
             win_shift,
@@ -495,7 +491,6 @@ def compute_slidwin_left_padding(
         feat_frame_shift=10,
         feat_snip_edges=False,
     ):
-
         # pass feat times from msecs to secs
         feat_frame_shift = feat_frame_shift / 1000
         feat_frame_length = feat_frame_length / 1000
@@ -526,7 +521,6 @@ def compute_slidwin_left_padding(
         return P1 + P2
 
     def get_config(self):
-
         enc_cfg = self.encoder_net.get_config()
         pool_cfg = PF.get_config(self.pool_net)
 
@@ -694,42 +688,14 @@ def valid_train_modes():
 
     @staticmethod
     def filter_args(**kwargs):
-
         # get arguments for pooling
         pool_args = PF.filter_args(**kwargs["pool_net"])
         args = filter_func_args(ClassifHead.__init__, kwargs)
         args["pool_net"] = pool_args
         return args
 
-        # valid_args = (
-        #     "num_classes",
-        #     "embed_dim",
-        #     "num_embed_layers",
-        #     "hid_act",
-        #     "loss_type",
-        #     "cos_scale",
-        #     "margin",
-        #     "margin_warmup_epochs",
-        #     "intertop_k",
-        #     "intertop_margin",
-        #     "num_subcenters",
-        #     "use_norm",
-        #     "norm_before",
-        #     "in_feats",
-        #     "proj_feats",
-        #     "dropout_rate",
-        #     "norm_layer",
-        #     "head_norm_layer",
-        #     "head_use_in_norm",
-        # )
-        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        # args["pool_net"] = pool_args
-        # return args
-
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index 2c8d239f..a981d1ec 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -545,6 +545,24 @@ def forward_impl(
         """
         max_in_length = x.size(-1)
         x, x_mask = self._preprocess(x, x_lengths)
+        if ddp_get_rank() == 0:
+            lora_layer = self.hf_model.encoder.layers[0].attention.v_proj
+            # print(
+            #     "lora\nw=",
+            #     lora_layer.weight[:3, :3],
+            #     "\na=",
+            #     lora_layer.lora_A[:3, :3],
+            #     "\nb=",
+            #     lora_layer.lora_B[:3, :3],
+            #     "\n",
+            #     "merged=",
+            #     lora_layer.merged,
+            #     "training=",
+            #     lora_layer.training,
+            #     flush=True,
+            # )
+            assert self.training == lora_layer.training
+            assert self.training == (not lora_layer.merged)
         output = self.hf_model(
             x,
             x_mask,
@@ -728,7 +746,7 @@ def save(self, file_path: str):
 
     @staticmethod
     def filter_args(**kwargs):
-        return filter_func_args(HFWav2VecBase.__init__, **kwargs)
+        return filter_func_args(HFWav2VecBase.__init__, kwargs)
         # valid_args = (
         #     "pretrained_model_path",
         #     "normalize_input",
@@ -910,7 +928,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        return filter_func_args(HFWav2VecBase.change_config, **kwargs)
+        return filter_func_args(HFWav2VecBase.change_config, kwargs)
         # valid_args = (
         #     "override_dropouts",
         #     "override_spec_augment",

From c23103ee406a833726516ff8ac35b3a06382e97e Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 13 Sep 2023 19:32:11 -0400
Subject: [PATCH 113/154] lora in wavlm and hubert

---
 hyperion/torch/tpm/hf/hf_hubert.py   | 25 ++++++++++++++++++++-----
 hyperion/torch/tpm/hf/hf_wav2vec2.py |  8 +++++++-
 hyperion/torch/tpm/hf/hf_wavlm.py    | 25 ++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 2957e433..32355bf6 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import HubertConfig, HubertModel
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import HubertConfig, HubertModel
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
@@ -137,6 +136,12 @@ class HFHubert(HFWav2VecBase):
         sample_frequency: (`int`) waveform sample frequency used to train the model.
         feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
         encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
+        use_lora: use low-rank adapters
+        lora_components: list of components where we apply LoRA, eg [Wq, Wv]
+        lora_rank: rank of LoRA
+        lora_alpha: scale for LoRA
+        lora_dropout: dropout rate for LoRA
+        lora_merge_weights: lora weights are merged with the pretrained weights at inference.
     """
 
     def __init__(
@@ -186,8 +191,12 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
     ):
-
         super().__init__(
             pretrained_model_path=pretrained_model_path,
             normalize_input=normalize_input,
@@ -205,6 +214,12 @@ def __init__(
             sample_frequency=sample_frequency,
             feat_extract_lr=feat_extract_lr,
             encoder_lr=encoder_lr,
+            use_lora=use_lora,
+            lora_components=lora_components,
+            lora_rank=lora_rank,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            lora_merge_weights=lora_merge_weights,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -618,7 +633,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        args_base = HFWav2VecBase.filter_args(**kwargs)
+        args_base = HFWav2VecBase.filter_finetune_args(**kwargs)
         valid_args = (
             "hidden_dropout",
             "activation_dropout",
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index dd5de2fe..bc98f460 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -149,6 +149,12 @@ class HFWav2Vec2(HFWav2VecBase):
         sample_frequency: (`int`) waveform sample frequency used to train the model.
         feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
         encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
+        use_lora: use low-rank adapters
+        lora_components: list of components where we apply LoRA, eg [Wq, Wv]
+        lora_rank: rank of LoRA
+        lora_alpha: scale for LoRA
+        lora_dropout: dropout rate for LoRA
+        lora_merge_weights: lora weights are merged with the pretrained weights at inference.
     """
 
     def __init__(
@@ -697,7 +703,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        args_base = HFWav2VecBase.filter_args(**kwargs)
+        args_base = HFWav2VecBase.filter_finetune_args(**kwargs)
         valid_args = (
             "hidden_dropout",
             "activation_dropout",
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index e1b67d81..400e6a8b 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -6,11 +6,10 @@
 import os
 from typing import Callable, List, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-from transformers import WavLMConfig, WavLMModel
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from transformers import WavLMConfig, WavLMModel
 
 from ...utils.ddp import ddp_get_rank, ddp_wait_for_all_procs
 from .hf_wav2vec_base import HFWav2VecBase
@@ -150,6 +149,12 @@ class HFWavLM(HFWav2VecBase):
         sample_frequency: (`int`) waveform sample frequency used to train the model.
         feat_extract_lr: learning rate for conv feature extractor, serves to set a lr different than the global one.
         encoder_lr: learning rate for the wav2vec encoder, serves to set a lr different than the global one.
+        use_lora: use low-rank adapters
+        lora_components: list of components where we apply LoRA, eg [Wq, Wv]
+        lora_rank: rank of LoRA
+        lora_alpha: scale for LoRA
+        lora_dropout: dropout rate for LoRA
+        lora_merge_weights: lora weights are merged with the pretrained weights at inference.
     """
 
     def __init__(
@@ -204,8 +209,12 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        lora_components: List[str] = ["q_proj", "v_proj"],
+        lora_rank: int = 4,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        lora_merge_weights: bool = True,
     ):
-
         super().__init__(
             pretrained_model_path=pretrained_model_path,
             normalize_input=normalize_input,
@@ -223,6 +232,12 @@ def __init__(
             sample_frequency=sample_frequency,
             feat_extract_lr=feat_extract_lr,
             encoder_lr=encoder_lr,
+            use_lora=use_lora,
+            lora_components=lora_components,
+            lora_rank=lora_rank,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            lora_merge_weights=lora_merge_weights,
         )
 
         if pretrained_model_path is not None and not ignore_pretrained:
@@ -687,7 +702,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        args_base = HFWav2VecBase.filter_args(**kwargs)
+        args_base = HFWav2VecBase.filter_finetune_args(**kwargs)
         valid_args = (
             "hidden_dropout",
             "activation_dropout",

From 81c540b1492ec7b42299f0ebb871f6af66d11304 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 15 Sep 2023 12:35:56 -0400
Subject: [PATCH 114/154] fix bug in w2v constructors with lora

---
 ...v2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh |  6 +++---
 egs/voxceleb/v2.1/run_006_extract_xvectors.sh   |  2 +-
 hyperion/torch/tpm/hf/hf_hubert.py              | 10 ++++++++++
 hyperion/torch/tpm/hf/hf_wav2vec2.py            |  9 +++++++++
 hyperion/torch/tpm/hf/hf_wav2vec_base.py        | 17 ++++-------------
 hyperion/torch/tpm/hf/hf_wavlm.py               | 10 ++++++++++
 6 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh
index 96ef76c5..1985b8e6 100644
--- a/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh
+++ b/egs/voxceleb/v2.1/global_conf/config_wav2vec2xlsr300m_loraqv_ecapatdnn512x3_v2.0.sh
@@ -37,9 +37,9 @@ nnet_s3=$nnet_s3_dir/model_ep0004.pth
 
 # back-end
 do_plda=false
-do_snorm=true
-do_qmf=true
-do_voxsrc22=true
+#do_snorm=true
+#do_qmf=true
+#do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
 plda_num_augs=0
diff --git a/egs/voxceleb/v2.1/run_006_extract_xvectors.sh b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
index 2cfe27fe..72b019cd 100755
--- a/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
+++ b/egs/voxceleb/v2.1/run_006_extract_xvectors.sh
@@ -17,7 +17,7 @@ xvec_chunk_length=120.0
 . $config_file
 
 if [ "$use_gpu" == "true" ];then
-  xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
+  xvec_args="--use-gpu --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
   xvec_cmd="$cuda_eval_cmd --gpu 1 --mem 6G"
   num_gpus=1
 else
diff --git a/hyperion/torch/tpm/hf/hf_hubert.py b/hyperion/torch/tpm/hf/hf_hubert.py
index 32355bf6..638bf561 100644
--- a/hyperion/torch/tpm/hf/hf_hubert.py
+++ b/hyperion/torch/tpm/hf/hf_hubert.py
@@ -191,6 +191,7 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        use_lora: bool = False,
         lora_components: List[str] = ["q_proj", "v_proj"],
         lora_rank: int = 4,
         lora_alpha: int = 1,
@@ -298,6 +299,15 @@ def __init__(
         if drop_layers_gt is not None:
             self.drop_upper_layers(drop_layers_gt)
 
+        if use_lora:
+            self._make_lora_layers(
+                lora_components,
+                lora_rank,
+                lora_alpha,
+                lora_dropout,
+                lora_merge_weights,
+            )
+
         self.ignore_pretrained = True
 
     @property
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec2.py b/hyperion/torch/tpm/hf/hf_wav2vec2.py
index bc98f460..5b59d79a 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec2.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec2.py
@@ -322,6 +322,15 @@ def __init__(
         if drop_layers_gt is not None:
             self.drop_upper_layers(drop_layers_gt)
 
+        if use_lora:
+            self._make_lora_layers(
+                lora_components,
+                lora_rank,
+                lora_alpha,
+                lora_dropout,
+                lora_merge_weights,
+            )
+
         self.ignore_pretrained = True
 
     @property
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index a981d1ec..e0bcee1c 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -174,15 +174,6 @@ def __init__(
         self._frame_shift = None
         self.hf_model = None
 
-        if use_lora:
-            self._make_lora_layers(
-                lora_components,
-                lora_rank,
-                lora_alpha,
-                lora_dropout,
-                lora_merge_weights,
-            )
-
     def __deepcopy__(self, memo):
         """Reimplementation of deepcopy for Hugging Face models.
         The weight_norm in the Conv. Pos. Encoder of Wav2Vec models make the default deepcopy to fail.
@@ -545,8 +536,8 @@ def forward_impl(
         """
         max_in_length = x.size(-1)
         x, x_mask = self._preprocess(x, x_lengths)
-        if ddp_get_rank() == 0:
-            lora_layer = self.hf_model.encoder.layers[0].attention.v_proj
+        # if ddp_get_rank() == 0:
+        #     lora_layer = self.hf_model.encoder.layers[0].attention.v_proj
             # print(
             #     "lora\nw=",
             #     lora_layer.weight[:3, :3],
@@ -561,8 +552,8 @@ def forward_impl(
             #     lora_layer.training,
             #     flush=True,
             # )
-            assert self.training == lora_layer.training
-            assert self.training == (not lora_layer.merged)
+            # assert self.training == lora_layer.training
+            # assert self.training == (not lora_layer.merged)
         output = self.hf_model(
             x,
             x_mask,
diff --git a/hyperion/torch/tpm/hf/hf_wavlm.py b/hyperion/torch/tpm/hf/hf_wavlm.py
index 400e6a8b..1db5fa23 100644
--- a/hyperion/torch/tpm/hf/hf_wavlm.py
+++ b/hyperion/torch/tpm/hf/hf_wavlm.py
@@ -209,6 +209,7 @@ def __init__(
         sample_frequency: int = 16000,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
+        use_lora: bool = False,
         lora_components: List[str] = ["q_proj", "v_proj"],
         lora_rank: int = 4,
         lora_alpha: int = 1,
@@ -321,6 +322,15 @@ def __init__(
         if drop_layers_gt is not None:
             self.drop_upper_layers(drop_layers_gt)
 
+        if use_lora:
+            self._make_lora_layers(
+                lora_components,
+                lora_rank,
+                lora_alpha,
+                lora_dropout,
+                lora_merge_weights,
+            )
+
         self.ignore_pretrained = True
 
     @property

From cb9fa3c9a6d1d6293f0869060bc3273f5beb41bb Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Mon, 30 Oct 2023 19:07:10 -0400
Subject: [PATCH 115/154] lre22 fixed v1 done

---
 egs/lre22/fixed.v1.8k/README.md               |   43 +
 egs/lre22/fixed.v1.8k/cmd.sh                  |   25 +
 egs/lre22/fixed.v1.8k/conf/clsp.conf          |   11 +
 .../fixed.v1.8k/conf/coe_gpu_bigmem.conf      |   11 +
 egs/lre22/fixed.v1.8k/conf/coe_gpu_long.conf  |   13 +
 egs/lre22/fixed.v1.8k/conf/coe_gpu_rtx.conf   |   11 +
 egs/lre22/fixed.v1.8k/conf/coe_gpu_short.conf |   11 +
 egs/lre22/fixed.v1.8k/conf/coe_gpu_v100.conf  |   11 +
 .../conf/fbank64_specaug1_stmn_8k.yaml        |   24 +
 .../fixed.v1.8k/conf/fbank64_stmn_8k.yaml     |   12 +
 .../fixed.v1.8k/conf/reverb_noise_aug.yaml    |   35 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml |  101 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |   69 +
 ...rain_fwseres2net50s8_xvec_stage1_v1.0.yaml |   78 +
 ...rain_fwseres2net50s8_xvec_stage1_v1.1.yaml |   78 +
 ...rain_fwseres2net50s8_xvec_stage2_v1.0.yaml |   69 +
 egs/lre22/fixed.v1.8k/conf/vad_8k.yaml        |    9 +
 egs/lre22/fixed.v1.8k/datapath.sh             |   46 +
 egs/lre22/fixed.v1.8k/default_config.sh       |    1 +
 ...onfig_fbank64_stmn_ecapatdnn2048x4_v1.0.sh |   24 +
 ...onfig_fbank64_stmn_fwseres2net50s8_v1.0.sh |   28 +
 egs/lre22/fixed.v1.8k/hyp_utils               |    1 +
 .../apply_tel_codecs_to_kaldi_datadir.py      |  215 ++
 egs/lre22/fixed.v1.8k/local/download_focal.sh |   27 +
 .../fixed.v1.8k/local/download_focal.sh~      |   27 +
 .../local/download_lre22_scorer.sh            |   24 +
 .../local/download_lre22_scorer.sh~           |   25 +
 .../local/eval_calibration_lre22.sh           |   42 +
 .../fixed.v1.8k/local/eval_fusion_lre22.sh    |   46 +
 egs/lre22/fixed.v1.8k/local/make_musan.py     |  189 ++
 egs/lre22/fixed.v1.8k/local/make_musan.sh     |   48 +
 egs/lre22/fixed.v1.8k/local/make_rirs_data.sh |   29 +
 .../fixed.v1.8k/local/make_sre16_train_dev.sh |   65 +
 .../local/make_sre16_train_eval.sh            |   66 +
 .../local/make_sre18_dev_unlabeled.sh         |   38 +
 .../fixed.v1.8k/local/make_sre18_train_dev.sh |   57 +
 .../local/make_sre18_train_eval.sh            |   60 +
 .../fixed.v1.8k/local/make_sre19cmn2_eval.sh  |   54 +
 egs/lre22/fixed.v1.8k/local/merge_scores.py   |   32 +
 egs/lre22/fixed.v1.8k/local/prepare_adi17.py  |  164 ++
 egs/lre22/fixed.v1.8k/local/prepare_ast.py    |  144 ++
 egs/lre22/fixed.v1.8k/local/prepare_babel.py  |  108 +
 .../fixed.v1.8k/local/prepare_common_voice.py |  146 ++
 .../local/prepare_common_voice_accents.py     |  132 +
 .../local/prepare_common_voice_accents_cat.py |  174 ++
 .../local/prepare_common_voice_cat.py         |  180 ++
 egs/lre22/fixed.v1.8k/local/prepare_lre17.py  |  140 ++
 .../fixed.v1.8k/local/prepare_lre22_dev.py    |  108 +
 .../fixed.v1.8k/local/prepare_lre22_eval.py   |   98 +
 .../local/prepare_some_data_for_lre.py        |  135 ++
 .../local/prepare_some_data_for_lre_cat.py    |  204 ++
 .../local/prepare_sre21av_dev_audio.py        |  215 ++
 .../local/prepare_sre21av_eval_audio.py       |  243 ++
 .../local/prepare_sre_cts_superset.py         |  185 ++
 .../fixed.v1.8k/local/prepare_voxlingua107.py |  130 +
 egs/lre22/fixed.v1.8k/local/score_lre22.sh    |   29 +
 egs/lre22/fixed.v1.8k/local/split_dev.py      |   80 +
 .../local/split_segments_train_val.py         |  160 ++
 .../local/train_calibration_lre22.sh          |   46 +
 .../fixed.v1.8k/local/train_fusion_lre22.sh   |   36 +
 egs/lre22/fixed.v1.8k/local/validate_lre22.sh |   17 +
 egs/lre22/fixed.v1.8k/path.sh                 |    5 +
 .../dev_splits/fold_0/test_segments.csv       | 2114 +++++++++++++++++
 .../dev_splits/fold_0/train_segments.csv      | 2088 ++++++++++++++++
 .../dev_splits/fold_1/test_segments.csv       | 2088 ++++++++++++++++
 .../dev_splits/fold_1/train_segments.csv      | 2114 +++++++++++++++++
 .../resources/lre17_ara-ary/segs_ara-ary.csv  | 1306 ++++++++++
 egs/lre22/fixed.v1.8k/run_001_prepare_data.sh |   93 +
 egs/lre22/fixed.v1.8k/run_002_compute_evad.sh |   58 +
 .../run_003_prepare_noises_rirs.sh            |   66 +
 egs/lre22/fixed.v1.8k/run_004_apply_codecs.sh |   25 +
 .../run_010_prepare_xvec_train_data.sh        |   96 +
 .../fixed.v1.8k/run_011_train_xvector.sh      |  164 ++
 .../fixed.v1.8k/run_011_train_xvector.sh~     |  161 ++
 .../fixed.v1.8k/run_030_extract_xvectors.sh   |  215 ++
 egs/lre22/fixed.v1.8k/run_040_be_final.sh     |  434 ++++
 egs/lre22/fixed.v1.8k/run_050_fusion_v1.sh    |   43 +
 egs/lre22/fixed.v1.8k/steps                   |    1 +
 egs/lre22/fixed.v1.8k/steps_be/eval_be_v1.py  |  117 +
 egs/lre22/fixed.v1.8k/steps_be/eval_be_v2.py  |  117 +
 egs/lre22/fixed.v1.8k/steps_be/eval_be_v5.py  |  129 +
 egs/lre22/fixed.v1.8k/steps_be/eval_fusion.m  |   17 +
 egs/lre22/fixed.v1.8k/steps_be/train_be_v1.py |  136 ++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v2.py |  136 ++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v3.py |  204 ++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v4.py |  199 ++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v5.py |  274 +++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v6.py |  196 ++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v7.py |  315 +++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v8.py |  317 +++
 egs/lre22/fixed.v1.8k/steps_be/train_be_v9.py |  220 ++
 egs/lre22/fixed.v1.8k/steps_be/train_fusion.m |   16 +
 egs/lre22/fixed.v1.8k/steps_xvec              |    1 +
 egs/lre22/fixed.v1.8k/utils                   |    1 +
 egs/lre22/open.v1.8k/README.md                |   43 +
 egs/lre22/open.v1.8k/cmd.sh                   |   28 +
 egs/lre22/open.v1.8k/conf/clsp.conf           |   11 +
 egs/lre22/open.v1.8k/conf/coe_gpu_bigmem.conf |   11 +
 egs/lre22/open.v1.8k/conf/coe_gpu_long.conf   |   13 +
 egs/lre22/open.v1.8k/conf/coe_gpu_rtx.conf    |   11 +
 egs/lre22/open.v1.8k/conf/coe_gpu_short.conf  |   11 +
 egs/lre22/open.v1.8k/conf/coe_gpu_v100.conf   |   11 +
 .../conf/fbank64_specaug1_stmn_8k.yaml        |   24 +
 .../open.v1.8k/conf/fbank64_stmn_8k.yaml      |   12 +
 .../conf/other_conf/fbank64_stmn_8k.yaml      |   12 +
 .../conf/other_conf/fbank80_stmn_16k.yaml     |   12 +
 .../conf/other_conf/reverb_noise_aug.yaml     |   35 +
 ...ecapatdnn2048-5120x8_xvec_stage1_v2.4.yaml |  124 +
 ...ecapatdnn2048-5120x8_xvec_stage2_v2.4.yaml |   79 +
 ...ecapatdnn2048-5120x8_xvec_stage3_v2.4.yaml |   79 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml |   99 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v2.1.yaml |   95 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v2.2.yaml |   97 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v2.3.yaml |   77 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v2.4.yaml |   97 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |   98 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.1.yaml |   98 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.2.yaml |   98 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.5.yaml |   98 +
 ...in_ecapatdnn2048x4_xvec_stage2_v2.1.1.yaml |   79 +
 ...in_ecapatdnn2048x4_xvec_stage2_v2.1.2.yaml |   79 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v2.1.yaml |   79 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v2.3.yaml |   75 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v2.4.yaml |   79 +
 ...rain_ecapatdnn2048x4_xvec_stage3_v2.1.yaml |   79 +
 ...rain_ecapatdnn2048x4_xvec_stage3_v2.4.yaml |   79 +
 ...train_tseres2net50s8_xvec_stage1_v2.1.yaml |   75 +
 ...train_tseres2net50s8_xvec_stage1_v2.2.yaml |   77 +
 ...train_tseres2net50s8_xvec_stage1_v2.3.yaml |   77 +
 ...train_tseres2net50s8_xvec_stage2_v2.1.yaml |   79 +
 ...train_tseres2net50s8_xvec_stage2_v2.3.yaml |   75 +
 .../open.v1.8k/conf/other_conf/vad_16k.yaml   |    8 +
 .../open.v1.8k/conf/other_conf/vad_8k.yaml    |    8 +
 .../open.v1.8k/conf/reverb_noise_aug.yaml     |   35 +
 ...rain_ecapatdnn2048x4_xvec_stage1_v1.0.yaml |  105 +
 ...rain_fwseres2net50s8_xvec_stage1_v1.0.yaml |   82 +
 egs/lre22/open.v1.8k/conf/vad_8k.yaml         |    9 +
 egs/lre22/open.v1.8k/datapath.sh              |   87 +
 egs/lre22/open.v1.8k/default_config.sh        |    1 +
 ...onfig_fbank64_stmn_ecapatdnn2048x4_v1.0.sh |   20 +
 ...onfig_fbank64_stmn_fwseres2net50s8_v1.0.sh |   45 +
 egs/lre22/open.v1.8k/hyp_utils                |    1 +
 egs/lre22/open.v1.8k/local                    |    1 +
 egs/lre22/open.v1.8k/path.sh                  |    5 +
 egs/lre22/open.v1.8k/resources                |    1 +
 egs/lre22/open.v1.8k/run_001_prepare_data.sh  |  342 +++
 egs/lre22/open.v1.8k/run_002_compute_evad.sh  |   64 +
 .../open.v1.8k/run_003_prepare_noises_rirs.sh |   66 +
 egs/lre22/open.v1.8k/run_004_apply_codecs.sh  |   28 +
 .../run_010_prepare_xvec_train_data.sh        |   91 +
 egs/lre22/open.v1.8k/run_011_train_xvector.sh |   92 +
 .../open.v1.8k/run_030_extract_xvectors.sh    |  219 ++
 egs/lre22/open.v1.8k/run_040_be_final.sh      |  434 ++++
 egs/lre22/open.v1.8k/steps                    |    1 +
 egs/lre22/open.v1.8k/steps_be                 |    1 +
 egs/lre22/open.v1.8k/steps_xvec               |    1 +
 egs/lre22/open.v1.8k/utils                    |    1 +
 egs/lre22/open.v2.8k/cmd.sh                   |   28 +
 egs/lre22/open.v2.8k/conf/clsp.conf           |   11 +
 egs/lre22/open.v2.8k/conf/coe_gpu_bigmem.conf |   11 +
 egs/lre22/open.v2.8k/conf/coe_gpu_long.conf   |   13 +
 egs/lre22/open.v2.8k/conf/coe_gpu_rtx.conf    |   11 +
 egs/lre22/open.v2.8k/conf/coe_gpu_short.conf  |   11 +
 egs/lre22/open.v2.8k/conf/coe_gpu_v100.conf   |   11 +
 .../open.v2.8k/conf/reverb_noise_aug.yaml     |   35 +
 ...2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml |   59 +
 ...ec2xlsr300m_ecapatdnn1024x3_subcenter.yaml |   47 +
 egs/lre22/open.v2.8k/datapath.sh              |   87 +
 egs/lre22/open.v2.8k/default_config.sh        |    1 +
 ...ig_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh |   36 +
 egs/lre22/open.v2.8k/hyp_utils                |    1 +
 egs/lre22/open.v2.8k/local                    |    1 +
 egs/lre22/open.v2.8k/path.sh                  |    5 +
 egs/lre22/open.v2.8k/resources                |    1 +
 egs/lre22/open.v2.8k/run_001_prepare_data.sh  |  330 +++
 egs/lre22/open.v2.8k/run_002_compute_evad.sh  |   64 +
 .../open.v2.8k/run_003_prepare_noises_rirs.sh |   66 +
 egs/lre22/open.v2.8k/run_004_apply_codecs.sh  |   28 +
 .../run_010_prepare_xvec_train_data.sh        |   78 +
 egs/lre22/open.v2.8k/run_011_train_xvector.sh |  128 +
 egs/lre22/open.v2.8k/steps                    |    1 +
 egs/lre22/open.v2.8k/steps_be                 |    1 +
 egs/lre22/open.v2.8k/steps_xvec               |    1 +
 egs/lre22/open.v2.8k/utils                    |    1 +
 egs/sre21-av-a/v1.8k/run_011_train_xvector.sh |   47 -
 hyperion/np/transforms/skl_tsne.py            |    3 +-
 hyperion/utils/info_table.py                  |    2 +-
 hyperion/utils/scp_list.py                    |    2 +-
 188 files changed, 23219 insertions(+), 50 deletions(-)
 create mode 100644 egs/lre22/fixed.v1.8k/README.md
 create mode 100755 egs/lre22/fixed.v1.8k/cmd.sh
 create mode 100644 egs/lre22/fixed.v1.8k/conf/clsp.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/coe_gpu_long.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/coe_gpu_rtx.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/coe_gpu_short.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/coe_gpu_v100.conf
 create mode 100644 egs/lre22/fixed.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/fbank64_stmn_8k.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/reverb_noise_aug.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage2_v1.0.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/conf/vad_8k.yaml
 create mode 100644 egs/lre22/fixed.v1.8k/datapath.sh
 create mode 120000 egs/lre22/fixed.v1.8k/default_config.sh
 create mode 100644 egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
 create mode 100644 egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
 create mode 120000 egs/lre22/fixed.v1.8k/hyp_utils
 create mode 100755 egs/lre22/fixed.v1.8k/local/apply_tel_codecs_to_kaldi_datadir.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/download_focal.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/download_focal.sh~
 create mode 100755 egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh~
 create mode 100755 egs/lre22/fixed.v1.8k/local/eval_calibration_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/eval_fusion_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_musan.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_musan.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_rirs_data.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre16_train_dev.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre16_train_eval.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre18_dev_unlabeled.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre18_train_dev.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre18_train_eval.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/make_sre19cmn2_eval.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/merge_scores.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_adi17.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_ast.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_babel.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_common_voice.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents_cat.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_common_voice_cat.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_lre17.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_lre22_dev.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_lre22_eval.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre_cat.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_sre21av_dev_audio.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_sre21av_eval_audio.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_sre_cts_superset.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/prepare_voxlingua107.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/score_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/split_dev.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/split_segments_train_val.py
 create mode 100755 egs/lre22/fixed.v1.8k/local/train_calibration_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/train_fusion_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/local/validate_lre22.sh
 create mode 100755 egs/lre22/fixed.v1.8k/path.sh
 create mode 100644 egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/test_segments.csv
 create mode 100644 egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/train_segments.csv
 create mode 100644 egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/test_segments.csv
 create mode 100644 egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/train_segments.csv
 create mode 100644 egs/lre22/fixed.v1.8k/resources/lre17_ara-ary/segs_ara-ary.csv
 create mode 100755 egs/lre22/fixed.v1.8k/run_001_prepare_data.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_002_compute_evad.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_004_apply_codecs.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_010_prepare_xvec_train_data.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_011_train_xvector.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_011_train_xvector.sh~
 create mode 100755 egs/lre22/fixed.v1.8k/run_030_extract_xvectors.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_040_be_final.sh
 create mode 100755 egs/lre22/fixed.v1.8k/run_050_fusion_v1.sh
 create mode 120000 egs/lre22/fixed.v1.8k/steps
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/eval_be_v1.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/eval_be_v2.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/eval_be_v5.py
 create mode 100644 egs/lre22/fixed.v1.8k/steps_be/eval_fusion.m
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v1.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v2.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v3.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v4.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v5.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v6.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v7.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v8.py
 create mode 100755 egs/lre22/fixed.v1.8k/steps_be/train_be_v9.py
 create mode 100644 egs/lre22/fixed.v1.8k/steps_be/train_fusion.m
 create mode 120000 egs/lre22/fixed.v1.8k/steps_xvec
 create mode 120000 egs/lre22/fixed.v1.8k/utils
 create mode 100644 egs/lre22/open.v1.8k/README.md
 create mode 100755 egs/lre22/open.v1.8k/cmd.sh
 create mode 100644 egs/lre22/open.v1.8k/conf/clsp.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/coe_gpu_long.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/coe_gpu_rtx.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/coe_gpu_short.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/coe_gpu_v100.conf
 create mode 100644 egs/lre22/open.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/fbank64_stmn_8k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/fbank64_stmn_8k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/fbank80_stmn_16k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/reverb_noise_aug.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage1_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage2_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage3_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.2.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.3.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.2.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.5.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.2.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.3.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.4.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.2.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.3.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.1.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.3.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/vad_16k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/other_conf/vad_8k.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/reverb_noise_aug.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
 create mode 100644 egs/lre22/open.v1.8k/conf/vad_8k.yaml
 create mode 100644 egs/lre22/open.v1.8k/datapath.sh
 create mode 120000 egs/lre22/open.v1.8k/default_config.sh
 create mode 100644 egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
 create mode 100644 egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
 create mode 120000 egs/lre22/open.v1.8k/hyp_utils
 create mode 120000 egs/lre22/open.v1.8k/local
 create mode 100755 egs/lre22/open.v1.8k/path.sh
 create mode 120000 egs/lre22/open.v1.8k/resources
 create mode 100755 egs/lre22/open.v1.8k/run_001_prepare_data.sh
 create mode 100755 egs/lre22/open.v1.8k/run_002_compute_evad.sh
 create mode 100755 egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/lre22/open.v1.8k/run_004_apply_codecs.sh
 create mode 100755 egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
 create mode 100755 egs/lre22/open.v1.8k/run_011_train_xvector.sh
 create mode 100755 egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
 create mode 100755 egs/lre22/open.v1.8k/run_040_be_final.sh
 create mode 120000 egs/lre22/open.v1.8k/steps
 create mode 120000 egs/lre22/open.v1.8k/steps_be
 create mode 120000 egs/lre22/open.v1.8k/steps_xvec
 create mode 120000 egs/lre22/open.v1.8k/utils
 create mode 100755 egs/lre22/open.v2.8k/cmd.sh
 create mode 100644 egs/lre22/open.v2.8k/conf/clsp.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/coe_gpu_long.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/coe_gpu_rtx.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/coe_gpu_short.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/coe_gpu_v100.conf
 create mode 100644 egs/lre22/open.v2.8k/conf/reverb_noise_aug.yaml
 create mode 100644 egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
 create mode 100644 egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
 create mode 100644 egs/lre22/open.v2.8k/datapath.sh
 create mode 120000 egs/lre22/open.v2.8k/default_config.sh
 create mode 100644 egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
 create mode 120000 egs/lre22/open.v2.8k/hyp_utils
 create mode 120000 egs/lre22/open.v2.8k/local
 create mode 100644 egs/lre22/open.v2.8k/path.sh
 create mode 120000 egs/lre22/open.v2.8k/resources
 create mode 100755 egs/lre22/open.v2.8k/run_001_prepare_data.sh
 create mode 100755 egs/lre22/open.v2.8k/run_002_compute_evad.sh
 create mode 100755 egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/lre22/open.v2.8k/run_004_apply_codecs.sh
 create mode 100755 egs/lre22/open.v2.8k/run_010_prepare_xvec_train_data.sh
 create mode 100755 egs/lre22/open.v2.8k/run_011_train_xvector.sh
 create mode 120000 egs/lre22/open.v2.8k/steps
 create mode 120000 egs/lre22/open.v2.8k/steps_be
 create mode 120000 egs/lre22/open.v2.8k/steps_xvec
 create mode 120000 egs/lre22/open.v2.8k/utils

diff --git a/egs/lre22/fixed.v1.8k/README.md b/egs/lre22/fixed.v1.8k/README.md
new file mode 100644
index 00000000..877f99ca
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/README.md
@@ -0,0 +1,43 @@
+# LRE22 Fixed Condition V1
+
+Recipe for the NIST LRE22 fixed condition based to the JHU-MIT Submission.
+
+## Citing
+```
+@inproceedings{villalba23_interspeech,
+  author={Jesús Villalba and Jonas Borgstrom and Maliha Jahan and Saurabh Kataria and Leibny Paola Garcia and Pedro Torres-Carrasquillo and Najim Dehak},
+  title={{Advances in Language Recognition in Low Resource African Languages: The JHU-MIT Submission for NIST LRE22}},
+  year=2023,
+  booktitle={Proc. INTERSPEECH 2023},
+  pages={521--525},
+  doi={10.21437/Interspeech.2023-1094}
+}
+```
+
+## Training Data
+
+  - x-Vector networks trained on:
+    - VoxLingua107
+    - NIST LRE17 Train + Dev + Eval / CTS + AfV
+  - Gaussian back-end trained on:
+    - NIST LRE22 dev with 2-fold cross-val + x10 augmentations
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it uses ECAPA-TDNN 4 layers of 2048 dim.
+   - To change the default network run scripts with the config-file argument:
+```bash
+run_011_train_xvector.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+run_030_extract_xvectors.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh --use-gpu true
+run_040_be_final.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+```
+
+## Results
+
+| Config | Model Type | Model Details | Back-end | Dev MinCp | Dev ActCp | Eval MinCp | Eval ActCp |
+| ------ | ---------- | ------------- | -------- | :-------: | :-------: | :--------: | :--------: |
+| config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-2 | GBE | 0.207 | 0.209 | 0.198 | 0.199 |
+| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-2 | GBE | 0.227 | 0.229 | 0.213 | 0.215 |
+| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 0.182 | 0.183 | 0.180 | 0.181 |
+
diff --git a/egs/lre22/fixed.v1.8k/cmd.sh b/egs/lre22/fixed.v1.8k/cmd.sh
new file mode 100755
index 00000000..4b4e8ae7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/cmd.sh
@@ -0,0 +1,25 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
diff --git a/egs/lre22/fixed.v1.8k/conf/clsp.conf b/egs/lre22/fixed.v1.8k/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/lre22/fixed.v1.8k/conf/coe_gpu_bigmem.conf b/egs/lre22/fixed.v1.8k/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/lre22/fixed.v1.8k/conf/coe_gpu_long.conf b/egs/lre22/fixed.v1.8k/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/lre22/fixed.v1.8k/conf/coe_gpu_rtx.conf b/egs/lre22/fixed.v1.8k/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/lre22/fixed.v1.8k/conf/coe_gpu_short.conf b/egs/lre22/fixed.v1.8k/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/lre22/fixed.v1.8k/conf/coe_gpu_v100.conf b/egs/lre22/fixed.v1.8k/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/lre22/fixed.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml b/egs/lre22/fixed.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
new file mode 100644
index 00000000..fce3804a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
@@ -0,0 +1,24 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 5
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 1
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 8
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 1
+  mask_method: mean
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/fixed.v1.8k/conf/fbank64_stmn_8k.yaml b/egs/lre22/fixed.v1.8k/conf/fbank64_stmn_8k.yaml
new file mode 100644
index 00000000..dfd0d3e5
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/fbank64_stmn_8k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/fixed.v1.8k/conf/reverb_noise_aug.yaml b/egs/lre22/fixed.v1.8k/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..22620f03
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,101 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+    dropout_rate: 0.2
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.0
+  intertop_margin: 0.0
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.2
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    warmup_steps: 15000
+    min_lr: 1.0e-06
+    update_lr_on_opt_step: true
+  use_amp: true
+  swa_start: 12
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 15
+  #eff_batch_size: 512
+  eff_batch_size: 256
diff --git a/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..25e7b213
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      weight_exponent: 0.3
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      weight_exponent: 0.3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  margin_warmup_epochs: 0
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 10000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 10000
+  use_amp: true
+  swa_start: 14
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 18
+  eff_batch_size: 256
diff --git a/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..d900ec9b
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
@@ -0,0 +1,78 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  resnet_type: fwseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  se_r: 4
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.0
+  intertop_margin: 0.0
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.05
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    warmup_steps: 15000
+    min_lr: 1.0e-06
+    update_lr_on_opt_step: true
+  use_amp: true
+  swa_start: 12
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 8
+  eff_batch_size: 256
diff --git a/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
new file mode 100644
index 00000000..28b1f641
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
@@ -0,0 +1,78 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  resnet_type: fwseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  se_r: 4
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.0
+  intertop_margin: 0.0
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.1
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    warmup_steps: 15000
+    min_lr: 1.0e-06
+    update_lr_on_opt_step: true
+  use_amp: true
+  swa_start: 12
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
diff --git a/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage2_v1.0.yaml b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage2_v1.0.yaml
new file mode 100644
index 00000000..2e6d3a6c
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage2_v1.0.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      weight_exponent: 0.3
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      weight_exponent: 0.3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  margin_warmup_epochs: 0
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 8000
+    hold_steps: 10000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 10000
+  use_amp: true
+  swa_start: 14
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 7
+  eff_batch_size: 256
diff --git a/egs/lre22/fixed.v1.8k/conf/vad_8k.yaml b/egs/lre22/fixed.v1.8k/conf/vad_8k.yaml
new file mode 100644
index 00000000..1cfe34b0
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/conf/vad_8k.yaml
@@ -0,0 +1,9 @@
+sample_frequency: 8000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: -4.89
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
+wav_scale: 1
diff --git a/egs/lre22/fixed.v1.8k/datapath.sh b/egs/lre22/fixed.v1.8k/datapath.sh
new file mode 100644
index 00000000..d6a81520
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/datapath.sh
@@ -0,0 +1,46 @@
+# Copyright
+#            2021   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+#paths to databases
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  ldc_root3=/export/fs02/corpora3/LDC
+  ldc_root5=/export/corpora5/LDC
+  ldc_root=/export/corpora6/LDC
+  #voxceleb1_root=/export/corpora5/VoxCeleb1_v2
+  #voxceleb2_root=/export/corpora5/VoxCeleb2
+  sre16_eval_root=$ldc_root5/LDC2018E30/data/eval/R149_0_1
+  #janus_root=$ldc_root/LDC2019E55/Janus_Multimedia_Dataset
+  #sre_superset_root=$ldc_root/LDC2021E08
+  #sre21_dev_root=$ldc_root/LDC2021E09
+  #sre21_eval_root=$ldc_root/LDC2021E10
+  lre17_train_root=$ldc_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$ldc_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$ldc_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=/export/corpora6/lre22_test_data_v2
+  voxlingua_root=/export/corpora6/voxlingua107
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  ldc_root=/export/common/data/corpora/LDC
+  sre_root=/export/common/data/corpora/NIST/SRE
+  my_root=/exp/jvillalba/corpora
+  #voxceleb1_root=/exp/jvillalba/corpora/voxceleb1
+  #voxceleb2_root=/expscratch/dgromero/corpora/vox2
+  sre16_dev_root=/exp/jvillalba/corpora/LDC2019S20/data/dev/R148_0_0
+  sre16_eval_root=/exp/jvillalba/corpora/LDC2019S20/data/eval/R149_0_1
+  #janus_root=$sre_root/SRE19/LDC2019E55_Janus_Multimedia_Dataset
+  sre_superset_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E08
+  sre21_dev_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E09
+  sre21_eval_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E10
+  lre17_train_root=$my_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$my_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$my_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=$my_root/lre22_test_data_v2
+  voxlingua_root=$my_root/voxlingua107
+  musan_root=/expscratch/dgromero/corpora/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
diff --git a/egs/lre22/fixed.v1.8k/default_config.sh b/egs/lre22/fixed.v1.8k/default_config.sh
new file mode 120000
index 00000000..506bebe6
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh b/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
new file mode 100644
index 00000000..b9cd45a5
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
@@ -0,0 +1,24 @@
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=voxlingua107_lre17_noary
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_stages=2
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
+
+nnet_name=${feat_type}_ecapatdnn2048x4_v1.0
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/swa_model_ep0016.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0007.pth
diff --git a/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh b/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
new file mode 100644
index 00000000..afac4198
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
@@ -0,0 +1,28 @@
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=voxlingua107_lre17_noary
+
+# x-vector cfg
+
+nnet_type=resnet
+nnet_stages=2
+nnet_s1_base_cfg=conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
+
+nnet_name=${feat_type}_fwseres2net50s8_v1.0
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0008.pth
+
+
+nnet_s2_base_cfg=conf/train_fwseres2net50s8_xvec_stage2_v1.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0007.pth
+
+
diff --git a/egs/lre22/fixed.v1.8k/hyp_utils b/egs/lre22/fixed.v1.8k/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/local/apply_tel_codecs_to_kaldi_datadir.py b/egs/lre22/fixed.v1.8k/local/apply_tel_codecs_to_kaldi_datadir.py
new file mode 100755
index 00000000..c0e2b9d3
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/apply_tel_codecs_to_kaldi_datadir.py
@@ -0,0 +1,215 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import shutil
+from tqdm import tqdm
+import time
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import RecordingSet, SegmentSet
+
+valid_codecs = ["gsm", "g711mu", "g711a", "g722", "g723_1", "g726", "opus"]
+
+sox_options = {"gsm": "-r 8000 -e gsm-full-rate -t gsm"}
+ffmpeg_options = {
+    "g711a": "-ar 8000 -acodec pcm_alaw -f wav",
+    "g711mu": "-ar 8000 -acodec pcm_mulaw -f wav",
+    "g722": "-ar 8000 -acodec g722 -f wav",
+    "g723_1": "-ar 8000 -acodec g723_1 -b:a 6300 -f wav",
+    "g726": "-ar 8000 -acodec g726 -f wav",
+    "opus": "-ar 8000 -acodec libopus -application voip -f opus",
+}
+
+
+def apply_sox_codec(storage_path, codec):
+
+    option = sox_options[codec]
+    storage_path = storage_path.rstrip()
+    if storage_path[-1] == "|":
+        storage_path = f"{storage_path} sox -t wav - {option} - |"
+    else:
+        storage_path = f"sox {storage_path} {option} - |"
+
+    storage_path = f"{storage_path} sox {option} - -t wav -e signed-integer -b 16 - |"
+    return storage_path
+
+
+def apply_ffmpeg_codec(storage_path, codec, g726_css, opus_brs, rng):
+
+    option = ffmpeg_options[codec]
+    if codec == "g726":
+        code_size = rng.choice(g726_css)
+        option = f"{option} -code_size {code_size}"
+    elif codec == "opus":
+        br = rng.choice(opus_brs)
+        option = f"{option} -b:a {br}"
+
+    storage_path = storage_path.rstrip()
+    if storage_path[-1] == "|":
+        storage_path = f"{storage_path} ffmpeg -i - {option} - |"
+    else:
+        storage_path = f"ffmpeg -i {storage_path} {option} - |"
+
+    storage_path = f"{storage_path} ffmpeg -i - -ar 8000 -c:a pcm_s16le -f wav - |"
+    return storage_path
+
+
+def apply_codec(storage_path, codec, g726_css, opus_brs, rng):
+
+    if codec in ["gsm"]:
+        storage_path = apply_sox_codec(storage_path, codec)
+    else:
+        storage_path = apply_ffmpeg_codec(storage_path, codec, g726_css,
+                                          opus_brs, rng)
+
+    return storage_path
+
+
+def apply_codecs(
+    input_dir,
+    output_dir,
+    codecs,
+    keep_orig,
+    g726_min_code_size,
+    opus_brs,
+    seed,
+    verbose,
+):
+    config_logger(verbose)
+    logging.info("Applying codecs %s -> %s", input_dir, output_dir)
+    rng = np.random.RandomState(seed=seed)
+    input_dir = Path(input_dir)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+
+    g726_css = list(range(g726_min_code_size, 6))
+    logging.info("making wav.scp")
+    recs = RecordingSet.load(input_dir / "wav.scp")
+    recs["orig_id"] = recs["id"]
+    if keep_orig:
+        recs_orig = recs.clone()
+
+    codec_idx = 0
+    ids = []
+    s_paths = []
+    for i in tqdm(range(len(recs))):
+        t1 = time.time()
+        row = recs.iloc[i]
+        t2 = time.time()
+        codec_i = codecs[codec_idx % len(codecs)]
+        codec_idx += 1
+        t3 = time.time()
+        # recs.loc[row.id, "id"] = f"{row.id}-{codec_i}"
+        ids.append(f"{row.id}-{codec_i}")
+        t4 = time.time()
+        sp = apply_codec(row["storage_path"], codec_i, g726_css, opus_brs, rng)
+
+        t5 = time.time()
+        # recs.loc[row.id, "storage_path"] = sp
+        s_paths.append(sp)
+        t6 = time.time()
+
+    recs["id"] = ids
+    recs["storage_path"] = s_paths
+
+    mapping = recs[["orig_id", "id"]]
+    mapping.set_index("orig_id", inplace=True, drop=False)
+    if keep_orig:
+        recs = RecordingSet.merge(recs_orig, recs)
+        recs.sort()
+
+    logging.info("making utt2orig_utt")
+    recs[["id", "orig_id"]].to_csv(output_dir / "utt2orig_utt",
+                                   sep=" ",
+                                   header=False,
+                                   index=False)
+
+    recs.save(output_dir / "wav.scp")
+    u2x_files = []
+    for pattern in ["utt2*", "vad.scp", "feats.scp"]:
+        files_p = glob.glob(str(input_dir / pattern))
+        u2x_files.extend(files_p)
+
+    for f in u2x_files:
+        logging.info("making %s", Path(f).name)
+        u2x = SegmentSet.load(f)
+        if keep_orig:
+            u2x_orig = u2x.clone()
+
+        u2x["id"] = mapping.loc[u2x["id"], "id"]
+        if keep_orig:
+            u2x = SegmentSet.merge(u2x_orig, u2x)
+            u2x.sort()
+
+        output_file = output_dir / Path(f).name
+        u2x.save(output_file)
+
+    spk_files = glob.glob(str(input_dir / "spk2gender"))
+    for f in spk_files:
+        logging.info("making %s", Path(f).name)
+        output_file = output_dir / Path(f).name
+        shutil.copy2(f, output_file)
+
+    logging.info("making utt2spk")
+    u2s = SegmentSet.load(output_dir / "utt2spk")
+    spks = u2s["class_id"].unique()
+    df_spk = u2s.df.sort_values(by="class_id")
+    df_spk.set_index("class_id", inplace=True)
+
+    with open(output_dir / "spk2utt", "w") as f:
+        for spk in spks:
+            seg_ids = df_spk.loc[spk, "id"]
+            if isinstance(seg_ids, list):
+                seg_ids = " ".join(seg_ids)
+            f.write(f"{spk} {seg_ids}\n")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Apply telephone codecs to kaldi data dir")
+    parser.add_argument("--input-dir",
+                        required=True,
+                        help="Path to the original kaldi dataset")
+
+    parser.add_argument("--output-dir",
+                        required=True,
+                        help="Codec augmented directory")
+    parser.add_argument(
+        "--codecs",
+        default=valid_codecs,
+        nargs="+",
+        choices=valid_codecs,
+        help="List of codecs to apply",
+    )
+    parser.add_argument(
+        "--g726-min-code-size",
+        default=2,
+        choices=[2, 3, 4, 5],
+        help="minimum code-size for g726",
+    )
+    parser.add_argument(
+        "--opus-brs",
+        default=[4500, 5500, 7700, 9500, 12500, 16000, 32000],
+        nargs="+",
+        help="opus codec bit rates",
+    )
+    parser.add_argument("--keep-orig", default=False, action=ActionYesNo)
+    parser.add_argument("--seed", default=1234, help="random seed")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+
+    args = parser.parse_args()
+    apply_codecs(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/download_focal.sh b/egs/lre22/fixed.v1.8k/local/download_focal.sh
new file mode 100755
index 00000000..13b86e57
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/download_focal.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright 2022 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+#
+# Downloads Niko Brummer's FoCal Multiclass
+
+set -e 
+tool=FoCal_MultiClass_V1
+s_dir=focal_multiclass
+
+# shareable link:
+# https://drive.google.com/file/d/13rPUqS68NdEF5NB0vsL7bDEju5dhmmDZ/view?usp=sharing
+
+
+wget --no-check-certificate "https://drive.google.com/uc?export=download&id=13rPUqS68NdEF5NB0vsL7bDEju5dhmmDZ" -O $tool.zip
+unzip $tool.zip -d $s_dir
+
+if [ ! -f $s_dir/v1.0/readme.txt ];then
+    echo "the focal tool wasn't dowloaded correctly, download manually"
+    exit 1
+fi
+
+rm -f $tool.zip
+
+
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/download_focal.sh~ b/egs/lre22/fixed.v1.8k/local/download_focal.sh~
new file mode 100755
index 00000000..b871348f
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/download_focal.sh~
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright 2022 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+#
+# Downloads Niko Brummer's FoCal Multiclass
+
+set -e 
+tool=FoCal_MultiClass_V1
+s_dir=focal_multiclass_v1.0
+
+# shareable link:
+# https://drive.google.com/file/d/13rPUqS68NdEF5NB0vsL7bDEju5dhmmDZ/view?usp=sharing
+
+
+wget --no-check-certificate "https://drive.google.com/uc?export=download&id=13rPUqS68NdEF5NB0vsL7bDEju5dhmmDZ" -O $tool.zip
+unzip $tool.zip
+
+if [ ! -f $s_dir/readme.txt ];then
+    echo "the focal tool wasn't dowloaded correctly, download manually"
+    exit 1
+fi
+
+rm -f $tool.zip
+
+
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh b/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh
new file mode 100755
index 00000000..344a6a34
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Copyright 2022 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+#
+# Downloads NIST scoring tools for LRE22
+
+set -e 
+tool=lre-scorer
+s_dir=lre-scorer
+
+# shareable link:
+# https://drive.google.com/file/d/13pvUhFPGLgqId5yB8i25X__LFXKIU-ju/view?usp=sharing
+
+wget --no-check-certificate "https://drive.google.com/uc?export=download&id=13pvUhFPGLgqId5yB8i25X__LFXKIU-ju" -O $tool.tar.gz
+tar xzvf $tool.tar.gz
+
+if [ ! -f $s_dir/scorerLRE22.py ];then
+    echo "the scoring tool wasn't dowloaded correctly, download manually"
+    exit 1
+fi
+
+rm -f $tool.tar.gz
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh~ b/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh~
new file mode 100755
index 00000000..4201eecf
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/download_lre22_scorer.sh~
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Copyright 2022 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+#
+# Downloads NIST scoring tools for LRE22
+
+set -e 
+tool=lre-scorer
+s_dir=lre-scorer
+
+# shareable link:
+# https://drive.google.com/file/d/13pvUhFPGLgqId5yB8i25X__LFXKIU-ju/view?usp=sharing
+
+wget --no-check-certificate "https://drive.google.com/uc?export=download&id=13pvUhFPGLgqId5yB8i25X__LFXKIU-ju" -O $tool.tar.gz
+tar xzvf $tool.tar.gz
+
+if [ ! -f $s_dir/scorerLRE22.py ];then
+    echo "the scoring tool wasn't dowloaded correctly, download manually"
+    exit 1
+fi
+
+rm -f $tool.tar.gz
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/eval_calibration_lre22.sh b/egs/lre22/fixed.v1.8k/local/eval_calibration_lre22.sh
new file mode 100755
index 00000000..2c28e70e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/eval_calibration_lre22.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 2 ];then
+  echo "Usage: $0 <score-dir> <model-file>"
+  exit 1
+fi
+
+score_dir=$1
+model_file=$2
+nocal_dir=$score_dir/nocal
+cal_dir=$score_dir/cal_v1
+
+dev_file=$nocal_dir/lre22_dev_scores.tsv
+dev_cal_file=$cal_dir/lre22_dev_scores.tsv
+eval_file=$nocal_dir/lre22_eval_scores.tsv
+eval_cal_file=$cal_dir/lre22_eval_scores.tsv
+mkdir -p $cal_dir
+
+
+if [ "$(hostname --domain)" == "cm.gemini" ];then
+  module load matlab
+fi
+
+if [ -f $dev_file ];then
+  echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({'$dev_file'}, '$dev_cal_file', '$model_file');
+" | matlab -nodisplay -nosplash > $cal_dir/eval_lre22_dev.log
+fi
+
+if [ -f $eval_file ];then
+  echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({'$eval_file'}, '$eval_cal_file', '$model_file');
+" | matlab -nodisplay -nosplash > $cal_dir/eval_lre22_eval.log
+fi
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/eval_fusion_lre22.sh b/egs/lre22/fixed.v1.8k/local/eval_fusion_lre22.sh
new file mode 100755
index 00000000..284cac7e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/eval_fusion_lre22.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 3 ];then
+  echo "Usage: $0 <score-dir> <model-file> <out"
+  exit 1
+fi
+
+score_dirs="$1"
+model_file=$2
+output_dir=$3
+
+dev_base=lre22_dev_scores.tsv
+eval_base=lre22_eval_scores.tsv
+dev_files=$(echo $score_dirs | awk 'BEGIN{OFS=","}{ for(i=1;i<=NF;i++){ $i="'\''"$i"/'$dev_base\''" }; print $0}')
+eval_files=$(echo $score_dirs | awk 'BEGIN{OFS=","}{ for(i=1;i<=NF;i++){ $i="'\''"$i"/'$eval_base\''" }; print $0}')
+
+dev_file_1=$(echo $dev_files | sed -e 's@'\''@@g' -e 's@,.*@@')
+eval_file_1=$(echo $eval_files | sed -e 's@'\''@@g' -e 's@,.*@@')
+
+dev_fus_file=$output_dir/$dev_base
+eval_fus_file=$output_dir/$eval_base
+mkdir -p $output_dir
+
+if [ "$(hostname --domain)" == "cm.gemini" ];then
+  module load matlab
+fi
+
+if [ -f $dev_file_1 ];then
+  echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({$dev_files}, '$dev_fus_file', '$model_file');
+" | matlab -nodisplay -nosplash > $output_dir/eval_lre22_dev.log
+fi
+
+if [ -f $eval_file_1 ];then
+  echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({$eval_files}, '$eval_fus_file', '$model_file');
+" | matlab -nodisplay -nosplash > $output_dir/eval_lre22_eval.log
+fi
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_musan.py b/egs/lre22/fixed.v1.8k/local/make_musan.py
new file mode 100755
index 00000000..b0ae6846
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_musan.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This file is meant to be invoked by make_musan.sh.
+
+import os, sys
+
+
+def process_music_annotations(path):
+    utt2spk = {}
+    utt2vocals = {}
+    lines = open(path, "r").readlines()
+    for line in lines:
+        utt, genres, vocals, musician = line.rstrip().split()[:4]
+        # For this application, the musican ID isn't important
+        utt2spk[utt] = utt
+        utt2vocals[utt] = vocals == "Y"
+    return utt2spk, utt2vocals
+
+
+def prepare_music(root_dir, fs, use_vocals):
+    utt2vocals = {}
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    music_dir = os.path.join(root_dir, "music")
+    for root, dirs, files in os.walk(music_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+            elif str(file) == "ANNOTATIONS":
+                utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
+                utt2spk.update(utt2spk_part)
+                utt2vocals.update(utt2vocals_part)
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2vocals:
+        if utt in utt2wav:
+            if use_vocals or not utt2vocals[utt]:
+                utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+                if fs == 8:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 8k -t wav - |\n"
+                    )
+                else:
+                    utt2wav_str = (
+                        utt2wav_str
+                        + utt
+                        + " sox -t wav "
+                        + utt2wav[utt]
+                        + " -r 16k -t wav - |\n"
+                    )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In music directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_speech(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    speech_dir = os.path.join(root_dir, "speech")
+    for root, dirs, files in os.walk(speech_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In speech directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def prepare_noise(root_dir, fs):
+    utt2spk = {}
+    utt2wav = {}
+    num_good_files = 0
+    num_bad_files = 0
+    noise_dir = os.path.join(root_dir, "noise")
+    for root, dirs, files in os.walk(noise_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            if file.endswith(".wav"):
+                utt = str(file).replace(".wav", "")
+                utt2wav[utt] = file_path
+                utt2spk[utt] = utt
+    utt2spk_str = ""
+    utt2wav_str = ""
+    for utt in utt2spk:
+        if utt in utt2wav:
+            utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
+            if fs == 8:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 8k -t wav - |\n"
+                )
+            else:
+                utt2wav_str = (
+                    utt2wav_str
+                    + utt
+                    + " sox -t wav "
+                    + utt2wav[utt]
+                    + " -r 16k -t wav - |\n"
+                )
+            num_good_files += 1
+        else:
+            print("Missing file", utt)
+            num_bad_files += 1
+    print(
+        "In noise directory, processed",
+        num_good_files,
+        "files;",
+        num_bad_files,
+        "had missing wav data",
+    )
+    return utt2spk_str, utt2wav_str
+
+
+def main():
+    in_dir = sys.argv[1]
+    fs = int(sys.argv[2])
+    out_dir = sys.argv[3]
+    use_vocals = sys.argv[4] == "Y"
+    utt2spk_music, utt2wav_music = prepare_music(in_dir, fs, use_vocals)
+    utt2spk_speech, utt2wav_speech = prepare_speech(in_dir, fs)
+    utt2spk_noise, utt2wav_noise = prepare_noise(in_dir, fs)
+    utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
+    utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
+    wav_fi = open(os.path.join(out_dir, "wav.scp"), "w")
+    wav_fi.write(utt2wav)
+    utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), "w")
+    utt2spk_fi.write(utt2spk)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/lre22/fixed.v1.8k/local/make_musan.sh b/egs/lre22/fixed.v1.8k/local/make_musan.sh
new file mode 100755
index 00000000..4a6d30f9
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_musan.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+# Copyright 2019   Johns Hopkins University (Jesus Villalba) (added fs support)
+# Apache 2.0.
+#
+# This script, called by ../run.sh, creates the MUSAN
+# data directory. The required dataset is freely available at
+#   http://www.openslr.org/17/
+
+set -e
+use_vocals='Y'
+
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ];then
+    echo "Usage: $0 [options] <in-dir> <fs> <data-dir>";
+    echo "e.g.: $0 /export/corpora/JHU/musan 8 data"
+    exit 1;
+fi
+
+in_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir/musan.tmp
+
+echo "Preparing ${data_dir}/musan..."
+mkdir -p ${data_dir}/musan
+local/make_musan.py ${in_dir} $fs ${data_dir}/musan ${use_vocals}
+
+utils/fix_data_dir.sh ${data_dir}/musan
+
+grep "music" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_music
+grep "speech" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_speech
+grep "noise" ${data_dir}/musan/utt2spk > $data_dir/musan.tmp/utt2spk_noise
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_music \
+  ${data_dir}/musan ${data_dir}/musan_music
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_speech \
+  ${data_dir}/musan ${data_dir}/musan_speech
+utils/subset_data_dir.sh --utt-list $data_dir/musan.tmp/utt2spk_noise \
+  ${data_dir}/musan ${data_dir}/musan_noise
+
+utils/fix_data_dir.sh ${data_dir}/musan_music
+utils/fix_data_dir.sh ${data_dir}/musan_speech
+utils/fix_data_dir.sh ${data_dir}/musan_noise
+
+rm -rf $data_dir/musan.tmp
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_rirs_data.sh b/egs/lre22/fixed.v1.8k/local/make_rirs_data.sh
new file mode 100755
index 00000000..c6652eda
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_rirs_data.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)
+#           
+# Apache 2.0.
+set -e
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 <rir-dir> <fs> <data-dir>"
+  echo "e.g.: $0 RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom"
+fi
+
+rir_dir=$1
+fs=$2
+data_dir=$3
+
+mkdir -p $data_dir
+
+rir_list=$rir_dir/rir_list
+if [ "$fs" -eq 16 ];then
+    awk '{ key=$5; sub(/.*\//,"",key); print key,$5 }' $rir_list > $data_dir/wav.scp
+else
+    awk '{ 
+key=$5; sub(/.*\//,"",key); 
+print key,"sox "$5" -r 8000 -t wav -b 16 -e signed-integer - |" }' \
+    $rir_list > $data_dir/wav.scp
+fi
+awk '{ key=$5; sub(/.*\//,"",key); print key,$4 }' $rir_list > $data_dir/rir2room
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre16_train_dev.sh b/egs/lre22/fixed.v1.8k/local/make_sre16_train_dev.sh
new file mode 100755
index 00000000..f861a8f4
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre16_train_dev.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE16_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+meta=$input_path/metadata
+call2lang=$meta/calls.tsv
+call2spk=$meta/call_sides.tsv
+spk2gender=$meta/subjects.tsv
+segm_file=$docs/sre16_dev_segment_key.tsv
+
+tel_up=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+fi
+
+#Dev CMN2 Mandarin and Cebuano
+for lang in cmn ceb
+do
+    output_dir=$output_path/sre16_train_dev_$lang
+    mkdir -p $output_dir
+    awk -v c2l=$call2lang -v c2s=$call2spk -v s2g=$spk2gender -v l=$lang -F "\t" 'BEGIN{ 
+while(getline < c2l)
+{
+     if($2 == l){ calls[$1]=1 }
+}
+while(getline < c2s) { spk[$1]=$3 }
+while(getline < s2g) { gender[$1]=tolower($2) }
+}
+{ if($2 in calls) { s=spk[$2]; print $1, s, gender[s] }}' $segm_file > $output_dir/table
+
+    awk '{ print $2"-"$1,$2}' $output_dir/table | sort -k1,1 > $output_dir/utt2spk
+    utils/utt2spk_to_spk2utt.pl $output_dir/utt2spk > $output_dir/spk2utt
+    awk '{ print $2,$3}' $output_dir/table | sort -k1,1 -u > $output_dir/spk2gender
+    awk -v lang=$lang 'BEGIN{if(lang=="cmn"){lang_ldc="zho-cmn"} else { lang_ldc="ceb-ceb" }} { print $1,lang_ldc}' $output_dir/utt2spk > $output_dir/utt2lang
+    
+    find -L $input_path -name "*.sph" > $output_dir/wav.scp.tmp    
+
+    awk -v fwav=$output_dir/wav.scp.tmp 'BEGIN{
+while(getline < fwav)
+{
+   bn=$1; 
+   sub(/.*\//,"",bn);
+   sub(/\.sph$/,"",bn);
+   wav[bn]=$1;
+}
+}
+{  print $2"-"$1,"sph2pipe -f wav -p -c 1 "wav[$1]" |'"$tel_up"'"}' $output_dir/table | \
+    sort -k1,1 > $output_dir/wav.scp
+
+    rm -f $output_dir/wav.scp.tmp
+    utils/fix_data_dir.sh $output_dir
+    utils/validate_data_dir.sh --no-text --no-feats $output_dir
+done
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre16_train_eval.sh b/egs/lre22/fixed.v1.8k/local/make_sre16_train_eval.sh
new file mode 100755
index 00000000..3589a60e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre16_train_eval.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# Copyright 2020 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE16_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+meta=$input_path/metadata
+call2lang=$meta/calls.tsv
+call2spk=$meta/call_sides.tsv
+spk2gender=$meta/subjects.tsv
+segm_file=$docs/sre16_eval_segment_key.tsv
+
+tel_up=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+fi
+
+#Dev CMN2 Cantonese and Tagalog
+for lang in yue tgl
+do
+    output_dir=$output_path/sre16_train_eval_$lang
+    mkdir -p $output_dir
+    awk -v c2l=$call2lang -v c2s=$call2spk -v s2g=$spk2gender -v l=$lang -F "\t" 'BEGIN{ 
+while(getline < c2l)
+{
+     if($2 == l){ calls[$1]=1 }
+}
+while(getline < c2s) { spk[$1]=$3 }
+while(getline < s2g) { gender[$1]=tolower($2) }
+}
+{ if($2 in calls) { s=spk[$2]; print $1, s, gender[s] }}' $segm_file > $output_dir/table
+
+    awk '{ print $2"-"$1,$2}' $output_dir/table | sort -k1,1 > $output_dir/utt2spk
+    utils/utt2spk_to_spk2utt.pl $output_dir/utt2spk > $output_dir/spk2utt
+    awk '{ print $2,$3}' $output_dir/table | sort -k1,1 -u > $output_dir/spk2gender
+    awk -v lang=$lang 'BEGIN{if(lang=="yue"){lang_ldc="zho-yue"} else { lang_ldc="tl-tl" }} { print $1,lang_ldc}' $output_dir/utt2spk > $output_dir/utt2lang
+
+    
+    find -L $input_path -name "*.sph" > $output_dir/wav.scp.tmp    
+
+    awk -v fwav=$output_dir/wav.scp.tmp 'BEGIN{
+while(getline < fwav)
+{
+   bn=$1; 
+   sub(/.*\//,"",bn);
+   sub(/\.sph$/,"",bn);
+   wav[bn]=$1;
+}
+}
+{  print $2"-"$1,"sph2pipe -f wav -p -c 1 "wav[$1]" |'"$tel_up"'"}' $output_dir/table | \
+    sort -k1,1 > $output_dir/wav.scp
+
+    rm -f $output_dir/wav.scp.tmp
+    utils/fix_data_dir.sh $output_dir
+    utils/validate_data_dir.sh --no-text --no-feats $output_dir
+done
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre18_dev_unlabeled.sh b/egs/lre22/fixed.v1.8k/local/make_sre18_dev_unlabeled.sh
new file mode 100755
index 00000000..5d49bba7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre18_dev_unlabeled.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright 2018 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE18_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+#enroll_file=$docs/sre18_dev_enrollment.tsv
+#enroll_diar_file=$docs/sre18_dev_enrollment_diarization.tsv
+segm_file=$docs/sre18_dev_segment_key.tsv
+#trial_file=$docs/sre18_dev_trials.tsv
+#key_file=$docs/sre18_dev_trial_key.tsv
+
+tel_up=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+fi
+
+#Unlabeled
+unlab_dir=$output_path/sre18_dev_unlabeled
+mkdir -p $unlab_dir
+awk '/unlabeled/ { print $1,"sph2pipe -f wav -p -c 1 '$input_path'/data/unlabeled/"$1" |'"$tel_up"'"}' $segm_file | \
+    sort -k1,1 > $unlab_dir/wav.scp
+awk '/unlabeled/ { print $1,$1}' $segm_file | sort -k1,1 > $unlab_dir/utt2spk
+cp $unlab_dir/utt2spk $unlab_dir/spk2utt
+awk '{ print $1,"ara-aeb" }' $unlab_dir/utt2spk > $unlab_dir/utt2lang
+
+utils/fix_data_dir.sh $unlab_dir
+utils/validate_data_dir.sh --no-text --no-feats $unlab_dir
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre18_train_dev.sh b/egs/lre22/fixed.v1.8k/local/make_sre18_train_dev.sh
new file mode 100755
index 00000000..9e6ff763
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre18_train_dev.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright 2018 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE18_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+segm_file=$docs/sre18_dev_segment_key.tsv
+
+tel_up=""
+vid_down=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+fi
+
+
+#Dev CMN2
+output_dir=$output_path/sre18_train_dev_cmn2
+mkdir -p $output_dir
+awk '$7=="cmn2" && $4 != "unlabeled" { print $2"-"$1,$2}' $segm_file | sort -k1,1 > $output_dir/utt2spk
+utils/utt2spk_to_spk2utt.pl $output_dir/utt2spk > $output_dir/spk2utt
+awk '{ print $1,"ara-aeb" }' $output_dir/utt2spk > $output_dir/utt2lang
+
+find -L $input_path -name "*.sph" > $output_dir/wav.scp.tmp
+
+awk -v fwav=$output_dir/wav.scp.tmp 'BEGIN{
+while(getline < fwav)
+{
+   bn=$1; 
+   sub(/.*\//,"",bn);
+   wav[bn]=$1;
+}
+}
+$7=="cmn2" && $4 != "unlabeled" {  print $2"-"$1,"sph2pipe -f wav -p -c 1 "wav[$1]" |'"$tel_up"'"}' $segm_file | \
+    sort -k1,1 > $output_dir/wav.scp
+
+rm -f $output_dir/wav.scp.tmp
+
+awk -v sf=$segm_file 'BEGIN{
+while(getline < sf)
+{
+ gender[$1]=substr($3,1,1)
+}
+}
+{ sub(/^[^-]*-/,"",$2); print $1,gender[$2] } ' $output_dir/spk2utt > $output_dir/spk2gender
+
+utils/fix_data_dir.sh $output_dir
+utils/validate_data_dir.sh --no-text --no-feats $output_dir
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre18_train_eval.sh b/egs/lre22/fixed.v1.8k/local/make_sre18_train_eval.sh
new file mode 100755
index 00000000..33ff5a5a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre18_train_eval.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# Copyright 2018 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE18_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+segm_file=$docs/sre18_eval_segment_key.tsv
+
+tel_up=""
+vid_down=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+    vid_down=" -r 16k "
+elif [ $fs -eq 8 ];then
+    vid_down=" -r 8k "
+fi
+
+
+#Eval CMN2
+output_dir=$output_path/sre18_train_eval_cmn2
+mkdir -p $output_dir
+awk '$7=="cmn2" && $4 != "unlabeled" { print $2"-"$1,$2}' $segm_file | sort -k1,1 > $output_dir/utt2spk
+utils/utt2spk_to_spk2utt.pl $output_dir/utt2spk > $output_dir/spk2utt
+awk '{ print $1,"ara-aeb" }' $output_dir/utt2spk > $output_dir/utt2lang
+
+find $input_path -name "*.sph" > $output_dir/wav.scp.tmp
+
+awk -v fwav=$output_dir/wav.scp.tmp 'BEGIN{
+while(getline < fwav)
+{
+   bn=$1; 
+   sub(/.*\//,"",bn);
+   wav[bn]=$1;
+}
+}
+$7=="cmn2" && $4 != "unlabeled" {  print $2"-"$1,"sph2pipe -f wav -p -c 1 "wav[$1]" |'"$tel_up"'"}' $segm_file | \
+    sort -k1,1 > $output_dir/wav.scp
+
+rm -f $output_dir/wav.scp.tmp
+
+awk -v sf=$segm_file 'BEGIN{
+while(getline < sf)
+{
+ gender[$1]=substr($3,1,1)
+}
+}
+{ sub(/^[^-]*-/,"",$2); print $1,gender[$2] } ' $output_dir/spk2utt > $output_dir/spk2gender
+
+utils/fix_data_dir.sh $output_dir
+utils/validate_data_dir.sh --no-text --no-feats $output_dir
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/make_sre19cmn2_eval.sh b/egs/lre22/fixed.v1.8k/local/make_sre19cmn2_eval.sh
new file mode 100755
index 00000000..d6f877f5
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/make_sre19cmn2_eval.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright 2019 Johns Hopkins University (Jesus Villalba)
+# Apache 2.0
+
+if [  $# != 3 ]; then
+    echo "Usage: $0 <SRE18_PATH> <fs 8/16> <OUTPATH>"
+    exit 1
+fi
+input_path=$1
+fs=$2
+output_path=$3
+
+docs=$input_path/docs
+enroll_file=$docs/sre19_cts_challenge_enrollment.tsv  
+trial_file=$docs/sre19_cts_challenge_trials.tsv
+key_file=$docs/sre19_cts_challenge_trial_key.tsv
+
+tel_up=""
+vid_down=""
+if [ $fs -eq 16 ];then
+    tel_up=" sox -t wav - -t wav -r 16k - |"
+fi
+
+#Enrollment CMN2
+enroll_dir=$output_path/sre19_eval_enroll_cmn2
+mkdir -p $enroll_dir
+awk '/\.sph/ { print $1"-"$2,"sph2pipe -f wav -p -c 1 '$input_path'/data/enrollment/"$2" |'"$tel_up"'"}' $enroll_file | \
+    sort -k1,1 > $enroll_dir/wav.scp
+awk '!/modelid/ && /\.sph/ { print $1"-"$2,$1}' $enroll_file | sort -k1,1 > $enroll_dir/utt2spk
+utils/utt2spk_to_spk2utt.pl $enroll_dir/utt2spk > $enroll_dir/spk2utt
+awk '{ print $1,"ara-aeb" }' $enroll_dir/utt2spk > $enroll_dir/utt2lang
+
+utils/fix_data_dir.sh $enroll_dir
+utils/validate_data_dir.sh --no-text --no-feats $enroll_dir
+
+
+#Test set CMN2
+test_dir=$output_path/sre19_eval_test_cmn2
+mkdir -p $test_dir
+awk '/\.sph/ { print $2,"sph2pipe -f wav -p -c 1 '$input_path'/data/test/"$2" |'"$tel_up"'"}' $trial_file | \
+    sort -u -k1,1 > $test_dir/wav.scp
+awk '{ print $1,$1}' $test_dir/wav.scp | sort -k1,1 > $test_dir/utt2spk
+cp $test_dir/utt2spk $test_dir/spk2utt
+awk '{ print $1,"ara-aeb" }' $test_dir/utt2spk > $test_dir/utt2lang
+awk '!/modelid/  { print $1,$2,$4 }' $key_file > $test_dir/trials
+
+cp $trial_file $test_dir/trials.tsv
+cp $key_file $test_dir/trial_key.tsv
+
+utils/fix_data_dir.sh $test_dir
+utils/validate_data_dir.sh --no-text --no-feats $test_dir
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/merge_scores.py b/egs/lre22/fixed.v1.8k/local/merge_scores.py
new file mode 100755
index 00000000..8d0df80e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/merge_scores.py
@@ -0,0 +1,32 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import pandas as pd
+
+
+def merge_scores(in_score_files, out_score_file):
+
+    dfs = []
+    for f in in_score_files:
+        df_f = pd.read_csv(f, sep="\t")
+        dfs.append(df_f)
+
+    df = pd.concat(dfs)
+    df.sort_values(by="segmentid", inplace=True)
+    df.to_csv(out_score_file, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Split Segment list into training and validation"
+    )
+    parser.add_argument("--in-score-files", nargs="+", required=True)
+    parser.add_argument("--out-score-file", required=True)
+    args = parser.parse_args()
+    merge_scores(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_adi17.py b/egs/lre22/fixed.v1.8k/local/prepare_adi17.py
new file mode 100755
index 00000000..c04d988b
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_adi17.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# prepare_adi17.py --corpus-dir /export/corpora6/ADI17 --output-dir data/adi17 --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+lre_map = {
+    "ALG": "ara-arq",
+    "EGY": "ara-arz",
+    "IRA": "ara-acm",
+    "JOR": "ara-jor",
+    "KSA": "ara-ksa",
+    "KUW": "ara-kuw",
+    "LEB": "ara-leb",
+    "LIB": "ara-ayl",
+    "MAU": "ara-mau",
+    "MOR": "ara-mor",
+    "OMA": "ara-oma",
+    "PAL": "ara-pal",
+    "QAT": "ara-qat",
+    "SUD": "ara-sud",
+    "SYR": "ara-syr",
+    "UAE": "ara-uae",
+    "YEM": "ara-yem"
+}
+
+
+def map_to_lre(langs):
+    return [lre_map[l] for l in langs]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = "sox {} -t wav -r {} - |".format(filename, target_fs)
+            else:
+                wav = filename
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_adi17(corpus_dir, output_dir, remove_langs, map_langs_to_lre_codes,
+                  target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    train_files = glob.glob(str(corpus_dir / "train_segments/*/*.wav"),
+                            recursive=True)
+    train_ids = [Path(f).stem for f in train_files]
+    train_langs = [Path(f).parent.stem for f in train_files]
+    dev_files = glob.glob(str(corpus_dir / "dev_segments/*.wav"),
+                          recursive=True)
+    test_files = glob.glob(str(corpus_dir / "test_segments/*.wav"),
+                           recursive=True)
+    dev_test_files = dev_files + test_files
+    df_labels = pd.concat([
+        pd.read_csv(str(corpus_dir / "adi17_official_dev_label.txt"),
+                    delim_whitespace=True),
+        pd.read_csv(str(corpus_dir / "adi17_official_test_label.txt"),
+                    delim_whitespace=True)
+    ])
+    df_labels = df_labels.set_index("id")
+    dev_test_ids = [Path(f).stem for f in dev_test_files]
+    dev_test_langs = df_labels.loc[dev_test_ids, "label"].values
+    all_ids = train_ids + dev_test_ids
+    all_files = train_files + dev_test_files
+    all_langs = list(train_langs) + list(dev_test_langs)
+    if map_langs_to_lre_codes:
+        all_langs = map_to_lre(all_langs)
+
+    all_ids = [f"{a}-{b}" for a, b in zip(all_langs, all_ids)]
+    df = pd.DataFrame({
+        "id": all_ids,
+        "language": all_langs,
+        "filename": all_files
+    })
+    if remove_langs is not None:
+        for lang in remove_langs:
+            df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = corpus_dir.stem
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares ADI17 for training")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--remove-langs",
+                        default=None,
+                        nargs="+",
+                        help="languages to remove")
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_adi17(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_ast.py b/egs/lre22/fixed.v1.8k/local/prepare_ast.py
new file mode 100755
index 00000000..957ee9bf
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_ast.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+# prepare_ast.py --corpus-dir /export/corpora6/LRE/AST2004 --output-dir data/ast --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+lre_map = {
+    "afr": "afr-afr",
+    "ndb": "nbl-nbl",
+    "oro": "orm-orm",
+    "tso": "tso-tso",
+    "ven": "ven-ven",
+    "xho": "xho-xho",
+    "zul": "zul-zul",
+    "tig": "tir-tir",
+    "sae": "eng-ens",
+    "ine": "eng-iaf",
+    "tun": "ara-aeb",
+    "alg": "ara-arq",
+    "lib": "ara-ayl",
+    "naf": "fra-ntf",
+    "aa": "afr-afr",
+    "ba": "afr-afr",
+    "ca": "afr-afr",
+    "ae": "eng-ens",
+    "be": "eng-ens",
+    "ce": "eng-ens",
+}
+
+
+def map_to_lre(langs):
+    return [lre_map[l] if l in lre_map else "{}-{}".format(l, l) for l in langs]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = "sox -t raw -e a-law -r 8000 {} -t wav -e signed-integer -b 16 -r {} - |".format(filename, target_fs)
+            else:
+                wav = "sox -t raw -e a-law -r 8000 {} -t wav -e signed-integer -b 16 -r 16000 - |".format(filename)
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_ast(
+    corpus_dir, output_dir, remove_langs, map_langs_to_lre_codes, target_fs, verbose
+):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    files = glob.glob(str(corpus_dir / "*/*/*/*/*.alaw"))
+    langs = [(Path(f).parent.parent.parent.parent.stem).lower() for f in files]
+    files2 = glob.glob(str(corpus_dir / "*/*/*/*.alaw"))
+    langs2 = [(Path(f).parent.parent.parent.stem).lower() for f in files2]
+    files = files + files2
+    langs = langs + langs2
+    files = [f for f, l in zip(files, langs) if l not in ['ee']]
+    langs = [l for l in langs if l not in ['ee']]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    if remove_langs is not None:
+        for lang in remove_langs:
+            df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "cts"
+    df["corpus_id"] = corpus_dir.stem
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":#ast
+
+    parser = ArgumentParser(description="Prepares AST for training")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--remove-langs", default=None, nargs="+", help="languages to remove"
+    )
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_ast(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_babel.py b/egs/lre22/fixed.v1.8k/local/prepare_babel.py
new file mode 100755
index 00000000..4eb18945
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_babel.py
@@ -0,0 +1,108 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import glob
+import re
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def make_kaldi(df, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            source = row["source"]
+            if source == "cts":
+                wav = f"sph2pipe -f wav -p -c 1 {filename} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            else:
+                wav = f"sox {filename}  -t wav -r {target_fs} - |"
+
+            f.write(f"{segment_id} {wav}\n")
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_babel(corpus_dir, lang_code, output_dir, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    logging.info("searching audio files")
+    wavs = glob.glob(str(corpus_dir / "**/audio/*.sph"), recursive=True)
+    logging.info("found %d files", len(wavs))
+    wavs = [corpus_dir / w for w in wavs]
+    seg_ids = [w.stem for w in wavs]
+    df = pd.DataFrame({"id": seg_ids, "filename": wavs})
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+    df["corpus_id"] = "babel"
+    df["sample_rate"] = target_fs
+    df["language"] = lang_code
+    df["source"] = "cts"
+    logging.info("saving files")
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Prepares Babel datasets for training in LRE")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument(
+        "--lang-code",
+        required=True,
+        help="language code",
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_babel(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_common_voice.py b/egs/lre22/fixed.v1.8k/local/prepare_common_voice.py
new file mode 100755
index 00000000..411ae94a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_common_voice.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+# prepare_common_voice.py --corpus-dir /export/corpora6/LRE/CommonVoice2020 --output-dir data/cv --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+from hyperion.hyp_defs import config_logger
+
+lre_map = {
+    "afr": "afr-afr",
+    "ndb": "nbl-nbl",
+    "oro": "orm-orm",
+    "tso": "tso-tso",
+    "ven": "ven-ven",
+    "xho": "xho-xho",
+    "zul": "zul-zul",
+    "tig": "tir-tir",
+    "sae": "eng-ens",
+    "ine": "eng-ine",
+    "tun": "ara-aeb",
+    "alg": "ara-arq",
+    "lib": "ara-ayl",
+    "naf": "fra-ntf"
+}
+
+
+def map_to_lre(langs):
+    return [
+        lre_map[l] if l in lre_map else "{}-{}".format(l, l) for l in langs
+    ]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = "ffmpeg -i {} -acodec pcm_s16le -ar {} -f wav - |".format(
+                    filename, target_fs)
+            else:
+                wav = "ffmpeg -i {} -acodec pcm_s16le -f wav - |".format(
+                    filename)
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_common_voice(corpus_dir, output_dir, keep_langs,
+                         map_langs_to_lre_codes, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    files = glob.glob(str(corpus_dir / "**/clips/*.mp3"), recursive=True)
+    langs = [(Path(f).parent.parent.stem).lower() for f in files]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    df = df[df["language"].isin(keep_langs)]
+    # if remove_langs is not None:
+    #     for lang in remove_langs:
+    #         df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = "cv"
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares Common Voice for training")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--keep-langs",
+                        default=["tir-tir"],
+                        nargs="+",
+                        help="languages to keep")
+
+    # parser.add_argument("--remove-langs",
+    #                     default=None,
+    #                     nargs="+",
+    #                     help="languages to remove")
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_common_voice(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents.py b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents.py
new file mode 100755
index 00000000..4c44b7f7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+# prepare_common_voice.py --corpus-dir /export/corpora6/LRE/CommonVoice2020 --output-dir data/cv --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+from hyperion.hyp_defs import config_logger
+
+en_map = {"indian": "eng-ine"}
+fr_map = {
+    "france": "fra-fra",
+    "canada": "fra-can",
+    "algeria": "fra-ntf",
+    "morocco": "fra-ntf",
+    "tunisia": "fra-ntf",
+}
+
+lre_map = {
+    "en": en_map,
+    "fr": fr_map,
+}
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = "ffmpeg -i {} -acodec pcm_s16le -ar {} -f wav - |".format(
+                    filename, target_fs)
+            else:
+                wav = "ffmpeg -i {} -acodec pcm_s16le -f wav - |".format(
+                    filename)
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_common_voice(corpus_dir, output_dir, lang, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    my_map = lre_map[lang]
+    df = pd.read_csv(corpus_dir / lang / "validated.tsv", sep="\t")
+    mask = None
+    for dialect in my_map.keys():
+        mask_d = df["accent"] == dialect
+        if mask is None:
+            mask = mask_d
+        else:
+            mask = np.logical_or(mask, mask_d)
+
+    df = df.loc[mask]
+    files = df["path"]
+    files = [corpus_dir / lang / "clips" / f for f in df["path"]]
+    langs = [my_map[l] for l in df["accent"]]
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = "cv"
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Prepares Common Voice Accents for training in LRE22")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--lang",
+                        default="en",
+                        choices=["en", "fr"],
+                        help="languages")
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_common_voice(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents_cat.py b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents_cat.py
new file mode 100755
index 00000000..bf9d79ed
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_accents_cat.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+# prepare_common_voice.py --corpus-dir /export/corpora6/LRE/CommonVoice2020 --output-dir data/cv --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+from hyperion.hyp_defs import config_logger
+
+en_map = {"indian": "eng-ine"}
+fr_map = {
+    "france": "fra-fra",
+    "canada": "fra-can",
+    "algeria": "fra-ntf",
+    "morocco": "fra-ntf",
+    "tunisia": "fra-ntf",
+}
+
+lre_map = {
+    "en": en_map,
+    "fr": fr_map,
+}
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    list_dir = output_dir / "lists_cat"
+    list_dir.mkdir(parents=True, exist_ok=True)
+    for r in range(len(df)):
+        file_list = df.iloc[r].file_lists
+        with open(list_dir / f"{df.iloc[r].id}.txt", "w") as f:
+            for fn in file_list:
+                f.write("file %s\n" % fn)
+
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = list_dir / f"{segment_id}.txt"
+            if target_fs != 16000:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -ar {target_fs} -f wav - |"
+            else:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -f wav - |"
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_common_voice(corpus_dir, output_dir, lang, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    my_map = lre_map[lang]
+    df = pd.read_csv(corpus_dir / lang / "validated.tsv", sep="\t")
+    mask = None
+    for dialect in my_map.keys():
+        mask_d = df["accent"] == dialect
+        if mask is None:
+            mask = mask_d
+        else:
+            mask = np.logical_or(mask, mask_d)
+
+    df = df.loc[mask]
+    files = df["path"]
+    files = [corpus_dir / lang / "clips" / f for f in df["path"]]
+    langs = [my_map[l] for l in df["accent"]]
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({
+        "id": ids,
+        "language": langs,
+        "filename": files,
+        "speaker": df["client_id"]
+    })
+
+    # sort by speaker, id
+    df.sort_values(by=["speaker", "id"], inplace=True)
+
+    file_lists = []
+    file_list = []
+    seg_count = 0
+    prev_spk = ""
+    cat_segs = []
+    cur_seg = 0
+    for r in range(len(df)):
+        row = df.iloc[r]
+        if seg_count == 5 or (row.speaker != prev_spk and seg_count > 0):
+            file_lists.append(file_list)
+            cat_segs.append(cur_seg)
+            file_list = []
+            seg_count = 0
+            cur_seg = r
+
+        file_list.append(row.filename)
+        seg_count += 1
+        prev_spk = row.speaker
+
+    if file_list:
+        file_lists.append(file_list)
+        cat_segs.append(cur_seg)
+
+    df_cat = df.iloc[cat_segs].drop(["filename"], axis=1)
+    df_cat["file_lists"] = file_lists
+
+    df_cat["sample_coding"] = "pcm"
+    df_cat["source"] = "afv"
+    df_cat["corpus_id"] = "cv"
+    df_cat["sample_rate"] = target_fs
+
+    # sort by segment id
+    df_cat.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df_cat.drop(["file_lists"], axis=1).to_csv(output_file,
+                                               sep=",",
+                                               index=False)
+
+    make_kaldi(df_cat, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Prepares Common Voice Accents for training in LRE22")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--lang",
+                        default="en",
+                        choices=["en", "fr"],
+                        help="languages")
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_common_voice(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_common_voice_cat.py b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_cat.py
new file mode 100755
index 00000000..0790be25
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_common_voice_cat.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# prepare_common_voice.py --corpus-dir /export/corpora6/LRE/CommonVoice2020 --output-dir data/cv --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+from hyperion.hyp_defs import config_logger
+
+lre_map = {
+    "afr": "afr-afr",
+    "ndb": "nbl-nbl",
+    "oro": "orm-orm",
+    "tso": "tso-tso",
+    "ven": "ven-ven",
+    "xho": "xho-xho",
+    "zul": "zul-zul",
+    "tig": "tir-tir",
+    "sae": "eng-ens",
+    "ine": "eng-ine",
+    "tun": "ara-aeb",
+    "alg": "ara-arq",
+    "lib": "ara-ayl",
+    "naf": "fra-ntf"
+}
+
+
+def map_to_lre(langs):
+    return [
+        lre_map[l] if l in lre_map else "{}-{}".format(l, l) for l in langs
+    ]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    list_dir = output_dir / "lists_cat"
+    list_dir.mkdir(parents=True, exist_ok=True)
+    for r in range(len(df)):
+        file_list = df.iloc[r].file_lists
+        with open(list_dir / f"{df.iloc[r].id}.txt", "w") as f:
+            for fn in file_list:
+                f.write("file %s\n" % fn)
+
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = list_dir / f"{segment_id}.txt"
+            if target_fs != 16000:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -ar {target_fs} -f wav - |"
+            else:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -f wav - |"
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_common_voice(corpus_dir, output_dir, keep_langs,
+                         map_langs_to_lre_codes, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    files = glob.glob(str(corpus_dir / "**/clips/*.mp3"), recursive=True)
+    langs = [(Path(f).parent.parent.stem).lower() for f in files]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    df = df[df["language"].isin(keep_langs)]
+    # if remove_langs is not None:
+    #     for lang in remove_langs:
+    #         df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = "cv"
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    file_lists = []
+    file_list = []
+    seg_count = 0
+    prev_lang = ""
+    cat_segs = []
+    cur_seg = 0
+    for r in range(len(df)):
+        row = df.iloc[r]
+        if seg_count == 5 or (row.language != prev_lang and seg_count > 0):
+            file_lists.append(file_list)
+            cat_segs.append(cur_seg)
+            file_list = []
+            seg_count = 0
+            cur_seg = r
+
+        file_list.append(row.filename)
+        seg_count += 1
+        prev_lang = row.language
+
+    if file_list:
+        file_lists.append(file_list)
+        cat_segs.append(cur_seg)
+
+    df_cat = df.iloc[cat_segs].drop(["filename"], axis=1)
+    df_cat["file_lists"] = file_lists
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df_cat.drop(["file_lists"], axis=1).to_csv(output_file,
+                                               sep=",",
+                                               index=False)
+
+    make_kaldi(df_cat, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares Common Voice for training")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--keep-langs",
+                        default=["tir-tir"],
+                        nargs="+",
+                        help="languages to keep")
+
+    # parser.add_argument("--remove-langs",
+    #                     default=None,
+    #                     nargs="+",
+    #                     help="languages to remove")
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_common_voice(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_lre17.py b/egs/lre22/fixed.v1.8k/local/prepare_lre17.py
new file mode 100755
index 00000000..18eaa1d2
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_lre17.py
@@ -0,0 +1,140 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import re
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            source = row["source"]
+            if source == "cts":
+                wav = f"sph2pipe -f wav -p -c 1 {filename} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            else:
+                wav = f"sox {filename}  -t wav -r {target_fs} - |"
+
+            f.write(f"{segment_id} {wav}\n")
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+        "duration",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+        "utt2speech_dur",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_lre17(corpus_dir, subset, source, output_dir, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s - %s -> %s", corpus_dir, subset, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir / "data" / subset
+    if subset == "eval":
+        table_info = corpus_dir / "docs" / f"lre17_eval_segment_keys.tsv"
+    else:
+        table_info = corpus_dir / "docs" / f"{subset}_info.tab"
+    df = pd.read_csv(table_info, sep="\t")
+    df.rename(
+        columns={
+            "language_code": "language",
+            "segmentid": "id",
+            "file_duration": "duration",
+        },
+        inplace=True,
+    )
+
+    if subset == "eval":
+        df["data_source"] = df["data_source"].str.lower()
+        df["sample_coding"] = df["data_source"].apply(
+            lambda x: "mulaw" if x == "mls14" else "pcm"
+        )
+        df.loc[df["speech_duration"].isnull(), "speech_duration"] = 1000
+        df["length_condition"] = df.pop("speech_duration").astype("int32")
+
+    if subset in ["dev", "eval"]:
+        # drop files of 3 and 10 secs since they are contained in the files of 30 secs
+        df = df[df["length_condition"] > 10]
+        if source != "all":
+            df = df[df["data_source"] == source]
+
+    # move segment column to first positon
+    first_col = df.pop("id")
+    df.insert(0, "id", first_col)
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    if subset == "train":
+        df["filename"] = df.apply(lambda x: wav_dir / x.language / x.id, axis=1)
+    else:
+        df["filename"] = df.apply(lambda x: wav_dir / x.id, axis=1)
+    df["source"] = df["id"].apply(lambda x: "cts" if re.match(r".*\.sph", x) else "afv")
+    df["corpus_id"] = "lre17"
+    df["sample_rate"] = target_fs
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares LDC2022E16/17 LRE17 for training")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument(
+        "--subset",
+        required=True,
+        help="train/dev/eval",
+        choices=["train", "dev", "eval"],
+    )
+    parser.add_argument(
+        "--source",
+        default="all",
+        help="all/mls14/vast",
+        choices=["all", "mls14", "vast"],
+    )
+
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_lre17(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_lre22_dev.py b/egs/lre22/fixed.v1.8k/local/prepare_lre22_dev.py
new file mode 100755
index 00000000..825f9b67
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_lre22_dev.py
@@ -0,0 +1,108 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            source = row["source"]
+            if source == "cts":
+                wav = f"sph2pipe -f wav -p -c 1 {filename} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            else:
+                wav = f"sox {filename}  -t wav -r {target_fs} - |"
+
+            f.write(f"{segment_id} {wav}\n")
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+        "duration",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+        "utt2speech_dur",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_lre22(corpus_dir, output_dir, target_fs, verbose):
+    config_logger(verbose)
+    subset = "dev"
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir / "data" / subset
+    table_info = corpus_dir / "metadata" / "lre22_dev_metadata.tsv"
+    df = pd.read_csv(table_info, sep="\t")
+    df.rename(
+        columns={
+            "language_code": "language",
+            "file_name": "id",
+            "duration_sec": "duration",
+        },
+        inplace=True,
+    )
+
+    # move segment column to first positon
+    first_col = df.pop("id")
+    df.insert(0, "id", first_col)
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    df["filename"] = df.apply(lambda x: wav_dir / f"{x.id}.sph", axis=1)
+    df["source_coding"] = "alaw"
+    df["source"] = "cts"
+    df["corpus_id"] = "lre22"
+    df["sample_rate"] = target_fs
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares LDC2022E14 LRE22")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_lre22(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_lre22_eval.py b/egs/lre22/fixed.v1.8k/local/prepare_lre22_eval.py
new file mode 100755
index 00000000..39aa06de
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_lre22_eval.py
@@ -0,0 +1,98 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            source = row["source"]
+            if source == "cts":
+                wav = f"sph2pipe -f wav -p -c 1 {filename} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            else:
+                wav = f"sox {filename}  -t wav -r {target_fs} - |"
+
+            f.write(f"{segment_id} {wav}\n")
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_lre22(corpus_dir, output_dir, target_fs, verbose):
+    config_logger(verbose)
+    subset = "eval"
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir / "data" / subset
+    table_info = corpus_dir / "docs" / "lre22_eval_trials.tsv"
+    df = pd.read_csv(table_info, sep="\t")
+    df.rename(
+        columns={
+            "segmentid": "id",
+        },
+        inplace=True,
+    )
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    df["filename"] = df.apply(lambda x: wav_dir / f"{x.id}.sph", axis=1)
+    df["source_coding"] = "alaw"
+    df["source"] = "cts"
+    df["corpus_id"] = "lre22"
+    df["sample_rate"] = target_fs
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares LRE22 eval data")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_lre22(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre.py b/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre.py
new file mode 100755
index 00000000..d3eb68f1
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# prepare_data.py --corpus-dir /export/corpora6/LRE/FLEURS2022 --output-dir data/fleurs --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/Lwazi2009 --output-dir data/lwazi --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/NCHLT2014 --output-dir data/nchlt --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/AMMI2020 --output-dir data/ammi --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+lre_map = {
+    "afr": "afr-afr",
+    "ndb": "nbl-nbl",
+    "oro": "orm-orm",
+    "tso": "tso-tso",
+    "ven": "ven-ven",
+    "xho": "xho-xho",
+    "zul": "zul-zul",
+    "tig": "tir-tir",
+    "sae": "eng-ens",
+    "ine": "eng-iaf",
+    "tun": "ara-aeb",
+    "alg": "ara-arq",
+    "lib": "ara-ayl",
+    "naf": "fra-ntf"
+}
+
+
+def map_to_lre(langs):
+    return [lre_map[l] if l in lre_map else "{}-{}".format(l,l) for l in langs]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = "sox {} -t wav -r {} - |".format(filename, target_fs)
+            else:
+                wav = filename
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_data(
+    corpus_dir, output_dir, remove_langs, map_langs_to_lre_codes, target_fs, verbose
+):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    files = glob.glob(str(corpus_dir / "*/*/*/*/*.wav"))
+    langs = [(Path(f).parent.parent.parent.parent.stem).lower() for f in files]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    if remove_langs is not None:
+        for lang in remove_langs:
+            df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = corpus_dir.stem
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares NCHLT, FLEURS, Lwazi, and AMMI corpus for training")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--remove-langs", default=None, nargs="+", help="languages to remove"
+    )
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_data(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre_cat.py b/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre_cat.py
new file mode 100755
index 00000000..df62f18a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_some_data_for_lre_cat.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# prepare_data.py --corpus-dir /export/corpora6/LRE/FLEURS2022 --output-dir data/fleurs --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/Lwazi2009 --output-dir data/lwazi --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/NCHLT2014 --output-dir data/nchlt --map-langs-to-lre-codes --target-fs 8000
+# prepare_data.py --corpus-dir /export/corpora6/LRE/AMMI2020 --output-dir data/ammi --map-langs-to-lre-codes --target-fs 8000
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+lre_map = {
+    "afr": "afr-afr",
+    "ndb": "nbl-nbl",
+    "oro": "orm-orm",
+    "tso": "tso-tso",
+    "ven": "ven-ven",
+    "xho": "xho-xho",
+    "zul": "zul-zul",
+    "tig": "tir-tir",
+    "sae": "eng-ens",
+    "ine": "eng-iaf",
+    "tun": "ara-aeb",
+    "alg": "ara-arq",
+    "lib": "ara-ayl",
+    "naf": "fra-ntf"
+}
+
+buggy_files = {
+    "200630-192328_tir_c85_elicit_7",
+    "200701-120846_tir_c85_elicit_35",
+    "200701-133352_tir_c85_elicit_57",
+    "200701-134903_tir_c85_elicit_19",
+    "200701-134903_tir_c85_elicit_32",
+    "200701-234652_tir_c85_elicit_78",
+    "200702-083859_tir_c85_elicit_18",
+    "200702-125252_tir_c85_elicit_46",
+    "200702-161120_tir_c85_elicit_4",
+    "200702-161120_tir_c85_elicit_7",
+    "200702-172026_tir_c85_elicit_31",
+    "200702-182933_tir_c85_elicit_133",
+    "200702-182933_tir_c85_elicit_88",
+    "200702-193310_tir_c85_elicit_2",
+    "200702-194850_tir_c85_elicit_88",
+    "200702-200911_tir_c85_elicit_171",
+}
+
+
+def map_to_lre(langs):
+    return [
+        lre_map[l] if l in lre_map else "{}-{}".format(l, l) for l in langs
+    ]
+
+
+def make_kaldi(df, wav_dir, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    list_dir = output_dir / "lists_cat"
+    list_dir.mkdir(parents=True, exist_ok=True)
+    for r in range(len(df)):
+        file_list = df.iloc[r].file_lists
+        with open(list_dir / f"{df.iloc[r].id}.txt", "w") as f:
+            for fn in file_list:
+                f.write("file %s\n" % fn)
+
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = list_dir / f"{segment_id}.txt"
+            if target_fs != 16000:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -ar {target_fs} -f wav - |"
+            else:
+                wav = f"ffmpeg -f concat -safe 0 -i {filename} -acodec pcm_s16le -f wav - |"
+
+            f.write("{} {}\n".format(segment_id, wav))
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(output_file,
+                      sep=" ",
+                      columns=["id", c],
+                      header=False,
+                      index=False)
+
+
+def prepare_data(corpus_dir, output_dir, remove_langs, map_langs_to_lre_codes,
+                 target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir
+    files = glob.glob(str(corpus_dir / "**/*.wav"), recursive=True)
+    langs = [(Path(f).parent.parent.parent.parent.stem).lower() for f in files]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = ["{}-{}".format(l, Path(f).stem) for f, l in zip(files, langs)]
+    val = np.array(
+        [False if Path(f).stem in buggy_files else True for f in files])
+    non_val = np.any(val == False)
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    if non_val:
+        df = df.loc[val]
+        logging.info("detected invalid files %d / %d remained", len(df),
+                     len(val))
+    if remove_langs is not None:
+        for lang in remove_langs:
+            df = df[df["language"] != lang]
+
+    # sort by segment id
+    df.sort_values(by=["language", "id"], inplace=True)
+
+    file_lists = []
+    file_list = []
+    seg_count = 0
+    prev_lang = ""
+    cat_segs = []
+    cur_seg = 0
+    for r in range(len(df)):
+        row = df.iloc[r]
+        if seg_count == 5 or (row.language != prev_lang and seg_count > 0):
+            file_lists.append(file_list)
+            cat_segs.append(cur_seg)
+            file_list = []
+            seg_count = 0
+            cur_seg = r
+
+        file_list.append(row.filename)
+        seg_count += 1
+        prev_lang = row.language
+
+    if file_list:
+        file_lists.append(file_list)
+        cat_segs.append(cur_seg)
+
+    df_cat = df.iloc[cat_segs].drop(["filename"], axis=1)
+    df_cat["file_lists"] = file_lists
+
+    # sort by segment id
+    df_cat.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df_cat.drop(["file_lists"], axis=1).to_csv(output_file,
+                                               sep=",",
+                                               index=False)
+
+    make_kaldi(df_cat, wav_dir, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description=
+        "Prepares NCHLT, FLEURS, Lwazi, and AMMI corpus for training")
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument("--remove-langs",
+                        default=None,
+                        nargs="+",
+                        help="languages to remove")
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument("--target-fs",
+                        default=8000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_data(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_sre21av_dev_audio.py b/egs/lre22/fixed.v1.8k/local/prepare_sre21av_dev_audio.py
new file mode 100755
index 00000000..bc2c3001
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_sre21av_dev_audio.py
@@ -0,0 +1,215 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+from enum import Enum
+
+
+class LangTrialCond(Enum):
+    ENG_ENG = 1
+    ENG_CMN = 2
+    ENG_YUE = 3
+    CMN_CMN = 4
+    CMN_YUE = 5
+    YUE_YUE = 6
+    OTHER_OTHER = 7
+    OTHER_ENG = 8
+    OTHER_CMN = 9
+    OTHER_YUE = 10
+
+    @staticmethod
+    def is_eng(val):
+        if val in "ENG" or val in "USE":
+            return True
+        return False
+
+    @staticmethod
+    def get_side_cond(val):
+        if val == "ENG" or val == "USE":
+            return "ENG"
+        if "YUE" in val:
+            return "YUE"
+        if "CMN" in val:
+            return "CMN"
+
+        return "OTHER"
+
+    @staticmethod
+    def get_trial_cond(enr, test):
+        enr = LangTrialCond.get_side_cond(enr)
+        test = LangTrialCond.get_side_cond(test)
+        trial = enr + "_" + test
+        try:
+            return LangTrialCond[trial]
+        except:
+            trial = test + "_" + enr
+            return LangTrialCond[trial]
+
+
+class SourceTrialCond(Enum):
+    CTS_CTS = 1
+    CTS_AFV = 2
+    AFV_AFV = 3
+
+    @staticmethod
+    def get_trial_cond(enr, test):
+        trial = enr.upper() + "_" + test.upper()
+        try:
+            return SourceTrialCond[trial]
+        except:
+            trial = test.upper() + "_" + enr.upper()
+            return SourceTrialCond[trial]
+
+
+def write_wav(df, target_fs, wav_dir, output_file):
+    with open(output_file, "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            ext = segment_id.split(".")[-1]
+            if ext == "flac":
+                if target_fs == 16000:
+                    wav = f"{wav_dir}/{segment_id}"
+                else:
+                    wav = f"sox {wav_dir}/{segment_id} -t wav -r {target_fs} - |"
+            elif ext == "mp4":
+                wav = f"ffmpeg -v 8 -i {wav_dir}/{segment_id} -vn -ar {target_fs} -ac 1 -f wav - |"
+            else:
+                wav = f"sph2pipe -f wav -p -c 1 {wav_dir}/{segment_id} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            f.write(f"{segment_id} {wav}\n")
+
+
+def make_enroll_dir(df_segms, wav_dir, target_fs, source, output_path):
+    # fix source
+    df_segms.loc[df_segms["id"].str.match(r".*\.flac$"), "source_type"] = "afv"
+    enroll_dir = Path(output_path + f"_enroll_{source}")
+    wav_dir = wav_dir / "enrollment"
+    logging.info("making enrollment dir %s", enroll_dir)
+    enroll_dir.mkdir(parents=True, exist_ok=True)
+    df_segms = (df_segms[(df_segms["partition"] == "enrollment")
+                         & (df_segms["source_type"] == source) &
+                         (df_segms["language"] != "other")].drop(
+                             ["partition"], axis=1).sort_values(by="id"))
+    segment_file = enroll_dir / "segments.csv"
+    df_segms.to_csv(segment_file, sep=",", index=False)
+
+    with open(enroll_dir / "utt2spk", "w") as f1, open(enroll_dir / "spk2utt",
+                                                       "w") as f2:
+        for u in df_segms["id"]:
+            f1.write(f"{u} {u}\n")
+            f2.write(f"{u} {u}\n")
+
+    with open(enroll_dir / "utt2lang", "w") as f:
+        for u, s in zip(df_segms["id"], df_segms["language"]):
+            f.write(f"{u} {s}\n")
+
+    write_wav(df_segms, target_fs, wav_dir, enroll_dir / "wav.scp")
+
+
+def make_test_dir(df_segms, wav_dir, target_fs, source, output_path):
+    if source == "na":
+        # fix source
+        df_segms.loc[df_segms["id"].str.match(r".*\.mp4$"),
+                     "source_type"] = "afv"
+        source = "afv"
+
+    test_dir = Path(output_path + f"_test_{source}")
+    wav_dir = wav_dir / "test"
+    logging.info("making test dir %s", test_dir)
+    test_dir.mkdir(parents=True, exist_ok=True)
+    df_segms = (df_segms[(df_segms["partition"] == "test")
+                         & (df_segms["source_type"] == source) &
+                         (df_segms["language"] != "other")].drop(
+                             ["partition"], axis=1).sort_values(by="id"))
+
+    segment_file = test_dir / "segments.csv"
+    df_segms.to_csv(segment_file, sep=",", index=False)
+
+    with open(test_dir / "utt2spk", "w") as f1, open(test_dir / "spk2utt",
+                                                     "w") as f2:
+        for u in df_segms["id"]:
+            f1.write(f"{u} {u}\n")
+            f2.write(f"{u} {u}\n")
+
+    with open(test_dir / "utt2lang", "w") as f:
+        for u, s in zip(df_segms["id"], df_segms["language"]):
+            f.write(f"{u} {s}\n")
+
+    with open(test_dir / "spk2gender", "w") as f:
+        for u, g in zip(df_segms["id"], df_segms["gender"]):
+            g = g[0]
+            f.write(f"{u} {g}\n")
+
+    write_wav(df_segms, target_fs, wav_dir, test_dir / "wav.scp")
+
+
+def prepare_sre21av_dev_audio(corpus_dir, output_path, av_output_path,
+                              target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_path)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir / "data" / "audio"
+    segments_file = corpus_dir / "docs" / "sre21_dev_segment_key.tsv"
+    df_segms = pd.read_csv(segments_file, sep="\t")
+    df_segms.rename(
+        columns={
+            "segmentid": "id",
+            "subjectid": "speaker_id"
+        },
+        inplace=True,
+    )
+    df_segms.replace({"language": "english"}, {"language": "eng-zho"},
+                     inplace=True)
+    df_segms.replace({"language": "cantonese"}, {"language": "zho-yue"},
+                     inplace=True)
+    df_segms.replace({"language": "mandarin"}, {"language": "zho-cmn"},
+                     inplace=True)
+
+    enroll_file = corpus_dir / "docs" / "sre21_audio_dev_enrollment.tsv"
+
+    make_enroll_dir(df_segms, wav_dir, target_fs, "cts", output_path)
+    make_enroll_dir(df_segms, wav_dir, target_fs, "afv", output_path)
+    make_test_dir(df_segms, wav_dir, target_fs, "cts", output_path)
+    make_test_dir(df_segms, wav_dir, target_fs, "afv", output_path)
+
+    wav_dir = corpus_dir / "data" / "video"
+    make_test_dir(df_segms, wav_dir, target_fs, "na", av_output_path)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares SRE21 dev audio part")
+
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-path",
+                        required=True,
+                        help="Output data path prefix")
+    parser.add_argument(
+        "--av-output-path",
+        required=True,
+        help="Output data path prefix for audio visual",
+    )
+    parser.add_argument("--target-fs",
+                        default=16000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_sre21av_dev_audio(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_sre21av_eval_audio.py b/egs/lre22/fixed.v1.8k/local/prepare_sre21av_eval_audio.py
new file mode 100755
index 00000000..301eebf7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_sre21av_eval_audio.py
@@ -0,0 +1,243 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+from enum import Enum
+
+
+class LangTrialCond(Enum):
+    ENG_ENG = 1
+    ENG_CMN = 2
+    ENG_YUE = 3
+    CMN_CMN = 4
+    CMN_YUE = 5
+    YUE_YUE = 6
+    OTHER_OTHER = 7
+    OTHER_ENG = 8
+    OTHER_CMN = 9
+    OTHER_YUE = 10
+
+    @staticmethod
+    def is_eng(val):
+        if val in "ENG" or val in "USE":
+            return True
+        return False
+
+    @staticmethod
+    def get_side_cond(val):
+        if val == "ENG" or val == "USE":
+            return "ENG"
+        if "YUE" in val:
+            return "YUE"
+        if "CMN" in val:
+            return "CMN"
+
+        return "OTHER"
+
+    @staticmethod
+    def get_trial_cond(enr, test):
+        enr = LangTrialCond.get_side_cond(enr)
+        test = LangTrialCond.get_side_cond(test)
+        trial = enr + "_" + test
+        try:
+            return LangTrialCond[trial]
+        except:
+            trial = test + "_" + enr
+            return LangTrialCond[trial]
+
+
+class SourceTrialCond(Enum):
+    CTS_CTS = 1
+    CTS_AFV = 2
+    AFV_AFV = 3
+
+    @staticmethod
+    def get_trial_cond(enr, test):
+        trial = enr.upper() + "_" + test.upper()
+        try:
+            return SourceTrialCond[trial]
+        except:
+            trial = test.upper() + "_" + enr.upper()
+            return SourceTrialCond[trial]
+
+
+def write_wav(df, target_fs, wav_dir, output_file):
+    with open(output_file, "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            ext = segment_id.split(".")[-1]
+            if ext == "flac":
+                if target_fs == 16000:
+                    wav = f"{wav_dir}/{segment_id}"
+                else:
+                    wav = f"sox {wav_dir}/{segment_id} -t wav -r {target_fs} - |"
+            elif ext == "mp4":
+                wav = f"ffmpeg -v 8 -i {wav_dir}/{segment_id} -vn -ar {target_fs} -ac 1 -f wav - |"
+            else:
+                wav = f"sph2pipe -f wav -p -c 1 {wav_dir}/{segment_id} |"
+                if target_fs != 8000:
+                    wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            f.write(f"{segment_id} {wav}\n")
+
+
+def make_enroll_dir(df_segms, wav_dir, target_fs, source, output_path):
+
+    # fix source
+    df_segms.loc[df_segms["id"].str.match(r".*\.flac$"), "source_type"] = "afv"
+    enroll_dir = Path(output_path + f"_enroll_{source}")
+    wav_dir = wav_dir / "enrollment"
+    logging.info("making enrollment dir %s", enroll_dir)
+    enroll_dir.mkdir(parents=True, exist_ok=True)
+    df_segms = (df_segms[(df_segms["partition"] == "enrollment")
+                         & (df_segms["source_type"] == source) &
+                         (df_segms["language"] != "other")].drop(
+                             ["partition"], axis=1).sort_values(by="id"))
+    segment_file = enroll_dir / "segments.csv"
+    df_segms.to_csv(segment_file, sep=",", index=False)
+
+    with open(enroll_dir / "utt2spk", "w") as f1, open(enroll_dir / "spk2utt",
+                                                       "w") as f2:
+        for u in df_segms["id"]:
+            f1.write(f"{u} {u}\n")
+            f2.write(f"{u} {u}\n")
+
+    with open(enroll_dir / "utt2lang", "w") as f:
+        for u, s in zip(df_segms["id"], df_segms["language"]):
+            f.write(f"{u} {s}\n")
+
+    write_wav(df_segms, target_fs, wav_dir, enroll_dir / "wav.scp")
+
+
+def make_test_dir(df_segms, wav_dir, target_fs, source, output_path):
+
+    if source == "na":
+        # fix source
+        df_segms.loc[df_segms["id"].str.match(r".*\.mp4$"),
+                     "source_type"] = "afv"
+        source = "afv"
+
+    test_dir = Path(output_path + f"_test_{source}")
+    wav_dir = wav_dir / "test"
+    logging.info("making test dir %s", test_dir)
+    test_dir.mkdir(parents=True, exist_ok=True)
+    df_segms = (df_segms[(df_segms["partition"] == "test")
+                         & (df_segms["source_type"] == source) &
+                         (df_segms["language"] != "other")].drop(
+                             ["partition"], axis=1).sort_values(by="id"))
+
+    segment_file = test_dir / "segments.csv"
+    df_segms.to_csv(segment_file, sep=",", index=False)
+
+    with open(test_dir / "utt2spk", "w") as f1, open(test_dir / "spk2utt",
+                                                     "w") as f2:
+        for u in df_segms["id"]:
+            f1.write(f"{u} {u}\n")
+            f2.write(f"{u} {u}\n")
+
+    with open(test_dir / "utt2lang", "w") as f:
+        for u, s in zip(df_segms["id"], df_segms["language"]):
+            f.write(f"{u} {s}\n")
+
+    with open(test_dir / "spk2gender", "w") as f:
+        for u, g in zip(df_segms["id"], df_segms["gender"]):
+            g = g[0]
+            f.write(f"{u} {g}\n")
+
+    write_wav(df_segms, target_fs, wav_dir, test_dir / "wav.scp")
+
+
+def prepare_sre21av_eval_audio(corpus_dir, output_path, av_output_path,
+                               target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_path)
+    corpus_dir = Path(corpus_dir)
+    wav_dir = corpus_dir / "data" / "audio"
+    segments_file = corpus_dir / "docs" / "sre21_eval_segment_key.tsv"
+    df_segms = pd.read_csv(segments_file, sep="\t")
+    df_segms.rename(
+        columns={
+            "segmentid": "id",
+            "subjectid": "speaker_id"
+        },
+        inplace=True,
+    )
+    df_segms.replace({"language": "english"}, {"language": "eng-zho"},
+                     inplace=True)
+    df_segms.replace({"language": "cantonese"}, {"language": "zho-yue"},
+                     inplace=True)
+    df_segms.replace({"language": "mandarin"}, {"language": "zho-cmn"},
+                     inplace=True)
+
+    # enroll_file = corpus_dir / "docs" / "sre21_audio_eval_enrollment.tsv"
+    # df_enr = pd.read_csv(enroll_file, sep="\t")
+    # df_enr.rename(
+    #     columns={
+    #         "segmentid": "id",
+    #         "modelid": "model_id"
+    #     },
+    #     inplace=True,
+    # )
+    # key_file = corpus_dir / "docs" / "sre21_audio_eval_trial_key.tsv"
+    # df_key = pd.read_csv(key_file, sep="\t")
+    # df_key.rename(
+    #     columns={
+    #         "segmentid": "id",
+    #         "modelid": "model_id"
+    #     },
+    #     inplace=True,
+    # )
+
+    make_enroll_dir(df_segms, wav_dir, target_fs, "cts", output_path)
+    make_enroll_dir(df_segms, wav_dir, target_fs, "afv", output_path)
+    make_test_dir(df_segms, wav_dir, target_fs, "cts", output_path)
+    make_test_dir(df_segms, wav_dir, target_fs, "afv", output_path)
+
+    key_file = corpus_dir / "docs" / "sre21_audio-visual_eval_trial_key.tsv"
+    # df_key = pd.read_csv(key_file, sep="\t")
+    # df_key.rename(
+    #     columns={
+    #         "segmentid": "id",
+    #         "modelid": "model_id"
+    #     },
+    #     inplace=True,
+    # )
+    wav_dir = corpus_dir / "data" / "video"
+    make_test_dir(df_segms, wav_dir, target_fs, "na", av_output_path)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares SRE21 eval audio part")
+
+    parser.add_argument("--corpus-dir",
+                        required=True,
+                        help="Path to the original dataset")
+    parser.add_argument("--output-path",
+                        required=True,
+                        help="Output data path prefix")
+    parser.add_argument(
+        "--av-output-path",
+        required=True,
+        help="Output data path prefix for audio visual",
+    )
+    parser.add_argument("--target-fs",
+                        default=16000,
+                        type=int,
+                        help="Target sampling frequency")
+    parser.add_argument("-v",
+                        "--verbose",
+                        dest="verbose",
+                        default=1,
+                        choices=[0, 1, 2, 3],
+                        type=int)
+    args = parser.parse_args()
+    prepare_sre21av_eval_audio(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_sre_cts_superset.py b/egs/lre22/fixed.v1.8k/local/prepare_sre_cts_superset.py
new file mode 100755
index 00000000..af299781
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_sre_cts_superset.py
@@ -0,0 +1,185 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+multigender_spks = [
+    "111774",
+    "111781",
+    "112778",
+    "112783",
+    "112879",
+    "113153",
+    "113213",
+    "113603",
+    "128673",
+    "128770",
+]
+
+lre_map = {
+    "USE": "eng-usg",
+    "AMH": "am-am",
+    "BEN": "bn-bn",
+    "FAR": "far-far",
+    "HIN": "hi-hi",
+    "INE": "eng-ine",
+    "ITA": "it-it",
+    "JPN": "ja-ja",
+    "KAT": "ka-ka",
+    "KHM": "km-km",
+    "KOR": "ko-ko",
+    "LAO": "lo-lo",
+    "PAN": "pa-pa",
+    "POL": "qsl-pol",
+    "RUS": "qsl-rus",
+    "TAM": "ta-ta",
+    "TGL": "tl-tl",
+    "THA": "th-th",
+    "TIR": "tir-tir",
+    "URD": "ur-ur",
+    "UZB": "uz-uz",
+    "VIE": "vi-vi",
+    "CMN": "zho-cmn",
+    "YUE": "zho-yue",
+    "WUU": "zho-wuu",
+    "NAN": "zho-nan",
+}
+
+
+def fix_multigender_spks(df):
+
+    logging.info("Fixing multigender speakers")
+    n0 = len(df)
+    for spk in multigender_spks:
+        male_idx = (df["speaker_id"] == spk) & (df["gender"] == "male")
+        female_idx = (df["speaker_id"] == spk) & (df["gender"] == "female")
+        num_male = np.sum(male_idx)
+        num_female = np.sum(female_idx)
+        if num_male > num_female:
+            df = df[~female_idx]
+        else:
+            df = df[~male_idx]
+
+    logging.info("Fixed multigender speakers, %d/%d segments remained", len(df), n0)
+    return df
+
+
+def prepare_sre_cts_superset(corpus_dir, output_dir, target_fs, verbose):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    wav_dir = Path(corpus_dir) / "data"
+    table_file = Path(corpus_dir) / "docs/cts_superset_segment_key.tsv"
+    df = pd.read_csv(table_file, sep="\t")
+    df.drop(["segmentid", "speakerid"], axis=1, inplace=True)
+    df.rename(
+        columns={
+            "subjectid": "speaker_id",
+            "sessionid": "session_id",
+            "corpusid": "corpus_id",
+            "phoneid": "phone_id",
+        },
+        inplace=True,
+    )
+    df["speaker_id"] = df["speaker_id"].astype("str")
+    df = fix_multigender_spks(df)
+
+    logging.info("remove generic ENG or mixed langs")
+    n0 = len(df)
+    df = df[df["language"] != "ENG"]
+    df = df[df["language"] != "SPA"]
+    df = df[df["language"] != "UND"]
+    df = df[~df["language"].str.contains("\.")]
+    logging.info("remained %d out of %d", len(df), n0)
+    logging.info("renaming languages like LRE")
+    for k, v in lre_map.items():
+        idx = df["language"] == k
+        df.loc[idx, "language"] = v
+
+    df["id"] = df["filename"].str.replace("/", "-")
+    # put segment_id as first columnt
+    cols = df.columns.tolist()
+    cols = cols[-1:] + cols[:-1]
+    df = df[cols]
+    logging.info("sorting by segment_id")
+    df.sort_values(by="id", inplace=True)
+
+    logging.info("saving segments.csv")
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / "segments.csv"
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "speaker_id",
+        "speech_duration",
+        "session_id",
+        "corpus_id",
+        "phone_id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "utt2speech_dur",
+        "utt2session",
+        "utt2corpus",
+        "utt2phone",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        df.to_csv(output_file, sep=" ", columns=["id", c], header=False, index=False)
+
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            wav = f"sph2pipe -f wav -p -c 1 {wav_dir}/{filename} |"
+            if target_fs != 8000:
+                wav = f"{wav} sox -t wav - -t wav -r {target_fs} - |"
+            f.write(f"{segment_id} {wav}\n")
+
+    # speaker table
+    logging.info("saving speaker files")
+    spk_df = df[["speaker_id", "gender"]].drop_duplicates()
+    output_file = output_dir / "speaker.csv"
+    spk_df.to_csv(output_file, sep=",", index=False)
+    gender = df["gender"].str.replace("female", "f").str.replace("male", "m")
+    spk_df["gender"] = gender
+    output_file = output_dir / "spk2gender"
+    spk_df.to_csv(output_file, sep=" ", header=False, index=False)
+
+    with open(output_dir / "spk2utt", "w") as f:
+        for spk in df["speaker_id"].unique():
+            segment_ids = " ".join(df[df["speaker_id"] == spk].id.values)
+            f.write(f"{spk} {segment_ids}\n")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares SRE superset LDC2021E08")
+
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="Ouput data path")
+    parser.add_argument(
+        "--target-fs", default=8000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_sre_cts_superset(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/prepare_voxlingua107.py b/egs/lre22/fixed.v1.8k/local/prepare_voxlingua107.py
new file mode 100755
index 00000000..c4dc3894
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/prepare_voxlingua107.py
@@ -0,0 +1,130 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict, ActionYesNo
+import logging
+from pathlib import Path
+import glob
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+
+
+lre_map = {
+    "pl": "qsl-pol",
+    "ru": "qsl-rus",
+    "cs": "qsl-cze",
+    "uk": "qsl-ukr",
+    "hr": "qsl-cro",
+    "bg": "qsl-bul",
+    "be": "qsl-bel",
+    "sk": "qsl-slk",
+    "sl": "qsl-slv",
+    "bs": "qsl-bos",
+    "sr": "qsl-ser",
+    "zh": "zho-cmn",
+    "fr": "fra-mix",
+    "af": "afr-afr",
+}
+
+
+def map_to_lre(langs):
+    return [lre_map[l] if l in lre_map else f"{l}-{l}" for l in langs]
+
+
+def make_kaldi(df, output_dir, target_fs):
+    # make wav.scp
+    logging.info("making wav.scp")
+    with open(output_dir / "wav.scp", "w") as f:
+        for _, row in df.iterrows():
+            segment_id = row["id"]
+            filename = row["filename"]
+            if target_fs != 16000:
+                wav = f"sox {filename} -t wav -r {target_fs} - |"
+            else:
+                wav = filename
+
+            f.write(f"{segment_id} {wav}\n")
+
+    # Kaldi data directory files
+    # utt2xxx files
+    logging.info("saving Kaldi utt2xxx files")
+    columns = [
+        "id",
+        "id",
+        "language",
+    ]
+    files = [
+        "utt2spk",
+        "spk2utt",
+        "utt2lang",
+    ]
+    for c, f in zip(columns, files):
+        output_file = output_dir / f
+        if c in df:
+            df.to_csv(
+                output_file, sep=" ", columns=["id", c], header=False, index=False
+            )
+
+
+def prepare_voxlingua107(
+    corpus_dir, output_dir, remove_langs, map_langs_to_lre_codes, target_fs, verbose
+):
+    config_logger(verbose)
+    logging.info("Preparing corpus %s -> %s", corpus_dir, output_dir)
+    corpus_dir = Path(corpus_dir)
+    files = glob.glob(str(corpus_dir / "*/*.wav"))
+    langs = [Path(f).parent.stem for f in files]
+    if map_langs_to_lre_codes:
+        langs = map_to_lre(langs)
+    ids = [f"{l}-{Path(f).stem}" for f, l in zip(files, langs)]
+    df = pd.DataFrame({"id": ids, "language": langs, "filename": files})
+    if remove_langs is not None:
+        for lang in remove_langs:
+            df = df[df["language"] != lang]
+
+    df["sample_coding"] = "pcm"
+    df["source"] = "afv"
+    df["corpus_id"] = "voxlingua107"
+    df["sample_rate"] = target_fs
+
+    # sort by segment id
+    df.sort_values(by="id", inplace=True)
+
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_file = output_dir / "segments.csv"
+    logging.info("saving %s", output_file)
+    df.drop(["filename"], axis=1).to_csv(output_file, sep=",", index=False)
+
+    make_kaldi(df, output_dir, target_fs)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Prepares Voxlingua107 for training")
+    parser.add_argument(
+        "--corpus-dir", required=True, help="Path to the original dataset"
+    )
+    parser.add_argument("--output-dir", required=True, help="data path")
+    parser.add_argument(
+        "--remove-langs", default=None, nargs="+", help="languages to remove"
+    )
+    parser.add_argument(
+        "--map-langs-to-lre-codes",
+        default=False,
+        action=ActionYesNo,
+        help="use LRE17 language codes",
+    )
+
+    parser.add_argument(
+        "--target-fs", default=16000, type=int, help="Target sampling frequency"
+    )
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    prepare_voxlingua107(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/score_lre22.sh b/egs/lre22/fixed.v1.8k/local/score_lre22.sh
new file mode 100755
index 00000000..e6564da4
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/score_lre22.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 3 ];then
+  echo "Usage: $0 <dev/eval> <score-file> <output-file>"
+  exit 1
+fi
+
+dev_eval=$1
+score_file=$(readlink -f $2)
+output_file=$(readlink -f $3)
+echo $dev_eval $score_file $output_file
+output_dir=$(dirname $output_file)
+mkdir -p $output_dir
+
+conda activate $HYP_ENV
+
+cd ./lre-scorer
+echo "Scoring $score_file -> $output_file"
+if [ "$dev_eval" == "dev" ];then
+  config=config.ini
+else
+  config=config_eval.ini
+fi
+
+python ./scoreit.py  -s $score_file -o $output_file -e $config
+
+cd -
diff --git a/egs/lre22/fixed.v1.8k/local/split_dev.py b/egs/lre22/fixed.v1.8k/local/split_dev.py
new file mode 100755
index 00000000..5988e245
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/split_dev.py
@@ -0,0 +1,80 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from pathlib import Path
+from jsonargparse import ArgumentParser, namespace_to_dict
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+
+
+def split_dev(segs_file, output_dir, num_folds, verbose):
+    config_logger(verbose)
+    segs = SegmentSet.load(segs_file)
+    assert "subclass_idx" in segs
+    class_ids = segs["class_id"]
+    _, class_idx = np.unique(class_ids, return_inverse=True)
+    logging.info("splitting segments into %d folds", num_folds)
+    folds = [[] for i in range(num_folds)]
+    for c in range(np.max(class_idx) + 1):
+        c_idx = class_idx == c
+        subclass_idx = segs.loc[c_idx, "subclass_idx"]
+        num_c = len(subclass_idx)
+        num_c_pf = num_c / num_folds
+        _, counts = np.unique(subclass_idx, return_counts=True)
+        acc_counts = np.cumsum(counts)
+        logging.info(
+            f"class {c} subclass-counts={counts}, subclass-acc-counts={acc_counts}"
+        )
+        c_idx = np.nonzero(c_idx)[0]
+        first = 0
+        for f in range(num_folds):
+            if f < num_folds - 1:
+                last = np.argmin(np.abs(acc_counts - (f + 1) * num_c_pf))
+            else:
+                last = np.max(subclass_idx)
+            f_idx = np.logical_and(subclass_idx >= first, subclass_idx <= last)
+            folds[f].extend(c_idx[f_idx])
+            logging.info(
+                (
+                    f"class {c} fold {f} add {np.sum(f_idx)} samples,"
+                    f"accum {len(folds[f])} samples, "
+                    f"first-subclass={first}, last-subclass={last}"
+                )
+            )
+            first = last + 1
+
+    output_dir = Path(output_dir)
+    for f in range(num_folds):
+        logging.info(
+            "fold %d, train-samples=%d test-samples=%d",
+            f,
+            len(segs) - len(folds[f]),
+            len(folds[f]),
+        )
+        f_dir = output_dir / f"fold_{f}"
+        f_dir.mkdir(parents=True, exist_ok=True)
+        mask = np.zeros((len(segs),), dtype=bool)
+        mask[folds[f]] = True
+        segs_test = SegmentSet(segs.loc[mask])
+        segs_test.save(f_dir / "test_segments.csv")
+        segs_train = SegmentSet(segs.loc[~mask])
+        segs_train.save(f_dir / "train_segments.csv")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Splits LRE22 into folds")
+    parser.add_argument(
+        "--segs-file", required=True, help="Segments file with subclass_idx column",
+    )
+    parser.add_argument("--output-dir", required=True, help="output path")
+    parser.add_argument("--num-folds", default=2, type=int, help="number of folds")
+    parser.add_argument("-v", "--verbose", default=1, choices=[0, 1, 2, 3], type=int)
+
+    args = parser.parse_args()
+    split_dev(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/split_segments_train_val.py b/egs/lre22/fixed.v1.8k/local/split_segments_train_val.py
new file mode 100755
index 00000000..922c868c
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/split_segments_train_val.py
@@ -0,0 +1,160 @@
+#!/bin/env python
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from jsonargparse import ArgumentParser, namespace_to_dict
+import logging
+from pathlib import Path
+import re
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import RecordingSet, FeatureSet, SegmentSet, ClassInfo
+
+
+def split_train_val(
+    segments_file,
+    recordings_file,
+    feats_file,
+    durations_file,
+    ara_ary_seg_file,
+    in_class_name,
+    out_class_name,
+    val_percent,
+    remove_langs,
+    seed,
+    output_dir,
+    verbose,
+):
+
+    config_logger(verbose)
+    rng = np.random.RandomState(seed=seed)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    segs = SegmentSet.load(segments_file)
+    if durations_file is not None:
+        durs = SegmentSet.load(durations_file)
+        if "duration" in durs:
+            segs["duration"] = durs.loc[segs["id"], "duration"]
+        else:
+            segs["duration"] = durs.loc[segs["id"], "class_id"].astype(float)
+
+    if remove_langs is not None:
+        for lang in remove_langs:
+            segs = segs[segs[in_class_name] != lang]
+
+        segs = SegmentSet(segs)
+
+    if ara_ary_seg_file is not None:
+        segs_ary = SegmentSet.load(ara_ary_seg_file)
+        segs.loc[segs_ary["id"], in_class_name] = segs_ary["class_id"]
+        n1 = len(segs)
+        noary_idx = segs[in_class_name] != "ara-ary"
+        segs = SegmentSet(segs.loc[noary_idx])
+        logging.info("removing ara-ary segments remained %d / %d", len(segs), n1)
+
+    logging.info("creating class_info file")
+    class_ids = segs[in_class_name].drop_duplicates().sort_values()
+    class_info = ClassInfo(pd.DataFrame({"id": class_ids}))
+    class_info.save(output_dir / "class_file.csv")
+
+    logging.info("splitting segments into train and val")
+    segs.set_index(in_class_name)
+    val_mask = np.zeros((len(segs),), dtype=bool)
+    for c in class_info["id"]:
+        seg_idx_c = segs.get_loc(c)
+        num_val = int(val_percent * len(seg_idx_c) / 100)
+        val_idx = rng.choice(seg_idx_c, size=num_val, replace=False)
+        val_mask[val_idx] = True
+        logging.info(
+            "class %s total=%d train=%d val=%d",
+            c,
+            len(seg_idx_c),
+            len(seg_idx_c) - num_val,
+            num_val,
+        )
+
+    segs.reset_index()
+    if out_class_name is not None:
+        segs.rename(columns={in_class_name: out_class_name}, inplace=True)
+
+    train_segs = SegmentSet(segs.loc[~val_mask])
+    train_segs.save(output_dir / "train_segments.csv")
+    val_segs = SegmentSet(segs.loc[val_mask])
+    val_segs.save(output_dir / "val_segments.csv")
+
+    if recordings_file is not None:
+        logging.info("splitting recordings into train and val")
+        recs = RecordingSet.load(recordings_file)
+        train_recs = RecordingSet(recs.loc[train_segs.recording_ids(train_segs["id"])])
+        train_recs.save(output_dir / "train_recordings.csv")
+        val_recs = RecordingSet(recs.loc[val_segs.recording_ids(val_segs["id"])])
+        val_recs.save(output_dir / "val_recordings.csv")
+
+    if feats_file is not None:
+        logging.info("splitting features into train and val")
+        feats = FeatureSet.load(feats_file)
+        train_feats = FeatureSet(feats.loc[train_segs["id"]])
+        train_feats.save(output_dir / "train_feats.csv")
+        val_feats = FeatureSet(feats.loc[val_segs["id"]])
+        val_feats.save(output_dir / "val_feats.csv")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Split Segment list into training and validation"
+    )
+    parser.add_argument(
+        "--segments-file", required=True, help="path to segments file",
+    )
+    parser.add_argument(
+        "--recordings-file",
+        default=None,
+        help="if not None, splits recordings file into train and val",
+    )
+
+    parser.add_argument(
+        "--durations-file",
+        default=None,
+        help="if not None, add durations to segments file",
+    )
+
+    parser.add_argument(
+        "--feats-file",
+        default=None,
+        help="if not None, splits features file into train and val",
+    )
+    parser.add_argument(
+        "--ara-ary-seg-file",
+        default=None,
+        help="segment-file with labels for Maghrebi Arabic",
+    )
+
+    parser.add_argument(
+        "--in-class-name",
+        default="class_id",
+        help="column name containing the class_id that we consider to make the partition",
+    )
+    parser.add_argument(
+        "--out-class-name",
+        default=None,
+        help="if not None, we rename the class_id column in the output file",
+    )
+    parser.add_argument(
+        "--val-percent", default=5.0, type=float, help="percentage of data used for val"
+    )
+    parser.add_argument(
+        "--remove-langs", default=None, nargs="+", help="remove languages from training"
+    )
+    parser.add_argument("--seed", default=1123, type=int, help="random seed")
+
+    parser.add_argument("--output-dir", required=True, help="output directory")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    split_train_val(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/local/train_calibration_lre22.sh b/egs/lre22/fixed.v1.8k/local/train_calibration_lre22.sh
new file mode 100755
index 00000000..227331b3
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/train_calibration_lre22.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 1 ];then
+  echo "Usage: $0 <score-dir>"
+  exit 1
+fi
+
+score_dir=$1
+nocal_dir=$score_dir/nocal
+cal_dir=$score_dir/cal_v1
+
+train_list=data/lre22_dev/utt2lang
+train_file=$nocal_dir/lre22_dev_scores.tsv
+train_cal_file=$cal_dir/lre22_dev_scores.tsv
+eval_file=$nocal_dir/lre22_eval_scores.tsv
+eval_cal_file=$cal_dir/lre22_eval_scores.tsv
+mkdir -p $cal_dir
+model_file=$cal_dir/cal.mat
+
+if [ "$(hostname --domain)" == "cm.gemini" ];then
+  module load matlab
+fi
+
+echo "
+addpath('steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+train_fusion('$train_list', {'$train_file'}, '$model_file');
+" | matlab -nodisplay -nosplash > $cal_dir/train.log
+
+echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({'$train_file'}, '$train_cal_file', '$model_file');
+" | matlab -nodisplay -nosplash > $cal_dir/eval_lre22_dev.log
+
+if [ -f $eval_file ];then
+  echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({'$eval_file'}, '$eval_cal_file', '$model_file');
+" | matlab -nodisplay -nosplash > $cal_dir/eval_lre22_eval.log
+fi
+
+
diff --git a/egs/lre22/fixed.v1.8k/local/train_fusion_lre22.sh b/egs/lre22/fixed.v1.8k/local/train_fusion_lre22.sh
new file mode 100755
index 00000000..add44362
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/train_fusion_lre22.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 2 ];then
+  echo "Usage: $0 <score-dirs> <output-dir>"
+  exit 1
+fi
+
+score_dirs="$1"
+output_dir=$2
+
+train_list=data/lre22_dev/utt2lang
+train_base=lre22_dev_scores.tsv
+train_files=$(echo $score_dirs | awk 'BEGIN{OFS=","}{ for(i=1;i<=NF;i++){ $i="'\''"$i"/'$train_base\''" }; print $0}')
+
+train_fus_file=$output_dir/$train_base
+mkdir -p $output_dir
+model_file=$output_dir/fus.mat
+
+if [ "$(hostname --domain)" == "cm.gemini" ];then
+  module load matlab
+fi
+
+echo "
+addpath('steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+train_fusion('$train_list', {$train_files}, '$model_file');
+" | matlab -nodisplay -nosplash > $output_dir/train.log
+
+echo "
+addpath('./steps_be');
+addpath(genpath('$PWD/focal_multiclass/v1.0'));
+eval_fusion({$train_files}, '$train_fus_file', '$model_file');
+" | matlab -nodisplay -nosplash > $output_dir/eval_lre22_dev.log
+
diff --git a/egs/lre22/fixed.v1.8k/local/validate_lre22.sh b/egs/lre22/fixed.v1.8k/local/validate_lre22.sh
new file mode 100755
index 00000000..fe039a5a
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/local/validate_lre22.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+. path.sh
+
+if [ $# -ne 1 ];then
+  echo "Usage: $0 <score-file>"
+  exit 1
+fi
+
+score_file=$(readlink -f $1)
+conda activate $HYP_ENV
+
+cd ./lre-scorer
+echo "Scoring $score_file -> $output_file"
+python ./scoreit.py -s $score_file -o $score_file.val -v
+
+cd -
diff --git a/egs/lre22/fixed.v1.8k/path.sh b/egs/lre22/fixed.v1.8k/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/test_segments.csv b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/test_segments.csv
new file mode 100644
index 00000000..6518f24e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/test_segments.csv
@@ -0,0 +1,2114 @@
+id,class_id,subclass_idx
+lre22_dev_aadaq,afr-afr,5
+lre22_dev_aaxdt,xho-xho,14
+lre22_dev_abujj,xho-xho,15
+lre22_dev_acgiu,zul-zul,6
+lre22_dev_acnyv,ven-ven,7
+lre22_dev_adbku,ara-ayl,4
+lre22_dev_ademr,orm-orm,3
+lre22_dev_adgoy,xho-xho,4
+lre22_dev_adnpi,eng-ens,1
+lre22_dev_adqaa,ven-ven,10
+lre22_dev_adwzf,zul-zul,2
+lre22_dev_aeiuj,afr-afr,4
+lre22_dev_afhui,eng-ens,4
+lre22_dev_afuav,nbl-nbl,15
+lre22_dev_afvvg,ven-ven,10
+lre22_dev_afxjf,eng-iaf,10
+lre22_dev_agmwb,ara-aeb,10
+lre22_dev_agnik,eng-ens,3
+lre22_dev_ahcja,orm-orm,14
+lre22_dev_ahobp,afr-afr,13
+lre22_dev_ahupk,eng-ens,11
+lre22_dev_aicjg,xho-xho,12
+lre22_dev_aikrz,eng-ens,9
+lre22_dev_ailwo,orm-orm,7
+lre22_dev_aiqhl,tir-tir,10
+lre22_dev_aiuwf,ara-ayl,5
+lre22_dev_aizyr,ara-arq,0
+lre22_dev_ajbui,zul-zul,12
+lre22_dev_ajigk,ara-aeb,10
+lre22_dev_ajuwq,ara-ayl,3
+lre22_dev_akbly,nbl-nbl,3
+lre22_dev_akhwr,xho-xho,6
+lre22_dev_aksxd,nbl-nbl,6
+lre22_dev_aktcg,afr-afr,1
+lre22_dev_aktzw,eng-ens,11
+lre22_dev_akulq,orm-orm,14
+lre22_dev_alcie,orm-orm,11
+lre22_dev_alunz,xho-xho,6
+lre22_dev_amaec,tir-tir,10
+lre22_dev_amnvo,ara-arq,6
+lre22_dev_amxrk,zul-zul,9
+lre22_dev_anmuv,tso-tso,11
+lre22_dev_aomcz,ara-aeb,7
+lre22_dev_aooht,fra-ntf,11
+lre22_dev_aprbe,ara-arq,3
+lre22_dev_apxxx,orm-orm,12
+lre22_dev_aqdwu,ven-ven,6
+lre22_dev_aqejl,xho-xho,5
+lre22_dev_aqnyy,tso-tso,5
+lre22_dev_arjuc,afr-afr,5
+lre22_dev_arrkp,tir-tir,1
+lre22_dev_atdgp,zul-zul,13
+lre22_dev_atoxn,eng-ens,10
+lre22_dev_audls,afr-afr,6
+lre22_dev_auilj,ven-ven,11
+lre22_dev_auqgt,eng-iaf,3
+lre22_dev_autlo,zul-zul,7
+lre22_dev_avait,zul-zul,3
+lre22_dev_avvik,nbl-nbl,14
+lre22_dev_awgem,ara-ayl,3
+lre22_dev_awgnb,fra-ntf,14
+lre22_dev_awvym,ara-ayl,9
+lre22_dev_axhbz,tir-tir,12
+lre22_dev_axici,tir-tir,8
+lre22_dev_axtpv,xho-xho,6
+lre22_dev_aygsz,ara-aeb,4
+lre22_dev_ayiif,ven-ven,7
+lre22_dev_azqvo,zul-zul,3
+lre22_dev_basml,eng-ens,11
+lre22_dev_bawje,tir-tir,6
+lre22_dev_bbana,zul-zul,7
+lre22_dev_bbtpz,ven-ven,5
+lre22_dev_bcbrw,eng-iaf,2
+lre22_dev_bchvx,zul-zul,9
+lre22_dev_bcllp,afr-afr,13
+lre22_dev_bcsmi,fra-ntf,6
+lre22_dev_bdqaw,ven-ven,6
+lre22_dev_bdwle,ara-arq,6
+lre22_dev_behbh,ara-ayl,4
+lre22_dev_bexda,ara-arq,6
+lre22_dev_bfbyn,ara-aeb,9
+lre22_dev_bfjgx,ara-ayl,7
+lre22_dev_bgbjo,nbl-nbl,1
+lre22_dev_bgebs,ara-ayl,5
+lre22_dev_bgnod,fra-ntf,3
+lre22_dev_bhezb,ara-ayl,7
+lre22_dev_bhyuy,afr-afr,13
+lre22_dev_bidge,tir-tir,12
+lre22_dev_bimnd,eng-ens,7
+lre22_dev_biyaj,ara-ayl,5
+lre22_dev_bjsgu,afr-afr,10
+lre22_dev_blmfp,eng-iaf,5
+lre22_dev_blohd,ven-ven,4
+lre22_dev_bmebz,ara-arq,4
+lre22_dev_bmjuo,ara-aeb,6
+lre22_dev_bmkrm,fra-ntf,10
+lre22_dev_bmzym,zul-zul,5
+lre22_dev_bnfuu,orm-orm,13
+lre22_dev_bnilb,zul-zul,8
+lre22_dev_bnxna,eng-ens,1
+lre22_dev_boikl,orm-orm,7
+lre22_dev_boisz,ven-ven,2
+lre22_dev_boqxy,zul-zul,13
+lre22_dev_bpqhd,tso-tso,2
+lre22_dev_briiw,ara-aeb,8
+lre22_dev_brohj,fra-ntf,1
+lre22_dev_brqdv,nbl-nbl,3
+lre22_dev_brwcj,afr-afr,6
+lre22_dev_bsclv,orm-orm,8
+lre22_dev_bsdbb,ara-arq,4
+lre22_dev_bstjt,nbl-nbl,10
+lre22_dev_btbke,ara-aeb,0
+lre22_dev_btcfj,ven-ven,12
+lre22_dev_btomw,ven-ven,6
+lre22_dev_btpvy,afr-afr,1
+lre22_dev_btrtb,ara-arq,4
+lre22_dev_btruf,zul-zul,8
+lre22_dev_btsll,ara-ayl,7
+lre22_dev_butrw,ara-ayl,6
+lre22_dev_buwrj,ara-ayl,2
+lre22_dev_bvlhb,fra-ntf,8
+lre22_dev_bvmql,xho-xho,10
+lre22_dev_bvnsc,tir-tir,10
+lre22_dev_bwrej,ven-ven,9
+lre22_dev_bxial,eng-ens,2
+lre22_dev_bxnbf,fra-ntf,9
+lre22_dev_bybim,afr-afr,6
+lre22_dev_byegp,orm-orm,15
+lre22_dev_byngq,ven-ven,9
+lre22_dev_byytf,fra-ntf,6
+lre22_dev_bzies,tso-tso,3
+lre22_dev_bzipd,afr-afr,7
+lre22_dev_cacop,nbl-nbl,5
+lre22_dev_caent,afr-afr,12
+lre22_dev_capsb,ven-ven,0
+lre22_dev_cawbw,orm-orm,12
+lre22_dev_cblep,ven-ven,3
+lre22_dev_cblig,fra-ntf,6
+lre22_dev_ccexy,ven-ven,7
+lre22_dev_ccsye,ara-aeb,8
+lre22_dev_cctyt,eng-iaf,11
+lre22_dev_ccuie,eng-ens,7
+lre22_dev_ccvzf,eng-iaf,1
+lre22_dev_cdlkq,tso-tso,8
+lre22_dev_cdtiu,ara-ayl,9
+lre22_dev_cemyb,tir-tir,12
+lre22_dev_ceprg,eng-iaf,9
+lre22_dev_ceqow,nbl-nbl,15
+lre22_dev_cfdsu,fra-ntf,7
+lre22_dev_cfhbm,ven-ven,3
+lre22_dev_cfsew,afr-afr,12
+lre22_dev_cgges,eng-iaf,11
+lre22_dev_cgjnr,eng-iaf,10
+lre22_dev_cgotg,eng-ens,11
+lre22_dev_cgovb,nbl-nbl,15
+lre22_dev_cgssg,tir-tir,7
+lre22_dev_chhsl,tir-tir,7
+lre22_dev_chjuh,nbl-nbl,9
+lre22_dev_chpoe,nbl-nbl,11
+lre22_dev_chtgu,ara-aeb,10
+lre22_dev_chtlt,eng-iaf,10
+lre22_dev_cigir,eng-ens,9
+lre22_dev_ciyeh,ara-ayl,2
+lre22_dev_cjswm,orm-orm,12
+lre22_dev_cjtdl,ven-ven,13
+lre22_dev_ckzie,ara-aeb,10
+lre22_dev_cldfc,ara-ayl,8
+lre22_dev_clxqz,ara-arq,9
+lre22_dev_cmahj,afr-afr,13
+lre22_dev_cmqxm,tir-tir,6
+lre22_dev_cmrdt,afr-afr,5
+lre22_dev_cmvpq,ara-ayl,2
+lre22_dev_cnbfw,eng-iaf,5
+lre22_dev_cnbvd,afr-afr,9
+lre22_dev_cnomp,orm-orm,15
+lre22_dev_cnrvj,xho-xho,11
+lre22_dev_cnszu,ara-ayl,4
+lre22_dev_cnudd,xho-xho,14
+lre22_dev_cnuoi,orm-orm,14
+lre22_dev_cnxjs,orm-orm,8
+lre22_dev_coarm,xho-xho,4
+lre22_dev_cocyn,zul-zul,6
+lre22_dev_colxc,zul-zul,13
+lre22_dev_cosfn,ara-aeb,10
+lre22_dev_cosgu,ara-ayl,7
+lre22_dev_cpjab,ara-aeb,10
+lre22_dev_cpple,tso-tso,6
+lre22_dev_cqhjy,ara-ayl,3
+lre22_dev_cqkmy,ara-aeb,10
+lre22_dev_cqukb,tso-tso,9
+lre22_dev_cqusc,orm-orm,6
+lre22_dev_cqyzf,fra-ntf,13
+lre22_dev_crcwu,xho-xho,12
+lre22_dev_crqjz,nbl-nbl,10
+lre22_dev_crtpm,ara-arq,5
+lre22_dev_crucu,tir-tir,6
+lre22_dev_crvby,eng-iaf,12
+lre22_dev_crvoh,eng-ens,7
+lre22_dev_csjxv,ara-arq,3
+lre22_dev_ctfiv,ara-aeb,5
+lre22_dev_ctgpr,ven-ven,12
+lre22_dev_ctlrz,tir-tir,8
+lre22_dev_ctzhm,zul-zul,6
+lre22_dev_cudew,ven-ven,8
+lre22_dev_cusin,ara-arq,10
+lre22_dev_cvaad,eng-iaf,5
+lre22_dev_cvedm,zul-zul,12
+lre22_dev_cvgfx,eng-iaf,8
+lre22_dev_cvujh,ara-ayl,2
+lre22_dev_cweil,ara-aeb,10
+lre22_dev_cweuh,eng-ens,7
+lre22_dev_cwiro,afr-afr,6
+lre22_dev_cwtby,ara-arq,7
+lre22_dev_cxggy,afr-afr,4
+lre22_dev_cxnqr,tso-tso,7
+lre22_dev_cxpan,nbl-nbl,14
+lre22_dev_cxsxl,ara-aeb,10
+lre22_dev_cxyti,tso-tso,8
+lre22_dev_cypcg,zul-zul,12
+lre22_dev_czcmz,zul-zul,10
+lre22_dev_czdzw,orm-orm,7
+lre22_dev_czppj,zul-zul,10
+lre22_dev_czxff,zul-zul,9
+lre22_dev_czxld,fra-ntf,9
+lre22_dev_dajnt,zul-zul,12
+lre22_dev_dbcxi,orm-orm,9
+lre22_dev_dbdbv,tso-tso,8
+lre22_dev_dbdwv,orm-orm,5
+lre22_dev_dbgof,nbl-nbl,15
+lre22_dev_dblhh,eng-iaf,0
+lre22_dev_dbljb,xho-xho,12
+lre22_dev_dcibg,eng-iaf,4
+lre22_dev_dcobk,ara-arq,8
+lre22_dev_dcvcu,afr-afr,4
+lre22_dev_dcvyc,fra-ntf,14
+lre22_dev_ddfeo,ara-ayl,5
+lre22_dev_ddhaq,zul-zul,10
+lre22_dev_ddhes,afr-afr,6
+lre22_dev_ddsds,afr-afr,12
+lre22_dev_ddxvn,ven-ven,5
+lre22_dev_dfdrs,ven-ven,7
+lre22_dev_dfifl,ara-ayl,9
+lre22_dev_dfjek,ven-ven,4
+lre22_dev_dflco,zul-zul,12
+lre22_dev_dftta,tso-tso,6
+lre22_dev_dfxnq,eng-ens,11
+lre22_dev_dgjdi,orm-orm,8
+lre22_dev_dgqwo,tir-tir,9
+lre22_dev_dhapq,ara-aeb,8
+lre22_dev_dhdfk,eng-ens,8
+lre22_dev_dhfjj,ara-arq,4
+lre22_dev_dhlxh,ara-aeb,4
+lre22_dev_dhnne,eng-ens,10
+lre22_dev_dhtlz,eng-ens,6
+lre22_dev_diarz,ara-ayl,2
+lre22_dev_diggg,tir-tir,9
+lre22_dev_diqtw,ara-aeb,8
+lre22_dev_dixuw,orm-orm,9
+lre22_dev_diypf,orm-orm,13
+lre22_dev_djzsk,nbl-nbl,13
+lre22_dev_dksey,nbl-nbl,11
+lre22_dev_dlzwh,fra-ntf,12
+lre22_dev_dmdpv,eng-ens,2
+lre22_dev_dmeea,orm-orm,14
+lre22_dev_dmhdv,xho-xho,10
+lre22_dev_dmics,fra-ntf,14
+lre22_dev_dmiiu,ara-aeb,6
+lre22_dev_dmjxr,xho-xho,10
+lre22_dev_dmzxn,afr-afr,4
+lre22_dev_dngtw,ara-ayl,3
+lre22_dev_dnjdq,eng-ens,7
+lre22_dev_dnprz,zul-zul,12
+lre22_dev_dobdj,fra-ntf,0
+lre22_dev_dobwk,orm-orm,8
+lre22_dev_donqm,ara-arq,3
+lre22_dev_dpbyt,tso-tso,6
+lre22_dev_dpfns,ara-aeb,4
+lre22_dev_dpjjp,fra-ntf,7
+lre22_dev_dpomx,eng-iaf,5
+lre22_dev_dpwhs,eng-ens,8
+lre22_dev_dpygj,eng-iaf,8
+lre22_dev_dqzex,xho-xho,3
+lre22_dev_drcqx,eng-iaf,7
+lre22_dev_drfhb,ara-aeb,10
+lre22_dev_drfte,ara-arq,8
+lre22_dev_driks,eng-ens,11
+lre22_dev_drofs,fra-ntf,1
+lre22_dev_dslxl,ara-ayl,7
+lre22_dev_dsmwd,ven-ven,13
+lre22_dev_dsyyk,tir-tir,9
+lre22_dev_dthcb,zul-zul,12
+lre22_dev_dtumd,fra-ntf,5
+lre22_dev_dtwmj,afr-afr,7
+lre22_dev_duegm,tso-tso,9
+lre22_dev_dvirs,afr-afr,6
+lre22_dev_dvtzf,eng-iaf,7
+lre22_dev_dwcfi,ven-ven,7
+lre22_dev_dwfle,fra-ntf,7
+lre22_dev_dwgsv,tir-tir,6
+lre22_dev_dwlay,ara-arq,3
+lre22_dev_dwnit,xho-xho,15
+lre22_dev_dwvoh,tso-tso,6
+lre22_dev_dxgpq,afr-afr,12
+lre22_dev_dxhpf,ara-ayl,9
+lre22_dev_dxlhq,ara-arq,5
+lre22_dev_dxrcj,zul-zul,5
+lre22_dev_dywox,tir-tir,9
+lre22_dev_dzjrv,eng-iaf,8
+lre22_dev_dzsql,tso-tso,6
+lre22_dev_dzxkv,orm-orm,13
+lre22_dev_eabne,xho-xho,2
+lre22_dev_eacdl,fra-ntf,14
+lre22_dev_eaupg,eng-iaf,11
+lre22_dev_eawug,eng-iaf,6
+lre22_dev_ebbgx,nbl-nbl,15
+lre22_dev_ecber,afr-afr,10
+lre22_dev_ecdgv,ara-arq,5
+lre22_dev_ecneb,afr-afr,6
+lre22_dev_ecxrr,tir-tir,9
+lre22_dev_edldw,tir-tir,10
+lre22_dev_edofc,afr-afr,6
+lre22_dev_edvaf,xho-xho,13
+lre22_dev_edydw,eng-ens,5
+lre22_dev_eejtn,zul-zul,4
+lre22_dev_eekzc,fra-ntf,4
+lre22_dev_eenhx,eng-iaf,9
+lre22_dev_efcgi,fra-ntf,0
+lre22_dev_efdoz,ven-ven,8
+lre22_dev_efioy,tso-tso,9
+lre22_dev_efiwx,eng-ens,9
+lre22_dev_efrlw,ven-ven,2
+lre22_dev_eghmh,eng-ens,11
+lre22_dev_ehhyu,nbl-nbl,10
+lre22_dev_eiomi,ven-ven,12
+lre22_dev_eisiy,orm-orm,8
+lre22_dev_ejaiq,ara-aeb,1
+lre22_dev_ejkmr,eng-iaf,5
+lre22_dev_ejthv,ven-ven,12
+lre22_dev_ejtyd,fra-ntf,14
+lre22_dev_ekfzq,ara-ayl,6
+lre22_dev_ekgjp,zul-zul,3
+lre22_dev_ekixu,nbl-nbl,2
+lre22_dev_ekjxx,ara-arq,6
+lre22_dev_ekvxc,eng-iaf,4
+lre22_dev_eldrg,orm-orm,11
+lre22_dev_elitc,ara-arq,3
+lre22_dev_emdtf,xho-xho,0
+lre22_dev_emhqx,tir-tir,4
+lre22_dev_emxnm,afr-afr,4
+lre22_dev_emzaa,xho-xho,3
+lre22_dev_engqe,xho-xho,15
+lre22_dev_ennjl,tso-tso,10
+lre22_dev_eokyg,nbl-nbl,2
+lre22_dev_epkwr,tir-tir,5
+lre22_dev_epojj,tir-tir,7
+lre22_dev_epsdk,nbl-nbl,12
+lre22_dev_epsfl,xho-xho,14
+lre22_dev_epuno,eng-ens,1
+lre22_dev_epylu,eng-iaf,10
+lre22_dev_ereen,ara-arq,10
+lre22_dev_eriaf,eng-ens,4
+lre22_dev_ermqx,ara-arq,2
+lre22_dev_escob,fra-ntf,9
+lre22_dev_esjsk,ara-ayl,7
+lre22_dev_esqti,xho-xho,9
+lre22_dev_etaln,zul-zul,12
+lre22_dev_etarn,nbl-nbl,6
+lre22_dev_etndu,ven-ven,13
+lre22_dev_etpdc,afr-afr,3
+lre22_dev_etsam,zul-zul,7
+lre22_dev_etwge,eng-ens,6
+lre22_dev_etxyc,orm-orm,12
+lre22_dev_eumsq,zul-zul,10
+lre22_dev_eusfl,orm-orm,8
+lre22_dev_eutkk,tso-tso,0
+lre22_dev_euxuy,orm-orm,13
+lre22_dev_evaon,ara-aeb,4
+lre22_dev_evkaz,eng-iaf,8
+lre22_dev_evret,fra-ntf,8
+lre22_dev_evvep,tso-tso,9
+lre22_dev_evvvd,tir-tir,10
+lre22_dev_ewems,ven-ven,7
+lre22_dev_ewijw,orm-orm,11
+lre22_dev_ewqpv,eng-iaf,6
+lre22_dev_ewywf,nbl-nbl,10
+lre22_dev_exaia,afr-afr,3
+lre22_dev_exbum,afr-afr,4
+lre22_dev_exhhd,ara-aeb,5
+lre22_dev_exkkf,afr-afr,3
+lre22_dev_extrh,zul-zul,6
+lre22_dev_exzyo,xho-xho,15
+lre22_dev_eyrzt,ara-ayl,1
+lre22_dev_eysdu,zul-zul,4
+lre22_dev_eyshz,xho-xho,1
+lre22_dev_eyuyq,ara-ayl,7
+lre22_dev_ezsyu,ven-ven,3
+lre22_dev_faahr,afr-afr,9
+lre22_dev_fabli,ven-ven,6
+lre22_dev_fatah,zul-zul,12
+lre22_dev_fccpw,orm-orm,12
+lre22_dev_fcpbu,xho-xho,8
+lre22_dev_fcqbx,tso-tso,3
+lre22_dev_fcwnw,fra-ntf,8
+lre22_dev_fdgia,orm-orm,10
+lre22_dev_febnk,eng-ens,5
+lre22_dev_fedau,eng-iaf,5
+lre22_dev_fehxn,xho-xho,8
+lre22_dev_fejsd,ven-ven,8
+lre22_dev_feqjc,eng-iaf,12
+lre22_dev_fesss,nbl-nbl,15
+lre22_dev_feuww,fra-ntf,8
+lre22_dev_fevex,zul-zul,2
+lre22_dev_ffban,ara-arq,6
+lre22_dev_ffefw,orm-orm,13
+lre22_dev_ffsps,fra-ntf,8
+lre22_dev_ffwid,tso-tso,11
+lre22_dev_fgbtr,nbl-nbl,15
+lre22_dev_fgmbr,ara-arq,6
+lre22_dev_fgmxd,eng-ens,9
+lre22_dev_fgnfs,tir-tir,12
+lre22_dev_fgrze,eng-ens,11
+lre22_dev_fhlhy,ara-aeb,7
+lre22_dev_fihvr,eng-iaf,7
+lre22_dev_fiizm,xho-xho,14
+lre22_dev_fiksd,fra-ntf,12
+lre22_dev_fitjt,tso-tso,6
+lre22_dev_fiuun,eng-ens,7
+lre22_dev_fjdul,ara-ayl,3
+lre22_dev_fjgrh,ven-ven,8
+lre22_dev_fkaqj,nbl-nbl,13
+lre22_dev_flfgv,ara-aeb,9
+lre22_dev_flirl,fra-ntf,13
+lre22_dev_fljab,fra-ntf,14
+lre22_dev_flnzm,tir-tir,11
+lre22_dev_flsmp,orm-orm,15
+lre22_dev_fmjvq,ven-ven,2
+lre22_dev_fmmxd,afr-afr,4
+lre22_dev_fnglh,afr-afr,13
+lre22_dev_fnsax,xho-xho,6
+lre22_dev_fojyn,eng-ens,5
+lre22_dev_foqgk,ven-ven,2
+lre22_dev_fovba,ara-arq,4
+lre22_dev_fozyj,ara-arq,2
+lre22_dev_fpavw,ara-aeb,8
+lre22_dev_fptba,eng-ens,3
+lre22_dev_fqdfc,tso-tso,11
+lre22_dev_fqdhm,eng-iaf,8
+lre22_dev_fqfet,nbl-nbl,7
+lre22_dev_fqgty,fra-ntf,4
+lre22_dev_fqgyd,zul-zul,10
+lre22_dev_fqvup,tso-tso,2
+lre22_dev_frviu,ara-aeb,10
+lre22_dev_frwfk,nbl-nbl,9
+lre22_dev_fsygm,eng-iaf,5
+lre22_dev_ftfjv,orm-orm,11
+lre22_dev_ftjvg,afr-afr,12
+lre22_dev_ftmnu,ara-aeb,10
+lre22_dev_ftrcl,eng-ens,3
+lre22_dev_ftygz,eng-ens,8
+lre22_dev_fughv,eng-iaf,3
+lre22_dev_fuhuk,ara-ayl,5
+lre22_dev_fusyr,ven-ven,13
+lre22_dev_futhm,zul-zul,5
+lre22_dev_fvbzh,ara-ayl,7
+lre22_dev_fvecf,ven-ven,9
+lre22_dev_fvktn,fra-ntf,8
+lre22_dev_fvpts,orm-orm,6
+lre22_dev_fvsmm,eng-iaf,12
+lre22_dev_fvvgc,ara-arq,5
+lre22_dev_fwvzh,zul-zul,2
+lre22_dev_fwwsy,xho-xho,5
+lre22_dev_fxggn,fra-ntf,1
+lre22_dev_fxqfi,orm-orm,10
+lre22_dev_fxuqw,ara-ayl,3
+lre22_dev_fxwfc,eng-iaf,12
+lre22_dev_fymdc,tso-tso,4
+lre22_dev_fywir,tso-tso,10
+lre22_dev_fzjzu,xho-xho,14
+lre22_dev_fzpeh,ara-aeb,10
+lre22_dev_fztdi,tir-tir,9
+lre22_dev_gcced,ven-ven,6
+lre22_dev_gchqj,zul-zul,10
+lre22_dev_gctmk,xho-xho,12
+lre22_dev_gcupw,ven-ven,7
+lre22_dev_gdfdn,tir-tir,5
+lre22_dev_gdlpg,tir-tir,3
+lre22_dev_gdrwq,fra-ntf,14
+lre22_dev_gdvjh,afr-afr,5
+lre22_dev_gdvtc,eng-iaf,13
+lre22_dev_gdxck,orm-orm,4
+lre22_dev_gecgq,afr-afr,12
+lre22_dev_gevbs,nbl-nbl,13
+lre22_dev_gfqxw,tir-tir,11
+lre22_dev_gfujh,eng-ens,8
+lre22_dev_gfwqx,fra-ntf,10
+lre22_dev_ggchj,tir-tir,10
+lre22_dev_ggeie,ara-arq,8
+lre22_dev_ggqob,ara-aeb,9
+lre22_dev_ghllb,eng-ens,8
+lre22_dev_ghlqh,afr-afr,12
+lre22_dev_ghmuk,afr-afr,13
+lre22_dev_ghskg,tso-tso,4
+lre22_dev_ghwmw,ara-arq,2
+lre22_dev_giijn,ven-ven,6
+lre22_dev_gised,xho-xho,9
+lre22_dev_gisrt,tir-tir,9
+lre22_dev_gjptx,nbl-nbl,4
+lre22_dev_gjvkc,ara-arq,7
+lre22_dev_gjxkc,eng-iaf,13
+lre22_dev_gkywh,ara-aeb,7
+lre22_dev_glhtl,eng-iaf,3
+lre22_dev_glulw,ara-aeb,8
+lre22_dev_gmpja,nbl-nbl,3
+lre22_dev_gmpjm,nbl-nbl,12
+lre22_dev_gnkvz,eng-iaf,13
+lre22_dev_gnmcz,nbl-nbl,4
+lre22_dev_goggr,afr-afr,5
+lre22_dev_goqov,ara-aeb,8
+lre22_dev_gpzgq,tso-tso,9
+lre22_dev_gpzuz,fra-ntf,5
+lre22_dev_gqpul,ara-arq,10
+lre22_dev_gratu,tir-tir,7
+lre22_dev_grewx,afr-afr,9
+lre22_dev_grizt,eng-ens,2
+lre22_dev_grsam,afr-afr,11
+lre22_dev_grsyr,zul-zul,1
+lre22_dev_grxus,nbl-nbl,15
+lre22_dev_gsanj,ven-ven,13
+lre22_dev_gsbwz,nbl-nbl,9
+lre22_dev_gtwjj,tso-tso,4
+lre22_dev_gtxwq,orm-orm,12
+lre22_dev_gubts,ara-ayl,0
+lre22_dev_gvawh,xho-xho,11
+lre22_dev_gvfsb,ara-aeb,10
+lre22_dev_gvhgg,afr-afr,9
+lre22_dev_gvnaj,fra-ntf,8
+lre22_dev_gvysc,ara-aeb,10
+lre22_dev_gwfkz,xho-xho,2
+lre22_dev_gwnqp,xho-xho,7
+lre22_dev_gwumi,tso-tso,3
+lre22_dev_gwvcw,xho-xho,11
+lre22_dev_gwwxz,eng-iaf,1
+lre22_dev_gwzrc,eng-ens,11
+lre22_dev_gxtlx,fra-ntf,13
+lre22_dev_gxygl,tso-tso,9
+lre22_dev_gycld,orm-orm,4
+lre22_dev_gzakl,nbl-nbl,15
+lre22_dev_gzrgo,ara-arq,9
+lre22_dev_hbkul,orm-orm,6
+lre22_dev_hbodn,eng-ens,10
+lre22_dev_hbwgy,ara-arq,6
+lre22_dev_hbwyc,nbl-nbl,5
+lre22_dev_hczek,fra-ntf,7
+lre22_dev_hdpsb,nbl-nbl,6
+lre22_dev_hdvsb,ara-aeb,8
+lre22_dev_hetsy,xho-xho,10
+lre22_dev_hfgrm,ven-ven,12
+lre22_dev_hfurz,afr-afr,13
+lre22_dev_hfwyw,nbl-nbl,11
+lre22_dev_hgdqx,tso-tso,3
+lre22_dev_hgwdk,eng-ens,8
+lre22_dev_hgxqf,eng-iaf,8
+lre22_dev_hgyuk,ven-ven,11
+lre22_dev_hhetm,fra-ntf,14
+lre22_dev_hhjki,ara-arq,8
+lre22_dev_hhvtc,ara-arq,10
+lre22_dev_hhxqv,tso-tso,5
+lre22_dev_hiisb,nbl-nbl,15
+lre22_dev_hioxp,tso-tso,3
+lre22_dev_hjqaf,ara-aeb,9
+lre22_dev_hjqid,orm-orm,6
+lre22_dev_hjzwc,eng-iaf,3
+lre22_dev_hkdzu,ara-arq,9
+lre22_dev_hlatl,eng-iaf,12
+lre22_dev_hlywv,nbl-nbl,2
+lre22_dev_hlzxa,ven-ven,7
+lre22_dev_hmvzg,ara-ayl,3
+lre22_dev_hnjgb,eng-ens,9
+lre22_dev_hntdv,eng-ens,11
+lre22_dev_hoish,tir-tir,2
+lre22_dev_hokbg,ara-ayl,6
+lre22_dev_hondp,eng-iaf,8
+lre22_dev_hpbve,tir-tir,11
+lre22_dev_hpdvc,fra-ntf,8
+lre22_dev_hpgst,orm-orm,5
+lre22_dev_hqbjb,xho-xho,5
+lre22_dev_hqdev,tso-tso,2
+lre22_dev_hqidg,tir-tir,1
+lre22_dev_hqids,afr-afr,9
+lre22_dev_hqltr,tir-tir,4
+lre22_dev_hqqhq,eng-ens,11
+lre22_dev_hrmcg,zul-zul,13
+lre22_dev_hrrcp,afr-afr,8
+lre22_dev_hstgi,xho-xho,9
+lre22_dev_hsvpq,ara-ayl,9
+lre22_dev_hswsy,ara-aeb,4
+lre22_dev_htcgm,eng-iaf,6
+lre22_dev_htedo,xho-xho,13
+lre22_dev_hthkx,eng-iaf,7
+lre22_dev_htohd,afr-afr,6
+lre22_dev_htxik,fra-ntf,0
+lre22_dev_huqbr,xho-xho,10
+lre22_dev_hvdom,afr-afr,8
+lre22_dev_hvkoa,afr-afr,13
+lre22_dev_hvnkg,tir-tir,9
+lre22_dev_hvocp,nbl-nbl,12
+lre22_dev_hvqzj,zul-zul,12
+lre22_dev_hvwph,afr-afr,3
+lre22_dev_hwaqg,zul-zul,8
+lre22_dev_hwgvu,ara-aeb,6
+lre22_dev_hwhlz,ven-ven,11
+lre22_dev_hwkes,fra-ntf,12
+lre22_dev_hwvna,eng-ens,2
+lre22_dev_hxfim,eng-iaf,12
+lre22_dev_hxmdw,afr-afr,10
+lre22_dev_hxrnp,zul-zul,6
+lre22_dev_hxvie,tir-tir,9
+lre22_dev_hxvju,zul-zul,3
+lre22_dev_hxzxm,zul-zul,6
+lre22_dev_hybef,nbl-nbl,14
+lre22_dev_hyfok,eng-ens,2
+lre22_dev_hyscv,ara-arq,4
+lre22_dev_hyzod,eng-iaf,6
+lre22_dev_hzdpb,tso-tso,7
+lre22_dev_hzjwn,ara-aeb,5
+lre22_dev_hzljv,tir-tir,8
+lre22_dev_hzomy,tso-tso,9
+lre22_dev_iaaar,tso-tso,9
+lre22_dev_iaimu,afr-afr,13
+lre22_dev_iakmg,orm-orm,15
+lre22_dev_iarxv,ara-aeb,9
+lre22_dev_iaywv,ara-ayl,6
+lre22_dev_ibcne,eng-ens,11
+lre22_dev_ibeth,zul-zul,2
+lre22_dev_ibwbi,tir-tir,9
+lre22_dev_ibyqr,tso-tso,7
+lre22_dev_iccwp,eng-iaf,6
+lre22_dev_ichmi,afr-afr,12
+lre22_dev_idjrt,zul-zul,8
+lre22_dev_iegng,afr-afr,8
+lre22_dev_iezrr,ara-ayl,7
+lre22_dev_ifaib,ara-ayl,5
+lre22_dev_ifhil,tso-tso,9
+lre22_dev_ifptd,ven-ven,12
+lre22_dev_ifriu,ara-aeb,6
+lre22_dev_ignvp,zul-zul,13
+lre22_dev_igxzy,eng-iaf,12
+lre22_dev_ihdva,fra-ntf,10
+lre22_dev_iiydv,eng-iaf,5
+lre22_dev_ijoyg,ara-ayl,9
+lre22_dev_ikghg,eng-iaf,7
+lre22_dev_ikijv,ven-ven,2
+lre22_dev_ilawb,ara-aeb,8
+lre22_dev_ilgnm,orm-orm,6
+lre22_dev_ilqhp,orm-orm,13
+lre22_dev_imrsx,tso-tso,8
+lre22_dev_inrfz,ara-arq,1
+lre22_dev_inrlw,eng-ens,1
+lre22_dev_inttm,tso-tso,8
+lre22_dev_iorip,ven-ven,13
+lre22_dev_ioryq,ara-aeb,8
+lre22_dev_iosse,afr-afr,1
+lre22_dev_ipahz,tir-tir,12
+lre22_dev_ipaup,tir-tir,10
+lre22_dev_ipllz,tir-tir,12
+lre22_dev_iprih,ara-aeb,4
+lre22_dev_iqkpj,tir-tir,6
+lre22_dev_iqowb,ara-aeb,0
+lre22_dev_iqzfp,orm-orm,15
+lre22_dev_irhue,tso-tso,8
+lre22_dev_irkvo,orm-orm,15
+lre22_dev_irnie,ara-aeb,8
+lre22_dev_irnxg,zul-zul,9
+lre22_dev_irsgt,ven-ven,2
+lre22_dev_isavf,nbl-nbl,0
+lre22_dev_isfpd,nbl-nbl,11
+lre22_dev_iskfd,ara-arq,4
+lre22_dev_isndz,ara-arq,6
+lre22_dev_istwz,nbl-nbl,15
+lre22_dev_isxpy,orm-orm,5
+lre22_dev_iszkk,tir-tir,9
+lre22_dev_itdot,ara-ayl,9
+lre22_dev_itfgh,eng-iaf,9
+lre22_dev_itlqd,tir-tir,12
+lre22_dev_itmbo,ara-aeb,10
+lre22_dev_itznp,ara-aeb,3
+lre22_dev_iucwv,zul-zul,5
+lre22_dev_iuowb,ara-aeb,8
+lre22_dev_iupes,zul-zul,4
+lre22_dev_iurgk,fra-ntf,4
+lre22_dev_ivcpr,nbl-nbl,12
+lre22_dev_ivrwa,ven-ven,3
+lre22_dev_ivvlb,afr-afr,11
+lre22_dev_ivwhm,tir-tir,6
+lre22_dev_iwoya,ara-aeb,4
+lre22_dev_iwpvu,orm-orm,5
+lre22_dev_ixpuq,ara-ayl,5
+lre22_dev_ixpyb,tso-tso,11
+lre22_dev_iyfiz,eng-iaf,5
+lre22_dev_iylyu,xho-xho,12
+lre22_dev_iyuli,zul-zul,13
+lre22_dev_iyupt,orm-orm,5
+lre22_dev_iyxjf,zul-zul,12
+lre22_dev_iyzgz,tso-tso,10
+lre22_dev_izepb,ara-arq,4
+lre22_dev_izkix,ven-ven,10
+lre22_dev_izknz,ven-ven,12
+lre22_dev_jadfl,ara-arq,9
+lre22_dev_jafja,zul-zul,9
+lre22_dev_jamvn,ven-ven,1
+lre22_dev_jbach,eng-iaf,2
+lre22_dev_jbqcq,ara-aeb,6
+lre22_dev_jcxgo,afr-afr,6
+lre22_dev_jddrh,fra-ntf,13
+lre22_dev_jdjpg,tir-tir,12
+lre22_dev_jdtrb,eng-iaf,11
+lre22_dev_jdwjj,zul-zul,7
+lre22_dev_jdzqw,tir-tir,3
+lre22_dev_jeaev,nbl-nbl,8
+lre22_dev_jeobs,ara-aeb,9
+lre22_dev_jesxq,eng-ens,10
+lre22_dev_jgcla,ara-arq,2
+lre22_dev_jggxv,fra-ntf,3
+lre22_dev_jgntz,orm-orm,5
+lre22_dev_jhcao,ven-ven,7
+lre22_dev_jhgik,eng-ens,11
+lre22_dev_jhpkj,ara-arq,4
+lre22_dev_jhuof,orm-orm,15
+lre22_dev_jignq,ara-ayl,9
+lre22_dev_jjffc,ven-ven,13
+lre22_dev_jjkfe,eng-ens,9
+lre22_dev_jjqxi,ara-aeb,8
+lre22_dev_jjrgq,eng-iaf,4
+lre22_dev_jkacy,tso-tso,3
+lre22_dev_jkmin,orm-orm,15
+lre22_dev_jkobe,xho-xho,7
+lre22_dev_jkosd,zul-zul,10
+lre22_dev_jkovc,tso-tso,3
+lre22_dev_jktcq,zul-zul,7
+lre22_dev_jlodp,eng-ens,9
+lre22_dev_jmbjo,nbl-nbl,9
+lre22_dev_jmccw,ara-arq,3
+lre22_dev_jminj,fra-ntf,5
+lre22_dev_jmmyw,afr-afr,3
+lre22_dev_jobae,fra-ntf,13
+lre22_dev_jobsv,nbl-nbl,14
+lre22_dev_jobxi,ara-arq,5
+lre22_dev_joghi,ara-arq,6
+lre22_dev_johkj,xho-xho,7
+lre22_dev_jolqw,ara-ayl,5
+lre22_dev_jplye,fra-ntf,11
+lre22_dev_jpsmt,ara-arq,9
+lre22_dev_jqdnf,eng-iaf,13
+lre22_dev_jqqpg,orm-orm,5
+lre22_dev_jqqrs,nbl-nbl,11
+lre22_dev_jrmnp,tir-tir,9
+lre22_dev_jsahe,fra-ntf,12
+lre22_dev_jsciw,eng-ens,5
+lre22_dev_jsisu,eng-iaf,4
+lre22_dev_jstjq,zul-zul,4
+lre22_dev_jsxuw,eng-iaf,8
+lre22_dev_jtaxh,ven-ven,4
+lre22_dev_jtgjo,ara-arq,9
+lre22_dev_jtxor,orm-orm,3
+lre22_dev_junyj,orm-orm,5
+lre22_dev_juykt,ara-ayl,7
+lre22_dev_jvqzf,fra-ntf,9
+lre22_dev_jvvxl,afr-afr,7
+lre22_dev_jvxpt,nbl-nbl,1
+lre22_dev_jwfeb,eng-iaf,4
+lre22_dev_jwmmp,eng-ens,3
+lre22_dev_jwyiq,tso-tso,10
+lre22_dev_jxcmp,ara-aeb,10
+lre22_dev_jxfsy,ara-ayl,9
+lre22_dev_jxjar,tso-tso,10
+lre22_dev_jylrr,ara-aeb,9
+lre22_dev_jzciw,orm-orm,5
+lre22_dev_jzcyt,tso-tso,5
+lre22_dev_jzhpf,tso-tso,4
+lre22_dev_jzidh,afr-afr,11
+lre22_dev_jznzw,eng-iaf,6
+lre22_dev_jzoqd,afr-afr,7
+lre22_dev_jzwnu,ven-ven,11
+lre22_dev_kaoyk,afr-afr,6
+lre22_dev_kasoe,zul-zul,12
+lre22_dev_kaygq,eng-ens,9
+lre22_dev_kayqh,fra-ntf,8
+lre22_dev_kbpcw,eng-iaf,3
+lre22_dev_kbtrx,orm-orm,10
+lre22_dev_kcebk,ven-ven,7
+lre22_dev_kdbil,orm-orm,15
+lre22_dev_kddhf,ara-arq,10
+lre22_dev_kdeij,ara-ayl,3
+lre22_dev_kdiak,zul-zul,12
+lre22_dev_kedwl,nbl-nbl,12
+lre22_dev_keouf,fra-ntf,9
+lre22_dev_keozw,ara-aeb,10
+lre22_dev_kervm,eng-ens,7
+lre22_dev_kflpm,xho-xho,1
+lre22_dev_kfqpd,ara-arq,8
+lre22_dev_kgaqj,ara-aeb,8
+lre22_dev_kghnx,fra-ntf,3
+lre22_dev_kgoze,zul-zul,4
+lre22_dev_kgrxe,fra-ntf,9
+lre22_dev_kgsdu,ara-arq,5
+lre22_dev_kheef,xho-xho,15
+lre22_dev_khgyl,xho-xho,8
+lre22_dev_khsgr,tso-tso,7
+lre22_dev_khxvm,nbl-nbl,9
+lre22_dev_kijjo,ara-aeb,3
+lre22_dev_kiush,xho-xho,2
+lre22_dev_kiyso,ara-arq,1
+lre22_dev_kjewo,ven-ven,6
+lre22_dev_kjgkg,ara-ayl,5
+lre22_dev_kjksh,ven-ven,3
+lre22_dev_kjomd,afr-afr,4
+lre22_dev_kjrcy,afr-afr,11
+lre22_dev_kkauw,fra-ntf,10
+lre22_dev_kkiew,orm-orm,15
+lre22_dev_kkyyu,zul-zul,8
+lre22_dev_klafc,ara-ayl,4
+lre22_dev_klalo,eng-ens,5
+lre22_dev_kliip,afr-afr,1
+lre22_dev_klkxg,tso-tso,8
+lre22_dev_klqwc,ara-arq,7
+lre22_dev_kmbgg,tir-tir,12
+lre22_dev_kmgoo,tir-tir,8
+lre22_dev_kmnko,zul-zul,3
+lre22_dev_kmtyc,ara-aeb,8
+lre22_dev_kmxqj,xho-xho,8
+lre22_dev_kmzdw,fra-ntf,3
+lre22_dev_knxsi,ara-arq,9
+lre22_dev_kofob,orm-orm,7
+lre22_dev_kokfk,fra-ntf,14
+lre22_dev_kokir,nbl-nbl,12
+lre22_dev_kooxu,ara-arq,9
+lre22_dev_korip,tso-tso,7
+lre22_dev_kpbnd,zul-zul,4
+lre22_dev_kpnyf,eng-iaf,3
+lre22_dev_kpwts,ara-ayl,8
+lre22_dev_kpxne,orm-orm,6
+lre22_dev_kpzbl,ven-ven,12
+lre22_dev_kqact,zul-zul,0
+lre22_dev_kqfbl,eng-iaf,12
+lre22_dev_kqfsm,zul-zul,5
+lre22_dev_kqfyp,ara-arq,1
+lre22_dev_kqkqj,ara-ayl,7
+lre22_dev_kqvwr,xho-xho,13
+lre22_dev_kragl,zul-zul,13
+lre22_dev_krbdn,xho-xho,14
+lre22_dev_ksake,ara-aeb,8
+lre22_dev_ksoly,nbl-nbl,11
+lre22_dev_kttyt,orm-orm,5
+lre22_dev_kttzq,tso-tso,9
+lre22_dev_ktwaf,zul-zul,3
+lre22_dev_ktwqf,ven-ven,6
+lre22_dev_ktxef,zul-zul,0
+lre22_dev_ktztb,orm-orm,12
+lre22_dev_kufkm,nbl-nbl,15
+lre22_dev_kuqsu,afr-afr,9
+lre22_dev_kuyka,tir-tir,4
+lre22_dev_kvcpn,ara-ayl,3
+lre22_dev_kvghz,eng-iaf,10
+lre22_dev_kvswv,ven-ven,11
+lre22_dev_kxkos,orm-orm,10
+lre22_dev_kxkzg,ara-ayl,9
+lre22_dev_kxqef,ven-ven,12
+lre22_dev_kyjpf,ven-ven,7
+lre22_dev_kynap,ara-ayl,9
+lre22_dev_kyptg,ven-ven,8
+lre22_dev_kytyr,nbl-nbl,11
+lre22_dev_kywmf,orm-orm,4
+lre22_dev_kzibn,zul-zul,3
+lre22_dev_kzqxx,fra-ntf,1
+lre22_dev_lacgv,tso-tso,7
+lre22_dev_lagpe,tso-tso,6
+lre22_dev_lanuu,tso-tso,9
+lre22_dev_lapag,afr-afr,6
+lre22_dev_larnq,zul-zul,4
+lre22_dev_lbbvq,xho-xho,8
+lre22_dev_lbfca,ara-arq,8
+lre22_dev_lbhoj,orm-orm,11
+lre22_dev_lbiin,ara-ayl,4
+lre22_dev_lcdyj,ara-arq,9
+lre22_dev_ldasz,fra-ntf,9
+lre22_dev_ldbur,tso-tso,1
+lre22_dev_lddhs,orm-orm,12
+lre22_dev_ldedw,ara-aeb,5
+lre22_dev_ldmbr,ara-ayl,5
+lre22_dev_ldmqc,tir-tir,7
+lre22_dev_leadw,eng-iaf,3
+lre22_dev_leaqq,tso-tso,10
+lre22_dev_ledsh,afr-afr,11
+lre22_dev_leovk,afr-afr,6
+lre22_dev_lexlh,ara-aeb,2
+lre22_dev_lfilk,eng-ens,10
+lre22_dev_lfyll,zul-zul,10
+lre22_dev_lgada,zul-zul,6
+lre22_dev_lgcjy,afr-afr,9
+lre22_dev_lgfri,ara-aeb,5
+lre22_dev_lgkbt,xho-xho,4
+lre22_dev_lhbjq,ara-arq,0
+lre22_dev_lhemi,xho-xho,9
+lre22_dev_lhfne,ara-arq,6
+lre22_dev_lhmtg,ara-arq,9
+lre22_dev_lieso,ara-aeb,8
+lre22_dev_likcy,afr-afr,13
+lre22_dev_lipyu,zul-zul,12
+lre22_dev_lisum,ven-ven,4
+lre22_dev_ljevp,ara-ayl,3
+lre22_dev_ljijh,orm-orm,3
+lre22_dev_ljylg,nbl-nbl,13
+lre22_dev_lkfig,ara-ayl,2
+lre22_dev_lklnc,ara-arq,3
+lre22_dev_lkopy,tir-tir,9
+lre22_dev_lllwi,eng-iaf,5
+lre22_dev_llstb,nbl-nbl,10
+lre22_dev_lmeax,eng-iaf,10
+lre22_dev_lmkui,ara-arq,7
+lre22_dev_lmrbp,tir-tir,9
+lre22_dev_lnejh,eng-ens,10
+lre22_dev_lnttv,ven-ven,10
+lre22_dev_loxqz,eng-iaf,8
+lre22_dev_loybq,ara-aeb,10
+lre22_dev_lpadb,fra-ntf,4
+lre22_dev_lpahk,nbl-nbl,11
+lre22_dev_lphgs,tir-tir,7
+lre22_dev_lphoa,eng-ens,2
+lre22_dev_lpkie,eng-iaf,5
+lre22_dev_lpkpc,zul-zul,6
+lre22_dev_lptpx,eng-iaf,4
+lre22_dev_lqwcv,xho-xho,13
+lre22_dev_lrgwx,orm-orm,10
+lre22_dev_lruoj,orm-orm,2
+lre22_dev_lrwee,fra-ntf,10
+lre22_dev_lsess,ven-ven,1
+lre22_dev_lsycj,tir-tir,9
+lre22_dev_ltaoe,eng-ens,8
+lre22_dev_ltish,ara-aeb,5
+lre22_dev_ltqeb,eng-ens,8
+lre22_dev_ltzfg,ven-ven,10
+lre22_dev_luuhd,ara-arq,2
+lre22_dev_lvejl,zul-zul,11
+lre22_dev_lvgsm,tir-tir,10
+lre22_dev_lvwle,xho-xho,7
+lre22_dev_lvxea,tir-tir,8
+lre22_dev_lwsmk,eng-ens,10
+lre22_dev_lwzhq,ara-ayl,3
+lre22_dev_lxbdd,ara-ayl,8
+lre22_dev_lxdgx,nbl-nbl,1
+lre22_dev_lxjij,ara-ayl,7
+lre22_dev_lxldm,tso-tso,8
+lre22_dev_lxmsa,zul-zul,11
+lre22_dev_lxugv,zul-zul,13
+lre22_dev_lxwig,tso-tso,4
+lre22_dev_lyigi,xho-xho,4
+lre22_dev_lymzv,ara-arq,6
+lre22_dev_lyuls,ara-arq,4
+lre22_dev_lyyzw,ara-ayl,5
+lre22_dev_lzhrm,ara-arq,8
+lre22_dev_lzjgb,xho-xho,12
+lre22_dev_lzrpe,xho-xho,8
+lre22_dev_lzvmq,fra-ntf,13
+lre22_dev_maagy,ven-ven,6
+lre22_dev_mabmx,ara-arq,4
+lre22_dev_macre,zul-zul,7
+lre22_dev_maggb,nbl-nbl,7
+lre22_dev_margf,ara-ayl,6
+lre22_dev_maydg,eng-iaf,4
+lre22_dev_mbsgm,zul-zul,7
+lre22_dev_mbttd,fra-ntf,14
+lre22_dev_mcebh,tso-tso,8
+lre22_dev_mcfve,ara-ayl,3
+lre22_dev_mclrc,zul-zul,12
+lre22_dev_mcvgl,ara-ayl,5
+lre22_dev_mdgok,ara-aeb,5
+lre22_dev_mdilb,ven-ven,3
+lre22_dev_mdzqr,nbl-nbl,11
+lre22_dev_mehfu,ara-arq,3
+lre22_dev_meiyg,eng-ens,11
+lre22_dev_merbq,orm-orm,9
+lre22_dev_mfoys,afr-afr,8
+lre22_dev_mgpfx,xho-xho,8
+lre22_dev_mgtzj,zul-zul,12
+lre22_dev_mgxxc,ven-ven,11
+lre22_dev_mhldj,nbl-nbl,14
+lre22_dev_mhvio,eng-iaf,6
+lre22_dev_mhxgi,tir-tir,9
+lre22_dev_miegc,fra-ntf,6
+lre22_dev_miwyu,ara-aeb,8
+lre22_dev_mjocm,ara-aeb,2
+lre22_dev_mjqij,orm-orm,12
+lre22_dev_mjxgy,afr-afr,8
+lre22_dev_mkeyt,tir-tir,12
+lre22_dev_mklub,ven-ven,4
+lre22_dev_mknzf,ara-aeb,10
+lre22_dev_mlhes,ara-arq,9
+lre22_dev_mlhse,tso-tso,3
+lre22_dev_mlhtc,orm-orm,8
+lre22_dev_mlpuq,ven-ven,10
+lre22_dev_mluow,orm-orm,2
+lre22_dev_mmwtu,ara-arq,4
+lre22_dev_mmwzf,tso-tso,7
+lre22_dev_mnjdq,tir-tir,10
+lre22_dev_mnkfe,nbl-nbl,4
+lre22_dev_mnmcm,ara-arq,3
+lre22_dev_mocss,xho-xho,9
+lre22_dev_mohxo,zul-zul,12
+lre22_dev_mojui,fra-ntf,1
+lre22_dev_mojvy,xho-xho,7
+lre22_dev_molqa,fra-ntf,14
+lre22_dev_mopiq,nbl-nbl,14
+lre22_dev_moqto,tir-tir,12
+lre22_dev_morri,ara-aeb,8
+lre22_dev_mpxyg,eng-ens,4
+lre22_dev_mqiap,xho-xho,14
+lre22_dev_mqxep,ara-ayl,2
+lre22_dev_mrcoe,ara-ayl,7
+lre22_dev_mriiq,tso-tso,4
+lre22_dev_mryoy,eng-ens,11
+lre22_dev_mryzh,ara-arq,4
+lre22_dev_msadm,ven-ven,2
+lre22_dev_msghz,nbl-nbl,11
+lre22_dev_mtpfp,ara-aeb,9
+lre22_dev_mtqft,orm-orm,14
+lre22_dev_mtzvt,ara-aeb,10
+lre22_dev_munim,xho-xho,15
+lre22_dev_murhb,nbl-nbl,1
+lre22_dev_mvbra,xho-xho,4
+lre22_dev_mvhza,afr-afr,13
+lre22_dev_mviud,xho-xho,12
+lre22_dev_mvxjk,afr-afr,9
+lre22_dev_mwnkm,orm-orm,8
+lre22_dev_mwoml,xho-xho,9
+lre22_dev_mxhup,eng-ens,8
+lre22_dev_mykuh,ara-ayl,5
+lre22_dev_myqfn,eng-iaf,4
+lre22_dev_mywmj,ven-ven,9
+lre22_dev_mzbrr,ara-arq,10
+lre22_dev_mzsiq,afr-afr,9
+lre22_dev_mztms,eng-ens,3
+lre22_dev_mzuxc,ara-arq,9
+lre22_dev_nbdbe,ara-ayl,7
+lre22_dev_nbjqz,ara-aeb,9
+lre22_dev_nbyhp,afr-afr,3
+lre22_dev_ncnyb,ven-ven,8
+lre22_dev_ncocl,nbl-nbl,6
+lre22_dev_ndecq,ara-ayl,8
+lre22_dev_ndjsl,nbl-nbl,6
+lre22_dev_nelsk,orm-orm,0
+lre22_dev_nenly,eng-iaf,11
+lre22_dev_neqkb,ven-ven,2
+lre22_dev_nfjid,orm-orm,12
+lre22_dev_nfkqr,orm-orm,8
+lre22_dev_nfoas,orm-orm,15
+lre22_dev_ngjbm,eng-ens,10
+lre22_dev_ngmbz,eng-iaf,9
+lre22_dev_ngnua,fra-ntf,10
+lre22_dev_nguuu,fra-ntf,13
+lre22_dev_ngyse,ven-ven,7
+lre22_dev_nhfso,fra-ntf,14
+lre22_dev_nhuue,zul-zul,1
+lre22_dev_niack,ara-ayl,8
+lre22_dev_niari,ven-ven,7
+lre22_dev_nibme,ara-arq,9
+lre22_dev_nikby,tso-tso,10
+lre22_dev_nimex,ara-ayl,8
+lre22_dev_nivmv,xho-xho,11
+lre22_dev_nkebu,eng-ens,5
+lre22_dev_nkgml,eng-ens,10
+lre22_dev_nkofi,fra-ntf,11
+lre22_dev_nkrez,xho-xho,5
+lre22_dev_nkscn,tso-tso,5
+lre22_dev_nkwrs,ara-aeb,2
+lre22_dev_nkxcy,afr-afr,4
+lre22_dev_nlast,xho-xho,12
+lre22_dev_nlcun,eng-ens,0
+lre22_dev_nljyr,afr-afr,5
+lre22_dev_nlkdv,eng-iaf,12
+lre22_dev_nlpcs,ara-ayl,7
+lre22_dev_nlrcn,ara-ayl,4
+lre22_dev_nlxla,xho-xho,0
+lre22_dev_nmmij,ara-ayl,4
+lre22_dev_nmrkv,fra-ntf,12
+lre22_dev_nmufp,tso-tso,10
+lre22_dev_nnbmo,tso-tso,10
+lre22_dev_nnnpi,afr-afr,4
+lre22_dev_nnzok,tir-tir,5
+lre22_dev_noqch,fra-ntf,12
+lre22_dev_nownd,xho-xho,2
+lre22_dev_npabl,nbl-nbl,5
+lre22_dev_npjhu,afr-afr,6
+lre22_dev_nqbks,afr-afr,11
+lre22_dev_nqijo,orm-orm,7
+lre22_dev_nqljj,ara-arq,6
+lre22_dev_nqvfr,tir-tir,7
+lre22_dev_nrtej,tir-tir,11
+lre22_dev_nshvj,nbl-nbl,7
+lre22_dev_nsmyy,tir-tir,12
+lre22_dev_nsqcm,fra-ntf,13
+lre22_dev_nstrj,nbl-nbl,9
+lre22_dev_nsvla,nbl-nbl,10
+lre22_dev_nthbx,eng-ens,0
+lre22_dev_nvwkf,ven-ven,0
+lre22_dev_nvwzy,tso-tso,11
+lre22_dev_nvyyg,orm-orm,7
+lre22_dev_nxdml,eng-ens,1
+lre22_dev_nxmxb,zul-zul,12
+lre22_dev_nxqpl,nbl-nbl,13
+lre22_dev_nxslf,fra-ntf,9
+lre22_dev_nyaof,nbl-nbl,5
+lre22_dev_nzeot,zul-zul,12
+lre22_dev_nzhhf,ara-ayl,7
+lre22_dev_nzpbh,fra-ntf,14
+lre22_dev_nzyjp,orm-orm,4
+lre22_dev_nzzyd,xho-xho,11
+lre22_dev_oaiij,ven-ven,7
+lre22_dev_oaimr,orm-orm,14
+lre22_dev_oatzl,fra-ntf,13
+lre22_dev_oaycx,ara-ayl,8
+lre22_dev_objwd,eng-ens,1
+lre22_dev_oboem,tir-tir,9
+lre22_dev_obzyj,xho-xho,5
+lre22_dev_occhn,fra-ntf,9
+lre22_dev_ocfcr,ven-ven,7
+lre22_dev_ochni,ven-ven,13
+lre22_dev_ociva,tir-tir,5
+lre22_dev_odofq,xho-xho,5
+lre22_dev_odtjr,eng-ens,11
+lre22_dev_oejjy,fra-ntf,4
+lre22_dev_offnw,afr-afr,8
+lre22_dev_ofgqs,ara-ayl,6
+lre22_dev_ofkvj,xho-xho,15
+lre22_dev_ofzhh,orm-orm,11
+lre22_dev_ogilp,afr-afr,6
+lre22_dev_oglxd,ara-ayl,4
+lre22_dev_ogoyt,tso-tso,8
+lre22_dev_ogpou,ven-ven,3
+lre22_dev_ohatz,eng-ens,10
+lre22_dev_ohlzs,nbl-nbl,15
+lre22_dev_ohpzj,tir-tir,4
+lre22_dev_ohzdt,ara-aeb,5
+lre22_dev_oicrh,eng-ens,9
+lre22_dev_oigem,orm-orm,14
+lre22_dev_ojbnw,ara-arq,4
+lre22_dev_ojebm,ven-ven,7
+lre22_dev_ojila,ara-arq,4
+lre22_dev_ojiso,fra-ntf,5
+lre22_dev_ojpdy,tso-tso,9
+lre22_dev_ojtki,tir-tir,11
+lre22_dev_ojxso,nbl-nbl,4
+lre22_dev_okdqa,fra-ntf,14
+lre22_dev_oktvp,ara-ayl,7
+lre22_dev_okvsg,zul-zul,10
+lre22_dev_okyah,tso-tso,11
+lre22_dev_olabw,ara-arq,4
+lre22_dev_omhry,tir-tir,4
+lre22_dev_omnrf,eng-iaf,13
+lre22_dev_omptm,ven-ven,6
+lre22_dev_omqfq,fra-ntf,4
+lre22_dev_onqdn,fra-ntf,13
+lre22_dev_onsyx,tso-tso,9
+lre22_dev_onvgj,tir-tir,6
+lre22_dev_onzha,zul-zul,10
+lre22_dev_ooptw,nbl-nbl,5
+lre22_dev_oowvo,eng-ens,11
+lre22_dev_ooyea,tso-tso,2
+lre22_dev_oozri,ven-ven,0
+lre22_dev_opazz,ara-ayl,1
+lre22_dev_opqkl,nbl-nbl,11
+lre22_dev_oqsva,ara-ayl,2
+lre22_dev_oquxw,nbl-nbl,15
+lre22_dev_orktv,afr-afr,5
+lre22_dev_ornjf,ara-ayl,6
+lre22_dev_ortbp,ara-arq,0
+lre22_dev_osauy,fra-ntf,12
+lre22_dev_osnch,afr-afr,1
+lre22_dev_otelo,eng-iaf,7
+lre22_dev_otewx,tso-tso,10
+lre22_dev_otnwj,eng-ens,3
+lre22_dev_ouecw,ara-aeb,10
+lre22_dev_ouzui,ara-arq,3
+lre22_dev_ovdtj,ara-ayl,6
+lre22_dev_ovjny,tso-tso,1
+lre22_dev_ovqwp,ara-ayl,7
+lre22_dev_ovvkn,afr-afr,11
+lre22_dev_ovvmi,tso-tso,2
+lre22_dev_owyeq,ara-arq,6
+lre22_dev_oxlrt,ara-aeb,10
+lre22_dev_oybst,zul-zul,9
+lre22_dev_oybua,nbl-nbl,2
+lre22_dev_oykjs,tso-tso,4
+lre22_dev_oyswm,ara-arq,8
+lre22_dev_oyxbj,ven-ven,8
+lre22_dev_oyxtq,eng-ens,11
+lre22_dev_oyyxh,ara-arq,8
+lre22_dev_ozbct,tir-tir,12
+lre22_dev_ozcvt,ara-aeb,10
+lre22_dev_ozjel,ara-arq,10
+lre22_dev_ozmuj,zul-zul,3
+lre22_dev_ozuvk,tir-tir,10
+lre22_dev_paguh,fra-ntf,1
+lre22_dev_paspj,tir-tir,6
+lre22_dev_pbmai,fra-ntf,6
+lre22_dev_pbpug,zul-zul,10
+lre22_dev_pbsbs,tso-tso,10
+lre22_dev_pbszl,tso-tso,1
+lre22_dev_pbxxf,eng-iaf,2
+lre22_dev_pcgvn,eng-iaf,3
+lre22_dev_pcmbn,eng-ens,1
+lre22_dev_pcqce,ara-arq,8
+lre22_dev_pdlnr,tso-tso,2
+lre22_dev_pdrus,orm-orm,1
+lre22_dev_pedyx,eng-iaf,12
+lre22_dev_pegyr,nbl-nbl,11
+lre22_dev_pesej,ara-arq,4
+lre22_dev_pevhh,tir-tir,12
+lre22_dev_peykl,xho-xho,13
+lre22_dev_pezwc,tso-tso,4
+lre22_dev_pfemh,eng-iaf,4
+lre22_dev_pfrfc,ven-ven,8
+lre22_dev_pfsoa,nbl-nbl,15
+lre22_dev_pgeoo,tso-tso,9
+lre22_dev_pgwei,orm-orm,2
+lre22_dev_pgxyv,tso-tso,4
+lre22_dev_phofb,ara-ayl,8
+lre22_dev_phula,nbl-nbl,14
+lre22_dev_phwnf,tso-tso,9
+lre22_dev_pifyx,orm-orm,9
+lre22_dev_pilvp,tso-tso,11
+lre22_dev_pinzj,nbl-nbl,11
+lre22_dev_piocw,ara-aeb,8
+lre22_dev_pipas,zul-zul,13
+lre22_dev_pipgo,afr-afr,3
+lre22_dev_pitmn,ara-arq,10
+lre22_dev_pizdz,ara-aeb,2
+lre22_dev_pizlx,ara-ayl,6
+lre22_dev_pjatg,ven-ven,9
+lre22_dev_pjavt,orm-orm,11
+lre22_dev_pjcec,eng-iaf,12
+lre22_dev_pjdwy,afr-afr,1
+lre22_dev_pjlmw,ara-ayl,7
+lre22_dev_pjsqe,eng-ens,7
+lre22_dev_pkdij,ara-ayl,3
+lre22_dev_pkekq,ara-aeb,3
+lre22_dev_pkpst,eng-iaf,9
+lre22_dev_plhqb,nbl-nbl,13
+lre22_dev_plowv,nbl-nbl,5
+lre22_dev_plrjb,xho-xho,12
+lre22_dev_pmove,eng-iaf,4
+lre22_dev_pneax,eng-ens,11
+lre22_dev_pnexr,nbl-nbl,9
+lre22_dev_pngea,nbl-nbl,11
+lre22_dev_pnipe,eng-ens,9
+lre22_dev_pnmlr,ara-arq,5
+lre22_dev_pnsuk,xho-xho,2
+lre22_dev_pnuct,tir-tir,10
+lre22_dev_pocev,ara-arq,4
+lre22_dev_powkd,eng-ens,9
+lre22_dev_pprvm,ara-ayl,7
+lre22_dev_ppyle,ara-aeb,7
+lre22_dev_pqfda,fra-ntf,5
+lre22_dev_pqryo,afr-afr,4
+lre22_dev_prrzc,afr-afr,9
+lre22_dev_psjuf,afr-afr,13
+lre22_dev_psngm,zul-zul,13
+lre22_dev_psroz,fra-ntf,13
+lre22_dev_pssqo,orm-orm,10
+lre22_dev_psvlh,fra-ntf,13
+lre22_dev_pswld,tir-tir,10
+lre22_dev_ptcns,nbl-nbl,11
+lre22_dev_ptobm,afr-afr,6
+lre22_dev_ptowg,tir-tir,8
+lre22_dev_ptreu,xho-xho,15
+lre22_dev_ptwru,fra-ntf,14
+lre22_dev_ptyff,ara-ayl,1
+lre22_dev_ptygm,tir-tir,3
+lre22_dev_pudne,ara-arq,4
+lre22_dev_puelp,zul-zul,9
+lre22_dev_purej,nbl-nbl,9
+lre22_dev_puyvb,ara-ayl,3
+lre22_dev_pvrdh,ara-aeb,9
+lre22_dev_pvryr,eng-ens,11
+lre22_dev_pwets,tir-tir,9
+lre22_dev_pwgnk,tir-tir,10
+lre22_dev_pwhyy,tir-tir,11
+lre22_dev_pwkgs,zul-zul,2
+lre22_dev_pwtdp,eng-iaf,0
+lre22_dev_pxccc,ara-ayl,5
+lre22_dev_pxpdo,xho-xho,14
+lre22_dev_pxsot,xho-xho,14
+lre22_dev_pxuhy,ara-aeb,6
+lre22_dev_pybxn,eng-iaf,11
+lre22_dev_pyoft,eng-iaf,12
+lre22_dev_pyvql,eng-iaf,7
+lre22_dev_pzcnz,nbl-nbl,2
+lre22_dev_pzhrk,ara-aeb,4
+lre22_dev_qadjy,ven-ven,7
+lre22_dev_qaeek,ven-ven,7
+lre22_dev_qafse,eng-iaf,11
+lre22_dev_qahft,ven-ven,13
+lre22_dev_qakoa,zul-zul,9
+lre22_dev_qalhd,ara-ayl,2
+lre22_dev_qazjh,ven-ven,11
+lre22_dev_qbfkw,eng-iaf,6
+lre22_dev_qbgcd,fra-ntf,14
+lre22_dev_qbisr,ara-ayl,3
+lre22_dev_qcnbm,ven-ven,3
+lre22_dev_qdcbb,tir-tir,5
+lre22_dev_qdfgi,zul-zul,12
+lre22_dev_qdmbj,eng-ens,4
+lre22_dev_qdwtg,fra-ntf,11
+lre22_dev_qefvt,ara-ayl,7
+lre22_dev_qffki,orm-orm,13
+lre22_dev_qfplk,tir-tir,8
+lre22_dev_qgxdl,xho-xho,14
+lre22_dev_qhadd,afr-afr,2
+lre22_dev_qhgaf,ara-ayl,7
+lre22_dev_qhinf,tir-tir,6
+lre22_dev_qhkjz,ara-aeb,6
+lre22_dev_qhlwj,ara-arq,8
+lre22_dev_qiarf,ara-arq,4
+lre22_dev_qidwl,ara-arq,5
+lre22_dev_qivzc,orm-orm,12
+lre22_dev_qizyt,ara-ayl,2
+lre22_dev_qjeue,ara-arq,9
+lre22_dev_qjgxh,ara-arq,1
+lre22_dev_qkdhb,afr-afr,1
+lre22_dev_qkiqi,orm-orm,4
+lre22_dev_qkoth,tir-tir,5
+lre22_dev_qkucq,fra-ntf,3
+lre22_dev_qltea,nbl-nbl,2
+lre22_dev_qlube,ara-aeb,5
+lre22_dev_qmcji,nbl-nbl,15
+lre22_dev_qmpzc,nbl-nbl,11
+lre22_dev_qmsog,tir-tir,3
+lre22_dev_qoech,eng-iaf,7
+lre22_dev_qovfg,ara-arq,10
+lre22_dev_qozzv,tir-tir,2
+lre22_dev_qpasx,tir-tir,3
+lre22_dev_qpauj,ara-aeb,4
+lre22_dev_qpfch,orm-orm,6
+lre22_dev_qpvea,orm-orm,9
+lre22_dev_qrgka,ara-arq,8
+lre22_dev_qrqmm,ara-ayl,7
+lre22_dev_qsaol,xho-xho,14
+lre22_dev_qsgpx,ara-arq,10
+lre22_dev_qspeg,eng-ens,7
+lre22_dev_qsvbe,fra-ntf,3
+lre22_dev_qsxoh,fra-ntf,5
+lre22_dev_qtbnc,xho-xho,7
+lre22_dev_qthzi,afr-afr,12
+lre22_dev_qtmaw,fra-ntf,13
+lre22_dev_qtnqh,eng-iaf,13
+lre22_dev_qtpsb,tso-tso,8
+lre22_dev_qtqpc,eng-iaf,12
+lre22_dev_qtwfv,eng-iaf,4
+lre22_dev_qvamq,fra-ntf,9
+lre22_dev_qveuq,tir-tir,9
+lre22_dev_qvffg,orm-orm,0
+lre22_dev_qvplf,xho-xho,6
+lre22_dev_qvqvi,ven-ven,7
+lre22_dev_qwhsh,afr-afr,7
+lre22_dev_qwiwm,eng-ens,9
+lre22_dev_qxbch,ara-aeb,9
+lre22_dev_qxlca,nbl-nbl,2
+lre22_dev_qxscb,afr-afr,2
+lre22_dev_qyoqn,fra-ntf,9
+lre22_dev_qyrgs,nbl-nbl,3
+lre22_dev_qytdl,fra-ntf,9
+lre22_dev_qyyeb,eng-iaf,12
+lre22_dev_qyzqb,tso-tso,8
+lre22_dev_qzayi,orm-orm,12
+lre22_dev_qzexr,eng-iaf,5
+lre22_dev_qzrfi,ara-arq,10
+lre22_dev_qztjh,orm-orm,3
+lre22_dev_qztze,eng-iaf,12
+lre22_dev_raent,eng-iaf,2
+lre22_dev_ragjh,orm-orm,14
+lre22_dev_ramzu,ara-ayl,6
+lre22_dev_ratmr,ven-ven,7
+lre22_dev_rawak,ara-arq,9
+lre22_dev_rbbne,ven-ven,7
+lre22_dev_rbcul,eng-iaf,10
+lre22_dev_rbsoy,eng-iaf,12
+lre22_dev_rbxqy,tso-tso,9
+lre22_dev_rcejf,xho-xho,7
+lre22_dev_rdbzt,zul-zul,7
+lre22_dev_rdhpu,ara-aeb,8
+lre22_dev_rdsew,ven-ven,2
+lre22_dev_rdtkf,ven-ven,11
+lre22_dev_reeba,ara-ayl,6
+lre22_dev_relip,eng-iaf,11
+lre22_dev_rfdoh,ara-aeb,9
+lre22_dev_rfkja,xho-xho,11
+lre22_dev_rflev,ven-ven,3
+lre22_dev_rfqcx,nbl-nbl,14
+lre22_dev_rfwuv,eng-ens,1
+lre22_dev_rgsil,fra-ntf,6
+lre22_dev_rhcuj,ara-aeb,8
+lre22_dev_rhdgz,eng-iaf,12
+lre22_dev_rhpmn,ven-ven,7
+lre22_dev_rhtoe,eng-iaf,11
+lre22_dev_rhyqq,ara-aeb,2
+lre22_dev_riltn,ara-aeb,10
+lre22_dev_rinti,xho-xho,12
+lre22_dev_rioxh,xho-xho,12
+lre22_dev_ripix,tir-tir,10
+lre22_dev_rjbji,ven-ven,10
+lre22_dev_rjqbz,eng-iaf,0
+lre22_dev_rkemd,tir-tir,8
+lre22_dev_rktzl,nbl-nbl,13
+lre22_dev_rkuni,xho-xho,15
+lre22_dev_rlsgd,fra-ntf,5
+lre22_dev_rlypa,afr-afr,7
+lre22_dev_rmeav,ven-ven,8
+lre22_dev_rmejy,fra-ntf,12
+lre22_dev_rmeuz,zul-zul,6
+lre22_dev_rmjsj,nbl-nbl,5
+lre22_dev_rmtxj,eng-iaf,13
+lre22_dev_rnpyc,ara-ayl,2
+lre22_dev_rnunw,orm-orm,9
+lre22_dev_rnvvw,tso-tso,9
+lre22_dev_roavh,fra-ntf,6
+lre22_dev_rodbi,xho-xho,15
+lre22_dev_roeph,xho-xho,13
+lre22_dev_rolun,ara-ayl,3
+lre22_dev_roydh,xho-xho,7
+lre22_dev_rpajy,ara-aeb,8
+lre22_dev_rpdsm,ara-ayl,5
+lre22_dev_rpfae,afr-afr,9
+lre22_dev_rpvyc,eng-iaf,9
+lre22_dev_rqxot,tso-tso,9
+lre22_dev_rumiv,ara-aeb,9
+lre22_dev_runhh,afr-afr,6
+lre22_dev_ruvpd,eng-iaf,4
+lre22_dev_rvpkd,fra-ntf,1
+lre22_dev_rvqxq,orm-orm,12
+lre22_dev_rvstc,ara-arq,7
+lre22_dev_rwbea,tir-tir,9
+lre22_dev_rweyk,nbl-nbl,2
+lre22_dev_rwnfb,eng-ens,8
+lre22_dev_rwrhn,afr-afr,11
+lre22_dev_rxhkp,ara-arq,3
+lre22_dev_rxixz,nbl-nbl,15
+lre22_dev_rxmft,zul-zul,7
+lre22_dev_ryknh,ara-ayl,5
+lre22_dev_rytyf,zul-zul,12
+lre22_dev_rywss,tso-tso,1
+lre22_dev_rzjrd,nbl-nbl,7
+lre22_dev_rzpyx,tso-tso,2
+lre22_dev_satbk,ven-ven,7
+lre22_dev_sbfhc,fra-ntf,6
+lre22_dev_sboxi,xho-xho,15
+lre22_dev_scxxn,eng-iaf,5
+lre22_dev_scyvp,ara-aeb,6
+lre22_dev_sdbou,tir-tir,10
+lre22_dev_sddua,tir-tir,11
+lre22_dev_seasj,afr-afr,7
+lre22_dev_sevcw,tir-tir,12
+lre22_dev_sfevx,tso-tso,4
+lre22_dev_sfqgm,fra-ntf,1
+lre22_dev_sgaza,ara-aeb,8
+lre22_dev_sgkrh,afr-afr,9
+lre22_dev_sgmjh,nbl-nbl,14
+lre22_dev_shafn,ven-ven,8
+lre22_dev_shaob,orm-orm,10
+lre22_dev_shnns,afr-afr,6
+lre22_dev_siprc,ven-ven,7
+lre22_dev_sisge,afr-afr,13
+lre22_dev_siuwu,ara-arq,10
+lre22_dev_sivik,fra-ntf,2
+lre22_dev_sjyoo,afr-afr,1
+lre22_dev_skacz,fra-ntf,13
+lre22_dev_skcai,orm-orm,12
+lre22_dev_skctw,nbl-nbl,0
+lre22_dev_skygk,afr-afr,13
+lre22_dev_slraf,ara-aeb,6
+lre22_dev_slrzl,eng-ens,11
+lre22_dev_sltzh,xho-xho,6
+lre22_dev_sluki,ven-ven,1
+lre22_dev_slyez,tso-tso,8
+lre22_dev_slzuh,xho-xho,15
+lre22_dev_smdsm,nbl-nbl,7
+lre22_dev_smhae,ara-ayl,3
+lre22_dev_smxhe,ara-aeb,10
+lre22_dev_snayr,afr-afr,2
+lre22_dev_snbxs,eng-ens,8
+lre22_dev_sngol,tso-tso,9
+lre22_dev_snhun,fra-ntf,13
+lre22_dev_snkib,ven-ven,8
+lre22_dev_snqld,eng-iaf,2
+lre22_dev_sntvb,eng-ens,11
+lre22_dev_snzbl,tir-tir,12
+lre22_dev_sobid,afr-afr,3
+lre22_dev_soknx,orm-orm,15
+lre22_dev_spesw,ven-ven,13
+lre22_dev_sphuq,eng-iaf,12
+lre22_dev_spqcy,xho-xho,11
+lre22_dev_sqcyu,zul-zul,9
+lre22_dev_sqdkr,eng-iaf,13
+lre22_dev_sqfnt,ara-aeb,9
+lre22_dev_sqhrr,eng-ens,11
+lre22_dev_sqyiu,ara-ayl,4
+lre22_dev_srbwp,ara-aeb,10
+lre22_dev_srokn,afr-afr,6
+lre22_dev_srzck,ara-ayl,3
+lre22_dev_ssbei,tso-tso,10
+lre22_dev_ssfmz,eng-iaf,12
+lre22_dev_ssmgk,xho-xho,10
+lre22_dev_ssmsy,xho-xho,4
+lre22_dev_stgcb,afr-afr,10
+lre22_dev_stihb,afr-afr,0
+lre22_dev_stkav,ara-aeb,9
+lre22_dev_stkrw,xho-xho,3
+lre22_dev_sttnk,fra-ntf,8
+lre22_dev_stwkk,eng-iaf,12
+lre22_dev_stwrt,nbl-nbl,1
+lre22_dev_subio,afr-afr,1
+lre22_dev_sumjk,ara-arq,6
+lre22_dev_suocb,nbl-nbl,6
+lre22_dev_svcbx,tso-tso,9
+lre22_dev_svllg,fra-ntf,14
+lre22_dev_svvqs,afr-afr,3
+lre22_dev_svxyz,ara-ayl,1
+lre22_dev_swhlf,ara-aeb,10
+lre22_dev_swhnk,fra-ntf,12
+lre22_dev_swnrg,ven-ven,12
+lre22_dev_swofz,zul-zul,4
+lre22_dev_swuls,tso-tso,8
+lre22_dev_sxfkn,ara-aeb,2
+lre22_dev_sycoz,tir-tir,10
+lre22_dev_syoek,fra-ntf,5
+lre22_dev_sypnb,ven-ven,13
+lre22_dev_syvrt,eng-iaf,8
+lre22_dev_szmoc,ven-ven,6
+lre22_dev_szmwp,eng-ens,8
+lre22_dev_talec,ven-ven,11
+lre22_dev_tasfs,ven-ven,7
+lre22_dev_tbbrr,xho-xho,5
+lre22_dev_tbcun,ara-aeb,3
+lre22_dev_tbhnw,nbl-nbl,15
+lre22_dev_tblhf,ven-ven,12
+lre22_dev_tbozq,xho-xho,1
+lre22_dev_tcckd,ara-ayl,3
+lre22_dev_tcele,tso-tso,11
+lre22_dev_tciob,tso-tso,10
+lre22_dev_tcpxj,tir-tir,9
+lre22_dev_tdejo,tir-tir,6
+lre22_dev_tdfqo,tso-tso,0
+lre22_dev_tdhhf,zul-zul,10
+lre22_dev_tdjje,ven-ven,10
+lre22_dev_tdkrp,orm-orm,6
+lre22_dev_tebop,tso-tso,10
+lre22_dev_teeqm,ven-ven,6
+lre22_dev_tejsn,tir-tir,12
+lre22_dev_teptc,ara-arq,10
+lre22_dev_tetmt,orm-orm,9
+lre22_dev_tfkij,ara-aeb,2
+lre22_dev_tfnin,tir-tir,3
+lre22_dev_tfyqz,tir-tir,3
+lre22_dev_tgbui,ara-aeb,5
+lre22_dev_tgixi,xho-xho,13
+lre22_dev_tgmud,eng-iaf,6
+lre22_dev_tgult,eng-ens,2
+lre22_dev_thcjv,tso-tso,5
+lre22_dev_thzir,eng-ens,11
+lre22_dev_tisfm,fra-ntf,9
+lre22_dev_tixou,xho-xho,2
+lre22_dev_tiyuw,afr-afr,5
+lre22_dev_tjdcc,afr-afr,13
+lre22_dev_tjikt,zul-zul,12
+lre22_dev_tjpdw,ara-arq,8
+lre22_dev_tkadi,ven-ven,12
+lre22_dev_tkcbm,afr-afr,6
+lre22_dev_tkgfw,eng-ens,11
+lre22_dev_tkiks,ara-aeb,6
+lre22_dev_tlgzi,xho-xho,1
+lre22_dev_tlhlw,tir-tir,6
+lre22_dev_tloqn,afr-afr,6
+lre22_dev_tmcje,eng-ens,4
+lre22_dev_tmjpw,eng-iaf,2
+lre22_dev_tmxtu,ven-ven,2
+lre22_dev_tngwh,tir-tir,8
+lre22_dev_tnqdv,ara-aeb,9
+lre22_dev_tnqro,xho-xho,15
+lre22_dev_tnqzy,orm-orm,7
+lre22_dev_tnskm,xho-xho,12
+lre22_dev_tnvhc,ven-ven,12
+lre22_dev_tofhy,zul-zul,6
+lre22_dev_tohkd,zul-zul,9
+lre22_dev_tonqb,ven-ven,6
+lre22_dev_tpbib,tso-tso,1
+lre22_dev_tpejq,ara-arq,3
+lre22_dev_tpfir,eng-ens,11
+lre22_dev_tphgn,zul-zul,12
+lre22_dev_tpidd,ara-arq,6
+lre22_dev_tpkce,eng-ens,11
+lre22_dev_tpszi,orm-orm,15
+lre22_dev_tpwcn,eng-iaf,6
+lre22_dev_trdfy,ara-ayl,3
+lre22_dev_tsbms,ara-ayl,4
+lre22_dev_tslui,tso-tso,6
+lre22_dev_tsvvy,zul-zul,10
+lre22_dev_tsyey,xho-xho,10
+lre22_dev_ttlco,eng-iaf,12
+lre22_dev_tubpr,orm-orm,13
+lre22_dev_tugpl,eng-ens,9
+lre22_dev_tuoiq,tir-tir,4
+lre22_dev_tuxfx,zul-zul,3
+lre22_dev_tvahj,tir-tir,9
+lre22_dev_tvewc,eng-iaf,3
+lre22_dev_tvfvc,ara-ayl,8
+lre22_dev_tvkod,xho-xho,5
+lre22_dev_tvkwe,zul-zul,9
+lre22_dev_tvopo,xho-xho,12
+lre22_dev_tvqui,eng-ens,7
+lre22_dev_tvsbw,ara-arq,6
+lre22_dev_tvxvk,ven-ven,8
+lre22_dev_twbkf,nbl-nbl,9
+lre22_dev_twfot,ara-arq,6
+lre22_dev_twkns,ara-ayl,4
+lre22_dev_twuvf,eng-ens,10
+lre22_dev_txahv,eng-ens,8
+lre22_dev_txcob,ara-aeb,6
+lre22_dev_txnvi,zul-zul,3
+lre22_dev_txurh,afr-afr,7
+lre22_dev_txzkl,ara-arq,5
+lre22_dev_tyfad,tso-tso,7
+lre22_dev_tyhwp,ara-aeb,8
+lre22_dev_tzism,tir-tir,12
+lre22_dev_tzsfj,tir-tir,12
+lre22_dev_tzwof,eng-iaf,9
+lre22_dev_uahzm,afr-afr,5
+lre22_dev_uajwt,tso-tso,7
+lre22_dev_uanlr,zul-zul,13
+lre22_dev_uaoju,zul-zul,8
+lre22_dev_uaryk,xho-xho,15
+lre22_dev_ubfaf,ven-ven,12
+lre22_dev_ucbje,ara-aeb,8
+lre22_dev_ucrpa,ara-arq,3
+lre22_dev_udtzx,eng-iaf,7
+lre22_dev_uduja,fra-ntf,6
+lre22_dev_udxpl,tso-tso,2
+lre22_dev_uesmx,eng-iaf,5
+lre22_dev_ufewk,eng-iaf,8
+lre22_dev_ugjxy,tir-tir,4
+lre22_dev_ugsxl,eng-ens,3
+lre22_dev_ugvov,tso-tso,8
+lre22_dev_uhmdw,tso-tso,10
+lre22_dev_uhqng,nbl-nbl,12
+lre22_dev_uhymw,tir-tir,8
+lre22_dev_uhzmr,eng-ens,2
+lre22_dev_uimtg,ara-ayl,4
+lre22_dev_uirdr,nbl-nbl,13
+lre22_dev_uiszj,ara-aeb,8
+lre22_dev_ujada,ara-ayl,9
+lre22_dev_ujmqw,ven-ven,4
+lre22_dev_ujswr,afr-afr,11
+lre22_dev_ujvve,xho-xho,10
+lre22_dev_ukfha,ara-ayl,6
+lre22_dev_ukkpr,eng-ens,10
+lre22_dev_ukpdg,fra-ntf,13
+lre22_dev_ukpoy,nbl-nbl,15
+lre22_dev_uktod,ara-ayl,4
+lre22_dev_uktvh,zul-zul,13
+lre22_dev_ukuwo,ara-ayl,5
+lre22_dev_ukynv,zul-zul,12
+lre22_dev_ulepv,ara-ayl,5
+lre22_dev_ulgtj,zul-zul,7
+lre22_dev_ulofk,eng-iaf,11
+lre22_dev_uluog,ara-arq,3
+lre22_dev_umbpy,zul-zul,13
+lre22_dev_umjzo,tso-tso,5
+lre22_dev_uncdb,ara-arq,9
+lre22_dev_unffr,ara-ayl,8
+lre22_dev_unpif,eng-ens,9
+lre22_dev_uoikj,eng-iaf,13
+lre22_dev_uopfp,nbl-nbl,7
+lre22_dev_upenl,eng-iaf,13
+lre22_dev_uphuw,xho-xho,11
+lre22_dev_upkbw,ara-ayl,4
+lre22_dev_uplen,xho-xho,9
+lre22_dev_upqod,orm-orm,6
+lre22_dev_upspe,afr-afr,12
+lre22_dev_uqnkk,tir-tir,12
+lre22_dev_uqvxc,eng-ens,0
+lre22_dev_urgqx,ara-ayl,8
+lre22_dev_urkgk,tir-tir,12
+lre22_dev_uscky,xho-xho,3
+lre22_dev_usiwx,tir-tir,9
+lre22_dev_usnzj,zul-zul,5
+lre22_dev_usopt,xho-xho,8
+lre22_dev_uswgv,nbl-nbl,11
+lre22_dev_uszcb,ara-arq,4
+lre22_dev_utahf,ara-ayl,7
+lre22_dev_utaxq,tso-tso,9
+lre22_dev_utcwb,afr-afr,10
+lre22_dev_uuhry,tir-tir,9
+lre22_dev_uuprr,eng-ens,7
+lre22_dev_uuvqh,zul-zul,2
+lre22_dev_uwcmh,orm-orm,4
+lre22_dev_uwiev,zul-zul,13
+lre22_dev_uwjzb,ven-ven,10
+lre22_dev_uwony,orm-orm,1
+lre22_dev_uwqeq,orm-orm,2
+lre22_dev_uwvfl,nbl-nbl,5
+lre22_dev_uxdjn,xho-xho,12
+lre22_dev_uxqte,zul-zul,13
+lre22_dev_uxryh,ven-ven,11
+lre22_dev_uyhzp,orm-orm,15
+lre22_dev_uyrjl,tso-tso,10
+lre22_dev_uyzcl,eng-ens,11
+lre22_dev_uzbqz,fra-ntf,4
+lre22_dev_uzoxq,ara-aeb,9
+lre22_dev_vabxl,nbl-nbl,11
+lre22_dev_vafyo,nbl-nbl,15
+lre22_dev_vascl,nbl-nbl,0
+lre22_dev_vauqx,ara-arq,10
+lre22_dev_vbscm,xho-xho,3
+lre22_dev_vbulh,xho-xho,12
+lre22_dev_vbwwp,xho-xho,15
+lre22_dev_vbznk,ara-arq,6
+lre22_dev_vcibu,nbl-nbl,9
+lre22_dev_vcjun,zul-zul,12
+lre22_dev_vckxt,xho-xho,7
+lre22_dev_vdkjy,fra-ntf,14
+lre22_dev_vdmyt,ara-ayl,0
+lre22_dev_vdoif,ven-ven,13
+lre22_dev_vdvjv,orm-orm,12
+lre22_dev_vebet,ara-aeb,1
+lre22_dev_velkr,ara-aeb,1
+lre22_dev_vgbmm,tir-tir,9
+lre22_dev_vgucw,nbl-nbl,7
+lre22_dev_vhiyb,afr-afr,9
+lre22_dev_vhoej,tir-tir,5
+lre22_dev_vhryd,orm-orm,13
+lre22_dev_vhzdh,tso-tso,10
+lre22_dev_viapx,tso-tso,3
+lre22_dev_vifdj,ara-ayl,4
+lre22_dev_vijbo,zul-zul,12
+lre22_dev_virnr,eng-ens,6
+lre22_dev_vjhbd,orm-orm,6
+lre22_dev_vjoca,ara-aeb,10
+lre22_dev_vjtou,eng-ens,5
+lre22_dev_vjxpv,ara-aeb,10
+lre22_dev_vkmab,fra-ntf,2
+lre22_dev_vkrvz,tir-tir,8
+lre22_dev_vkwwf,tso-tso,9
+lre22_dev_vlbdk,zul-zul,6
+lre22_dev_vliie,orm-orm,9
+lre22_dev_vlrve,eng-iaf,2
+lre22_dev_vmaet,tir-tir,3
+lre22_dev_vmdhi,eng-ens,10
+lre22_dev_vmdjw,nbl-nbl,13
+lre22_dev_vmjut,fra-ntf,9
+lre22_dev_vmrrg,eng-ens,3
+lre22_dev_vnjxn,nbl-nbl,7
+lre22_dev_vnmxm,ven-ven,12
+lre22_dev_vnykj,zul-zul,10
+lre22_dev_vovab,zul-zul,11
+lre22_dev_vovvl,zul-zul,11
+lre22_dev_vpcey,tir-tir,6
+lre22_dev_vpodd,nbl-nbl,11
+lre22_dev_vptke,eng-ens,4
+lre22_dev_vpulr,xho-xho,15
+lre22_dev_vpuve,tir-tir,8
+lre22_dev_vqttr,eng-iaf,12
+lre22_dev_vqzae,eng-iaf,11
+lre22_dev_vrnsg,tso-tso,8
+lre22_dev_vshpc,ara-aeb,6
+lre22_dev_vslbh,ara-arq,9
+lre22_dev_vsmaz,tir-tir,5
+lre22_dev_vsnez,tso-tso,8
+lre22_dev_vsnjp,fra-ntf,14
+lre22_dev_vsocn,ven-ven,7
+lre22_dev_vsvom,afr-afr,8
+lre22_dev_vtnfc,tir-tir,4
+lre22_dev_vtnlb,eng-ens,4
+lre22_dev_vubwb,eng-ens,8
+lre22_dev_vufsn,ara-aeb,3
+lre22_dev_vuiqu,tir-tir,8
+lre22_dev_vumeq,xho-xho,0
+lre22_dev_vupse,ven-ven,6
+lre22_dev_vvauz,xho-xho,14
+lre22_dev_vvfze,eng-ens,11
+lre22_dev_vviyr,zul-zul,12
+lre22_dev_vvwiq,fra-ntf,5
+lre22_dev_vwnkj,zul-zul,5
+lre22_dev_vwoww,orm-orm,7
+lre22_dev_vwtne,afr-afr,5
+lre22_dev_vwxgt,ara-arq,10
+lre22_dev_vxabl,eng-ens,8
+lre22_dev_vxnsl,afr-afr,7
+lre22_dev_vxslj,tir-tir,10
+lre22_dev_vxsvc,tir-tir,11
+lre22_dev_vxuiz,ara-aeb,10
+lre22_dev_vzarl,ara-ayl,7
+lre22_dev_vzeew,ven-ven,6
+lre22_dev_vzjtc,ara-arq,0
+lre22_dev_vzkdb,tso-tso,10
+lre22_dev_vzvpq,ara-arq,9
+lre22_dev_waqyh,xho-xho,15
+lre22_dev_wawwu,xho-xho,14
+lre22_dev_wbgqi,tso-tso,11
+lre22_dev_wcctp,eng-ens,10
+lre22_dev_wdcer,afr-afr,3
+lre22_dev_wdeor,fra-ntf,14
+lre22_dev_wdfdd,eng-iaf,2
+lre22_dev_wdkvb,eng-ens,11
+lre22_dev_wdogx,ara-aeb,7
+lre22_dev_wdqdq,ara-arq,10
+lre22_dev_wdxwu,tir-tir,5
+lre22_dev_weaek,ara-arq,4
+lre22_dev_wefui,tso-tso,10
+lre22_dev_wehjh,tir-tir,10
+lre22_dev_weypz,nbl-nbl,12
+lre22_dev_wffdy,zul-zul,12
+lre22_dev_wffgq,tso-tso,8
+lre22_dev_wfvlh,ven-ven,8
+lre22_dev_wgago,eng-ens,5
+lre22_dev_wglzd,afr-afr,11
+lre22_dev_wgsbu,afr-afr,5
+lre22_dev_whdhw,nbl-nbl,7
+lre22_dev_whogu,eng-iaf,13
+lre22_dev_whpee,tso-tso,9
+lre22_dev_whqpd,ara-aeb,9
+lre22_dev_wikrr,ven-ven,11
+lre22_dev_witju,fra-ntf,11
+lre22_dev_wjcme,orm-orm,10
+lre22_dev_wkare,ara-arq,2
+lre22_dev_wkbfe,afr-afr,9
+lre22_dev_wkecn,xho-xho,13
+lre22_dev_wkhxo,afr-afr,9
+lre22_dev_wlgae,ara-arq,6
+lre22_dev_wlnls,eng-iaf,7
+lre22_dev_wlsxb,eng-ens,1
+lre22_dev_wlwuc,nbl-nbl,8
+lre22_dev_wnaqr,nbl-nbl,9
+lre22_dev_wndpq,fra-ntf,13
+lre22_dev_wnkdc,ara-ayl,2
+lre22_dev_wnknc,nbl-nbl,9
+lre22_dev_wnppz,orm-orm,15
+lre22_dev_wpzgm,afr-afr,13
+lre22_dev_wqhqj,ara-ayl,9
+lre22_dev_wqreb,afr-afr,11
+lre22_dev_wqrez,eng-ens,4
+lre22_dev_wqtsf,ara-arq,8
+lre22_dev_wqwtc,orm-orm,3
+lre22_dev_wrfwf,ven-ven,7
+lre22_dev_wrqqt,orm-orm,15
+lre22_dev_wrutf,afr-afr,7
+lre22_dev_wrvzk,nbl-nbl,1
+lre22_dev_wrxly,fra-ntf,13
+lre22_dev_wsbiw,ara-aeb,8
+lre22_dev_wshay,zul-zul,8
+lre22_dev_wsous,tso-tso,5
+lre22_dev_wszpj,ven-ven,7
+lre22_dev_wtksi,afr-afr,8
+lre22_dev_wugbw,xho-xho,6
+lre22_dev_wujfv,afr-afr,11
+lre22_dev_wuwek,xho-xho,12
+lre22_dev_wvhhk,fra-ntf,2
+lre22_dev_wvosz,nbl-nbl,3
+lre22_dev_wwagu,xho-xho,14
+lre22_dev_wwbuj,eng-iaf,2
+lre22_dev_wwgnr,afr-afr,10
+lre22_dev_wwjev,afr-afr,12
+lre22_dev_wwmsu,ara-arq,4
+lre22_dev_wwrmy,ven-ven,7
+lre22_dev_wwvhd,ara-arq,9
+lre22_dev_wxdjv,ara-ayl,6
+lre22_dev_wygox,tir-tir,6
+lre22_dev_wyhuq,zul-zul,13
+lre22_dev_wzoir,xho-xho,15
+lre22_dev_wzvwa,orm-orm,6
+lre22_dev_xapvn,tso-tso,8
+lre22_dev_xarkl,eng-ens,5
+lre22_dev_xavhh,nbl-nbl,10
+lre22_dev_xazuy,orm-orm,3
+lre22_dev_xbnft,eng-iaf,0
+lre22_dev_xbqbc,fra-ntf,7
+lre22_dev_xbzfw,tir-tir,11
+lre22_dev_xccde,ara-arq,3
+lre22_dev_xcdty,zul-zul,8
+lre22_dev_xcjkb,ara-ayl,7
+lre22_dev_xcmty,ara-arq,10
+lre22_dev_xcsbc,tso-tso,1
+lre22_dev_xdkjb,nbl-nbl,11
+lre22_dev_xdknq,nbl-nbl,11
+lre22_dev_xdoik,eng-ens,10
+lre22_dev_xdtyd,nbl-nbl,4
+lre22_dev_xearl,eng-iaf,3
+lre22_dev_xedqa,nbl-nbl,11
+lre22_dev_xefnx,eng-ens,11
+lre22_dev_xeipr,tir-tir,11
+lre22_dev_xekhs,zul-zul,9
+lre22_dev_xelzr,ara-aeb,9
+lre22_dev_xenhb,ara-aeb,3
+lre22_dev_xfdsx,xho-xho,12
+lre22_dev_xfggl,xho-xho,9
+lre22_dev_xgspz,eng-iaf,13
+lre22_dev_xgwmu,tso-tso,8
+lre22_dev_xhbmk,orm-orm,15
+lre22_dev_xhdtl,orm-orm,3
+lre22_dev_xisjn,ara-arq,8
+lre22_dev_xitdz,nbl-nbl,10
+lre22_dev_xizbg,xho-xho,14
+lre22_dev_xjcph,xho-xho,10
+lre22_dev_xjcvd,zul-zul,7
+lre22_dev_xjlgm,ara-aeb,3
+lre22_dev_xjxzy,eng-ens,2
+lre22_dev_xkfsd,ven-ven,12
+lre22_dev_xkktj,eng-iaf,12
+lre22_dev_xkmmy,ara-aeb,10
+lre22_dev_xltgz,ara-ayl,5
+lre22_dev_xmbby,orm-orm,3
+lre22_dev_xmcmv,xho-xho,14
+lre22_dev_xngam,fra-ntf,14
+lre22_dev_xnsev,ara-ayl,8
+lre22_dev_xnwsq,ara-arq,8
+lre22_dev_xnwwh,zul-zul,13
+lre22_dev_xobeh,tir-tir,11
+lre22_dev_xolau,ven-ven,13
+lre22_dev_xoqtn,eng-iaf,10
+lre22_dev_xovpd,eng-iaf,10
+lre22_dev_xpaff,eng-ens,9
+lre22_dev_xpahm,ara-arq,4
+lre22_dev_xpcrs,tso-tso,5
+lre22_dev_xpdsg,eng-iaf,5
+lre22_dev_xpjqj,nbl-nbl,6
+lre22_dev_xqwtk,ara-arq,10
+lre22_dev_xrfge,ara-arq,8
+lre22_dev_xrhka,orm-orm,9
+lre22_dev_xrpup,zul-zul,8
+lre22_dev_xsbff,ara-aeb,9
+lre22_dev_xsffv,tso-tso,1
+lre22_dev_xstnu,eng-ens,5
+lre22_dev_xthfd,ara-aeb,8
+lre22_dev_xthzz,ven-ven,4
+lre22_dev_xtmgg,eng-iaf,13
+lre22_dev_xtyic,nbl-nbl,14
+lre22_dev_xucyl,eng-ens,7
+lre22_dev_xudii,ara-ayl,3
+lre22_dev_xugux,afr-afr,0
+lre22_dev_xuqnj,ara-ayl,4
+lre22_dev_xvaoh,nbl-nbl,9
+lre22_dev_xvclh,afr-afr,9
+lre22_dev_xveae,xho-xho,4
+lre22_dev_xxpqz,ara-arq,9
+lre22_dev_xxqad,tso-tso,10
+lre22_dev_xybed,tir-tir,9
+lre22_dev_xyrex,eng-ens,11
+lre22_dev_xzlas,eng-iaf,9
+lre22_dev_xztyr,orm-orm,9
+lre22_dev_yaxkb,zul-zul,12
+lre22_dev_ybcvu,xho-xho,13
+lre22_dev_ybjon,orm-orm,2
+lre22_dev_ybubm,ven-ven,5
+lre22_dev_ycarc,eng-ens,6
+lre22_dev_ychjj,orm-orm,2
+lre22_dev_ycnyc,tir-tir,7
+lre22_dev_ycsvt,afr-afr,12
+lre22_dev_ydaxa,nbl-nbl,8
+lre22_dev_ydrxu,nbl-nbl,1
+lre22_dev_yeekw,fra-ntf,13
+lre22_dev_yevan,tir-tir,11
+lre22_dev_yfaan,tir-tir,10
+lre22_dev_yfayx,afr-afr,6
+lre22_dev_yfpsd,fra-ntf,1
+lre22_dev_yfxkm,ven-ven,7
+lre22_dev_yguqk,ven-ven,3
+lre22_dev_yhrgj,afr-afr,8
+lre22_dev_yhzyq,ara-ayl,5
+lre22_dev_yiqui,eng-iaf,12
+lre22_dev_yjens,ara-ayl,7
+lre22_dev_yjkxx,eng-ens,8
+lre22_dev_yjypk,ara-ayl,9
+lre22_dev_ykchd,ven-ven,8
+lre22_dev_ykktl,xho-xho,0
+lre22_dev_ylhwh,orm-orm,9
+lre22_dev_ylnms,tso-tso,2
+lre22_dev_ylsdz,ven-ven,7
+lre22_dev_ymcmp,eng-iaf,8
+lre22_dev_ymfzx,tso-tso,7
+lre22_dev_ymizm,fra-ntf,0
+lre22_dev_ympvj,tir-tir,9
+lre22_dev_ymslh,tir-tir,12
+lre22_dev_ynavg,zul-zul,9
+lre22_dev_ynhlk,tir-tir,9
+lre22_dev_ynnkb,eng-ens,10
+lre22_dev_yogkc,fra-ntf,7
+lre22_dev_yokld,eng-ens,4
+lre22_dev_yokve,tir-tir,6
+lre22_dev_yomdz,ara-ayl,6
+lre22_dev_yomuu,xho-xho,12
+lre22_dev_yoobm,ara-ayl,8
+lre22_dev_yoocz,eng-ens,10
+lre22_dev_yopyf,eng-iaf,5
+lre22_dev_yoxoc,tir-tir,8
+lre22_dev_ypaem,afr-afr,5
+lre22_dev_ypamp,afr-afr,7
+lre22_dev_ypjpq,tir-tir,8
+lre22_dev_yplba,ara-arq,9
+lre22_dev_ypnrh,fra-ntf,1
+lre22_dev_ypqfg,eng-ens,7
+lre22_dev_yrdsl,eng-ens,2
+lre22_dev_yrtkv,afr-afr,7
+lre22_dev_yrwrb,nbl-nbl,9
+lre22_dev_ysmlk,eng-ens,11
+lre22_dev_yspja,orm-orm,5
+lre22_dev_ytfnn,fra-ntf,14
+lre22_dev_yturp,ara-aeb,6
+lre22_dev_ytvbd,afr-afr,4
+lre22_dev_yuhvo,tso-tso,8
+lre22_dev_yundi,ara-arq,3
+lre22_dev_yvmnx,ara-arq,10
+lre22_dev_yvqud,xho-xho,15
+lre22_dev_yvxdd,ara-ayl,4
+lre22_dev_ywjtq,xho-xho,5
+lre22_dev_ywnza,fra-ntf,12
+lre22_dev_yxnno,tso-tso,10
+lre22_dev_yxoww,tir-tir,7
+lre22_dev_yxpgi,ara-arq,5
+lre22_dev_yxsta,eng-ens,7
+lre22_dev_yyltz,xho-xho,8
+lre22_dev_yyqqx,fra-ntf,12
+lre22_dev_yzloh,ara-ayl,7
+lre22_dev_zacdy,ara-ayl,3
+lre22_dev_zadkk,tir-tir,9
+lre22_dev_zalpc,afr-afr,6
+lre22_dev_zarod,orm-orm,8
+lre22_dev_zasvb,afr-afr,11
+lre22_dev_zazom,ara-arq,9
+lre22_dev_zbfqk,afr-afr,13
+lre22_dev_zbqew,tso-tso,2
+lre22_dev_zbrkn,eng-ens,7
+lre22_dev_zbubp,zul-zul,9
+lre22_dev_zbytc,ara-arq,8
+lre22_dev_zcfns,tir-tir,6
+lre22_dev_zcfzk,afr-afr,7
+lre22_dev_zcrgv,ara-arq,10
+lre22_dev_zdxdn,ara-ayl,7
+lre22_dev_zdydi,eng-ens,1
+lre22_dev_zebzq,ven-ven,4
+lre22_dev_zedlk,xho-xho,14
+lre22_dev_zeqpp,tir-tir,12
+lre22_dev_zfjbm,ara-arq,10
+lre22_dev_zfkne,nbl-nbl,13
+lre22_dev_zflnr,ven-ven,13
+lre22_dev_zfoyd,xho-xho,4
+lre22_dev_zgdyu,eng-iaf,8
+lre22_dev_zgmja,zul-zul,9
+lre22_dev_zgvfs,ara-arq,6
+lre22_dev_zhmud,orm-orm,14
+lre22_dev_zhoml,tso-tso,9
+lre22_dev_zijcb,xho-xho,10
+lre22_dev_ziktm,ara-aeb,10
+lre22_dev_zipxy,ara-arq,9
+lre22_dev_ziqxc,eng-iaf,1
+lre22_dev_zjhir,ven-ven,7
+lre22_dev_zjmqp,orm-orm,13
+lre22_dev_zjrrk,tso-tso,11
+lre22_dev_zjtwd,ara-aeb,3
+lre22_dev_zkfcf,xho-xho,6
+lre22_dev_zkftc,nbl-nbl,4
+lre22_dev_zkqei,ara-ayl,7
+lre22_dev_zkwqo,zul-zul,11
+lre22_dev_zlamn,nbl-nbl,6
+lre22_dev_zlbor,xho-xho,14
+lre22_dev_zloet,ven-ven,8
+lre22_dev_zlvhk,zul-zul,5
+lre22_dev_zlzqv,fra-ntf,12
+lre22_dev_zmobq,ara-ayl,7
+lre22_dev_zmuiv,zul-zul,9
+lre22_dev_znvqw,zul-zul,4
+lre22_dev_znzuu,tir-tir,0
+lre22_dev_zoava,eng-iaf,6
+lre22_dev_zodvu,tso-tso,0
+lre22_dev_zosdw,nbl-nbl,15
+lre22_dev_zpnvq,xho-xho,6
+lre22_dev_zqeby,eng-iaf,12
+lre22_dev_zqgdd,nbl-nbl,9
+lre22_dev_zqhaw,nbl-nbl,5
+lre22_dev_zqkau,orm-orm,8
+lre22_dev_zqkel,ara-ayl,9
+lre22_dev_zqlnd,ara-aeb,8
+lre22_dev_zrnpw,orm-orm,8
+lre22_dev_zrqvc,afr-afr,9
+lre22_dev_zrrgq,ven-ven,8
+lre22_dev_zryit,zul-zul,8
+lre22_dev_zsckt,zul-zul,4
+lre22_dev_zucqq,orm-orm,4
+lre22_dev_zusln,orm-orm,11
+lre22_dev_zuxzw,tir-tir,0
+lre22_dev_zvabs,tir-tir,11
+lre22_dev_zvlid,tso-tso,11
+lre22_dev_zvned,eng-iaf,5
+lre22_dev_zvtwr,xho-xho,11
+lre22_dev_zwmim,orm-orm,11
+lre22_dev_zwnsu,ara-arq,8
+lre22_dev_zwtxn,ara-arq,10
+lre22_dev_zxfcm,orm-orm,3
+lre22_dev_zxsgm,tir-tir,5
+lre22_dev_zybya,eng-iaf,10
+lre22_dev_zygak,zul-zul,1
+lre22_dev_zylqc,eng-ens,3
+lre22_dev_zyppc,fra-ntf,8
+lre22_dev_zywem,eng-ens,8
+lre22_dev_zzapx,ara-ayl,5
+lre22_dev_zzumc,ara-arq,2
+lre22_dev_zzvdl,fra-ntf,5
+lre22_dev_zzvjv,nbl-nbl,14
diff --git a/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/train_segments.csv b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/train_segments.csv
new file mode 100644
index 00000000..4d50b6a5
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_0/train_segments.csv
@@ -0,0 +1,2088 @@
+id,class_id,subclass_idx
+lre22_dev_aayck,ara-aeb,12
+lre22_dev_aayto,eng-iaf,14
+lre22_dev_abaha,zul-zul,17
+lre22_dev_abetm,fra-ntf,15
+lre22_dev_abnwz,zul-zul,19
+lre22_dev_abvjt,zul-zul,19
+lre22_dev_abwgm,ara-ayl,13
+lre22_dev_acepd,eng-iaf,19
+lre22_dev_acspt,eng-ens,12
+lre22_dev_aczdh,eng-ens,13
+lre22_dev_adkkm,tso-tso,19
+lre22_dev_adpus,tso-tso,13
+lre22_dev_adwju,ara-aeb,14
+lre22_dev_afnfn,afr-afr,20
+lre22_dev_afohq,ara-aeb,13
+lre22_dev_agnnp,afr-afr,17
+lre22_dev_agquw,fra-ntf,20
+lre22_dev_ahoow,ara-ayl,15
+lre22_dev_ahqxq,fra-ntf,22
+lre22_dev_aieqr,eng-iaf,17
+lre22_dev_ainix,eng-iaf,16
+lre22_dev_aiojl,fra-ntf,18
+lre22_dev_aiypg,nbl-nbl,17
+lre22_dev_ajcpi,orm-orm,22
+lre22_dev_ajeqv,ara-aeb,11
+lre22_dev_ajlqy,xho-xho,16
+lre22_dev_ajlyw,orm-orm,21
+lre22_dev_ajmrs,ara-aeb,11
+lre22_dev_ajzjc,eng-iaf,16
+lre22_dev_ajzyq,ara-ayl,14
+lre22_dev_akmfp,orm-orm,19
+lre22_dev_aleeu,ara-arq,14
+lre22_dev_aliba,ara-aeb,15
+lre22_dev_alkwi,eng-iaf,14
+lre22_dev_aluwk,nbl-nbl,16
+lre22_dev_alvdl,ara-arq,14
+lre22_dev_amrca,ara-aeb,11
+lre22_dev_aoanh,ara-ayl,15
+lre22_dev_aoeql,eng-ens,16
+lre22_dev_apfpk,eng-iaf,14
+lre22_dev_apufs,tir-tir,17
+lre22_dev_apvko,orm-orm,20
+lre22_dev_arefe,orm-orm,23
+lre22_dev_arvyp,ara-ayl,11
+lre22_dev_arwsc,fra-ntf,20
+lre22_dev_asqwa,ara-aeb,14
+lre22_dev_asrng,fra-ntf,18
+lre22_dev_aswjo,afr-afr,18
+lre22_dev_aulzk,ven-ven,21
+lre22_dev_aupcr,zul-zul,18
+lre22_dev_auqcy,eng-ens,18
+lre22_dev_auxdy,nbl-nbl,16
+lre22_dev_auycg,ara-ayl,11
+lre22_dev_aviiv,tso-tso,14
+lre22_dev_avrwo,tso-tso,19
+lre22_dev_avwim,ara-arq,13
+lre22_dev_avzdv,zul-zul,18
+lre22_dev_awtna,ara-arq,13
+lre22_dev_awxbj,orm-orm,23
+lre22_dev_axejc,fra-ntf,17
+lre22_dev_axtso,eng-ens,16
+lre22_dev_axwoo,ara-aeb,15
+lre22_dev_axyma,ara-arq,15
+lre22_dev_aycai,ven-ven,17
+lre22_dev_ayfjz,orm-orm,20
+lre22_dev_aylrz,eng-iaf,16
+lre22_dev_aynwz,tso-tso,18
+lre22_dev_aypyt,ara-aeb,11
+lre22_dev_ayszn,zul-zul,18
+lre22_dev_ayvge,ara-aeb,11
+lre22_dev_ayvmo,afr-afr,23
+lre22_dev_ayzdz,xho-xho,20
+lre22_dev_azbmt,xho-xho,19
+lre22_dev_azjsr,tir-tir,19
+lre22_dev_azkdh,nbl-nbl,20
+lre22_dev_azwrd,fra-ntf,15
+lre22_dev_badwe,ara-aeb,13
+lre22_dev_baiaf,zul-zul,17
+lre22_dev_baiwb,ara-aeb,13
+lre22_dev_baxuo,zul-zul,18
+lre22_dev_bbbtf,eng-ens,18
+lre22_dev_bbdws,ara-ayl,12
+lre22_dev_bbitq,eng-ens,16
+lre22_dev_bbnvu,ara-arq,13
+lre22_dev_bbunq,eng-iaf,14
+lre22_dev_bcinm,ara-aeb,14
+lre22_dev_bcrhs,zul-zul,17
+lre22_dev_bcwpu,ara-aeb,13
+lre22_dev_bcxdq,fra-ntf,21
+lre22_dev_bdgbr,ara-aeb,12
+lre22_dev_bdgrw,orm-orm,17
+lre22_dev_bdiml,ara-aeb,11
+lre22_dev_bdyue,xho-xho,21
+lre22_dev_bdzsj,tir-tir,13
+lre22_dev_beanp,tso-tso,12
+lre22_dev_beigo,ara-aeb,14
+lre22_dev_belhi,orm-orm,23
+lre22_dev_bfoej,ven-ven,20
+lre22_dev_bfznf,ara-ayl,11
+lre22_dev_bgeiq,ven-ven,15
+lre22_dev_bgeyp,ara-aeb,11
+lre22_dev_bgomt,afr-afr,14
+lre22_dev_bgrfd,nbl-nbl,19
+lre22_dev_bgwlu,tir-tir,17
+lre22_dev_bifkp,nbl-nbl,18
+lre22_dev_bipvh,nbl-nbl,17
+lre22_dev_biuyu,eng-ens,12
+lre22_dev_bixnf,ara-ayl,11
+lre22_dev_bjhdf,tso-tso,17
+lre22_dev_bjsmm,ara-ayl,10
+lre22_dev_bkhqg,eng-ens,17
+lre22_dev_bkpah,ven-ven,14
+lre22_dev_blaco,afr-afr,17
+lre22_dev_bleum,xho-xho,18
+lre22_dev_bnhvt,nbl-nbl,16
+lre22_dev_bowyn,ara-arq,14
+lre22_dev_bpeqb,xho-xho,21
+lre22_dev_bpgqs,tir-tir,13
+lre22_dev_bpzpv,afr-afr,16
+lre22_dev_bqenu,eng-ens,12
+lre22_dev_bqfxw,zul-zul,14
+lre22_dev_bqowg,tir-tir,19
+lre22_dev_bqxyq,tir-tir,19
+lre22_dev_brjud,xho-xho,21
+lre22_dev_bruwl,xho-xho,16
+lre22_dev_brzld,fra-ntf,20
+lre22_dev_bsgqz,eng-ens,13
+lre22_dev_bsocl,eng-ens,12
+lre22_dev_bszou,ara-arq,13
+lre22_dev_btapz,zul-zul,15
+lre22_dev_btjlk,ara-aeb,14
+lre22_dev_btkry,xho-xho,19
+lre22_dev_btyeu,ara-ayl,15
+lre22_dev_bvnuu,fra-ntf,19
+lre22_dev_bvqag,eng-iaf,20
+lre22_dev_bvvho,eng-ens,16
+lre22_dev_bvwaj,tir-tir,14
+lre22_dev_bvymi,eng-ens,15
+lre22_dev_bwgmj,eng-iaf,20
+lre22_dev_bwqpz,ara-arq,14
+lre22_dev_bwyrh,ara-aeb,12
+lre22_dev_bxkrj,ven-ven,18
+lre22_dev_bxkti,afr-afr,20
+lre22_dev_bxzms,nbl-nbl,17
+lre22_dev_bygrw,tso-tso,18
+lre22_dev_byjqr,ven-ven,18
+lre22_dev_bylkl,eng-iaf,16
+lre22_dev_bzmkn,fra-ntf,22
+lre22_dev_bzntz,ara-arq,13
+lre22_dev_bzwkf,eng-iaf,19
+lre22_dev_caijh,ven-ven,18
+lre22_dev_canou,tir-tir,19
+lre22_dev_caqxh,afr-afr,20
+lre22_dev_cayuc,eng-ens,12
+lre22_dev_cbruy,xho-xho,23
+lre22_dev_cbyyw,ara-arq,14
+lre22_dev_cbzbe,afr-afr,22
+lre22_dev_cclfh,ara-arq,15
+lre22_dev_ccovd,ara-arq,11
+lre22_dev_ccpns,eng-ens,17
+lre22_dev_ccsjt,eng-iaf,16
+lre22_dev_ccsql,fra-ntf,21
+lre22_dev_ccugm,eng-ens,18
+lre22_dev_ccyfn,afr-afr,23
+lre22_dev_cdmgw,tir-tir,16
+lre22_dev_cdshg,eng-iaf,17
+lre22_dev_ceccy,orm-orm,20
+lre22_dev_cecwt,fra-ntf,22
+lre22_dev_cegvk,ara-arq,11
+lre22_dev_cferi,zul-zul,15
+lre22_dev_cfojx,ara-arq,11
+lre22_dev_cfzoe,tir-tir,20
+lre22_dev_cgfna,zul-zul,18
+lre22_dev_cggzh,ara-ayl,13
+lre22_dev_cgims,tir-tir,20
+lre22_dev_cgixe,tir-tir,19
+lre22_dev_cgjov,zul-zul,14
+lre22_dev_chhio,ara-aeb,14
+lre22_dev_chnvd,tir-tir,13
+lre22_dev_chpww,nbl-nbl,21
+lre22_dev_churq,ara-ayl,13
+lre22_dev_cifqp,zul-zul,17
+lre22_dev_cijnx,xho-xho,22
+lre22_dev_ciozp,nbl-nbl,16
+lre22_dev_citpi,ara-aeb,12
+lre22_dev_cjrav,tir-tir,15
+lre22_dev_cksrw,ara-aeb,14
+lre22_dev_cktce,tir-tir,17
+lre22_dev_ckzhf,nbl-nbl,20
+lre22_dev_cleyn,ara-aeb,11
+lre22_dev_clhmt,fra-ntf,19
+lre22_dev_clrjd,orm-orm,21
+lre22_dev_clssx,eng-iaf,14
+lre22_dev_cluxm,ara-ayl,13
+lre22_dev_clzwe,ara-aeb,14
+lre22_dev_cminq,ara-aeb,11
+lre22_dev_cmmap,afr-afr,23
+lre22_dev_cmssr,orm-orm,20
+lre22_dev_cmufu,tso-tso,16
+lre22_dev_cnapz,orm-orm,19
+lre22_dev_cndba,tso-tso,12
+lre22_dev_cnkjh,tso-tso,15
+lre22_dev_cnvfe,orm-orm,18
+lre22_dev_cobbz,ara-arq,12
+lre22_dev_coppu,nbl-nbl,21
+lre22_dev_coqoj,eng-ens,17
+lre22_dev_cotun,ven-ven,16
+lre22_dev_cowrt,xho-xho,19
+lre22_dev_cppma,afr-afr,20
+lre22_dev_cpqkz,ara-arq,14
+lre22_dev_cpraw,afr-afr,17
+lre22_dev_cpsrb,fra-ntf,20
+lre22_dev_cpuax,zul-zul,16
+lre22_dev_cpudb,nbl-nbl,16
+lre22_dev_cqqds,afr-afr,22
+lre22_dev_cquib,ven-ven,21
+lre22_dev_cqwxe,nbl-nbl,16
+lre22_dev_cqyad,eng-iaf,15
+lre22_dev_crkut,eng-ens,17
+lre22_dev_crozj,fra-ntf,17
+lre22_dev_crrro,orm-orm,16
+lre22_dev_csavn,ara-aeb,15
+lre22_dev_cschy,afr-afr,16
+lre22_dev_csegr,tso-tso,14
+lre22_dev_csgvq,fra-ntf,17
+lre22_dev_csltj,ara-aeb,14
+lre22_dev_csmtr,ara-ayl,14
+lre22_dev_csqxl,ven-ven,20
+lre22_dev_ctjqw,nbl-nbl,16
+lre22_dev_ctxxt,nbl-nbl,17
+lre22_dev_cuaoy,ara-aeb,13
+lre22_dev_cudpj,ara-arq,13
+lre22_dev_cuhdf,afr-afr,21
+lre22_dev_cuoju,ven-ven,21
+lre22_dev_cupti,nbl-nbl,21
+lre22_dev_cusej,ara-aeb,14
+lre22_dev_cvfle,tir-tir,14
+lre22_dev_cvnqu,eng-ens,14
+lre22_dev_cvvjc,zul-zul,18
+lre22_dev_cvwht,fra-ntf,18
+lre22_dev_cvwtu,fra-ntf,21
+lre22_dev_cwlvk,tso-tso,16
+lre22_dev_cwnky,xho-xho,17
+lre22_dev_cxdlr,afr-afr,14
+lre22_dev_cxfii,ara-arq,13
+lre22_dev_cxpzt,zul-zul,16
+lre22_dev_cxqri,fra-ntf,21
+lre22_dev_cyaug,xho-xho,22
+lre22_dev_czdbd,fra-ntf,15
+lre22_dev_czvoy,ven-ven,16
+lre22_dev_czzrm,afr-afr,17
+lre22_dev_dahzr,ven-ven,17
+lre22_dev_dapny,ven-ven,17
+lre22_dev_dapug,nbl-nbl,19
+lre22_dev_dcbnz,xho-xho,16
+lre22_dev_dciaf,nbl-nbl,22
+lre22_dev_dcljn,afr-afr,19
+lre22_dev_dcmrn,afr-afr,20
+lre22_dev_dcobq,xho-xho,16
+lre22_dev_dcohp,tir-tir,16
+lre22_dev_dcsep,tso-tso,12
+lre22_dev_dctlw,ara-arq,12
+lre22_dev_dctvv,ara-arq,12
+lre22_dev_dcyoy,eng-iaf,17
+lre22_dev_ddgeb,xho-xho,23
+lre22_dev_ddsab,eng-ens,18
+lre22_dev_ddtpk,eng-ens,18
+lre22_dev_debjr,xho-xho,16
+lre22_dev_defkv,eng-ens,15
+lre22_dev_dejub,ara-arq,11
+lre22_dev_delok,eng-ens,14
+lre22_dev_dezlg,nbl-nbl,17
+lre22_dev_dffbj,fra-ntf,21
+lre22_dev_dfkox,xho-xho,19
+lre22_dev_dfpcn,ara-ayl,13
+lre22_dev_dfqgl,afr-afr,18
+lre22_dev_dfras,eng-iaf,19
+lre22_dev_dftpm,eng-iaf,20
+lre22_dev_dfvta,tso-tso,17
+lre22_dev_dgarp,eng-ens,13
+lre22_dev_dgntq,zul-zul,17
+lre22_dev_dgssb,tir-tir,19
+lre22_dev_dgvtc,xho-xho,23
+lre22_dev_dhdvp,ara-ayl,10
+lre22_dev_dhmbl,fra-ntf,22
+lre22_dev_diiry,orm-orm,16
+lre22_dev_disrs,afr-afr,16
+lre22_dev_ditsk,xho-xho,21
+lre22_dev_djbbz,ara-arq,14
+lre22_dev_djevu,tso-tso,16
+lre22_dev_djlaf,tir-tir,20
+lre22_dev_djoim,zul-zul,15
+lre22_dev_djvvp,zul-zul,17
+lre22_dev_djwyo,ven-ven,18
+lre22_dev_dkbfm,ara-ayl,12
+lre22_dev_dkpcy,ara-aeb,12
+lre22_dev_dlxzj,orm-orm,19
+lre22_dev_dmnjo,ven-ven,14
+lre22_dev_dmtsm,zul-zul,16
+lre22_dev_dnaql,orm-orm,23
+lre22_dev_dnkpf,ara-aeb,15
+lre22_dev_dnscr,tso-tso,12
+lre22_dev_dnygt,eng-ens,15
+lre22_dev_dobre,xho-xho,19
+lre22_dev_dohlp,xho-xho,23
+lre22_dev_doioo,orm-orm,19
+lre22_dev_donaq,ara-aeb,13
+lre22_dev_dooht,ara-arq,11
+lre22_dev_dpmbt,zul-zul,14
+lre22_dev_dptyy,xho-xho,17
+lre22_dev_dqmud,eng-iaf,15
+lre22_dev_dqmxb,xho-xho,20
+lre22_dev_dqopt,eng-ens,14
+lre22_dev_dqpgr,ara-aeb,14
+lre22_dev_drkux,eng-ens,14
+lre22_dev_dsfha,ven-ven,18
+lre22_dev_dsftc,tso-tso,16
+lre22_dev_dskaq,ven-ven,15
+lre22_dev_dtdmp,zul-zul,18
+lre22_dev_dtdux,afr-afr,14
+lre22_dev_dtyki,ara-arq,11
+lre22_dev_durlr,orm-orm,18
+lre22_dev_dutdz,tso-tso,12
+lre22_dev_dvbol,ara-ayl,15
+lre22_dev_dwesk,nbl-nbl,22
+lre22_dev_dwtjw,ven-ven,14
+lre22_dev_dxckb,tso-tso,12
+lre22_dev_dxizq,eng-iaf,14
+lre22_dev_dxtnq,fra-ntf,18
+lre22_dev_dxvib,zul-zul,14
+lre22_dev_dyago,eng-iaf,16
+lre22_dev_dyipl,eng-iaf,18
+lre22_dev_dyqlo,ara-arq,13
+lre22_dev_dyvml,eng-iaf,15
+lre22_dev_dzkui,tso-tso,12
+lre22_dev_dzqta,ven-ven,20
+lre22_dev_dzxio,eng-ens,18
+lre22_dev_eachn,tir-tir,16
+lre22_dev_eapvu,eng-iaf,20
+lre22_dev_ebfdv,ara-ayl,10
+lre22_dev_ebgbd,eng-ens,17
+lre22_dev_eblhy,eng-iaf,20
+lre22_dev_ebtrq,ara-aeb,13
+lre22_dev_ebymv,tir-tir,14
+lre22_dev_ebzhg,nbl-nbl,21
+lre22_dev_ecbwo,ven-ven,21
+lre22_dev_ecllm,fra-ntf,21
+lre22_dev_eclpf,ven-ven,16
+lre22_dev_ecmhd,ara-aeb,14
+lre22_dev_ecnqi,eng-ens,14
+lre22_dev_ecpdc,ara-ayl,10
+lre22_dev_ecslx,afr-afr,22
+lre22_dev_ecuyo,xho-xho,23
+lre22_dev_edgur,tso-tso,16
+lre22_dev_edjtb,nbl-nbl,22
+lre22_dev_edsls,tso-tso,16
+lre22_dev_edssc,orm-orm,23
+lre22_dev_edvab,zul-zul,19
+lre22_dev_eehzu,zul-zul,18
+lre22_dev_eekci,afr-afr,15
+lre22_dev_eekcw,zul-zul,17
+lre22_dev_efihg,nbl-nbl,16
+lre22_dev_efsxw,tso-tso,16
+lre22_dev_efxjv,ara-aeb,14
+lre22_dev_efymf,ara-aeb,14
+lre22_dev_ehcvr,tir-tir,19
+lre22_dev_ehehw,xho-xho,20
+lre22_dev_ehewh,eng-ens,18
+lre22_dev_ehvyp,zul-zul,14
+lre22_dev_eifqv,zul-zul,19
+lre22_dev_eifxu,ara-ayl,10
+lre22_dev_ejcvy,fra-ntf,18
+lre22_dev_ejeek,eng-ens,16
+lre22_dev_ejfyn,fra-ntf,22
+lre22_dev_ejjqg,tso-tso,12
+lre22_dev_ejtox,ven-ven,19
+lre22_dev_ejwch,fra-ntf,21
+lre22_dev_ejzhx,xho-xho,17
+lre22_dev_ekbkm,afr-afr,21
+lre22_dev_ekzhk,ara-ayl,10
+lre22_dev_elanj,tso-tso,18
+lre22_dev_elvvn,tir-tir,16
+lre22_dev_emadg,xho-xho,22
+lre22_dev_emkzr,afr-afr,21
+lre22_dev_emmck,ara-arq,15
+lre22_dev_enwfu,afr-afr,15
+lre22_dev_eodro,ara-arq,15
+lre22_dev_eoisu,ven-ven,18
+lre22_dev_eomzr,xho-xho,23
+lre22_dev_eorva,xho-xho,21
+lre22_dev_epbwh,nbl-nbl,17
+lre22_dev_epeou,xho-xho,20
+lre22_dev_epifq,nbl-nbl,22
+lre22_dev_epqqo,ara-ayl,14
+lre22_dev_epsld,tso-tso,12
+lre22_dev_epsza,ara-ayl,12
+lre22_dev_eqmgm,ara-aeb,12
+lre22_dev_eqrhr,afr-afr,22
+lre22_dev_eqvan,ara-ayl,13
+lre22_dev_ersgd,orm-orm,22
+lre22_dev_erxig,zul-zul,15
+lre22_dev_esbrw,fra-ntf,19
+lre22_dev_esuug,nbl-nbl,20
+lre22_dev_etczk,tir-tir,14
+lre22_dev_etelz,fra-ntf,21
+lre22_dev_ettsh,fra-ntf,20
+lre22_dev_etuwp,ven-ven,19
+lre22_dev_eubgy,fra-ntf,18
+lre22_dev_euewj,orm-orm,18
+lre22_dev_euzyb,ara-aeb,13
+lre22_dev_ewatn,zul-zul,18
+lre22_dev_ewehs,orm-orm,17
+lre22_dev_ewexz,fra-ntf,18
+lre22_dev_ewgop,tir-tir,20
+lre22_dev_ewmgd,fra-ntf,21
+lre22_dev_ewzma,orm-orm,18
+lre22_dev_expvn,xho-xho,17
+lre22_dev_eyoqu,tir-tir,16
+lre22_dev_eyylz,nbl-nbl,16
+lre22_dev_eyzqu,tir-tir,18
+lre22_dev_ezdty,afr-afr,18
+lre22_dev_ezgcl,ara-aeb,13
+lre22_dev_eznzd,zul-zul,19
+lre22_dev_ezzwj,eng-iaf,18
+lre22_dev_facyr,zul-zul,18
+lre22_dev_faejb,tso-tso,16
+lre22_dev_famjw,orm-orm,18
+lre22_dev_favzh,ara-arq,11
+lre22_dev_fbsre,orm-orm,23
+lre22_dev_fbtkl,fra-ntf,22
+lre22_dev_fbvxh,ara-ayl,14
+lre22_dev_fbyhp,nbl-nbl,20
+lre22_dev_fbysf,nbl-nbl,17
+lre22_dev_fcckx,ara-arq,12
+lre22_dev_fczba,eng-iaf,17
+lre22_dev_fdouw,eng-ens,14
+lre22_dev_fdtmf,tso-tso,13
+lre22_dev_fdtnc,fra-ntf,20
+lre22_dev_fdwme,afr-afr,19
+lre22_dev_fdyhr,eng-ens,18
+lre22_dev_feanh,fra-ntf,22
+lre22_dev_femmc,ara-arq,12
+lre22_dev_fevab,orm-orm,19
+lre22_dev_fexsi,orm-orm,17
+lre22_dev_fflai,ara-aeb,14
+lre22_dev_fgblw,tso-tso,14
+lre22_dev_fglhf,nbl-nbl,22
+lre22_dev_fhucm,ara-ayl,14
+lre22_dev_fhzwp,nbl-nbl,17
+lre22_dev_fifon,eng-iaf,14
+lre22_dev_fipff,orm-orm,19
+lre22_dev_fipyx,zul-zul,14
+lre22_dev_firtn,zul-zul,18
+lre22_dev_fjdqb,nbl-nbl,16
+lre22_dev_fjdxl,tir-tir,14
+lre22_dev_fjocp,ara-ayl,12
+lre22_dev_fjudb,ara-aeb,15
+lre22_dev_fkbjz,afr-afr,22
+lre22_dev_fkwaq,afr-afr,19
+lre22_dev_flbgp,afr-afr,16
+lre22_dev_flgxs,tir-tir,13
+lre22_dev_fljfm,tir-tir,19
+lre22_dev_fmauu,tso-tso,18
+lre22_dev_fmbvf,fra-ntf,19
+lre22_dev_fmhfa,ara-arq,12
+lre22_dev_fmije,ara-ayl,13
+lre22_dev_fnafq,tir-tir,20
+lre22_dev_fofmo,eng-ens,15
+lre22_dev_foikm,tir-tir,16
+lre22_dev_fosfi,eng-iaf,19
+lre22_dev_fotti,eng-ens,13
+lre22_dev_fozzx,zul-zul,15
+lre22_dev_fpehr,ara-aeb,12
+lre22_dev_fpiig,orm-orm,21
+lre22_dev_fqfag,ara-ayl,16
+lre22_dev_fqogo,tir-tir,13
+lre22_dev_frdqe,ara-arq,11
+lre22_dev_fremq,afr-afr,22
+lre22_dev_frjdx,zul-zul,18
+lre22_dev_fruha,ara-ayl,12
+lre22_dev_frxmu,eng-iaf,18
+lre22_dev_fsbeo,tso-tso,13
+lre22_dev_fsijy,fra-ntf,22
+lre22_dev_fsjwh,nbl-nbl,18
+lre22_dev_fspmb,tso-tso,19
+lre22_dev_ftbak,tir-tir,13
+lre22_dev_ftxuo,eng-iaf,20
+lre22_dev_fupee,ara-aeb,13
+lre22_dev_fupla,ara-aeb,11
+lre22_dev_fvmdq,fra-ntf,22
+lre22_dev_fvmjb,fra-ntf,20
+lre22_dev_fvubo,fra-ntf,22
+lre22_dev_fvwze,afr-afr,23
+lre22_dev_fvxxt,ara-arq,13
+lre22_dev_fwcye,ven-ven,21
+lre22_dev_fwkwv,orm-orm,18
+lre22_dev_fxezd,orm-orm,17
+lre22_dev_fxuir,nbl-nbl,19
+lre22_dev_fzgcm,zul-zul,14
+lre22_dev_fzncb,nbl-nbl,16
+lre22_dev_gaezu,ara-aeb,11
+lre22_dev_gawox,ara-aeb,13
+lre22_dev_gbcfq,zul-zul,14
+lre22_dev_gbdkv,orm-orm,17
+lre22_dev_gbevf,eng-iaf,20
+lre22_dev_gchke,ara-aeb,12
+lre22_dev_gcncr,ara-arq,13
+lre22_dev_gdeqd,ara-ayl,14
+lre22_dev_gdncj,eng-iaf,14
+lre22_dev_gdobt,ven-ven,21
+lre22_dev_geeoy,xho-xho,22
+lre22_dev_geraa,afr-afr,20
+lre22_dev_gfigd,nbl-nbl,16
+lre22_dev_gfjzm,ara-ayl,12
+lre22_dev_gftlv,tir-tir,20
+lre22_dev_ggaux,xho-xho,16
+lre22_dev_ggbgc,zul-zul,15
+lre22_dev_gghhn,zul-zul,18
+lre22_dev_ggrwj,eng-iaf,17
+lre22_dev_ghdur,eng-ens,15
+lre22_dev_ghgbo,ara-ayl,14
+lre22_dev_ghhop,nbl-nbl,20
+lre22_dev_ghnwg,ara-ayl,14
+lre22_dev_ghpmd,ara-ayl,14
+lre22_dev_ghqbh,orm-orm,19
+lre22_dev_gihvo,eng-ens,16
+lre22_dev_giueq,tso-tso,19
+lre22_dev_giuix,ara-aeb,15
+lre22_dev_gjaqj,eng-iaf,20
+lre22_dev_gjgcw,xho-xho,18
+lre22_dev_gjirh,eng-iaf,16
+lre22_dev_gjvwy,nbl-nbl,22
+lre22_dev_gkeql,eng-iaf,16
+lre22_dev_gkhas,tso-tso,16
+lre22_dev_glmyp,nbl-nbl,16
+lre22_dev_glqft,eng-ens,18
+lre22_dev_glsnb,afr-afr,17
+lre22_dev_gmfcb,eng-iaf,16
+lre22_dev_gmlwo,afr-afr,16
+lre22_dev_gmpjq,tso-tso,12
+lre22_dev_gmrvk,ara-aeb,14
+lre22_dev_gmryq,ara-ayl,13
+lre22_dev_gmsds,eng-ens,16
+lre22_dev_gmztl,xho-xho,16
+lre22_dev_gnbyu,eng-iaf,15
+lre22_dev_gntym,zul-zul,17
+lre22_dev_gocpa,tso-tso,15
+lre22_dev_gpyxs,orm-orm,17
+lre22_dev_grgvb,afr-afr,16
+lre22_dev_grspj,orm-orm,19
+lre22_dev_grvjm,xho-xho,19
+lre22_dev_gsidj,eng-ens,18
+lre22_dev_gslzy,afr-afr,22
+lre22_dev_gtwcl,tir-tir,14
+lre22_dev_gulky,orm-orm,21
+lre22_dev_gvlhy,tir-tir,20
+lre22_dev_gvljx,tso-tso,15
+lre22_dev_gvmma,tso-tso,13
+lre22_dev_gvtvb,afr-afr,23
+lre22_dev_gweym,xho-xho,19
+lre22_dev_gwljh,ara-aeb,11
+lre22_dev_gwxtn,ara-ayl,14
+lre22_dev_gxdpw,fra-ntf,16
+lre22_dev_gxext,afr-afr,15
+lre22_dev_gxkqq,nbl-nbl,19
+lre22_dev_gxkxo,xho-xho,21
+lre22_dev_gxnkr,xho-xho,18
+lre22_dev_gxxbk,fra-ntf,21
+lre22_dev_gydvv,afr-afr,20
+lre22_dev_gytkt,ara-arq,12
+lre22_dev_gzmvp,afr-afr,18
+lre22_dev_gzoou,ven-ven,19
+lre22_dev_gzvza,tir-tir,15
+lre22_dev_gzwee,eng-iaf,17
+lre22_dev_haewp,tir-tir,19
+lre22_dev_haokb,fra-ntf,19
+lre22_dev_hazis,nbl-nbl,20
+lre22_dev_hbbbc,eng-ens,16
+lre22_dev_hblqa,nbl-nbl,17
+lre22_dev_hbmfy,zul-zul,15
+lre22_dev_hbndl,zul-zul,17
+lre22_dev_hcgfc,eng-ens,13
+lre22_dev_hcjnx,orm-orm,17
+lre22_dev_hcont,tir-tir,17
+lre22_dev_hcvik,tso-tso,13
+lre22_dev_hczom,zul-zul,19
+lre22_dev_hdaca,xho-xho,19
+lre22_dev_hdijt,fra-ntf,15
+lre22_dev_hdkyr,afr-afr,18
+lre22_dev_hdnoq,orm-orm,23
+lre22_dev_hdtlb,eng-iaf,16
+lre22_dev_hever,nbl-nbl,18
+lre22_dev_hfirj,nbl-nbl,17
+lre22_dev_hgbxp,xho-xho,21
+lre22_dev_hgcax,xho-xho,19
+lre22_dev_hgkwa,tso-tso,13
+lre22_dev_hgljd,ara-arq,15
+lre22_dev_hgvrh,nbl-nbl,21
+lre22_dev_hhovn,eng-iaf,16
+lre22_dev_hhpzm,fra-ntf,22
+lre22_dev_hhuab,ven-ven,20
+lre22_dev_hicev,ven-ven,18
+lre22_dev_hickz,ara-arq,12
+lre22_dev_hilii,orm-orm,23
+lre22_dev_hjenx,eng-iaf,19
+lre22_dev_hjiui,orm-orm,18
+lre22_dev_hkfts,eng-ens,18
+lre22_dev_hkhvl,zul-zul,19
+lre22_dev_hkobh,xho-xho,17
+lre22_dev_hkvay,ara-arq,13
+lre22_dev_hkvtj,orm-orm,21
+lre22_dev_hlevc,fra-ntf,17
+lre22_dev_hliut,ara-aeb,14
+lre22_dev_hlntc,zul-zul,18
+lre22_dev_hlprm,zul-zul,18
+lre22_dev_hmeav,ven-ven,17
+lre22_dev_hnelt,tir-tir,15
+lre22_dev_hniiy,ara-arq,15
+lre22_dev_hoepv,ara-aeb,13
+lre22_dev_hofkm,orm-orm,19
+lre22_dev_hoilz,tir-tir,19
+lre22_dev_hookr,ara-aeb,13
+lre22_dev_hpbhl,tir-tir,16
+lre22_dev_hpbzf,ara-aeb,11
+lre22_dev_hpizl,eng-ens,15
+lre22_dev_hplhi,ara-ayl,13
+lre22_dev_hplrq,xho-xho,20
+lre22_dev_hqdva,ven-ven,21
+lre22_dev_hqnus,xho-xho,16
+lre22_dev_hqoiz,orm-orm,18
+lre22_dev_hrerz,eng-ens,14
+lre22_dev_hrgjq,tir-tir,19
+lre22_dev_hrrhr,zul-zul,17
+lre22_dev_hsfbi,ara-ayl,14
+lre22_dev_hsjlg,tir-tir,17
+lre22_dev_hskug,afr-afr,16
+lre22_dev_hszzt,tso-tso,19
+lre22_dev_htgrl,tso-tso,18
+lre22_dev_htxah,zul-zul,17
+lre22_dev_htxrs,xho-xho,23
+lre22_dev_hudwz,nbl-nbl,17
+lre22_dev_huuqj,fra-ntf,18
+lre22_dev_hvsds,afr-afr,21
+lre22_dev_hwbhz,orm-orm,23
+lre22_dev_hwbvs,tso-tso,13
+lre22_dev_hwdlb,tso-tso,19
+lre22_dev_hwyki,eng-iaf,16
+lre22_dev_hxcmj,eng-iaf,20
+lre22_dev_hxdly,ara-arq,11
+lre22_dev_hyeqm,xho-xho,19
+lre22_dev_hyofm,ara-arq,12
+lre22_dev_hyogg,ara-arq,13
+lre22_dev_hyouu,tso-tso,13
+lre22_dev_hzfpc,fra-ntf,16
+lre22_dev_hzkjt,ara-aeb,12
+lre22_dev_hzrgv,fra-ntf,20
+lre22_dev_hzuus,tir-tir,19
+lre22_dev_hzzbp,xho-xho,19
+lre22_dev_iautt,afr-afr,20
+lre22_dev_ibdnu,tir-tir,13
+lre22_dev_ibuww,ara-aeb,13
+lre22_dev_icbuo,ven-ven,21
+lre22_dev_icqmr,tso-tso,14
+lre22_dev_ictwj,tir-tir,14
+lre22_dev_ifumz,ven-ven,14
+lre22_dev_igcgi,tso-tso,19
+lre22_dev_igder,tir-tir,19
+lre22_dev_igexm,xho-xho,21
+lre22_dev_igfxi,fra-ntf,20
+lre22_dev_igoxr,afr-afr,15
+lre22_dev_igxyt,ven-ven,21
+lre22_dev_ihqtn,ara-aeb,11
+lre22_dev_ihxfl,tir-tir,13
+lre22_dev_ihyrb,nbl-nbl,18
+lre22_dev_iifuu,tir-tir,15
+lre22_dev_iiien,xho-xho,20
+lre22_dev_ijccu,eng-iaf,16
+lre22_dev_ijrun,afr-afr,18
+lre22_dev_ijwlx,ara-arq,14
+lre22_dev_ijydw,xho-xho,21
+lre22_dev_ikdjt,xho-xho,23
+lre22_dev_iklbv,ara-arq,13
+lre22_dev_ikyai,fra-ntf,18
+lre22_dev_ildmr,orm-orm,21
+lre22_dev_ilebo,orm-orm,19
+lre22_dev_ilptc,eng-ens,18
+lre22_dev_ilsku,fra-ntf,16
+lre22_dev_ilyti,ara-arq,11
+lre22_dev_imnqh,zul-zul,17
+lre22_dev_imxdr,eng-ens,16
+lre22_dev_indww,fra-ntf,19
+lre22_dev_iokar,eng-iaf,15
+lre22_dev_iomtu,eng-iaf,15
+lre22_dev_ioobz,tir-tir,14
+lre22_dev_iosom,zul-zul,17
+lre22_dev_iowyd,ara-arq,14
+lre22_dev_iphzy,nbl-nbl,18
+lre22_dev_ipmrc,nbl-nbl,16
+lre22_dev_ipomi,ara-aeb,12
+lre22_dev_ipour,afr-afr,15
+lre22_dev_ippjq,ara-ayl,16
+lre22_dev_ipvjc,ara-aeb,13
+lre22_dev_iqfdc,ven-ven,19
+lre22_dev_iqppw,tso-tso,15
+lre22_dev_iqtde,tso-tso,14
+lre22_dev_irlee,eng-iaf,14
+lre22_dev_irxuq,ara-aeb,14
+lre22_dev_isjzo,ara-arq,14
+lre22_dev_isnwz,ara-ayl,14
+lre22_dev_isqvk,afr-afr,15
+lre22_dev_isqww,orm-orm,19
+lre22_dev_istdz,tir-tir,18
+lre22_dev_iszhe,fra-ntf,20
+lre22_dev_itblz,ven-ven,18
+lre22_dev_itfez,ara-arq,13
+lre22_dev_itjqm,zul-zul,18
+lre22_dev_itnap,nbl-nbl,21
+lre22_dev_itrms,xho-xho,21
+lre22_dev_itroi,fra-ntf,17
+lre22_dev_ittds,zul-zul,16
+lre22_dev_iuknz,tso-tso,16
+lre22_dev_iumnm,ara-ayl,15
+lre22_dev_iunul,afr-afr,23
+lre22_dev_iverq,ven-ven,16
+lre22_dev_ivwzd,ara-ayl,14
+lre22_dev_ivzjf,tso-tso,12
+lre22_dev_iwbta,nbl-nbl,16
+lre22_dev_iwdeh,orm-orm,21
+lre22_dev_iwgel,ara-aeb,11
+lre22_dev_ixbhj,ara-aeb,11
+lre22_dev_ixbnl,fra-ntf,16
+lre22_dev_ixcef,ven-ven,20
+lre22_dev_ixfdf,orm-orm,18
+lre22_dev_ixjey,orm-orm,19
+lre22_dev_ixlve,tir-tir,17
+lre22_dev_ixutu,ara-ayl,12
+lre22_dev_ixxoj,xho-xho,23
+lre22_dev_ixyko,afr-afr,22
+lre22_dev_iylls,eng-iaf,19
+lre22_dev_izegw,orm-orm,23
+lre22_dev_izglb,ara-ayl,13
+lre22_dev_iziar,ara-arq,13
+lre22_dev_jadvz,afr-afr,18
+lre22_dev_jajtw,ara-aeb,14
+lre22_dev_janvu,tso-tso,16
+lre22_dev_japrb,xho-xho,21
+lre22_dev_jarvz,ara-aeb,12
+lre22_dev_jazcn,tso-tso,13
+lre22_dev_jbfxj,tso-tso,12
+lre22_dev_jbnfg,fra-ntf,15
+lre22_dev_jbwgd,afr-afr,20
+lre22_dev_jceug,tso-tso,15
+lre22_dev_jcqtd,eng-ens,14
+lre22_dev_jcxry,ven-ven,20
+lre22_dev_jdbli,tir-tir,20
+lre22_dev_jegmb,orm-orm,18
+lre22_dev_jegqj,ara-ayl,12
+lre22_dev_jenns,xho-xho,22
+lre22_dev_jfarf,ven-ven,14
+lre22_dev_jfcve,zul-zul,17
+lre22_dev_jfgyq,xho-xho,23
+lre22_dev_jftnz,afr-afr,14
+lre22_dev_jftsj,afr-afr,22
+lre22_dev_jgnid,nbl-nbl,16
+lre22_dev_jgsju,eng-ens,13
+lre22_dev_jifal,orm-orm,19
+lre22_dev_jihsd,orm-orm,21
+lre22_dev_jihwf,ara-ayl,11
+lre22_dev_jiptp,eng-iaf,15
+lre22_dev_jizij,tir-tir,14
+lre22_dev_jjpzg,orm-orm,23
+lre22_dev_jkezw,fra-ntf,18
+lre22_dev_jkmux,fra-ntf,20
+lre22_dev_jkpnt,orm-orm,22
+lre22_dev_jlkfj,eng-ens,18
+lre22_dev_jlmtf,ven-ven,19
+lre22_dev_jlrfm,ara-arq,12
+lre22_dev_jmojg,orm-orm,19
+lre22_dev_jmrcv,ara-aeb,13
+lre22_dev_jmsxc,eng-iaf,16
+lre22_dev_jnjpw,tir-tir,14
+lre22_dev_jnzvu,ara-aeb,14
+lre22_dev_jocyh,xho-xho,19
+lre22_dev_joezr,tso-tso,16
+lre22_dev_jofqy,ara-arq,11
+lre22_dev_jpbyf,eng-ens,15
+lre22_dev_jppuy,ara-arq,13
+lre22_dev_jptts,ara-aeb,12
+lre22_dev_jqdyx,fra-ntf,22
+lre22_dev_jqjbq,zul-zul,17
+lre22_dev_jqpnb,ven-ven,21
+lre22_dev_jqqin,zul-zul,17
+lre22_dev_jqzkq,ara-ayl,13
+lre22_dev_jrroq,orm-orm,21
+lre22_dev_jruru,eng-ens,16
+lre22_dev_jskbr,ara-arq,11
+lre22_dev_jskdd,nbl-nbl,19
+lre22_dev_jslnc,eng-ens,12
+lre22_dev_jsmat,orm-orm,17
+lre22_dev_jsmdw,ara-aeb,11
+lre22_dev_jsvaz,afr-afr,19
+lre22_dev_jsxcy,afr-afr,21
+lre22_dev_jszgk,eng-iaf,19
+lre22_dev_jthui,ven-ven,20
+lre22_dev_jtpvz,ven-ven,17
+lre22_dev_jtwdi,ven-ven,14
+lre22_dev_jtwfh,ven-ven,18
+lre22_dev_juwid,tir-tir,20
+lre22_dev_jvdww,fra-ntf,21
+lre22_dev_jweyx,tir-tir,19
+lre22_dev_jwuto,afr-afr,19
+lre22_dev_jwwgs,afr-afr,19
+lre22_dev_jxhxf,nbl-nbl,17
+lre22_dev_jxtxk,orm-orm,20
+lre22_dev_jxzvy,eng-ens,15
+lre22_dev_jyjlm,nbl-nbl,19
+lre22_dev_jynvf,ara-ayl,13
+lre22_dev_jyzmh,nbl-nbl,19
+lre22_dev_jzivf,eng-ens,14
+lre22_dev_jzpns,tso-tso,14
+lre22_dev_kadwu,fra-ntf,18
+lre22_dev_kbnbi,tir-tir,13
+lre22_dev_kbqbd,fra-ntf,16
+lre22_dev_kbscm,tso-tso,15
+lre22_dev_kbxko,ara-aeb,12
+lre22_dev_kcegv,tso-tso,15
+lre22_dev_kcibo,afr-afr,17
+lre22_dev_kcmky,ara-ayl,14
+lre22_dev_kctrd,nbl-nbl,22
+lre22_dev_kcvbf,fra-ntf,16
+lre22_dev_kdbqy,zul-zul,15
+lre22_dev_kdgpz,ara-arq,14
+lre22_dev_kdhgq,nbl-nbl,22
+lre22_dev_kdvtu,eng-iaf,16
+lre22_dev_kdyhm,tso-tso,12
+lre22_dev_keeyz,zul-zul,18
+lre22_dev_kejvy,ven-ven,18
+lre22_dev_kerpr,ven-ven,21
+lre22_dev_keweh,ara-aeb,13
+lre22_dev_keysx,orm-orm,23
+lre22_dev_kezyv,ara-ayl,13
+lre22_dev_kgbiq,ven-ven,18
+lre22_dev_kgovz,tso-tso,15
+lre22_dev_kgxka,eng-ens,16
+lre22_dev_khkcx,fra-ntf,20
+lre22_dev_khobl,orm-orm,19
+lre22_dev_khttn,afr-afr,17
+lre22_dev_khvss,tir-tir,15
+lre22_dev_kiezl,tso-tso,16
+lre22_dev_kihlw,eng-ens,14
+lre22_dev_kipuq,ara-arq,14
+lre22_dev_kiqcx,tir-tir,16
+lre22_dev_kjiks,xho-xho,19
+lre22_dev_kjmpa,zul-zul,18
+lre22_dev_kjocf,eng-iaf,16
+lre22_dev_kkbur,ven-ven,16
+lre22_dev_kksdi,xho-xho,22
+lre22_dev_kkytv,ara-aeb,11
+lre22_dev_kmkgx,nbl-nbl,17
+lre22_dev_kmpkm,zul-zul,19
+lre22_dev_kmyzy,ara-ayl,13
+lre22_dev_knfsj,afr-afr,15
+lre22_dev_knyuq,orm-orm,19
+lre22_dev_koacp,orm-orm,19
+lre22_dev_koket,eng-ens,18
+lre22_dev_kovdn,zul-zul,15
+lre22_dev_kowqf,ven-ven,19
+lre22_dev_kozfr,nbl-nbl,21
+lre22_dev_kpmyz,orm-orm,19
+lre22_dev_kqfdc,eng-ens,17
+lre22_dev_kqumw,fra-ntf,22
+lre22_dev_kqwdi,nbl-nbl,16
+lre22_dev_krczb,ven-ven,19
+lre22_dev_kremz,nbl-nbl,16
+lre22_dev_ksruw,ven-ven,18
+lre22_dev_kszdw,eng-iaf,20
+lre22_dev_ktgvi,ara-arq,11
+lre22_dev_ktjax,fra-ntf,20
+lre22_dev_ktlvc,orm-orm,19
+lre22_dev_kvqgp,afr-afr,21
+lre22_dev_kvyoz,afr-afr,20
+lre22_dev_kvzim,afr-afr,14
+lre22_dev_kvzwc,eng-iaf,14
+lre22_dev_kwcwa,ara-arq,14
+lre22_dev_kwomo,zul-zul,19
+lre22_dev_kwxau,xho-xho,18
+lre22_dev_kxawf,tir-tir,19
+lre22_dev_kxjhn,ara-aeb,11
+lre22_dev_kxklh,tir-tir,19
+lre22_dev_kxlgg,tir-tir,16
+lre22_dev_kyqbp,fra-ntf,21
+lre22_dev_kyzio,ven-ven,20
+lre22_dev_kzcgh,ara-ayl,13
+lre22_dev_kzeyf,ven-ven,18
+lre22_dev_kzfwf,fra-ntf,19
+lre22_dev_kzjuz,orm-orm,21
+lre22_dev_kzjwx,ara-ayl,11
+lre22_dev_lamjl,tso-tso,17
+lre22_dev_laowh,xho-xho,16
+lre22_dev_larex,ara-ayl,11
+lre22_dev_laycs,tso-tso,12
+lre22_dev_lbxfn,eng-iaf,20
+lre22_dev_lcrog,zul-zul,18
+lre22_dev_ldczz,xho-xho,17
+lre22_dev_ldkgv,ara-aeb,13
+lre22_dev_ldkst,fra-ntf,20
+lre22_dev_ldkwr,orm-orm,22
+lre22_dev_lenxf,ven-ven,14
+lre22_dev_lfbey,ara-ayl,12
+lre22_dev_lfmml,fra-ntf,18
+lre22_dev_lfmxu,ven-ven,18
+lre22_dev_lfqfj,afr-afr,17
+lre22_dev_lgetu,ara-aeb,14
+lre22_dev_lgleu,ara-ayl,11
+lre22_dev_lgoat,eng-iaf,16
+lre22_dev_lhgaj,tso-tso,15
+lre22_dev_lhqyw,nbl-nbl,17
+lre22_dev_lhrmr,eng-iaf,17
+lre22_dev_lhtsd,tir-tir,19
+lre22_dev_lhydp,fra-ntf,22
+lre22_dev_livbf,tir-tir,15
+lre22_dev_ljdrg,ara-arq,13
+lre22_dev_ljniw,tso-tso,16
+lre22_dev_ljpmq,tso-tso,12
+lre22_dev_lkjon,tso-tso,15
+lre22_dev_lkszp,nbl-nbl,19
+lre22_dev_llbim,ara-ayl,15
+lre22_dev_llkkt,fra-ntf,15
+lre22_dev_llvcc,orm-orm,22
+lre22_dev_lmbug,ara-arq,12
+lre22_dev_lmmmw,nbl-nbl,19
+lre22_dev_lmsek,ven-ven,16
+lre22_dev_lmudp,ara-ayl,10
+lre22_dev_lmzmv,eng-iaf,19
+lre22_dev_lnlae,ara-arq,14
+lre22_dev_lnlvt,zul-zul,17
+lre22_dev_lnppu,ara-ayl,13
+lre22_dev_lnpyc,tso-tso,19
+lre22_dev_lolkv,xho-xho,19
+lre22_dev_lorcx,nbl-nbl,20
+lre22_dev_lparq,xho-xho,16
+lre22_dev_lqlft,ara-arq,11
+lre22_dev_lqlyq,ara-arq,12
+lre22_dev_lqoeu,tso-tso,14
+lre22_dev_lqueh,ara-ayl,11
+lre22_dev_lquzk,ara-arq,12
+lre22_dev_lqvav,zul-zul,18
+lre22_dev_lrgpy,eng-iaf,16
+lre22_dev_lrjbn,ven-ven,21
+lre22_dev_lrtad,ara-arq,14
+lre22_dev_lrtxd,ara-aeb,11
+lre22_dev_lrvkn,ven-ven,16
+lre22_dev_lrzwy,ara-ayl,13
+lre22_dev_lsefk,ara-arq,13
+lre22_dev_ltmmt,orm-orm,22
+lre22_dev_lutgh,ara-aeb,15
+lre22_dev_lvhmd,tso-tso,14
+lre22_dev_lvqim,ara-aeb,14
+lre22_dev_lvuuo,fra-ntf,17
+lre22_dev_lvzri,ven-ven,16
+lre22_dev_lweml,ara-arq,14
+lre22_dev_lwstj,eng-iaf,16
+lre22_dev_lwzdj,afr-afr,18
+lre22_dev_lxdsk,eng-ens,16
+lre22_dev_lxlcr,ara-aeb,13
+lre22_dev_lxshv,eng-iaf,20
+lre22_dev_lxxvv,eng-ens,16
+lre22_dev_lyfhc,ven-ven,18
+lre22_dev_lyikp,zul-zul,19
+lre22_dev_lyjix,tso-tso,14
+lre22_dev_lyxyh,eng-iaf,19
+lre22_dev_lyzxd,tir-tir,17
+lre22_dev_lzguf,orm-orm,21
+lre22_dev_lzpmk,tir-tir,16
+lre22_dev_lzugv,xho-xho,19
+lre22_dev_maeeb,tir-tir,15
+lre22_dev_maemn,zul-zul,16
+lre22_dev_manpw,orm-orm,19
+lre22_dev_mavli,ara-aeb,12
+lre22_dev_mbywd,orm-orm,19
+lre22_dev_mcath,nbl-nbl,22
+lre22_dev_mcjtw,xho-xho,16
+lre22_dev_mcndd,ven-ven,15
+lre22_dev_mcxqb,tir-tir,13
+lre22_dev_mdlia,fra-ntf,16
+lre22_dev_mdxsp,eng-ens,18
+lre22_dev_menex,eng-iaf,16
+lre22_dev_merfk,orm-orm,21
+lre22_dev_mfipk,zul-zul,16
+lre22_dev_mfuqh,ara-arq,14
+lre22_dev_mgcvo,xho-xho,19
+lre22_dev_mggbx,zul-zul,18
+lre22_dev_mgghl,tso-tso,12
+lre22_dev_mgwqd,ara-arq,14
+lre22_dev_mhswt,ara-ayl,15
+lre22_dev_mhwmt,tso-tso,16
+lre22_dev_miayn,ara-aeb,12
+lre22_dev_miley,tso-tso,16
+lre22_dev_mjfmb,nbl-nbl,21
+lre22_dev_mkbyx,tir-tir,19
+lre22_dev_mlbzi,xho-xho,23
+lre22_dev_mlduq,xho-xho,16
+lre22_dev_mljnp,ara-arq,14
+lre22_dev_mljpb,orm-orm,22
+lre22_dev_mlrsm,xho-xho,17
+lre22_dev_mlwzr,eng-ens,13
+lre22_dev_mlyeo,ven-ven,15
+lre22_dev_mmaed,ara-ayl,14
+lre22_dev_mmbns,eng-ens,12
+lre22_dev_mneyt,xho-xho,17
+lre22_dev_mnhsk,ven-ven,14
+lre22_dev_mnnvk,eng-ens,15
+lre22_dev_mnswo,tso-tso,16
+lre22_dev_mntdk,eng-ens,18
+lre22_dev_mogwl,orm-orm,22
+lre22_dev_mpbun,nbl-nbl,21
+lre22_dev_mpmuf,ara-aeb,14
+lre22_dev_mpoet,nbl-nbl,16
+lre22_dev_mptyi,afr-afr,18
+lre22_dev_mpzxy,orm-orm,18
+lre22_dev_mqxni,ara-arq,11
+lre22_dev_mqzga,tso-tso,19
+lre22_dev_mrgdh,xho-xho,17
+lre22_dev_mrgko,afr-afr,18
+lre22_dev_mrksc,tir-tir,19
+lre22_dev_mrogp,eng-iaf,15
+lre22_dev_mscwd,fra-ntf,16
+lre22_dev_mshco,ara-ayl,12
+lre22_dev_msptn,ara-ayl,16
+lre22_dev_msslk,ara-aeb,14
+lre22_dev_mtaus,fra-ntf,19
+lre22_dev_mtpgl,tso-tso,13
+lre22_dev_mttly,tir-tir,19
+lre22_dev_mubqn,fra-ntf,15
+lre22_dev_muskv,tso-tso,12
+lre22_dev_muzkp,ara-arq,14
+lre22_dev_mvdus,ven-ven,19
+lre22_dev_mvngl,xho-xho,19
+lre22_dev_mvrpq,tso-tso,12
+lre22_dev_mvtcj,afr-afr,22
+lre22_dev_mwhsu,xho-xho,21
+lre22_dev_mwkyp,nbl-nbl,20
+lre22_dev_mxcey,ara-ayl,12
+lre22_dev_mxcub,ara-aeb,12
+lre22_dev_myekh,ara-aeb,11
+lre22_dev_mzxhf,zul-zul,17
+lre22_dev_mzyru,ara-arq,12
+lre22_dev_nakax,eng-iaf,15
+lre22_dev_naymc,ara-ayl,13
+lre22_dev_nbgid,orm-orm,19
+lre22_dev_nbmnl,xho-xho,16
+lre22_dev_ncffi,zul-zul,14
+lre22_dev_ncjtj,fra-ntf,22
+lre22_dev_ncpix,ara-ayl,11
+lre22_dev_nctqc,xho-xho,16
+lre22_dev_ndkuo,orm-orm,20
+lre22_dev_ndqfw,nbl-nbl,17
+lre22_dev_nedes,ven-ven,15
+lre22_dev_neomw,zul-zul,18
+lre22_dev_neziz,tir-tir,19
+lre22_dev_nfcvg,eng-iaf,17
+lre22_dev_nfdfc,afr-afr,17
+lre22_dev_ngijv,xho-xho,21
+lre22_dev_ngrxk,ara-ayl,13
+lre22_dev_ngzja,ara-aeb,13
+lre22_dev_nhaub,tso-tso,13
+lre22_dev_nhkro,xho-xho,23
+lre22_dev_nhlvt,ara-arq,14
+lre22_dev_nhlxm,eng-ens,14
+lre22_dev_nhyjy,afr-afr,17
+lre22_dev_nifei,zul-zul,19
+lre22_dev_nikpx,ven-ven,18
+lre22_dev_njceq,afr-afr,18
+lre22_dev_njmlt,eng-ens,17
+lre22_dev_njqfj,orm-orm,18
+lre22_dev_nkdje,eng-iaf,19
+lre22_dev_nkkqo,nbl-nbl,22
+lre22_dev_nknrw,orm-orm,21
+lre22_dev_nkogd,fra-ntf,19
+lre22_dev_nksfc,tir-tir,19
+lre22_dev_nkwmm,orm-orm,22
+lre22_dev_nmhdg,ara-ayl,10
+lre22_dev_nmoux,ven-ven,20
+lre22_dev_nmrsq,ven-ven,21
+lre22_dev_nnbhc,fra-ntf,20
+lre22_dev_nnbpy,tir-tir,18
+lre22_dev_nnpwd,ara-aeb,13
+lre22_dev_nodin,ara-ayl,14
+lre22_dev_nogji,nbl-nbl,20
+lre22_dev_nonvr,afr-afr,15
+lre22_dev_notcl,eng-iaf,19
+lre22_dev_noufn,ara-aeb,11
+lre22_dev_noveb,ara-ayl,11
+lre22_dev_npajm,nbl-nbl,19
+lre22_dev_npehj,ara-ayl,14
+lre22_dev_nqdaj,tso-tso,12
+lre22_dev_nqkon,xho-xho,18
+lre22_dev_nqlhw,ara-aeb,13
+lre22_dev_nraqr,eng-ens,14
+lre22_dev_nrino,tso-tso,14
+lre22_dev_nrzgt,xho-xho,16
+lre22_dev_nscrg,orm-orm,18
+lre22_dev_nstgp,orm-orm,23
+lre22_dev_ntgqz,afr-afr,23
+lre22_dev_nthzr,eng-iaf,18
+lre22_dev_ntwzb,afr-afr,16
+lre22_dev_nudwv,eng-ens,14
+lre22_dev_nuerz,eng-iaf,18
+lre22_dev_nujfy,xho-xho,21
+lre22_dev_nurlx,eng-ens,13
+lre22_dev_nvakd,zul-zul,17
+lre22_dev_nvgkj,eng-ens,17
+lre22_dev_nvhvv,fra-ntf,20
+lre22_dev_nwbnz,ara-arq,14
+lre22_dev_nwjed,nbl-nbl,19
+lre22_dev_nwrto,ara-aeb,11
+lre22_dev_nwunl,zul-zul,14
+lre22_dev_nwvyy,tir-tir,19
+lre22_dev_nxwlo,nbl-nbl,17
+lre22_dev_nxxzy,zul-zul,16
+lre22_dev_nxzpp,nbl-nbl,20
+lre22_dev_nyhwg,ara-arq,14
+lre22_dev_nykvr,eng-ens,17
+lre22_dev_nyvkc,tir-tir,15
+lre22_dev_nyyui,ara-arq,11
+lre22_dev_nzbfh,zul-zul,19
+lre22_dev_nzxsk,xho-xho,21
+lre22_dev_oasrh,ara-arq,11
+lre22_dev_oavaf,xho-xho,21
+lre22_dev_obfrf,orm-orm,20
+lre22_dev_obocn,ara-arq,14
+lre22_dev_obumo,eng-ens,15
+lre22_dev_ocbuj,eng-ens,12
+lre22_dev_ocbxu,nbl-nbl,21
+lre22_dev_ocdvw,ara-ayl,13
+lre22_dev_ocdzj,xho-xho,19
+lre22_dev_ocveq,fra-ntf,22
+lre22_dev_odest,ara-ayl,11
+lre22_dev_odjlq,ven-ven,18
+lre22_dev_odpoq,ara-ayl,12
+lre22_dev_odrcm,fra-ntf,21
+lre22_dev_oeavx,ara-arq,12
+lre22_dev_oefoy,ara-aeb,12
+lre22_dev_oefqy,ven-ven,16
+lre22_dev_oehxk,ara-ayl,12
+lre22_dev_oeqbo,ara-aeb,14
+lre22_dev_oeqjq,fra-ntf,20
+lre22_dev_ofdgy,ara-ayl,15
+lre22_dev_ofgkq,fra-ntf,21
+lre22_dev_ofpva,ara-arq,11
+lre22_dev_ofufy,eng-iaf,17
+lre22_dev_ogglz,ara-aeb,13
+lre22_dev_oggtr,nbl-nbl,19
+lre22_dev_ogpxk,ara-aeb,11
+lre22_dev_ogsay,tso-tso,19
+lre22_dev_ogtvj,zul-zul,19
+lre22_dev_ohqwz,ara-arq,13
+lre22_dev_ohuxo,afr-afr,20
+lre22_dev_ohweb,ven-ven,16
+lre22_dev_ohzpg,fra-ntf,21
+lre22_dev_oijcy,xho-xho,19
+lre22_dev_oijgv,tir-tir,16
+lre22_dev_oikqj,eng-iaf,17
+lre22_dev_oinvl,ven-ven,15
+lre22_dev_oiofr,fra-ntf,19
+lre22_dev_oipks,eng-ens,17
+lre22_dev_ojzos,ara-arq,14
+lre22_dev_okbnu,ara-ayl,10
+lre22_dev_okpcp,eng-iaf,18
+lre22_dev_okwpq,tso-tso,16
+lre22_dev_oleie,ara-arq,12
+lre22_dev_oljep,ven-ven,21
+lre22_dev_oljsa,fra-ntf,16
+lre22_dev_olkup,nbl-nbl,16
+lre22_dev_olqbh,ara-ayl,14
+lre22_dev_omjqo,ara-aeb,14
+lre22_dev_omwiy,ara-ayl,12
+lre22_dev_omxnk,ara-arq,13
+lre22_dev_onqke,eng-iaf,16
+lre22_dev_onzje,tir-tir,13
+lre22_dev_ooktw,afr-afr,18
+lre22_dev_oosff,ara-aeb,12
+lre22_dev_ootbi,xho-xho,21
+lre22_dev_opciz,orm-orm,23
+lre22_dev_opgny,xho-xho,19
+lre22_dev_opifd,ara-arq,12
+lre22_dev_oporo,eng-iaf,19
+lre22_dev_opryj,nbl-nbl,16
+lre22_dev_opuzh,eng-ens,12
+lre22_dev_oqbaw,ven-ven,18
+lre22_dev_oqeuj,tir-tir,14
+lre22_dev_oqmhb,xho-xho,21
+lre22_dev_oqmrs,ara-arq,14
+lre22_dev_oqqwq,tso-tso,12
+lre22_dev_oquaq,xho-xho,17
+lre22_dev_oriap,fra-ntf,20
+lre22_dev_orsjj,tir-tir,20
+lre22_dev_orvna,fra-ntf,21
+lre22_dev_oskoe,orm-orm,20
+lre22_dev_otlyk,nbl-nbl,18
+lre22_dev_oujnj,nbl-nbl,17
+lre22_dev_oumka,ven-ven,14
+lre22_dev_ouqsx,ara-arq,13
+lre22_dev_outyl,zul-zul,16
+lre22_dev_owlwt,ara-ayl,14
+lre22_dev_owvfd,orm-orm,18
+lre22_dev_oxizc,tir-tir,15
+lre22_dev_oxpht,eng-ens,18
+lre22_dev_oxqlz,afr-afr,15
+lre22_dev_oydiw,nbl-nbl,16
+lre22_dev_oyfcl,fra-ntf,22
+lre22_dev_oyhba,eng-ens,18
+lre22_dev_oyiif,afr-afr,17
+lre22_dev_oyslg,afr-afr,21
+lre22_dev_ozfpi,tir-tir,15
+lre22_dev_ozlww,ven-ven,19
+lre22_dev_paxnc,eng-ens,17
+lre22_dev_pbbgx,eng-iaf,14
+lre22_dev_pcfmw,nbl-nbl,21
+lre22_dev_pclpc,fra-ntf,15
+lre22_dev_pcmmj,afr-afr,16
+lre22_dev_pcsqz,tso-tso,18
+lre22_dev_pdcfm,ara-ayl,10
+lre22_dev_pdtuf,eng-ens,18
+lre22_dev_pdzuj,zul-zul,17
+lre22_dev_pehfu,fra-ntf,15
+lre22_dev_pewpj,orm-orm,22
+lre22_dev_pexjz,orm-orm,17
+lre22_dev_pfioj,eng-iaf,15
+lre22_dev_pfkcf,eng-iaf,16
+lre22_dev_pfknl,ara-arq,14
+lre22_dev_pfucv,ara-ayl,12
+lre22_dev_pfyha,fra-ntf,21
+lre22_dev_pgavf,ara-ayl,13
+lre22_dev_phket,nbl-nbl,22
+lre22_dev_piabk,afr-afr,19
+lre22_dev_picvg,orm-orm,17
+lre22_dev_piina,eng-ens,14
+lre22_dev_pjahm,afr-afr,20
+lre22_dev_pjcso,nbl-nbl,17
+lre22_dev_pjggp,ven-ven,16
+lre22_dev_pjohw,xho-xho,19
+lre22_dev_pkpxo,ara-ayl,11
+lre22_dev_pktgk,nbl-nbl,22
+lre22_dev_plojq,eng-ens,12
+lre22_dev_pmayg,ven-ven,21
+lre22_dev_pmjyi,xho-xho,20
+lre22_dev_pmkcp,nbl-nbl,20
+lre22_dev_pnfhk,fra-ntf,18
+lre22_dev_pnust,nbl-nbl,20
+lre22_dev_pnwey,eng-iaf,15
+lre22_dev_pnwti,ara-aeb,13
+lre22_dev_pohmm,afr-afr,14
+lre22_dev_pojvr,nbl-nbl,22
+lre22_dev_poxsw,ara-aeb,13
+lre22_dev_ppjvq,tir-tir,16
+lre22_dev_ppkfc,fra-ntf,19
+lre22_dev_ppmnu,tso-tso,12
+lre22_dev_ppzno,tso-tso,12
+lre22_dev_pqksl,afr-afr,14
+lre22_dev_pqnvh,zul-zul,19
+lre22_dev_prcus,tso-tso,15
+lre22_dev_prhoh,tir-tir,19
+lre22_dev_prkth,ara-arq,12
+lre22_dev_prnhd,xho-xho,18
+lre22_dev_psjma,fra-ntf,18
+lre22_dev_psldq,tir-tir,19
+lre22_dev_psnvo,afr-afr,15
+lre22_dev_psnzj,zul-zul,19
+lre22_dev_pudqr,eng-ens,17
+lre22_dev_pufnl,orm-orm,19
+lre22_dev_pusxa,nbl-nbl,22
+lre22_dev_pvsqi,ara-arq,11
+lre22_dev_pvteg,fra-ntf,17
+lre22_dev_pvvay,tir-tir,14
+lre22_dev_pvxcv,ara-aeb,15
+lre22_dev_pvygc,ara-aeb,11
+lre22_dev_pwcxu,tir-tir,13
+lre22_dev_pwhdm,nbl-nbl,17
+lre22_dev_pwnkz,ven-ven,20
+lre22_dev_pwrqe,ara-aeb,14
+lre22_dev_pxbhi,afr-afr,16
+lre22_dev_pxeyk,zul-zul,18
+lre22_dev_pxkzd,ara-arq,14
+lre22_dev_pydgm,afr-afr,19
+lre22_dev_pyiju,ven-ven,20
+lre22_dev_pzhrc,tso-tso,13
+lre22_dev_pzkea,ven-ven,14
+lre22_dev_pzqka,ara-arq,11
+lre22_dev_pzuis,ara-arq,13
+lre22_dev_qabac,ven-ven,19
+lre22_dev_qahym,ara-ayl,11
+lre22_dev_qaxfr,xho-xho,17
+lre22_dev_qazyc,ara-ayl,14
+lre22_dev_qbcoz,nbl-nbl,22
+lre22_dev_qcavr,eng-iaf,20
+lre22_dev_qcbkh,fra-ntf,18
+lre22_dev_qcbtt,afr-afr,18
+lre22_dev_qclly,xho-xho,22
+lre22_dev_qcqdt,eng-iaf,18
+lre22_dev_qdqzp,zul-zul,17
+lre22_dev_qdwut,eng-ens,16
+lre22_dev_qehxr,afr-afr,22
+lre22_dev_qeqah,tir-tir,16
+lre22_dev_qeyjd,afr-afr,17
+lre22_dev_qfprv,ara-ayl,13
+lre22_dev_qfqhi,ara-ayl,15
+lre22_dev_qgoge,tso-tso,13
+lre22_dev_qgrlb,eng-iaf,16
+lre22_dev_qgrsu,zul-zul,14
+lre22_dev_qheor,xho-xho,23
+lre22_dev_qhfdz,tso-tso,14
+lre22_dev_qhlol,ven-ven,21
+lre22_dev_qhnfr,zul-zul,15
+lre22_dev_qhvuq,tso-tso,14
+lre22_dev_qibby,afr-afr,23
+lre22_dev_qicen,orm-orm,16
+lre22_dev_qiehd,eng-iaf,14
+lre22_dev_qjbfh,eng-iaf,15
+lre22_dev_qjdln,afr-afr,19
+lre22_dev_qjmro,ara-ayl,11
+lre22_dev_qkgor,zul-zul,16
+lre22_dev_qlgvf,ara-aeb,12
+lre22_dev_qlpjn,eng-iaf,16
+lre22_dev_qmoop,nbl-nbl,16
+lre22_dev_qmqhy,afr-afr,20
+lre22_dev_qmreh,ara-ayl,10
+lre22_dev_qmucf,ven-ven,18
+lre22_dev_qmvnu,fra-ntf,15
+lre22_dev_qmzke,ara-ayl,13
+lre22_dev_qmzxw,orm-orm,21
+lre22_dev_qnams,ven-ven,20
+lre22_dev_qnefv,xho-xho,23
+lre22_dev_qodht,zul-zul,19
+lre22_dev_qoqtk,eng-ens,16
+lre22_dev_qotto,fra-ntf,18
+lre22_dev_qoudd,tso-tso,18
+lre22_dev_qpego,ara-ayl,14
+lre22_dev_qphcb,fra-ntf,22
+lre22_dev_qqkiv,ara-arq,13
+lre22_dev_qqmeu,eng-ens,17
+lre22_dev_qqudk,orm-orm,21
+lre22_dev_qqvdr,orm-orm,23
+lre22_dev_qrbmq,ara-arq,12
+lre22_dev_qrfvx,fra-ntf,22
+lre22_dev_qrsqg,zul-zul,19
+lre22_dev_qrylo,eng-ens,18
+lre22_dev_qsbdh,nbl-nbl,16
+lre22_dev_qsqzo,afr-afr,14
+lre22_dev_qsudg,nbl-nbl,22
+lre22_dev_qszwt,fra-ntf,21
+lre22_dev_qtcmx,nbl-nbl,21
+lre22_dev_qtfpf,zul-zul,16
+lre22_dev_qtkhk,afr-afr,22
+lre22_dev_qtydg,afr-afr,22
+lre22_dev_qujmp,zul-zul,19
+lre22_dev_qulse,eng-ens,17
+lre22_dev_qutbz,eng-ens,18
+lre22_dev_quvqg,ara-aeb,13
+lre22_dev_qvpjs,eng-iaf,19
+lre22_dev_qvtdy,tso-tso,12
+lre22_dev_qvzol,orm-orm,19
+lre22_dev_qwvgm,ara-ayl,13
+lre22_dev_qwzxt,zul-zul,19
+lre22_dev_qxigw,tir-tir,19
+lre22_dev_qxkuu,tso-tso,13
+lre22_dev_qxtss,afr-afr,15
+lre22_dev_qxvbe,nbl-nbl,17
+lre22_dev_qxysh,afr-afr,22
+lre22_dev_qyfba,zul-zul,14
+lre22_dev_qyfov,fra-ntf,19
+lre22_dev_qyjgj,afr-afr,22
+lre22_dev_qyuwy,ara-aeb,15
+lre22_dev_qzfdr,nbl-nbl,18
+lre22_dev_qzldb,eng-iaf,19
+lre22_dev_ranrd,nbl-nbl,22
+lre22_dev_raurj,eng-ens,12
+lre22_dev_rbntq,ara-arq,11
+lre22_dev_rbssw,ara-aeb,11
+lre22_dev_rbwgx,ara-ayl,16
+lre22_dev_rcooi,fra-ntf,18
+lre22_dev_rcyom,ara-ayl,11
+lre22_dev_rdcns,zul-zul,18
+lre22_dev_rdrhv,ara-arq,11
+lre22_dev_rdyxn,eng-iaf,19
+lre22_dev_repec,tir-tir,19
+lre22_dev_rgbby,tso-tso,19
+lre22_dev_rgdvt,fra-ntf,20
+lre22_dev_rguqm,tso-tso,14
+lre22_dev_rgwjy,afr-afr,19
+lre22_dev_rijeq,orm-orm,19
+lre22_dev_rincv,tir-tir,16
+lre22_dev_rindo,zul-zul,17
+lre22_dev_rirhy,ara-arq,11
+lre22_dev_rjikw,fra-ntf,20
+lre22_dev_rjsik,tso-tso,16
+lre22_dev_rjvvj,tso-tso,19
+lre22_dev_rksid,nbl-nbl,22
+lre22_dev_rkycg,ven-ven,21
+lre22_dev_rlamm,zul-zul,15
+lre22_dev_rllya,tso-tso,15
+lre22_dev_rlzrk,eng-ens,14
+lre22_dev_rmxbg,tir-tir,14
+lre22_dev_rnrsy,tir-tir,19
+lre22_dev_rokej,xho-xho,17
+lre22_dev_rooaf,fra-ntf,17
+lre22_dev_rorob,ven-ven,15
+lre22_dev_rowwe,nbl-nbl,17
+lre22_dev_rqcuw,ara-ayl,11
+lre22_dev_rqdte,ara-ayl,10
+lre22_dev_rqpau,tso-tso,15
+lre22_dev_rquba,ven-ven,19
+lre22_dev_rrbgv,afr-afr,20
+lre22_dev_rsvjn,fra-ntf,16
+lre22_dev_rsynm,tir-tir,19
+lre22_dev_rtezn,tir-tir,19
+lre22_dev_rtkum,orm-orm,21
+lre22_dev_rturg,zul-zul,17
+lre22_dev_runwu,tir-tir,16
+lre22_dev_rvbmf,tso-tso,12
+lre22_dev_rvfls,tso-tso,16
+lre22_dev_rvhxb,ara-aeb,11
+lre22_dev_rvufk,orm-orm,20
+lre22_dev_rvzbo,ara-ayl,14
+lre22_dev_rwhfu,xho-xho,16
+lre22_dev_rwhiz,ara-ayl,10
+lre22_dev_rwimz,ven-ven,16
+lre22_dev_rwish,eng-ens,16
+lre22_dev_rwpzp,xho-xho,19
+lre22_dev_rwqlq,tir-tir,19
+lre22_dev_rwsnw,afr-afr,15
+lre22_dev_rwzwb,tso-tso,19
+lre22_dev_rxcjq,ara-arq,13
+lre22_dev_rxcka,ara-arq,14
+lre22_dev_rxgxu,tir-tir,19
+lre22_dev_rxqxn,nbl-nbl,20
+lre22_dev_rxwip,ara-ayl,10
+lre22_dev_rycca,ven-ven,14
+lre22_dev_rydpu,eng-ens,17
+lre22_dev_ryksb,ven-ven,14
+lre22_dev_rysmu,afr-afr,23
+lre22_dev_rzisy,ara-aeb,13
+lre22_dev_rzpus,ara-arq,15
+lre22_dev_rzqyn,ara-ayl,11
+lre22_dev_rzzca,orm-orm,21
+lre22_dev_sazdy,tso-tso,15
+lre22_dev_sbkip,afr-afr,14
+lre22_dev_sbyek,ara-arq,11
+lre22_dev_scjzn,xho-xho,21
+lre22_dev_scobo,ven-ven,17
+lre22_dev_scqui,orm-orm,16
+lre22_dev_sdccf,ara-arq,14
+lre22_dev_sdcty,tso-tso,19
+lre22_dev_sdebh,ara-ayl,12
+lre22_dev_sedif,orm-orm,21
+lre22_dev_sedug,xho-xho,18
+lre22_dev_seynu,tso-tso,13
+lre22_dev_seyxt,ara-aeb,13
+lre22_dev_sezun,ara-aeb,14
+lre22_dev_sfeyl,ara-aeb,12
+lre22_dev_sfnux,afr-afr,18
+lre22_dev_sfqnk,zul-zul,15
+lre22_dev_sftvb,ara-ayl,11
+lre22_dev_sfwkd,ven-ven,17
+lre22_dev_shgbp,fra-ntf,22
+lre22_dev_shikk,tir-tir,19
+lre22_dev_shpve,afr-afr,21
+lre22_dev_sidjm,ara-ayl,10
+lre22_dev_sihvc,orm-orm,17
+lre22_dev_siiaw,ven-ven,16
+lre22_dev_sinfr,xho-xho,19
+lre22_dev_sipnk,eng-iaf,16
+lre22_dev_sjbcr,tir-tir,19
+lre22_dev_sjdzp,eng-iaf,16
+lre22_dev_sjmsx,ven-ven,19
+lre22_dev_sjsnf,afr-afr,16
+lre22_dev_sjwmd,tir-tir,19
+lre22_dev_sjxce,nbl-nbl,16
+lre22_dev_sjzcc,eng-ens,13
+lre22_dev_sjzsv,fra-ntf,22
+lre22_dev_skegk,afr-afr,18
+lre22_dev_skpib,ven-ven,14
+lre22_dev_slgub,orm-orm,18
+lre22_dev_slryu,nbl-nbl,17
+lre22_dev_slupt,ara-ayl,13
+lre22_dev_smfbl,ara-aeb,14
+lre22_dev_smfon,xho-xho,20
+lre22_dev_smvms,afr-afr,18
+lre22_dev_snegl,xho-xho,18
+lre22_dev_snvvg,tso-tso,14
+lre22_dev_sobpf,orm-orm,19
+lre22_dev_soely,eng-iaf,14
+lre22_dev_sorzd,tir-tir,19
+lre22_dev_spixz,nbl-nbl,18
+lre22_dev_spjcl,fra-ntf,17
+lre22_dev_spzra,tso-tso,17
+lre22_dev_sqaei,xho-xho,23
+lre22_dev_sqime,ven-ven,14
+lre22_dev_srgaw,eng-iaf,15
+lre22_dev_srnhq,ven-ven,16
+lre22_dev_srsng,orm-orm,21
+lre22_dev_srysc,nbl-nbl,17
+lre22_dev_srzgk,eng-ens,16
+lre22_dev_srzsi,ara-aeb,14
+lre22_dev_ssjtt,nbl-nbl,16
+lre22_dev_stajf,xho-xho,21
+lre22_dev_sttfd,ara-aeb,15
+lre22_dev_suevr,ara-aeb,15
+lre22_dev_sumum,afr-afr,18
+lre22_dev_svukm,fra-ntf,20
+lre22_dev_swkzf,tir-tir,17
+lre22_dev_sxqmv,ara-aeb,11
+lre22_dev_sxvuf,ara-aeb,11
+lre22_dev_sydqt,eng-ens,18
+lre22_dev_syooe,eng-ens,14
+lre22_dev_szpip,tir-tir,17
+lre22_dev_szsgp,fra-ntf,19
+lre22_dev_szzuj,ara-ayl,11
+lre22_dev_tabof,orm-orm,19
+lre22_dev_tavcw,ven-ven,19
+lre22_dev_tbjal,xho-xho,22
+lre22_dev_tbxzb,fra-ntf,21
+lre22_dev_tdalr,nbl-nbl,18
+lre22_dev_tdfzf,eng-iaf,17
+lre22_dev_tdlyk,tir-tir,15
+lre22_dev_tefms,fra-ntf,15
+lre22_dev_telgo,xho-xho,19
+lre22_dev_teric,eng-ens,14
+lre22_dev_tfcgx,orm-orm,21
+lre22_dev_tgiid,xho-xho,19
+lre22_dev_tgoea,ara-ayl,13
+lre22_dev_tgrrk,eng-iaf,18
+lre22_dev_tgtyv,tso-tso,12
+lre22_dev_tgzex,tso-tso,12
+lre22_dev_thone,nbl-nbl,17
+lre22_dev_thpnk,afr-afr,18
+lre22_dev_thwls,ven-ven,17
+lre22_dev_tibov,tir-tir,14
+lre22_dev_tidld,tso-tso,16
+lre22_dev_tiezu,eng-ens,17
+lre22_dev_tioqa,nbl-nbl,16
+lre22_dev_tiuym,zul-zul,15
+lre22_dev_tjivp,afr-afr,22
+lre22_dev_tjltd,orm-orm,20
+lre22_dev_tkcqj,ara-aeb,12
+lre22_dev_tkpij,tir-tir,19
+lre22_dev_tkpwp,orm-orm,19
+lre22_dev_tkyuh,tso-tso,12
+lre22_dev_tlkrm,zul-zul,19
+lre22_dev_tlspo,zul-zul,18
+lre22_dev_tmdvx,zul-zul,17
+lre22_dev_tmynp,afr-afr,20
+lre22_dev_tntmu,xho-xho,22
+lre22_dev_tnwok,orm-orm,21
+lre22_dev_toccu,eng-iaf,16
+lre22_dev_tofur,tir-tir,14
+lre22_dev_tokhl,ven-ven,21
+lre22_dev_tonkq,zul-zul,15
+lre22_dev_topxu,zul-zul,14
+lre22_dev_touna,ara-arq,15
+lre22_dev_towvr,tso-tso,12
+lre22_dev_tpasn,tir-tir,15
+lre22_dev_tpmen,ara-ayl,10
+lre22_dev_tpuws,tir-tir,19
+lre22_dev_tqbqi,xho-xho,17
+lre22_dev_tqtfo,tso-tso,17
+lre22_dev_traqh,fra-ntf,21
+lre22_dev_trdfp,ara-ayl,15
+lre22_dev_trdml,xho-xho,23
+lre22_dev_trmpg,nbl-nbl,19
+lre22_dev_tsdyg,tso-tso,19
+lre22_dev_tsvmo,ara-ayl,11
+lre22_dev_ttcul,afr-afr,19
+lre22_dev_ttrfr,ara-arq,12
+lre22_dev_tuhrp,ven-ven,14
+lre22_dev_twaba,afr-afr,15
+lre22_dev_twcnd,tir-tir,13
+lre22_dev_twtog,ven-ven,15
+lre22_dev_twvne,tir-tir,19
+lre22_dev_txcqg,orm-orm,19
+lre22_dev_txjsy,eng-ens,18
+lre22_dev_txmpu,afr-afr,19
+lre22_dev_txqde,eng-iaf,16
+lre22_dev_tyaup,eng-ens,17
+lre22_dev_tyaym,afr-afr,17
+lre22_dev_tybrl,nbl-nbl,16
+lre22_dev_tyduc,eng-ens,17
+lre22_dev_tyhsa,fra-ntf,21
+lre22_dev_tyigo,ara-ayl,11
+lre22_dev_tykte,zul-zul,18
+lre22_dev_tymil,tir-tir,16
+lre22_dev_tyofb,ven-ven,20
+lre22_dev_tysph,fra-ntf,16
+lre22_dev_tzamn,ara-aeb,11
+lre22_dev_tzrpp,ven-ven,15
+lre22_dev_tzukm,ara-aeb,12
+lre22_dev_uabum,xho-xho,19
+lre22_dev_uankd,nbl-nbl,18
+lre22_dev_uazyk,ara-ayl,14
+lre22_dev_ubdfa,eng-iaf,15
+lre22_dev_ubugi,orm-orm,22
+lre22_dev_ucetp,ven-ven,21
+lre22_dev_ucsxt,eng-ens,12
+lre22_dev_uczke,zul-zul,14
+lre22_dev_udldh,ara-arq,11
+lre22_dev_uejdk,orm-orm,17
+lre22_dev_uekog,zul-zul,17
+lre22_dev_uemql,xho-xho,16
+lre22_dev_ueovt,eng-ens,14
+lre22_dev_uesao,zul-zul,19
+lre22_dev_ueyxm,ara-ayl,13
+lre22_dev_ufafi,tir-tir,17
+lre22_dev_ufaig,tso-tso,12
+lre22_dev_uffpc,ara-arq,14
+lre22_dev_ufrmg,ven-ven,20
+lre22_dev_ugieb,ara-aeb,12
+lre22_dev_ugoiy,ara-ayl,10
+lre22_dev_ugzkq,ara-aeb,12
+lre22_dev_uhdrj,xho-xho,18
+lre22_dev_uhjdn,ara-ayl,16
+lre22_dev_uhkcq,ara-ayl,11
+lre22_dev_uhrjo,ara-aeb,13
+lre22_dev_uhrow,afr-afr,16
+lre22_dev_uikqm,ara-arq,12
+lre22_dev_uitct,eng-ens,13
+lre22_dev_uitqu,ara-ayl,12
+lre22_dev_ujiby,eng-ens,18
+lre22_dev_ujmtl,orm-orm,22
+lre22_dev_ukdpu,ven-ven,17
+lre22_dev_ukfpb,xho-xho,19
+lre22_dev_ukklw,fra-ntf,22
+lre22_dev_ukwjy,xho-xho,17
+lre22_dev_uljbx,fra-ntf,20
+lre22_dev_uljgh,tir-tir,13
+lre22_dev_uljvo,fra-ntf,21
+lre22_dev_undfd,orm-orm,20
+lre22_dev_unmiu,ara-arq,14
+lre22_dev_updar,nbl-nbl,17
+lre22_dev_uprkv,eng-iaf,16
+lre22_dev_urkok,ara-ayl,11
+lre22_dev_urolj,orm-orm,22
+lre22_dev_uscpv,eng-ens,14
+lre22_dev_ushtk,fra-ntf,20
+lre22_dev_usiey,ven-ven,19
+lre22_dev_usitw,ara-arq,14
+lre22_dev_utkxp,nbl-nbl,19
+lre22_dev_utnvo,tir-tir,16
+lre22_dev_utyjg,tso-tso,18
+lre22_dev_uuwaa,ara-arq,12
+lre22_dev_uuxla,eng-iaf,15
+lre22_dev_uuzuj,ara-arq,14
+lre22_dev_uvcxs,eng-ens,12
+lre22_dev_uveah,ven-ven,17
+lre22_dev_uvfqy,ara-arq,13
+lre22_dev_uvnhb,fra-ntf,20
+lre22_dev_uvqbm,afr-afr,19
+lre22_dev_uvsus,zul-zul,15
+lre22_dev_uvyev,fra-ntf,20
+lre22_dev_uwicd,tso-tso,12
+lre22_dev_uwnlz,zul-zul,18
+lre22_dev_uwwyj,afr-afr,20
+lre22_dev_uwyxc,eng-iaf,17
+lre22_dev_uxjzh,xho-xho,21
+lre22_dev_uxpyg,tso-tso,15
+lre22_dev_uxrxr,tso-tso,12
+lre22_dev_uyciz,eng-ens,14
+lre22_dev_uycza,xho-xho,17
+lre22_dev_uyvyb,eng-ens,17
+lre22_dev_uziar,zul-zul,15
+lre22_dev_uzlxd,fra-ntf,22
+lre22_dev_uznjr,tir-tir,13
+lre22_dev_vagda,ara-ayl,12
+lre22_dev_vanjm,ven-ven,18
+lre22_dev_vaqia,tir-tir,19
+lre22_dev_vasjz,ara-arq,11
+lre22_dev_vcexs,tir-tir,17
+lre22_dev_vchpm,fra-ntf,21
+lre22_dev_vctsa,nbl-nbl,19
+lre22_dev_vcxit,ven-ven,15
+lre22_dev_vcyqv,xho-xho,19
+lre22_dev_vdjlh,afr-afr,22
+lre22_dev_vdogx,ven-ven,15
+lre22_dev_veutb,eng-ens,16
+lre22_dev_vezrd,tso-tso,12
+lre22_dev_vfbfg,tso-tso,12
+lre22_dev_vffqd,orm-orm,21
+lre22_dev_vfhum,afr-afr,16
+lre22_dev_vfjtw,ara-arq,11
+lre22_dev_vfnjb,eng-ens,15
+lre22_dev_vgbbh,ara-arq,13
+lre22_dev_vgcao,eng-iaf,20
+lre22_dev_vgpnk,xho-xho,19
+lre22_dev_vityk,zul-zul,18
+lre22_dev_vjeuy,tir-tir,19
+lre22_dev_vjltt,zul-zul,17
+lre22_dev_vjqrm,tir-tir,13
+lre22_dev_vjvbs,tso-tso,18
+lre22_dev_vlcbq,tso-tso,16
+lre22_dev_vlnlb,tso-tso,13
+lre22_dev_vlscu,ara-ayl,15
+lre22_dev_vlwhz,fra-ntf,22
+lre22_dev_vlyeh,tso-tso,16
+lre22_dev_vmnps,zul-zul,14
+lre22_dev_vmqxk,tso-tso,18
+lre22_dev_vmrez,ven-ven,18
+lre22_dev_vmsnh,ara-aeb,11
+lre22_dev_vmuti,ara-aeb,14
+lre22_dev_vncre,afr-afr,22
+lre22_dev_vnkqv,afr-afr,15
+lre22_dev_vnmlt,zul-zul,18
+lre22_dev_vpkra,ara-ayl,11
+lre22_dev_vpoit,ara-arq,14
+lre22_dev_vpruu,orm-orm,23
+lre22_dev_vptiv,tir-tir,18
+lre22_dev_vqhcn,tso-tso,16
+lre22_dev_vqura,tir-tir,16
+lre22_dev_vrqfs,xho-xho,23
+lre22_dev_vrvtr,zul-zul,15
+lre22_dev_vrxvj,fra-ntf,17
+lre22_dev_vsbay,eng-iaf,19
+lre22_dev_vsbvi,fra-ntf,19
+lre22_dev_vslkb,eng-ens,12
+lre22_dev_vsrdg,tso-tso,12
+lre22_dev_vsrnz,zul-zul,14
+lre22_dev_vsryb,nbl-nbl,19
+lre22_dev_vtlab,zul-zul,19
+lre22_dev_vtrff,eng-iaf,17
+lre22_dev_vtztf,ara-aeb,11
+lre22_dev_vucth,eng-ens,14
+lre22_dev_vucug,orm-orm,21
+lre22_dev_vufuu,eng-ens,18
+lre22_dev_vujbs,zul-zul,19
+lre22_dev_vuufm,afr-afr,19
+lre22_dev_vvgdf,eng-ens,18
+lre22_dev_vvlcx,ara-aeb,12
+lre22_dev_vvvho,tir-tir,18
+lre22_dev_vwait,eng-iaf,14
+lre22_dev_vwdcw,ara-arq,14
+lre22_dev_vwyzq,ara-arq,14
+lre22_dev_vwzon,eng-ens,12
+lre22_dev_vxhoc,ara-aeb,11
+lre22_dev_vxkgz,ven-ven,18
+lre22_dev_vxlgl,tir-tir,18
+lre22_dev_vxsqt,eng-ens,15
+lre22_dev_vyqsd,nbl-nbl,17
+lre22_dev_vzcai,zul-zul,19
+lre22_dev_vzgoj,eng-iaf,14
+lre22_dev_vzlon,zul-zul,16
+lre22_dev_vznrg,nbl-nbl,16
+lre22_dev_vzqme,xho-xho,19
+lre22_dev_wabqx,ven-ven,18
+lre22_dev_wafdh,fra-ntf,21
+lre22_dev_wagmt,eng-iaf,18
+lre22_dev_waocz,ven-ven,20
+lre22_dev_wavrh,zul-zul,16
+lre22_dev_wawqg,ara-ayl,13
+lre22_dev_waznj,nbl-nbl,22
+lre22_dev_wbepu,fra-ntf,19
+lre22_dev_wbygw,eng-ens,16
+lre22_dev_wccgz,tso-tso,17
+lre22_dev_wcpwx,tir-tir,18
+lre22_dev_wczkn,eng-iaf,17
+lre22_dev_wdfmt,tir-tir,17
+lre22_dev_wdgbh,ara-arq,12
+lre22_dev_wdind,tso-tso,19
+lre22_dev_wdkit,nbl-nbl,16
+lre22_dev_wdmpt,eng-ens,17
+lre22_dev_wdpya,nbl-nbl,16
+lre22_dev_wdrxo,orm-orm,21
+lre22_dev_wdyiy,ara-ayl,13
+lre22_dev_weccy,afr-afr,15
+lre22_dev_wfmco,ara-arq,14
+lre22_dev_wfnon,nbl-nbl,17
+lre22_dev_wgdui,eng-iaf,14
+lre22_dev_wgkmr,eng-iaf,17
+lre22_dev_wgnex,tir-tir,19
+lre22_dev_wgucy,eng-iaf,18
+lre22_dev_wgwdn,eng-iaf,17
+lre22_dev_whqhx,eng-iaf,15
+lre22_dev_whxwv,eng-ens,14
+lre22_dev_witnq,fra-ntf,17
+lre22_dev_wixzu,tso-tso,16
+lre22_dev_wjhbw,eng-iaf,16
+lre22_dev_wjist,orm-orm,16
+lre22_dev_wjnhh,zul-zul,19
+lre22_dev_wjnyo,ven-ven,20
+lre22_dev_wjtnm,orm-orm,19
+lre22_dev_wjzhz,ara-aeb,13
+lre22_dev_wkacx,eng-iaf,15
+lre22_dev_wkqey,fra-ntf,16
+lre22_dev_wldli,zul-zul,14
+lre22_dev_wlnst,nbl-nbl,16
+lre22_dev_wltvq,zul-zul,17
+lre22_dev_wlwhq,orm-orm,19
+lre22_dev_wmdan,xho-xho,21
+lre22_dev_wmfce,nbl-nbl,20
+lre22_dev_wmigl,ven-ven,20
+lre22_dev_wmwmc,eng-iaf,19
+lre22_dev_wmypk,xho-xho,19
+lre22_dev_wmzpv,eng-ens,17
+lre22_dev_wnjpz,ven-ven,19
+lre22_dev_wnmkt,orm-orm,23
+lre22_dev_wnpep,nbl-nbl,16
+lre22_dev_wnqhz,nbl-nbl,16
+lre22_dev_wnxpz,ven-ven,15
+lre22_dev_wnxrw,ven-ven,18
+lre22_dev_woawg,ven-ven,18
+lre22_dev_wobzv,eng-ens,14
+lre22_dev_wocbv,tso-tso,18
+lre22_dev_woerb,fra-ntf,21
+lre22_dev_wojrt,orm-orm,19
+lre22_dev_wosus,tir-tir,17
+lre22_dev_wozuc,xho-xho,19
+lre22_dev_wqcyu,tso-tso,15
+lre22_dev_wqfuv,eng-ens,17
+lre22_dev_wqhag,zul-zul,19
+lre22_dev_wqmsd,tir-tir,13
+lre22_dev_wqthl,ara-aeb,12
+lre22_dev_wqtvm,eng-ens,15
+lre22_dev_wrmnw,zul-zul,18
+lre22_dev_wrtec,zul-zul,17
+lre22_dev_wrvls,zul-zul,14
+lre22_dev_wscfs,nbl-nbl,16
+lre22_dev_wssqw,eng-ens,15
+lre22_dev_wtbdf,tir-tir,14
+lre22_dev_wtcpe,ara-aeb,11
+lre22_dev_wthrk,orm-orm,18
+lre22_dev_wtofd,eng-iaf,20
+lre22_dev_wtuol,tso-tso,18
+lre22_dev_wuqez,ara-aeb,11
+lre22_dev_wuquc,tir-tir,18
+lre22_dev_wvlde,tso-tso,13
+lre22_dev_wwbmg,ara-aeb,11
+lre22_dev_wwduf,fra-ntf,18
+lre22_dev_wwvuw,ara-arq,13
+lre22_dev_wxaev,orm-orm,17
+lre22_dev_wycsj,ven-ven,18
+lre22_dev_wypwj,ara-ayl,10
+lre22_dev_wytpq,fra-ntf,17
+lre22_dev_wzhqk,xho-xho,22
+lre22_dev_wzpmq,eng-ens,12
+lre22_dev_wztdj,zul-zul,19
+lre22_dev_wzxgv,ven-ven,18
+lre22_dev_xacjk,fra-ntf,18
+lre22_dev_xaevp,tir-tir,14
+lre22_dev_xaldr,eng-iaf,14
+lre22_dev_xapdy,ara-aeb,12
+lre22_dev_xaurw,nbl-nbl,16
+lre22_dev_xawdd,tir-tir,20
+lre22_dev_xbcpb,ara-arq,12
+lre22_dev_xbfrs,ven-ven,17
+lre22_dev_xbqsr,nbl-nbl,22
+lre22_dev_xbvcc,nbl-nbl,17
+lre22_dev_xbvqw,orm-orm,23
+lre22_dev_xcame,xho-xho,16
+lre22_dev_xcrnp,ara-aeb,13
+lre22_dev_xcswu,ven-ven,18
+lre22_dev_xcuok,orm-orm,21
+lre22_dev_xcvkj,tso-tso,16
+lre22_dev_xdtdp,fra-ntf,17
+lre22_dev_xdyea,ara-ayl,10
+lre22_dev_xerqi,fra-ntf,17
+lre22_dev_xetdb,eng-ens,14
+lre22_dev_xfecy,nbl-nbl,16
+lre22_dev_xfgcu,eng-iaf,19
+lre22_dev_xfing,tir-tir,20
+lre22_dev_xgaig,ara-aeb,15
+lre22_dev_xgoyq,eng-ens,18
+lre22_dev_xhdtx,eng-iaf,14
+lre22_dev_xhvkx,orm-orm,19
+lre22_dev_xiblr,tir-tir,17
+lre22_dev_xifty,ara-aeb,12
+lre22_dev_xigtx,ara-arq,14
+lre22_dev_xijus,tso-tso,14
+lre22_dev_xipox,xho-xho,20
+lre22_dev_xittq,ara-aeb,13
+lre22_dev_xjpwq,ara-ayl,15
+lre22_dev_xjrla,afr-afr,20
+lre22_dev_xkdof,ara-ayl,13
+lre22_dev_xkiba,eng-ens,18
+lre22_dev_xlcxh,fra-ntf,18
+lre22_dev_xlsxb,tso-tso,16
+lre22_dev_xmhpj,ven-ven,20
+lre22_dev_xnqct,ara-arq,11
+lre22_dev_xoayi,eng-ens,13
+lre22_dev_xohps,ara-arq,11
+lre22_dev_xokpn,zul-zul,18
+lre22_dev_xonym,eng-ens,14
+lre22_dev_xozod,afr-afr,14
+lre22_dev_xpenp,ara-arq,11
+lre22_dev_xpnti,ara-aeb,11
+lre22_dev_xpqyr,orm-orm,22
+lre22_dev_xpswt,orm-orm,23
+lre22_dev_xpumn,ven-ven,14
+lre22_dev_xpvcf,orm-orm,20
+lre22_dev_xqhoa,ara-ayl,13
+lre22_dev_xqnpt,orm-orm,22
+lre22_dev_xqooi,xho-xho,20
+lre22_dev_xqupu,fra-ntf,21
+lre22_dev_xresy,eng-iaf,17
+lre22_dev_xrouj,ara-ayl,16
+lre22_dev_xsnxu,ara-aeb,12
+lre22_dev_xtaof,ara-ayl,13
+lre22_dev_xtbxk,orm-orm,20
+lre22_dev_xtgak,nbl-nbl,20
+lre22_dev_xuauh,ara-aeb,13
+lre22_dev_xubei,eng-iaf,17
+lre22_dev_xubol,ara-aeb,11
+lre22_dev_xuieb,orm-orm,19
+lre22_dev_xunxs,ara-ayl,14
+lre22_dev_xutjo,nbl-nbl,20
+lre22_dev_xvbos,afr-afr,22
+lre22_dev_xvcfn,eng-ens,16
+lre22_dev_xvgqo,eng-ens,12
+lre22_dev_xwemk,zul-zul,18
+lre22_dev_xwsyq,ara-ayl,14
+lre22_dev_xxdbg,tso-tso,18
+lre22_dev_xyoua,fra-ntf,22
+lre22_dev_xzoej,ara-aeb,13
+lre22_dev_xzrdl,ara-arq,13
+lre22_dev_xztsz,tso-tso,16
+lre22_dev_xzxbd,zul-zul,15
+lre22_dev_yagvv,tso-tso,13
+lre22_dev_ybqju,tso-tso,13
+lre22_dev_ybrji,ara-arq,11
+lre22_dev_ybsmy,ven-ven,21
+lre22_dev_ycbaf,ara-aeb,14
+lre22_dev_ychsm,ven-ven,14
+lre22_dev_ycrlj,xho-xho,17
+lre22_dev_ycuhc,orm-orm,21
+lre22_dev_ydhqc,ara-arq,13
+lre22_dev_ydmnb,nbl-nbl,17
+lre22_dev_yduem,xho-xho,21
+lre22_dev_yemzu,ara-aeb,11
+lre22_dev_yeoyx,eng-ens,18
+lre22_dev_yersp,ara-ayl,13
+lre22_dev_yeshv,eng-iaf,17
+lre22_dev_yexec,ven-ven,20
+lre22_dev_yeyna,ara-ayl,14
+lre22_dev_yfxmd,ara-arq,14
+lre22_dev_yfzah,ara-arq,14
+lre22_dev_ygkvo,ara-arq,11
+lre22_dev_yhgvr,ara-arq,15
+lre22_dev_yhwin,ara-arq,12
+lre22_dev_yirig,ara-ayl,16
+lre22_dev_yixgu,xho-xho,16
+lre22_dev_yjbfl,xho-xho,19
+lre22_dev_yjodc,eng-ens,14
+lre22_dev_yjoht,ara-aeb,12
+lre22_dev_yjqkb,ara-arq,14
+lre22_dev_yjrkq,ara-arq,15
+lre22_dev_yjrng,afr-afr,16
+lre22_dev_ykpzq,afr-afr,21
+lre22_dev_yktop,eng-iaf,20
+lre22_dev_ylfah,zul-zul,15
+lre22_dev_ylgex,tso-tso,14
+lre22_dev_ylkds,nbl-nbl,17
+lre22_dev_ylvyc,xho-xho,20
+lre22_dev_ylzic,eng-iaf,20
+lre22_dev_ymoon,afr-afr,17
+lre22_dev_yncqr,ara-arq,13
+lre22_dev_ynjtn,ven-ven,18
+lre22_dev_ynmzy,tso-tso,16
+lre22_dev_ynozi,fra-ntf,21
+lre22_dev_yntec,orm-orm,19
+lre22_dev_ynurl,tso-tso,14
+lre22_dev_ypdtt,ara-aeb,11
+lre22_dev_yprom,tso-tso,13
+lre22_dev_yptsk,xho-xho,23
+lre22_dev_ypyft,eng-iaf,14
+lre22_dev_yqhwt,orm-orm,23
+lre22_dev_yqtxe,eng-iaf,19
+lre22_dev_yquja,ara-ayl,10
+lre22_dev_yqxhl,eng-ens,14
+lre22_dev_yqyby,nbl-nbl,18
+lre22_dev_yqzua,fra-ntf,16
+lre22_dev_yrfxo,ven-ven,21
+lre22_dev_yrgzf,ara-aeb,13
+lre22_dev_yruqe,tso-tso,17
+lre22_dev_yrwgb,zul-zul,18
+lre22_dev_yrxsi,orm-orm,21
+lre22_dev_ysdkl,tso-tso,15
+lre22_dev_ytgav,xho-xho,16
+lre22_dev_ytoet,ara-arq,14
+lre22_dev_yuabg,eng-ens,16
+lre22_dev_yundm,tso-tso,14
+lre22_dev_yuvux,ara-ayl,13
+lre22_dev_yvdcv,fra-ntf,21
+lre22_dev_yvoli,orm-orm,23
+lre22_dev_yweox,orm-orm,21
+lre22_dev_ywgoc,eng-iaf,19
+lre22_dev_ywoyx,ven-ven,18
+lre22_dev_ywxql,zul-zul,19
+lre22_dev_yxkyl,eng-iaf,15
+lre22_dev_yxtmn,ara-aeb,14
+lre22_dev_yycsn,ara-ayl,12
+lre22_dev_yyswd,eng-iaf,16
+lre22_dev_yyugr,ven-ven,21
+lre22_dev_yzitu,orm-orm,20
+lre22_dev_yzwmi,eng-ens,16
+lre22_dev_yzzww,zul-zul,17
+lre22_dev_zabub,ara-ayl,16
+lre22_dev_zabuv,eng-iaf,14
+lre22_dev_zacuc,zul-zul,19
+lre22_dev_zavru,zul-zul,19
+lre22_dev_zbfgy,ara-arq,12
+lre22_dev_zbjez,nbl-nbl,17
+lre22_dev_zbtpo,ven-ven,18
+lre22_dev_zbzip,tso-tso,19
+lre22_dev_zcevz,nbl-nbl,16
+lre22_dev_zcnsv,afr-afr,21
+lre22_dev_zcqkl,eng-iaf,20
+lre22_dev_zczer,ven-ven,14
+lre22_dev_zdcdt,nbl-nbl,18
+lre22_dev_zddua,xho-xho,19
+lre22_dev_zdvsh,ara-arq,14
+lre22_dev_zdwxx,ara-ayl,14
+lre22_dev_zdyxi,tir-tir,14
+lre22_dev_zetju,eng-iaf,17
+lre22_dev_zfsek,ara-arq,11
+lre22_dev_zfvfa,eng-ens,18
+lre22_dev_zggiu,zul-zul,19
+lre22_dev_zgndz,tso-tso,14
+lre22_dev_zgxth,eng-ens,16
+lre22_dev_zhlxa,ara-ayl,14
+lre22_dev_zhnsb,ara-ayl,15
+lre22_dev_zhsmo,ara-aeb,13
+lre22_dev_zhvbf,xho-xho,18
+lre22_dev_zhzrh,eng-iaf,15
+lre22_dev_ziigd,orm-orm,21
+lre22_dev_zilud,tir-tir,19
+lre22_dev_zjivp,zul-zul,19
+lre22_dev_zjleg,zul-zul,19
+lre22_dev_zjquq,orm-orm,16
+lre22_dev_zkgjo,nbl-nbl,22
+lre22_dev_zkhes,fra-ntf,16
+lre22_dev_zkioq,ara-aeb,12
+lre22_dev_zkwaw,afr-afr,21
+lre22_dev_zlapc,ara-ayl,13
+lre22_dev_zlntm,zul-zul,19
+lre22_dev_zmmyn,xho-xho,23
+lre22_dev_zmxld,ven-ven,17
+lre22_dev_znhcf,ven-ven,21
+lre22_dev_znwsk,afr-afr,22
+lre22_dev_znxvg,eng-ens,18
+lre22_dev_znycz,ara-aeb,13
+lre22_dev_zoayx,zul-zul,18
+lre22_dev_zogte,nbl-nbl,16
+lre22_dev_zoldl,ara-aeb,12
+lre22_dev_zoqzl,eng-ens,17
+lre22_dev_zorfv,eng-iaf,16
+lre22_dev_zoseh,ara-arq,12
+lre22_dev_zpotb,xho-xho,16
+lre22_dev_zptbg,tir-tir,14
+lre22_dev_zqjzi,ara-aeb,11
+lre22_dev_zqljj,ara-aeb,14
+lre22_dev_zqlri,orm-orm,18
+lre22_dev_zqoif,zul-zul,19
+lre22_dev_zqorv,ara-aeb,12
+lre22_dev_zqwgs,fra-ntf,18
+lre22_dev_zrhbt,tir-tir,19
+lre22_dev_zrqar,ara-aeb,13
+lre22_dev_zrqec,eng-iaf,17
+lre22_dev_ztdrx,fra-ntf,15
+lre22_dev_ztdwr,orm-orm,17
+lre22_dev_zthiv,ara-arq,15
+lre22_dev_ztknh,xho-xho,18
+lre22_dev_ztlcq,ara-aeb,13
+lre22_dev_ztufj,fra-ntf,19
+lre22_dev_zubjl,fra-ntf,20
+lre22_dev_zunuw,tso-tso,17
+lre22_dev_zutul,tir-tir,13
+lre22_dev_zutvv,eng-ens,12
+lre22_dev_zuugc,eng-iaf,17
+lre22_dev_zuvqx,eng-iaf,14
+lre22_dev_zvthu,orm-orm,20
+lre22_dev_zvvov,ara-aeb,11
+lre22_dev_zvyuh,ara-arq,14
+lre22_dev_zwfqq,eng-iaf,17
+lre22_dev_zwosr,xho-xho,16
+lre22_dev_zwvhw,tso-tso,12
+lre22_dev_zxihz,ven-ven,14
+lre22_dev_zydma,eng-ens,12
+lre22_dev_zyqlz,zul-zul,19
+lre22_dev_zyyie,orm-orm,23
+lre22_dev_zyywo,eng-iaf,14
+lre22_dev_zzyze,ara-ayl,12
diff --git a/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/test_segments.csv b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/test_segments.csv
new file mode 100644
index 00000000..4d50b6a5
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/test_segments.csv
@@ -0,0 +1,2088 @@
+id,class_id,subclass_idx
+lre22_dev_aayck,ara-aeb,12
+lre22_dev_aayto,eng-iaf,14
+lre22_dev_abaha,zul-zul,17
+lre22_dev_abetm,fra-ntf,15
+lre22_dev_abnwz,zul-zul,19
+lre22_dev_abvjt,zul-zul,19
+lre22_dev_abwgm,ara-ayl,13
+lre22_dev_acepd,eng-iaf,19
+lre22_dev_acspt,eng-ens,12
+lre22_dev_aczdh,eng-ens,13
+lre22_dev_adkkm,tso-tso,19
+lre22_dev_adpus,tso-tso,13
+lre22_dev_adwju,ara-aeb,14
+lre22_dev_afnfn,afr-afr,20
+lre22_dev_afohq,ara-aeb,13
+lre22_dev_agnnp,afr-afr,17
+lre22_dev_agquw,fra-ntf,20
+lre22_dev_ahoow,ara-ayl,15
+lre22_dev_ahqxq,fra-ntf,22
+lre22_dev_aieqr,eng-iaf,17
+lre22_dev_ainix,eng-iaf,16
+lre22_dev_aiojl,fra-ntf,18
+lre22_dev_aiypg,nbl-nbl,17
+lre22_dev_ajcpi,orm-orm,22
+lre22_dev_ajeqv,ara-aeb,11
+lre22_dev_ajlqy,xho-xho,16
+lre22_dev_ajlyw,orm-orm,21
+lre22_dev_ajmrs,ara-aeb,11
+lre22_dev_ajzjc,eng-iaf,16
+lre22_dev_ajzyq,ara-ayl,14
+lre22_dev_akmfp,orm-orm,19
+lre22_dev_aleeu,ara-arq,14
+lre22_dev_aliba,ara-aeb,15
+lre22_dev_alkwi,eng-iaf,14
+lre22_dev_aluwk,nbl-nbl,16
+lre22_dev_alvdl,ara-arq,14
+lre22_dev_amrca,ara-aeb,11
+lre22_dev_aoanh,ara-ayl,15
+lre22_dev_aoeql,eng-ens,16
+lre22_dev_apfpk,eng-iaf,14
+lre22_dev_apufs,tir-tir,17
+lre22_dev_apvko,orm-orm,20
+lre22_dev_arefe,orm-orm,23
+lre22_dev_arvyp,ara-ayl,11
+lre22_dev_arwsc,fra-ntf,20
+lre22_dev_asqwa,ara-aeb,14
+lre22_dev_asrng,fra-ntf,18
+lre22_dev_aswjo,afr-afr,18
+lre22_dev_aulzk,ven-ven,21
+lre22_dev_aupcr,zul-zul,18
+lre22_dev_auqcy,eng-ens,18
+lre22_dev_auxdy,nbl-nbl,16
+lre22_dev_auycg,ara-ayl,11
+lre22_dev_aviiv,tso-tso,14
+lre22_dev_avrwo,tso-tso,19
+lre22_dev_avwim,ara-arq,13
+lre22_dev_avzdv,zul-zul,18
+lre22_dev_awtna,ara-arq,13
+lre22_dev_awxbj,orm-orm,23
+lre22_dev_axejc,fra-ntf,17
+lre22_dev_axtso,eng-ens,16
+lre22_dev_axwoo,ara-aeb,15
+lre22_dev_axyma,ara-arq,15
+lre22_dev_aycai,ven-ven,17
+lre22_dev_ayfjz,orm-orm,20
+lre22_dev_aylrz,eng-iaf,16
+lre22_dev_aynwz,tso-tso,18
+lre22_dev_aypyt,ara-aeb,11
+lre22_dev_ayszn,zul-zul,18
+lre22_dev_ayvge,ara-aeb,11
+lre22_dev_ayvmo,afr-afr,23
+lre22_dev_ayzdz,xho-xho,20
+lre22_dev_azbmt,xho-xho,19
+lre22_dev_azjsr,tir-tir,19
+lre22_dev_azkdh,nbl-nbl,20
+lre22_dev_azwrd,fra-ntf,15
+lre22_dev_badwe,ara-aeb,13
+lre22_dev_baiaf,zul-zul,17
+lre22_dev_baiwb,ara-aeb,13
+lre22_dev_baxuo,zul-zul,18
+lre22_dev_bbbtf,eng-ens,18
+lre22_dev_bbdws,ara-ayl,12
+lre22_dev_bbitq,eng-ens,16
+lre22_dev_bbnvu,ara-arq,13
+lre22_dev_bbunq,eng-iaf,14
+lre22_dev_bcinm,ara-aeb,14
+lre22_dev_bcrhs,zul-zul,17
+lre22_dev_bcwpu,ara-aeb,13
+lre22_dev_bcxdq,fra-ntf,21
+lre22_dev_bdgbr,ara-aeb,12
+lre22_dev_bdgrw,orm-orm,17
+lre22_dev_bdiml,ara-aeb,11
+lre22_dev_bdyue,xho-xho,21
+lre22_dev_bdzsj,tir-tir,13
+lre22_dev_beanp,tso-tso,12
+lre22_dev_beigo,ara-aeb,14
+lre22_dev_belhi,orm-orm,23
+lre22_dev_bfoej,ven-ven,20
+lre22_dev_bfznf,ara-ayl,11
+lre22_dev_bgeiq,ven-ven,15
+lre22_dev_bgeyp,ara-aeb,11
+lre22_dev_bgomt,afr-afr,14
+lre22_dev_bgrfd,nbl-nbl,19
+lre22_dev_bgwlu,tir-tir,17
+lre22_dev_bifkp,nbl-nbl,18
+lre22_dev_bipvh,nbl-nbl,17
+lre22_dev_biuyu,eng-ens,12
+lre22_dev_bixnf,ara-ayl,11
+lre22_dev_bjhdf,tso-tso,17
+lre22_dev_bjsmm,ara-ayl,10
+lre22_dev_bkhqg,eng-ens,17
+lre22_dev_bkpah,ven-ven,14
+lre22_dev_blaco,afr-afr,17
+lre22_dev_bleum,xho-xho,18
+lre22_dev_bnhvt,nbl-nbl,16
+lre22_dev_bowyn,ara-arq,14
+lre22_dev_bpeqb,xho-xho,21
+lre22_dev_bpgqs,tir-tir,13
+lre22_dev_bpzpv,afr-afr,16
+lre22_dev_bqenu,eng-ens,12
+lre22_dev_bqfxw,zul-zul,14
+lre22_dev_bqowg,tir-tir,19
+lre22_dev_bqxyq,tir-tir,19
+lre22_dev_brjud,xho-xho,21
+lre22_dev_bruwl,xho-xho,16
+lre22_dev_brzld,fra-ntf,20
+lre22_dev_bsgqz,eng-ens,13
+lre22_dev_bsocl,eng-ens,12
+lre22_dev_bszou,ara-arq,13
+lre22_dev_btapz,zul-zul,15
+lre22_dev_btjlk,ara-aeb,14
+lre22_dev_btkry,xho-xho,19
+lre22_dev_btyeu,ara-ayl,15
+lre22_dev_bvnuu,fra-ntf,19
+lre22_dev_bvqag,eng-iaf,20
+lre22_dev_bvvho,eng-ens,16
+lre22_dev_bvwaj,tir-tir,14
+lre22_dev_bvymi,eng-ens,15
+lre22_dev_bwgmj,eng-iaf,20
+lre22_dev_bwqpz,ara-arq,14
+lre22_dev_bwyrh,ara-aeb,12
+lre22_dev_bxkrj,ven-ven,18
+lre22_dev_bxkti,afr-afr,20
+lre22_dev_bxzms,nbl-nbl,17
+lre22_dev_bygrw,tso-tso,18
+lre22_dev_byjqr,ven-ven,18
+lre22_dev_bylkl,eng-iaf,16
+lre22_dev_bzmkn,fra-ntf,22
+lre22_dev_bzntz,ara-arq,13
+lre22_dev_bzwkf,eng-iaf,19
+lre22_dev_caijh,ven-ven,18
+lre22_dev_canou,tir-tir,19
+lre22_dev_caqxh,afr-afr,20
+lre22_dev_cayuc,eng-ens,12
+lre22_dev_cbruy,xho-xho,23
+lre22_dev_cbyyw,ara-arq,14
+lre22_dev_cbzbe,afr-afr,22
+lre22_dev_cclfh,ara-arq,15
+lre22_dev_ccovd,ara-arq,11
+lre22_dev_ccpns,eng-ens,17
+lre22_dev_ccsjt,eng-iaf,16
+lre22_dev_ccsql,fra-ntf,21
+lre22_dev_ccugm,eng-ens,18
+lre22_dev_ccyfn,afr-afr,23
+lre22_dev_cdmgw,tir-tir,16
+lre22_dev_cdshg,eng-iaf,17
+lre22_dev_ceccy,orm-orm,20
+lre22_dev_cecwt,fra-ntf,22
+lre22_dev_cegvk,ara-arq,11
+lre22_dev_cferi,zul-zul,15
+lre22_dev_cfojx,ara-arq,11
+lre22_dev_cfzoe,tir-tir,20
+lre22_dev_cgfna,zul-zul,18
+lre22_dev_cggzh,ara-ayl,13
+lre22_dev_cgims,tir-tir,20
+lre22_dev_cgixe,tir-tir,19
+lre22_dev_cgjov,zul-zul,14
+lre22_dev_chhio,ara-aeb,14
+lre22_dev_chnvd,tir-tir,13
+lre22_dev_chpww,nbl-nbl,21
+lre22_dev_churq,ara-ayl,13
+lre22_dev_cifqp,zul-zul,17
+lre22_dev_cijnx,xho-xho,22
+lre22_dev_ciozp,nbl-nbl,16
+lre22_dev_citpi,ara-aeb,12
+lre22_dev_cjrav,tir-tir,15
+lre22_dev_cksrw,ara-aeb,14
+lre22_dev_cktce,tir-tir,17
+lre22_dev_ckzhf,nbl-nbl,20
+lre22_dev_cleyn,ara-aeb,11
+lre22_dev_clhmt,fra-ntf,19
+lre22_dev_clrjd,orm-orm,21
+lre22_dev_clssx,eng-iaf,14
+lre22_dev_cluxm,ara-ayl,13
+lre22_dev_clzwe,ara-aeb,14
+lre22_dev_cminq,ara-aeb,11
+lre22_dev_cmmap,afr-afr,23
+lre22_dev_cmssr,orm-orm,20
+lre22_dev_cmufu,tso-tso,16
+lre22_dev_cnapz,orm-orm,19
+lre22_dev_cndba,tso-tso,12
+lre22_dev_cnkjh,tso-tso,15
+lre22_dev_cnvfe,orm-orm,18
+lre22_dev_cobbz,ara-arq,12
+lre22_dev_coppu,nbl-nbl,21
+lre22_dev_coqoj,eng-ens,17
+lre22_dev_cotun,ven-ven,16
+lre22_dev_cowrt,xho-xho,19
+lre22_dev_cppma,afr-afr,20
+lre22_dev_cpqkz,ara-arq,14
+lre22_dev_cpraw,afr-afr,17
+lre22_dev_cpsrb,fra-ntf,20
+lre22_dev_cpuax,zul-zul,16
+lre22_dev_cpudb,nbl-nbl,16
+lre22_dev_cqqds,afr-afr,22
+lre22_dev_cquib,ven-ven,21
+lre22_dev_cqwxe,nbl-nbl,16
+lre22_dev_cqyad,eng-iaf,15
+lre22_dev_crkut,eng-ens,17
+lre22_dev_crozj,fra-ntf,17
+lre22_dev_crrro,orm-orm,16
+lre22_dev_csavn,ara-aeb,15
+lre22_dev_cschy,afr-afr,16
+lre22_dev_csegr,tso-tso,14
+lre22_dev_csgvq,fra-ntf,17
+lre22_dev_csltj,ara-aeb,14
+lre22_dev_csmtr,ara-ayl,14
+lre22_dev_csqxl,ven-ven,20
+lre22_dev_ctjqw,nbl-nbl,16
+lre22_dev_ctxxt,nbl-nbl,17
+lre22_dev_cuaoy,ara-aeb,13
+lre22_dev_cudpj,ara-arq,13
+lre22_dev_cuhdf,afr-afr,21
+lre22_dev_cuoju,ven-ven,21
+lre22_dev_cupti,nbl-nbl,21
+lre22_dev_cusej,ara-aeb,14
+lre22_dev_cvfle,tir-tir,14
+lre22_dev_cvnqu,eng-ens,14
+lre22_dev_cvvjc,zul-zul,18
+lre22_dev_cvwht,fra-ntf,18
+lre22_dev_cvwtu,fra-ntf,21
+lre22_dev_cwlvk,tso-tso,16
+lre22_dev_cwnky,xho-xho,17
+lre22_dev_cxdlr,afr-afr,14
+lre22_dev_cxfii,ara-arq,13
+lre22_dev_cxpzt,zul-zul,16
+lre22_dev_cxqri,fra-ntf,21
+lre22_dev_cyaug,xho-xho,22
+lre22_dev_czdbd,fra-ntf,15
+lre22_dev_czvoy,ven-ven,16
+lre22_dev_czzrm,afr-afr,17
+lre22_dev_dahzr,ven-ven,17
+lre22_dev_dapny,ven-ven,17
+lre22_dev_dapug,nbl-nbl,19
+lre22_dev_dcbnz,xho-xho,16
+lre22_dev_dciaf,nbl-nbl,22
+lre22_dev_dcljn,afr-afr,19
+lre22_dev_dcmrn,afr-afr,20
+lre22_dev_dcobq,xho-xho,16
+lre22_dev_dcohp,tir-tir,16
+lre22_dev_dcsep,tso-tso,12
+lre22_dev_dctlw,ara-arq,12
+lre22_dev_dctvv,ara-arq,12
+lre22_dev_dcyoy,eng-iaf,17
+lre22_dev_ddgeb,xho-xho,23
+lre22_dev_ddsab,eng-ens,18
+lre22_dev_ddtpk,eng-ens,18
+lre22_dev_debjr,xho-xho,16
+lre22_dev_defkv,eng-ens,15
+lre22_dev_dejub,ara-arq,11
+lre22_dev_delok,eng-ens,14
+lre22_dev_dezlg,nbl-nbl,17
+lre22_dev_dffbj,fra-ntf,21
+lre22_dev_dfkox,xho-xho,19
+lre22_dev_dfpcn,ara-ayl,13
+lre22_dev_dfqgl,afr-afr,18
+lre22_dev_dfras,eng-iaf,19
+lre22_dev_dftpm,eng-iaf,20
+lre22_dev_dfvta,tso-tso,17
+lre22_dev_dgarp,eng-ens,13
+lre22_dev_dgntq,zul-zul,17
+lre22_dev_dgssb,tir-tir,19
+lre22_dev_dgvtc,xho-xho,23
+lre22_dev_dhdvp,ara-ayl,10
+lre22_dev_dhmbl,fra-ntf,22
+lre22_dev_diiry,orm-orm,16
+lre22_dev_disrs,afr-afr,16
+lre22_dev_ditsk,xho-xho,21
+lre22_dev_djbbz,ara-arq,14
+lre22_dev_djevu,tso-tso,16
+lre22_dev_djlaf,tir-tir,20
+lre22_dev_djoim,zul-zul,15
+lre22_dev_djvvp,zul-zul,17
+lre22_dev_djwyo,ven-ven,18
+lre22_dev_dkbfm,ara-ayl,12
+lre22_dev_dkpcy,ara-aeb,12
+lre22_dev_dlxzj,orm-orm,19
+lre22_dev_dmnjo,ven-ven,14
+lre22_dev_dmtsm,zul-zul,16
+lre22_dev_dnaql,orm-orm,23
+lre22_dev_dnkpf,ara-aeb,15
+lre22_dev_dnscr,tso-tso,12
+lre22_dev_dnygt,eng-ens,15
+lre22_dev_dobre,xho-xho,19
+lre22_dev_dohlp,xho-xho,23
+lre22_dev_doioo,orm-orm,19
+lre22_dev_donaq,ara-aeb,13
+lre22_dev_dooht,ara-arq,11
+lre22_dev_dpmbt,zul-zul,14
+lre22_dev_dptyy,xho-xho,17
+lre22_dev_dqmud,eng-iaf,15
+lre22_dev_dqmxb,xho-xho,20
+lre22_dev_dqopt,eng-ens,14
+lre22_dev_dqpgr,ara-aeb,14
+lre22_dev_drkux,eng-ens,14
+lre22_dev_dsfha,ven-ven,18
+lre22_dev_dsftc,tso-tso,16
+lre22_dev_dskaq,ven-ven,15
+lre22_dev_dtdmp,zul-zul,18
+lre22_dev_dtdux,afr-afr,14
+lre22_dev_dtyki,ara-arq,11
+lre22_dev_durlr,orm-orm,18
+lre22_dev_dutdz,tso-tso,12
+lre22_dev_dvbol,ara-ayl,15
+lre22_dev_dwesk,nbl-nbl,22
+lre22_dev_dwtjw,ven-ven,14
+lre22_dev_dxckb,tso-tso,12
+lre22_dev_dxizq,eng-iaf,14
+lre22_dev_dxtnq,fra-ntf,18
+lre22_dev_dxvib,zul-zul,14
+lre22_dev_dyago,eng-iaf,16
+lre22_dev_dyipl,eng-iaf,18
+lre22_dev_dyqlo,ara-arq,13
+lre22_dev_dyvml,eng-iaf,15
+lre22_dev_dzkui,tso-tso,12
+lre22_dev_dzqta,ven-ven,20
+lre22_dev_dzxio,eng-ens,18
+lre22_dev_eachn,tir-tir,16
+lre22_dev_eapvu,eng-iaf,20
+lre22_dev_ebfdv,ara-ayl,10
+lre22_dev_ebgbd,eng-ens,17
+lre22_dev_eblhy,eng-iaf,20
+lre22_dev_ebtrq,ara-aeb,13
+lre22_dev_ebymv,tir-tir,14
+lre22_dev_ebzhg,nbl-nbl,21
+lre22_dev_ecbwo,ven-ven,21
+lre22_dev_ecllm,fra-ntf,21
+lre22_dev_eclpf,ven-ven,16
+lre22_dev_ecmhd,ara-aeb,14
+lre22_dev_ecnqi,eng-ens,14
+lre22_dev_ecpdc,ara-ayl,10
+lre22_dev_ecslx,afr-afr,22
+lre22_dev_ecuyo,xho-xho,23
+lre22_dev_edgur,tso-tso,16
+lre22_dev_edjtb,nbl-nbl,22
+lre22_dev_edsls,tso-tso,16
+lre22_dev_edssc,orm-orm,23
+lre22_dev_edvab,zul-zul,19
+lre22_dev_eehzu,zul-zul,18
+lre22_dev_eekci,afr-afr,15
+lre22_dev_eekcw,zul-zul,17
+lre22_dev_efihg,nbl-nbl,16
+lre22_dev_efsxw,tso-tso,16
+lre22_dev_efxjv,ara-aeb,14
+lre22_dev_efymf,ara-aeb,14
+lre22_dev_ehcvr,tir-tir,19
+lre22_dev_ehehw,xho-xho,20
+lre22_dev_ehewh,eng-ens,18
+lre22_dev_ehvyp,zul-zul,14
+lre22_dev_eifqv,zul-zul,19
+lre22_dev_eifxu,ara-ayl,10
+lre22_dev_ejcvy,fra-ntf,18
+lre22_dev_ejeek,eng-ens,16
+lre22_dev_ejfyn,fra-ntf,22
+lre22_dev_ejjqg,tso-tso,12
+lre22_dev_ejtox,ven-ven,19
+lre22_dev_ejwch,fra-ntf,21
+lre22_dev_ejzhx,xho-xho,17
+lre22_dev_ekbkm,afr-afr,21
+lre22_dev_ekzhk,ara-ayl,10
+lre22_dev_elanj,tso-tso,18
+lre22_dev_elvvn,tir-tir,16
+lre22_dev_emadg,xho-xho,22
+lre22_dev_emkzr,afr-afr,21
+lre22_dev_emmck,ara-arq,15
+lre22_dev_enwfu,afr-afr,15
+lre22_dev_eodro,ara-arq,15
+lre22_dev_eoisu,ven-ven,18
+lre22_dev_eomzr,xho-xho,23
+lre22_dev_eorva,xho-xho,21
+lre22_dev_epbwh,nbl-nbl,17
+lre22_dev_epeou,xho-xho,20
+lre22_dev_epifq,nbl-nbl,22
+lre22_dev_epqqo,ara-ayl,14
+lre22_dev_epsld,tso-tso,12
+lre22_dev_epsza,ara-ayl,12
+lre22_dev_eqmgm,ara-aeb,12
+lre22_dev_eqrhr,afr-afr,22
+lre22_dev_eqvan,ara-ayl,13
+lre22_dev_ersgd,orm-orm,22
+lre22_dev_erxig,zul-zul,15
+lre22_dev_esbrw,fra-ntf,19
+lre22_dev_esuug,nbl-nbl,20
+lre22_dev_etczk,tir-tir,14
+lre22_dev_etelz,fra-ntf,21
+lre22_dev_ettsh,fra-ntf,20
+lre22_dev_etuwp,ven-ven,19
+lre22_dev_eubgy,fra-ntf,18
+lre22_dev_euewj,orm-orm,18
+lre22_dev_euzyb,ara-aeb,13
+lre22_dev_ewatn,zul-zul,18
+lre22_dev_ewehs,orm-orm,17
+lre22_dev_ewexz,fra-ntf,18
+lre22_dev_ewgop,tir-tir,20
+lre22_dev_ewmgd,fra-ntf,21
+lre22_dev_ewzma,orm-orm,18
+lre22_dev_expvn,xho-xho,17
+lre22_dev_eyoqu,tir-tir,16
+lre22_dev_eyylz,nbl-nbl,16
+lre22_dev_eyzqu,tir-tir,18
+lre22_dev_ezdty,afr-afr,18
+lre22_dev_ezgcl,ara-aeb,13
+lre22_dev_eznzd,zul-zul,19
+lre22_dev_ezzwj,eng-iaf,18
+lre22_dev_facyr,zul-zul,18
+lre22_dev_faejb,tso-tso,16
+lre22_dev_famjw,orm-orm,18
+lre22_dev_favzh,ara-arq,11
+lre22_dev_fbsre,orm-orm,23
+lre22_dev_fbtkl,fra-ntf,22
+lre22_dev_fbvxh,ara-ayl,14
+lre22_dev_fbyhp,nbl-nbl,20
+lre22_dev_fbysf,nbl-nbl,17
+lre22_dev_fcckx,ara-arq,12
+lre22_dev_fczba,eng-iaf,17
+lre22_dev_fdouw,eng-ens,14
+lre22_dev_fdtmf,tso-tso,13
+lre22_dev_fdtnc,fra-ntf,20
+lre22_dev_fdwme,afr-afr,19
+lre22_dev_fdyhr,eng-ens,18
+lre22_dev_feanh,fra-ntf,22
+lre22_dev_femmc,ara-arq,12
+lre22_dev_fevab,orm-orm,19
+lre22_dev_fexsi,orm-orm,17
+lre22_dev_fflai,ara-aeb,14
+lre22_dev_fgblw,tso-tso,14
+lre22_dev_fglhf,nbl-nbl,22
+lre22_dev_fhucm,ara-ayl,14
+lre22_dev_fhzwp,nbl-nbl,17
+lre22_dev_fifon,eng-iaf,14
+lre22_dev_fipff,orm-orm,19
+lre22_dev_fipyx,zul-zul,14
+lre22_dev_firtn,zul-zul,18
+lre22_dev_fjdqb,nbl-nbl,16
+lre22_dev_fjdxl,tir-tir,14
+lre22_dev_fjocp,ara-ayl,12
+lre22_dev_fjudb,ara-aeb,15
+lre22_dev_fkbjz,afr-afr,22
+lre22_dev_fkwaq,afr-afr,19
+lre22_dev_flbgp,afr-afr,16
+lre22_dev_flgxs,tir-tir,13
+lre22_dev_fljfm,tir-tir,19
+lre22_dev_fmauu,tso-tso,18
+lre22_dev_fmbvf,fra-ntf,19
+lre22_dev_fmhfa,ara-arq,12
+lre22_dev_fmije,ara-ayl,13
+lre22_dev_fnafq,tir-tir,20
+lre22_dev_fofmo,eng-ens,15
+lre22_dev_foikm,tir-tir,16
+lre22_dev_fosfi,eng-iaf,19
+lre22_dev_fotti,eng-ens,13
+lre22_dev_fozzx,zul-zul,15
+lre22_dev_fpehr,ara-aeb,12
+lre22_dev_fpiig,orm-orm,21
+lre22_dev_fqfag,ara-ayl,16
+lre22_dev_fqogo,tir-tir,13
+lre22_dev_frdqe,ara-arq,11
+lre22_dev_fremq,afr-afr,22
+lre22_dev_frjdx,zul-zul,18
+lre22_dev_fruha,ara-ayl,12
+lre22_dev_frxmu,eng-iaf,18
+lre22_dev_fsbeo,tso-tso,13
+lre22_dev_fsijy,fra-ntf,22
+lre22_dev_fsjwh,nbl-nbl,18
+lre22_dev_fspmb,tso-tso,19
+lre22_dev_ftbak,tir-tir,13
+lre22_dev_ftxuo,eng-iaf,20
+lre22_dev_fupee,ara-aeb,13
+lre22_dev_fupla,ara-aeb,11
+lre22_dev_fvmdq,fra-ntf,22
+lre22_dev_fvmjb,fra-ntf,20
+lre22_dev_fvubo,fra-ntf,22
+lre22_dev_fvwze,afr-afr,23
+lre22_dev_fvxxt,ara-arq,13
+lre22_dev_fwcye,ven-ven,21
+lre22_dev_fwkwv,orm-orm,18
+lre22_dev_fxezd,orm-orm,17
+lre22_dev_fxuir,nbl-nbl,19
+lre22_dev_fzgcm,zul-zul,14
+lre22_dev_fzncb,nbl-nbl,16
+lre22_dev_gaezu,ara-aeb,11
+lre22_dev_gawox,ara-aeb,13
+lre22_dev_gbcfq,zul-zul,14
+lre22_dev_gbdkv,orm-orm,17
+lre22_dev_gbevf,eng-iaf,20
+lre22_dev_gchke,ara-aeb,12
+lre22_dev_gcncr,ara-arq,13
+lre22_dev_gdeqd,ara-ayl,14
+lre22_dev_gdncj,eng-iaf,14
+lre22_dev_gdobt,ven-ven,21
+lre22_dev_geeoy,xho-xho,22
+lre22_dev_geraa,afr-afr,20
+lre22_dev_gfigd,nbl-nbl,16
+lre22_dev_gfjzm,ara-ayl,12
+lre22_dev_gftlv,tir-tir,20
+lre22_dev_ggaux,xho-xho,16
+lre22_dev_ggbgc,zul-zul,15
+lre22_dev_gghhn,zul-zul,18
+lre22_dev_ggrwj,eng-iaf,17
+lre22_dev_ghdur,eng-ens,15
+lre22_dev_ghgbo,ara-ayl,14
+lre22_dev_ghhop,nbl-nbl,20
+lre22_dev_ghnwg,ara-ayl,14
+lre22_dev_ghpmd,ara-ayl,14
+lre22_dev_ghqbh,orm-orm,19
+lre22_dev_gihvo,eng-ens,16
+lre22_dev_giueq,tso-tso,19
+lre22_dev_giuix,ara-aeb,15
+lre22_dev_gjaqj,eng-iaf,20
+lre22_dev_gjgcw,xho-xho,18
+lre22_dev_gjirh,eng-iaf,16
+lre22_dev_gjvwy,nbl-nbl,22
+lre22_dev_gkeql,eng-iaf,16
+lre22_dev_gkhas,tso-tso,16
+lre22_dev_glmyp,nbl-nbl,16
+lre22_dev_glqft,eng-ens,18
+lre22_dev_glsnb,afr-afr,17
+lre22_dev_gmfcb,eng-iaf,16
+lre22_dev_gmlwo,afr-afr,16
+lre22_dev_gmpjq,tso-tso,12
+lre22_dev_gmrvk,ara-aeb,14
+lre22_dev_gmryq,ara-ayl,13
+lre22_dev_gmsds,eng-ens,16
+lre22_dev_gmztl,xho-xho,16
+lre22_dev_gnbyu,eng-iaf,15
+lre22_dev_gntym,zul-zul,17
+lre22_dev_gocpa,tso-tso,15
+lre22_dev_gpyxs,orm-orm,17
+lre22_dev_grgvb,afr-afr,16
+lre22_dev_grspj,orm-orm,19
+lre22_dev_grvjm,xho-xho,19
+lre22_dev_gsidj,eng-ens,18
+lre22_dev_gslzy,afr-afr,22
+lre22_dev_gtwcl,tir-tir,14
+lre22_dev_gulky,orm-orm,21
+lre22_dev_gvlhy,tir-tir,20
+lre22_dev_gvljx,tso-tso,15
+lre22_dev_gvmma,tso-tso,13
+lre22_dev_gvtvb,afr-afr,23
+lre22_dev_gweym,xho-xho,19
+lre22_dev_gwljh,ara-aeb,11
+lre22_dev_gwxtn,ara-ayl,14
+lre22_dev_gxdpw,fra-ntf,16
+lre22_dev_gxext,afr-afr,15
+lre22_dev_gxkqq,nbl-nbl,19
+lre22_dev_gxkxo,xho-xho,21
+lre22_dev_gxnkr,xho-xho,18
+lre22_dev_gxxbk,fra-ntf,21
+lre22_dev_gydvv,afr-afr,20
+lre22_dev_gytkt,ara-arq,12
+lre22_dev_gzmvp,afr-afr,18
+lre22_dev_gzoou,ven-ven,19
+lre22_dev_gzvza,tir-tir,15
+lre22_dev_gzwee,eng-iaf,17
+lre22_dev_haewp,tir-tir,19
+lre22_dev_haokb,fra-ntf,19
+lre22_dev_hazis,nbl-nbl,20
+lre22_dev_hbbbc,eng-ens,16
+lre22_dev_hblqa,nbl-nbl,17
+lre22_dev_hbmfy,zul-zul,15
+lre22_dev_hbndl,zul-zul,17
+lre22_dev_hcgfc,eng-ens,13
+lre22_dev_hcjnx,orm-orm,17
+lre22_dev_hcont,tir-tir,17
+lre22_dev_hcvik,tso-tso,13
+lre22_dev_hczom,zul-zul,19
+lre22_dev_hdaca,xho-xho,19
+lre22_dev_hdijt,fra-ntf,15
+lre22_dev_hdkyr,afr-afr,18
+lre22_dev_hdnoq,orm-orm,23
+lre22_dev_hdtlb,eng-iaf,16
+lre22_dev_hever,nbl-nbl,18
+lre22_dev_hfirj,nbl-nbl,17
+lre22_dev_hgbxp,xho-xho,21
+lre22_dev_hgcax,xho-xho,19
+lre22_dev_hgkwa,tso-tso,13
+lre22_dev_hgljd,ara-arq,15
+lre22_dev_hgvrh,nbl-nbl,21
+lre22_dev_hhovn,eng-iaf,16
+lre22_dev_hhpzm,fra-ntf,22
+lre22_dev_hhuab,ven-ven,20
+lre22_dev_hicev,ven-ven,18
+lre22_dev_hickz,ara-arq,12
+lre22_dev_hilii,orm-orm,23
+lre22_dev_hjenx,eng-iaf,19
+lre22_dev_hjiui,orm-orm,18
+lre22_dev_hkfts,eng-ens,18
+lre22_dev_hkhvl,zul-zul,19
+lre22_dev_hkobh,xho-xho,17
+lre22_dev_hkvay,ara-arq,13
+lre22_dev_hkvtj,orm-orm,21
+lre22_dev_hlevc,fra-ntf,17
+lre22_dev_hliut,ara-aeb,14
+lre22_dev_hlntc,zul-zul,18
+lre22_dev_hlprm,zul-zul,18
+lre22_dev_hmeav,ven-ven,17
+lre22_dev_hnelt,tir-tir,15
+lre22_dev_hniiy,ara-arq,15
+lre22_dev_hoepv,ara-aeb,13
+lre22_dev_hofkm,orm-orm,19
+lre22_dev_hoilz,tir-tir,19
+lre22_dev_hookr,ara-aeb,13
+lre22_dev_hpbhl,tir-tir,16
+lre22_dev_hpbzf,ara-aeb,11
+lre22_dev_hpizl,eng-ens,15
+lre22_dev_hplhi,ara-ayl,13
+lre22_dev_hplrq,xho-xho,20
+lre22_dev_hqdva,ven-ven,21
+lre22_dev_hqnus,xho-xho,16
+lre22_dev_hqoiz,orm-orm,18
+lre22_dev_hrerz,eng-ens,14
+lre22_dev_hrgjq,tir-tir,19
+lre22_dev_hrrhr,zul-zul,17
+lre22_dev_hsfbi,ara-ayl,14
+lre22_dev_hsjlg,tir-tir,17
+lre22_dev_hskug,afr-afr,16
+lre22_dev_hszzt,tso-tso,19
+lre22_dev_htgrl,tso-tso,18
+lre22_dev_htxah,zul-zul,17
+lre22_dev_htxrs,xho-xho,23
+lre22_dev_hudwz,nbl-nbl,17
+lre22_dev_huuqj,fra-ntf,18
+lre22_dev_hvsds,afr-afr,21
+lre22_dev_hwbhz,orm-orm,23
+lre22_dev_hwbvs,tso-tso,13
+lre22_dev_hwdlb,tso-tso,19
+lre22_dev_hwyki,eng-iaf,16
+lre22_dev_hxcmj,eng-iaf,20
+lre22_dev_hxdly,ara-arq,11
+lre22_dev_hyeqm,xho-xho,19
+lre22_dev_hyofm,ara-arq,12
+lre22_dev_hyogg,ara-arq,13
+lre22_dev_hyouu,tso-tso,13
+lre22_dev_hzfpc,fra-ntf,16
+lre22_dev_hzkjt,ara-aeb,12
+lre22_dev_hzrgv,fra-ntf,20
+lre22_dev_hzuus,tir-tir,19
+lre22_dev_hzzbp,xho-xho,19
+lre22_dev_iautt,afr-afr,20
+lre22_dev_ibdnu,tir-tir,13
+lre22_dev_ibuww,ara-aeb,13
+lre22_dev_icbuo,ven-ven,21
+lre22_dev_icqmr,tso-tso,14
+lre22_dev_ictwj,tir-tir,14
+lre22_dev_ifumz,ven-ven,14
+lre22_dev_igcgi,tso-tso,19
+lre22_dev_igder,tir-tir,19
+lre22_dev_igexm,xho-xho,21
+lre22_dev_igfxi,fra-ntf,20
+lre22_dev_igoxr,afr-afr,15
+lre22_dev_igxyt,ven-ven,21
+lre22_dev_ihqtn,ara-aeb,11
+lre22_dev_ihxfl,tir-tir,13
+lre22_dev_ihyrb,nbl-nbl,18
+lre22_dev_iifuu,tir-tir,15
+lre22_dev_iiien,xho-xho,20
+lre22_dev_ijccu,eng-iaf,16
+lre22_dev_ijrun,afr-afr,18
+lre22_dev_ijwlx,ara-arq,14
+lre22_dev_ijydw,xho-xho,21
+lre22_dev_ikdjt,xho-xho,23
+lre22_dev_iklbv,ara-arq,13
+lre22_dev_ikyai,fra-ntf,18
+lre22_dev_ildmr,orm-orm,21
+lre22_dev_ilebo,orm-orm,19
+lre22_dev_ilptc,eng-ens,18
+lre22_dev_ilsku,fra-ntf,16
+lre22_dev_ilyti,ara-arq,11
+lre22_dev_imnqh,zul-zul,17
+lre22_dev_imxdr,eng-ens,16
+lre22_dev_indww,fra-ntf,19
+lre22_dev_iokar,eng-iaf,15
+lre22_dev_iomtu,eng-iaf,15
+lre22_dev_ioobz,tir-tir,14
+lre22_dev_iosom,zul-zul,17
+lre22_dev_iowyd,ara-arq,14
+lre22_dev_iphzy,nbl-nbl,18
+lre22_dev_ipmrc,nbl-nbl,16
+lre22_dev_ipomi,ara-aeb,12
+lre22_dev_ipour,afr-afr,15
+lre22_dev_ippjq,ara-ayl,16
+lre22_dev_ipvjc,ara-aeb,13
+lre22_dev_iqfdc,ven-ven,19
+lre22_dev_iqppw,tso-tso,15
+lre22_dev_iqtde,tso-tso,14
+lre22_dev_irlee,eng-iaf,14
+lre22_dev_irxuq,ara-aeb,14
+lre22_dev_isjzo,ara-arq,14
+lre22_dev_isnwz,ara-ayl,14
+lre22_dev_isqvk,afr-afr,15
+lre22_dev_isqww,orm-orm,19
+lre22_dev_istdz,tir-tir,18
+lre22_dev_iszhe,fra-ntf,20
+lre22_dev_itblz,ven-ven,18
+lre22_dev_itfez,ara-arq,13
+lre22_dev_itjqm,zul-zul,18
+lre22_dev_itnap,nbl-nbl,21
+lre22_dev_itrms,xho-xho,21
+lre22_dev_itroi,fra-ntf,17
+lre22_dev_ittds,zul-zul,16
+lre22_dev_iuknz,tso-tso,16
+lre22_dev_iumnm,ara-ayl,15
+lre22_dev_iunul,afr-afr,23
+lre22_dev_iverq,ven-ven,16
+lre22_dev_ivwzd,ara-ayl,14
+lre22_dev_ivzjf,tso-tso,12
+lre22_dev_iwbta,nbl-nbl,16
+lre22_dev_iwdeh,orm-orm,21
+lre22_dev_iwgel,ara-aeb,11
+lre22_dev_ixbhj,ara-aeb,11
+lre22_dev_ixbnl,fra-ntf,16
+lre22_dev_ixcef,ven-ven,20
+lre22_dev_ixfdf,orm-orm,18
+lre22_dev_ixjey,orm-orm,19
+lre22_dev_ixlve,tir-tir,17
+lre22_dev_ixutu,ara-ayl,12
+lre22_dev_ixxoj,xho-xho,23
+lre22_dev_ixyko,afr-afr,22
+lre22_dev_iylls,eng-iaf,19
+lre22_dev_izegw,orm-orm,23
+lre22_dev_izglb,ara-ayl,13
+lre22_dev_iziar,ara-arq,13
+lre22_dev_jadvz,afr-afr,18
+lre22_dev_jajtw,ara-aeb,14
+lre22_dev_janvu,tso-tso,16
+lre22_dev_japrb,xho-xho,21
+lre22_dev_jarvz,ara-aeb,12
+lre22_dev_jazcn,tso-tso,13
+lre22_dev_jbfxj,tso-tso,12
+lre22_dev_jbnfg,fra-ntf,15
+lre22_dev_jbwgd,afr-afr,20
+lre22_dev_jceug,tso-tso,15
+lre22_dev_jcqtd,eng-ens,14
+lre22_dev_jcxry,ven-ven,20
+lre22_dev_jdbli,tir-tir,20
+lre22_dev_jegmb,orm-orm,18
+lre22_dev_jegqj,ara-ayl,12
+lre22_dev_jenns,xho-xho,22
+lre22_dev_jfarf,ven-ven,14
+lre22_dev_jfcve,zul-zul,17
+lre22_dev_jfgyq,xho-xho,23
+lre22_dev_jftnz,afr-afr,14
+lre22_dev_jftsj,afr-afr,22
+lre22_dev_jgnid,nbl-nbl,16
+lre22_dev_jgsju,eng-ens,13
+lre22_dev_jifal,orm-orm,19
+lre22_dev_jihsd,orm-orm,21
+lre22_dev_jihwf,ara-ayl,11
+lre22_dev_jiptp,eng-iaf,15
+lre22_dev_jizij,tir-tir,14
+lre22_dev_jjpzg,orm-orm,23
+lre22_dev_jkezw,fra-ntf,18
+lre22_dev_jkmux,fra-ntf,20
+lre22_dev_jkpnt,orm-orm,22
+lre22_dev_jlkfj,eng-ens,18
+lre22_dev_jlmtf,ven-ven,19
+lre22_dev_jlrfm,ara-arq,12
+lre22_dev_jmojg,orm-orm,19
+lre22_dev_jmrcv,ara-aeb,13
+lre22_dev_jmsxc,eng-iaf,16
+lre22_dev_jnjpw,tir-tir,14
+lre22_dev_jnzvu,ara-aeb,14
+lre22_dev_jocyh,xho-xho,19
+lre22_dev_joezr,tso-tso,16
+lre22_dev_jofqy,ara-arq,11
+lre22_dev_jpbyf,eng-ens,15
+lre22_dev_jppuy,ara-arq,13
+lre22_dev_jptts,ara-aeb,12
+lre22_dev_jqdyx,fra-ntf,22
+lre22_dev_jqjbq,zul-zul,17
+lre22_dev_jqpnb,ven-ven,21
+lre22_dev_jqqin,zul-zul,17
+lre22_dev_jqzkq,ara-ayl,13
+lre22_dev_jrroq,orm-orm,21
+lre22_dev_jruru,eng-ens,16
+lre22_dev_jskbr,ara-arq,11
+lre22_dev_jskdd,nbl-nbl,19
+lre22_dev_jslnc,eng-ens,12
+lre22_dev_jsmat,orm-orm,17
+lre22_dev_jsmdw,ara-aeb,11
+lre22_dev_jsvaz,afr-afr,19
+lre22_dev_jsxcy,afr-afr,21
+lre22_dev_jszgk,eng-iaf,19
+lre22_dev_jthui,ven-ven,20
+lre22_dev_jtpvz,ven-ven,17
+lre22_dev_jtwdi,ven-ven,14
+lre22_dev_jtwfh,ven-ven,18
+lre22_dev_juwid,tir-tir,20
+lre22_dev_jvdww,fra-ntf,21
+lre22_dev_jweyx,tir-tir,19
+lre22_dev_jwuto,afr-afr,19
+lre22_dev_jwwgs,afr-afr,19
+lre22_dev_jxhxf,nbl-nbl,17
+lre22_dev_jxtxk,orm-orm,20
+lre22_dev_jxzvy,eng-ens,15
+lre22_dev_jyjlm,nbl-nbl,19
+lre22_dev_jynvf,ara-ayl,13
+lre22_dev_jyzmh,nbl-nbl,19
+lre22_dev_jzivf,eng-ens,14
+lre22_dev_jzpns,tso-tso,14
+lre22_dev_kadwu,fra-ntf,18
+lre22_dev_kbnbi,tir-tir,13
+lre22_dev_kbqbd,fra-ntf,16
+lre22_dev_kbscm,tso-tso,15
+lre22_dev_kbxko,ara-aeb,12
+lre22_dev_kcegv,tso-tso,15
+lre22_dev_kcibo,afr-afr,17
+lre22_dev_kcmky,ara-ayl,14
+lre22_dev_kctrd,nbl-nbl,22
+lre22_dev_kcvbf,fra-ntf,16
+lre22_dev_kdbqy,zul-zul,15
+lre22_dev_kdgpz,ara-arq,14
+lre22_dev_kdhgq,nbl-nbl,22
+lre22_dev_kdvtu,eng-iaf,16
+lre22_dev_kdyhm,tso-tso,12
+lre22_dev_keeyz,zul-zul,18
+lre22_dev_kejvy,ven-ven,18
+lre22_dev_kerpr,ven-ven,21
+lre22_dev_keweh,ara-aeb,13
+lre22_dev_keysx,orm-orm,23
+lre22_dev_kezyv,ara-ayl,13
+lre22_dev_kgbiq,ven-ven,18
+lre22_dev_kgovz,tso-tso,15
+lre22_dev_kgxka,eng-ens,16
+lre22_dev_khkcx,fra-ntf,20
+lre22_dev_khobl,orm-orm,19
+lre22_dev_khttn,afr-afr,17
+lre22_dev_khvss,tir-tir,15
+lre22_dev_kiezl,tso-tso,16
+lre22_dev_kihlw,eng-ens,14
+lre22_dev_kipuq,ara-arq,14
+lre22_dev_kiqcx,tir-tir,16
+lre22_dev_kjiks,xho-xho,19
+lre22_dev_kjmpa,zul-zul,18
+lre22_dev_kjocf,eng-iaf,16
+lre22_dev_kkbur,ven-ven,16
+lre22_dev_kksdi,xho-xho,22
+lre22_dev_kkytv,ara-aeb,11
+lre22_dev_kmkgx,nbl-nbl,17
+lre22_dev_kmpkm,zul-zul,19
+lre22_dev_kmyzy,ara-ayl,13
+lre22_dev_knfsj,afr-afr,15
+lre22_dev_knyuq,orm-orm,19
+lre22_dev_koacp,orm-orm,19
+lre22_dev_koket,eng-ens,18
+lre22_dev_kovdn,zul-zul,15
+lre22_dev_kowqf,ven-ven,19
+lre22_dev_kozfr,nbl-nbl,21
+lre22_dev_kpmyz,orm-orm,19
+lre22_dev_kqfdc,eng-ens,17
+lre22_dev_kqumw,fra-ntf,22
+lre22_dev_kqwdi,nbl-nbl,16
+lre22_dev_krczb,ven-ven,19
+lre22_dev_kremz,nbl-nbl,16
+lre22_dev_ksruw,ven-ven,18
+lre22_dev_kszdw,eng-iaf,20
+lre22_dev_ktgvi,ara-arq,11
+lre22_dev_ktjax,fra-ntf,20
+lre22_dev_ktlvc,orm-orm,19
+lre22_dev_kvqgp,afr-afr,21
+lre22_dev_kvyoz,afr-afr,20
+lre22_dev_kvzim,afr-afr,14
+lre22_dev_kvzwc,eng-iaf,14
+lre22_dev_kwcwa,ara-arq,14
+lre22_dev_kwomo,zul-zul,19
+lre22_dev_kwxau,xho-xho,18
+lre22_dev_kxawf,tir-tir,19
+lre22_dev_kxjhn,ara-aeb,11
+lre22_dev_kxklh,tir-tir,19
+lre22_dev_kxlgg,tir-tir,16
+lre22_dev_kyqbp,fra-ntf,21
+lre22_dev_kyzio,ven-ven,20
+lre22_dev_kzcgh,ara-ayl,13
+lre22_dev_kzeyf,ven-ven,18
+lre22_dev_kzfwf,fra-ntf,19
+lre22_dev_kzjuz,orm-orm,21
+lre22_dev_kzjwx,ara-ayl,11
+lre22_dev_lamjl,tso-tso,17
+lre22_dev_laowh,xho-xho,16
+lre22_dev_larex,ara-ayl,11
+lre22_dev_laycs,tso-tso,12
+lre22_dev_lbxfn,eng-iaf,20
+lre22_dev_lcrog,zul-zul,18
+lre22_dev_ldczz,xho-xho,17
+lre22_dev_ldkgv,ara-aeb,13
+lre22_dev_ldkst,fra-ntf,20
+lre22_dev_ldkwr,orm-orm,22
+lre22_dev_lenxf,ven-ven,14
+lre22_dev_lfbey,ara-ayl,12
+lre22_dev_lfmml,fra-ntf,18
+lre22_dev_lfmxu,ven-ven,18
+lre22_dev_lfqfj,afr-afr,17
+lre22_dev_lgetu,ara-aeb,14
+lre22_dev_lgleu,ara-ayl,11
+lre22_dev_lgoat,eng-iaf,16
+lre22_dev_lhgaj,tso-tso,15
+lre22_dev_lhqyw,nbl-nbl,17
+lre22_dev_lhrmr,eng-iaf,17
+lre22_dev_lhtsd,tir-tir,19
+lre22_dev_lhydp,fra-ntf,22
+lre22_dev_livbf,tir-tir,15
+lre22_dev_ljdrg,ara-arq,13
+lre22_dev_ljniw,tso-tso,16
+lre22_dev_ljpmq,tso-tso,12
+lre22_dev_lkjon,tso-tso,15
+lre22_dev_lkszp,nbl-nbl,19
+lre22_dev_llbim,ara-ayl,15
+lre22_dev_llkkt,fra-ntf,15
+lre22_dev_llvcc,orm-orm,22
+lre22_dev_lmbug,ara-arq,12
+lre22_dev_lmmmw,nbl-nbl,19
+lre22_dev_lmsek,ven-ven,16
+lre22_dev_lmudp,ara-ayl,10
+lre22_dev_lmzmv,eng-iaf,19
+lre22_dev_lnlae,ara-arq,14
+lre22_dev_lnlvt,zul-zul,17
+lre22_dev_lnppu,ara-ayl,13
+lre22_dev_lnpyc,tso-tso,19
+lre22_dev_lolkv,xho-xho,19
+lre22_dev_lorcx,nbl-nbl,20
+lre22_dev_lparq,xho-xho,16
+lre22_dev_lqlft,ara-arq,11
+lre22_dev_lqlyq,ara-arq,12
+lre22_dev_lqoeu,tso-tso,14
+lre22_dev_lqueh,ara-ayl,11
+lre22_dev_lquzk,ara-arq,12
+lre22_dev_lqvav,zul-zul,18
+lre22_dev_lrgpy,eng-iaf,16
+lre22_dev_lrjbn,ven-ven,21
+lre22_dev_lrtad,ara-arq,14
+lre22_dev_lrtxd,ara-aeb,11
+lre22_dev_lrvkn,ven-ven,16
+lre22_dev_lrzwy,ara-ayl,13
+lre22_dev_lsefk,ara-arq,13
+lre22_dev_ltmmt,orm-orm,22
+lre22_dev_lutgh,ara-aeb,15
+lre22_dev_lvhmd,tso-tso,14
+lre22_dev_lvqim,ara-aeb,14
+lre22_dev_lvuuo,fra-ntf,17
+lre22_dev_lvzri,ven-ven,16
+lre22_dev_lweml,ara-arq,14
+lre22_dev_lwstj,eng-iaf,16
+lre22_dev_lwzdj,afr-afr,18
+lre22_dev_lxdsk,eng-ens,16
+lre22_dev_lxlcr,ara-aeb,13
+lre22_dev_lxshv,eng-iaf,20
+lre22_dev_lxxvv,eng-ens,16
+lre22_dev_lyfhc,ven-ven,18
+lre22_dev_lyikp,zul-zul,19
+lre22_dev_lyjix,tso-tso,14
+lre22_dev_lyxyh,eng-iaf,19
+lre22_dev_lyzxd,tir-tir,17
+lre22_dev_lzguf,orm-orm,21
+lre22_dev_lzpmk,tir-tir,16
+lre22_dev_lzugv,xho-xho,19
+lre22_dev_maeeb,tir-tir,15
+lre22_dev_maemn,zul-zul,16
+lre22_dev_manpw,orm-orm,19
+lre22_dev_mavli,ara-aeb,12
+lre22_dev_mbywd,orm-orm,19
+lre22_dev_mcath,nbl-nbl,22
+lre22_dev_mcjtw,xho-xho,16
+lre22_dev_mcndd,ven-ven,15
+lre22_dev_mcxqb,tir-tir,13
+lre22_dev_mdlia,fra-ntf,16
+lre22_dev_mdxsp,eng-ens,18
+lre22_dev_menex,eng-iaf,16
+lre22_dev_merfk,orm-orm,21
+lre22_dev_mfipk,zul-zul,16
+lre22_dev_mfuqh,ara-arq,14
+lre22_dev_mgcvo,xho-xho,19
+lre22_dev_mggbx,zul-zul,18
+lre22_dev_mgghl,tso-tso,12
+lre22_dev_mgwqd,ara-arq,14
+lre22_dev_mhswt,ara-ayl,15
+lre22_dev_mhwmt,tso-tso,16
+lre22_dev_miayn,ara-aeb,12
+lre22_dev_miley,tso-tso,16
+lre22_dev_mjfmb,nbl-nbl,21
+lre22_dev_mkbyx,tir-tir,19
+lre22_dev_mlbzi,xho-xho,23
+lre22_dev_mlduq,xho-xho,16
+lre22_dev_mljnp,ara-arq,14
+lre22_dev_mljpb,orm-orm,22
+lre22_dev_mlrsm,xho-xho,17
+lre22_dev_mlwzr,eng-ens,13
+lre22_dev_mlyeo,ven-ven,15
+lre22_dev_mmaed,ara-ayl,14
+lre22_dev_mmbns,eng-ens,12
+lre22_dev_mneyt,xho-xho,17
+lre22_dev_mnhsk,ven-ven,14
+lre22_dev_mnnvk,eng-ens,15
+lre22_dev_mnswo,tso-tso,16
+lre22_dev_mntdk,eng-ens,18
+lre22_dev_mogwl,orm-orm,22
+lre22_dev_mpbun,nbl-nbl,21
+lre22_dev_mpmuf,ara-aeb,14
+lre22_dev_mpoet,nbl-nbl,16
+lre22_dev_mptyi,afr-afr,18
+lre22_dev_mpzxy,orm-orm,18
+lre22_dev_mqxni,ara-arq,11
+lre22_dev_mqzga,tso-tso,19
+lre22_dev_mrgdh,xho-xho,17
+lre22_dev_mrgko,afr-afr,18
+lre22_dev_mrksc,tir-tir,19
+lre22_dev_mrogp,eng-iaf,15
+lre22_dev_mscwd,fra-ntf,16
+lre22_dev_mshco,ara-ayl,12
+lre22_dev_msptn,ara-ayl,16
+lre22_dev_msslk,ara-aeb,14
+lre22_dev_mtaus,fra-ntf,19
+lre22_dev_mtpgl,tso-tso,13
+lre22_dev_mttly,tir-tir,19
+lre22_dev_mubqn,fra-ntf,15
+lre22_dev_muskv,tso-tso,12
+lre22_dev_muzkp,ara-arq,14
+lre22_dev_mvdus,ven-ven,19
+lre22_dev_mvngl,xho-xho,19
+lre22_dev_mvrpq,tso-tso,12
+lre22_dev_mvtcj,afr-afr,22
+lre22_dev_mwhsu,xho-xho,21
+lre22_dev_mwkyp,nbl-nbl,20
+lre22_dev_mxcey,ara-ayl,12
+lre22_dev_mxcub,ara-aeb,12
+lre22_dev_myekh,ara-aeb,11
+lre22_dev_mzxhf,zul-zul,17
+lre22_dev_mzyru,ara-arq,12
+lre22_dev_nakax,eng-iaf,15
+lre22_dev_naymc,ara-ayl,13
+lre22_dev_nbgid,orm-orm,19
+lre22_dev_nbmnl,xho-xho,16
+lre22_dev_ncffi,zul-zul,14
+lre22_dev_ncjtj,fra-ntf,22
+lre22_dev_ncpix,ara-ayl,11
+lre22_dev_nctqc,xho-xho,16
+lre22_dev_ndkuo,orm-orm,20
+lre22_dev_ndqfw,nbl-nbl,17
+lre22_dev_nedes,ven-ven,15
+lre22_dev_neomw,zul-zul,18
+lre22_dev_neziz,tir-tir,19
+lre22_dev_nfcvg,eng-iaf,17
+lre22_dev_nfdfc,afr-afr,17
+lre22_dev_ngijv,xho-xho,21
+lre22_dev_ngrxk,ara-ayl,13
+lre22_dev_ngzja,ara-aeb,13
+lre22_dev_nhaub,tso-tso,13
+lre22_dev_nhkro,xho-xho,23
+lre22_dev_nhlvt,ara-arq,14
+lre22_dev_nhlxm,eng-ens,14
+lre22_dev_nhyjy,afr-afr,17
+lre22_dev_nifei,zul-zul,19
+lre22_dev_nikpx,ven-ven,18
+lre22_dev_njceq,afr-afr,18
+lre22_dev_njmlt,eng-ens,17
+lre22_dev_njqfj,orm-orm,18
+lre22_dev_nkdje,eng-iaf,19
+lre22_dev_nkkqo,nbl-nbl,22
+lre22_dev_nknrw,orm-orm,21
+lre22_dev_nkogd,fra-ntf,19
+lre22_dev_nksfc,tir-tir,19
+lre22_dev_nkwmm,orm-orm,22
+lre22_dev_nmhdg,ara-ayl,10
+lre22_dev_nmoux,ven-ven,20
+lre22_dev_nmrsq,ven-ven,21
+lre22_dev_nnbhc,fra-ntf,20
+lre22_dev_nnbpy,tir-tir,18
+lre22_dev_nnpwd,ara-aeb,13
+lre22_dev_nodin,ara-ayl,14
+lre22_dev_nogji,nbl-nbl,20
+lre22_dev_nonvr,afr-afr,15
+lre22_dev_notcl,eng-iaf,19
+lre22_dev_noufn,ara-aeb,11
+lre22_dev_noveb,ara-ayl,11
+lre22_dev_npajm,nbl-nbl,19
+lre22_dev_npehj,ara-ayl,14
+lre22_dev_nqdaj,tso-tso,12
+lre22_dev_nqkon,xho-xho,18
+lre22_dev_nqlhw,ara-aeb,13
+lre22_dev_nraqr,eng-ens,14
+lre22_dev_nrino,tso-tso,14
+lre22_dev_nrzgt,xho-xho,16
+lre22_dev_nscrg,orm-orm,18
+lre22_dev_nstgp,orm-orm,23
+lre22_dev_ntgqz,afr-afr,23
+lre22_dev_nthzr,eng-iaf,18
+lre22_dev_ntwzb,afr-afr,16
+lre22_dev_nudwv,eng-ens,14
+lre22_dev_nuerz,eng-iaf,18
+lre22_dev_nujfy,xho-xho,21
+lre22_dev_nurlx,eng-ens,13
+lre22_dev_nvakd,zul-zul,17
+lre22_dev_nvgkj,eng-ens,17
+lre22_dev_nvhvv,fra-ntf,20
+lre22_dev_nwbnz,ara-arq,14
+lre22_dev_nwjed,nbl-nbl,19
+lre22_dev_nwrto,ara-aeb,11
+lre22_dev_nwunl,zul-zul,14
+lre22_dev_nwvyy,tir-tir,19
+lre22_dev_nxwlo,nbl-nbl,17
+lre22_dev_nxxzy,zul-zul,16
+lre22_dev_nxzpp,nbl-nbl,20
+lre22_dev_nyhwg,ara-arq,14
+lre22_dev_nykvr,eng-ens,17
+lre22_dev_nyvkc,tir-tir,15
+lre22_dev_nyyui,ara-arq,11
+lre22_dev_nzbfh,zul-zul,19
+lre22_dev_nzxsk,xho-xho,21
+lre22_dev_oasrh,ara-arq,11
+lre22_dev_oavaf,xho-xho,21
+lre22_dev_obfrf,orm-orm,20
+lre22_dev_obocn,ara-arq,14
+lre22_dev_obumo,eng-ens,15
+lre22_dev_ocbuj,eng-ens,12
+lre22_dev_ocbxu,nbl-nbl,21
+lre22_dev_ocdvw,ara-ayl,13
+lre22_dev_ocdzj,xho-xho,19
+lre22_dev_ocveq,fra-ntf,22
+lre22_dev_odest,ara-ayl,11
+lre22_dev_odjlq,ven-ven,18
+lre22_dev_odpoq,ara-ayl,12
+lre22_dev_odrcm,fra-ntf,21
+lre22_dev_oeavx,ara-arq,12
+lre22_dev_oefoy,ara-aeb,12
+lre22_dev_oefqy,ven-ven,16
+lre22_dev_oehxk,ara-ayl,12
+lre22_dev_oeqbo,ara-aeb,14
+lre22_dev_oeqjq,fra-ntf,20
+lre22_dev_ofdgy,ara-ayl,15
+lre22_dev_ofgkq,fra-ntf,21
+lre22_dev_ofpva,ara-arq,11
+lre22_dev_ofufy,eng-iaf,17
+lre22_dev_ogglz,ara-aeb,13
+lre22_dev_oggtr,nbl-nbl,19
+lre22_dev_ogpxk,ara-aeb,11
+lre22_dev_ogsay,tso-tso,19
+lre22_dev_ogtvj,zul-zul,19
+lre22_dev_ohqwz,ara-arq,13
+lre22_dev_ohuxo,afr-afr,20
+lre22_dev_ohweb,ven-ven,16
+lre22_dev_ohzpg,fra-ntf,21
+lre22_dev_oijcy,xho-xho,19
+lre22_dev_oijgv,tir-tir,16
+lre22_dev_oikqj,eng-iaf,17
+lre22_dev_oinvl,ven-ven,15
+lre22_dev_oiofr,fra-ntf,19
+lre22_dev_oipks,eng-ens,17
+lre22_dev_ojzos,ara-arq,14
+lre22_dev_okbnu,ara-ayl,10
+lre22_dev_okpcp,eng-iaf,18
+lre22_dev_okwpq,tso-tso,16
+lre22_dev_oleie,ara-arq,12
+lre22_dev_oljep,ven-ven,21
+lre22_dev_oljsa,fra-ntf,16
+lre22_dev_olkup,nbl-nbl,16
+lre22_dev_olqbh,ara-ayl,14
+lre22_dev_omjqo,ara-aeb,14
+lre22_dev_omwiy,ara-ayl,12
+lre22_dev_omxnk,ara-arq,13
+lre22_dev_onqke,eng-iaf,16
+lre22_dev_onzje,tir-tir,13
+lre22_dev_ooktw,afr-afr,18
+lre22_dev_oosff,ara-aeb,12
+lre22_dev_ootbi,xho-xho,21
+lre22_dev_opciz,orm-orm,23
+lre22_dev_opgny,xho-xho,19
+lre22_dev_opifd,ara-arq,12
+lre22_dev_oporo,eng-iaf,19
+lre22_dev_opryj,nbl-nbl,16
+lre22_dev_opuzh,eng-ens,12
+lre22_dev_oqbaw,ven-ven,18
+lre22_dev_oqeuj,tir-tir,14
+lre22_dev_oqmhb,xho-xho,21
+lre22_dev_oqmrs,ara-arq,14
+lre22_dev_oqqwq,tso-tso,12
+lre22_dev_oquaq,xho-xho,17
+lre22_dev_oriap,fra-ntf,20
+lre22_dev_orsjj,tir-tir,20
+lre22_dev_orvna,fra-ntf,21
+lre22_dev_oskoe,orm-orm,20
+lre22_dev_otlyk,nbl-nbl,18
+lre22_dev_oujnj,nbl-nbl,17
+lre22_dev_oumka,ven-ven,14
+lre22_dev_ouqsx,ara-arq,13
+lre22_dev_outyl,zul-zul,16
+lre22_dev_owlwt,ara-ayl,14
+lre22_dev_owvfd,orm-orm,18
+lre22_dev_oxizc,tir-tir,15
+lre22_dev_oxpht,eng-ens,18
+lre22_dev_oxqlz,afr-afr,15
+lre22_dev_oydiw,nbl-nbl,16
+lre22_dev_oyfcl,fra-ntf,22
+lre22_dev_oyhba,eng-ens,18
+lre22_dev_oyiif,afr-afr,17
+lre22_dev_oyslg,afr-afr,21
+lre22_dev_ozfpi,tir-tir,15
+lre22_dev_ozlww,ven-ven,19
+lre22_dev_paxnc,eng-ens,17
+lre22_dev_pbbgx,eng-iaf,14
+lre22_dev_pcfmw,nbl-nbl,21
+lre22_dev_pclpc,fra-ntf,15
+lre22_dev_pcmmj,afr-afr,16
+lre22_dev_pcsqz,tso-tso,18
+lre22_dev_pdcfm,ara-ayl,10
+lre22_dev_pdtuf,eng-ens,18
+lre22_dev_pdzuj,zul-zul,17
+lre22_dev_pehfu,fra-ntf,15
+lre22_dev_pewpj,orm-orm,22
+lre22_dev_pexjz,orm-orm,17
+lre22_dev_pfioj,eng-iaf,15
+lre22_dev_pfkcf,eng-iaf,16
+lre22_dev_pfknl,ara-arq,14
+lre22_dev_pfucv,ara-ayl,12
+lre22_dev_pfyha,fra-ntf,21
+lre22_dev_pgavf,ara-ayl,13
+lre22_dev_phket,nbl-nbl,22
+lre22_dev_piabk,afr-afr,19
+lre22_dev_picvg,orm-orm,17
+lre22_dev_piina,eng-ens,14
+lre22_dev_pjahm,afr-afr,20
+lre22_dev_pjcso,nbl-nbl,17
+lre22_dev_pjggp,ven-ven,16
+lre22_dev_pjohw,xho-xho,19
+lre22_dev_pkpxo,ara-ayl,11
+lre22_dev_pktgk,nbl-nbl,22
+lre22_dev_plojq,eng-ens,12
+lre22_dev_pmayg,ven-ven,21
+lre22_dev_pmjyi,xho-xho,20
+lre22_dev_pmkcp,nbl-nbl,20
+lre22_dev_pnfhk,fra-ntf,18
+lre22_dev_pnust,nbl-nbl,20
+lre22_dev_pnwey,eng-iaf,15
+lre22_dev_pnwti,ara-aeb,13
+lre22_dev_pohmm,afr-afr,14
+lre22_dev_pojvr,nbl-nbl,22
+lre22_dev_poxsw,ara-aeb,13
+lre22_dev_ppjvq,tir-tir,16
+lre22_dev_ppkfc,fra-ntf,19
+lre22_dev_ppmnu,tso-tso,12
+lre22_dev_ppzno,tso-tso,12
+lre22_dev_pqksl,afr-afr,14
+lre22_dev_pqnvh,zul-zul,19
+lre22_dev_prcus,tso-tso,15
+lre22_dev_prhoh,tir-tir,19
+lre22_dev_prkth,ara-arq,12
+lre22_dev_prnhd,xho-xho,18
+lre22_dev_psjma,fra-ntf,18
+lre22_dev_psldq,tir-tir,19
+lre22_dev_psnvo,afr-afr,15
+lre22_dev_psnzj,zul-zul,19
+lre22_dev_pudqr,eng-ens,17
+lre22_dev_pufnl,orm-orm,19
+lre22_dev_pusxa,nbl-nbl,22
+lre22_dev_pvsqi,ara-arq,11
+lre22_dev_pvteg,fra-ntf,17
+lre22_dev_pvvay,tir-tir,14
+lre22_dev_pvxcv,ara-aeb,15
+lre22_dev_pvygc,ara-aeb,11
+lre22_dev_pwcxu,tir-tir,13
+lre22_dev_pwhdm,nbl-nbl,17
+lre22_dev_pwnkz,ven-ven,20
+lre22_dev_pwrqe,ara-aeb,14
+lre22_dev_pxbhi,afr-afr,16
+lre22_dev_pxeyk,zul-zul,18
+lre22_dev_pxkzd,ara-arq,14
+lre22_dev_pydgm,afr-afr,19
+lre22_dev_pyiju,ven-ven,20
+lre22_dev_pzhrc,tso-tso,13
+lre22_dev_pzkea,ven-ven,14
+lre22_dev_pzqka,ara-arq,11
+lre22_dev_pzuis,ara-arq,13
+lre22_dev_qabac,ven-ven,19
+lre22_dev_qahym,ara-ayl,11
+lre22_dev_qaxfr,xho-xho,17
+lre22_dev_qazyc,ara-ayl,14
+lre22_dev_qbcoz,nbl-nbl,22
+lre22_dev_qcavr,eng-iaf,20
+lre22_dev_qcbkh,fra-ntf,18
+lre22_dev_qcbtt,afr-afr,18
+lre22_dev_qclly,xho-xho,22
+lre22_dev_qcqdt,eng-iaf,18
+lre22_dev_qdqzp,zul-zul,17
+lre22_dev_qdwut,eng-ens,16
+lre22_dev_qehxr,afr-afr,22
+lre22_dev_qeqah,tir-tir,16
+lre22_dev_qeyjd,afr-afr,17
+lre22_dev_qfprv,ara-ayl,13
+lre22_dev_qfqhi,ara-ayl,15
+lre22_dev_qgoge,tso-tso,13
+lre22_dev_qgrlb,eng-iaf,16
+lre22_dev_qgrsu,zul-zul,14
+lre22_dev_qheor,xho-xho,23
+lre22_dev_qhfdz,tso-tso,14
+lre22_dev_qhlol,ven-ven,21
+lre22_dev_qhnfr,zul-zul,15
+lre22_dev_qhvuq,tso-tso,14
+lre22_dev_qibby,afr-afr,23
+lre22_dev_qicen,orm-orm,16
+lre22_dev_qiehd,eng-iaf,14
+lre22_dev_qjbfh,eng-iaf,15
+lre22_dev_qjdln,afr-afr,19
+lre22_dev_qjmro,ara-ayl,11
+lre22_dev_qkgor,zul-zul,16
+lre22_dev_qlgvf,ara-aeb,12
+lre22_dev_qlpjn,eng-iaf,16
+lre22_dev_qmoop,nbl-nbl,16
+lre22_dev_qmqhy,afr-afr,20
+lre22_dev_qmreh,ara-ayl,10
+lre22_dev_qmucf,ven-ven,18
+lre22_dev_qmvnu,fra-ntf,15
+lre22_dev_qmzke,ara-ayl,13
+lre22_dev_qmzxw,orm-orm,21
+lre22_dev_qnams,ven-ven,20
+lre22_dev_qnefv,xho-xho,23
+lre22_dev_qodht,zul-zul,19
+lre22_dev_qoqtk,eng-ens,16
+lre22_dev_qotto,fra-ntf,18
+lre22_dev_qoudd,tso-tso,18
+lre22_dev_qpego,ara-ayl,14
+lre22_dev_qphcb,fra-ntf,22
+lre22_dev_qqkiv,ara-arq,13
+lre22_dev_qqmeu,eng-ens,17
+lre22_dev_qqudk,orm-orm,21
+lre22_dev_qqvdr,orm-orm,23
+lre22_dev_qrbmq,ara-arq,12
+lre22_dev_qrfvx,fra-ntf,22
+lre22_dev_qrsqg,zul-zul,19
+lre22_dev_qrylo,eng-ens,18
+lre22_dev_qsbdh,nbl-nbl,16
+lre22_dev_qsqzo,afr-afr,14
+lre22_dev_qsudg,nbl-nbl,22
+lre22_dev_qszwt,fra-ntf,21
+lre22_dev_qtcmx,nbl-nbl,21
+lre22_dev_qtfpf,zul-zul,16
+lre22_dev_qtkhk,afr-afr,22
+lre22_dev_qtydg,afr-afr,22
+lre22_dev_qujmp,zul-zul,19
+lre22_dev_qulse,eng-ens,17
+lre22_dev_qutbz,eng-ens,18
+lre22_dev_quvqg,ara-aeb,13
+lre22_dev_qvpjs,eng-iaf,19
+lre22_dev_qvtdy,tso-tso,12
+lre22_dev_qvzol,orm-orm,19
+lre22_dev_qwvgm,ara-ayl,13
+lre22_dev_qwzxt,zul-zul,19
+lre22_dev_qxigw,tir-tir,19
+lre22_dev_qxkuu,tso-tso,13
+lre22_dev_qxtss,afr-afr,15
+lre22_dev_qxvbe,nbl-nbl,17
+lre22_dev_qxysh,afr-afr,22
+lre22_dev_qyfba,zul-zul,14
+lre22_dev_qyfov,fra-ntf,19
+lre22_dev_qyjgj,afr-afr,22
+lre22_dev_qyuwy,ara-aeb,15
+lre22_dev_qzfdr,nbl-nbl,18
+lre22_dev_qzldb,eng-iaf,19
+lre22_dev_ranrd,nbl-nbl,22
+lre22_dev_raurj,eng-ens,12
+lre22_dev_rbntq,ara-arq,11
+lre22_dev_rbssw,ara-aeb,11
+lre22_dev_rbwgx,ara-ayl,16
+lre22_dev_rcooi,fra-ntf,18
+lre22_dev_rcyom,ara-ayl,11
+lre22_dev_rdcns,zul-zul,18
+lre22_dev_rdrhv,ara-arq,11
+lre22_dev_rdyxn,eng-iaf,19
+lre22_dev_repec,tir-tir,19
+lre22_dev_rgbby,tso-tso,19
+lre22_dev_rgdvt,fra-ntf,20
+lre22_dev_rguqm,tso-tso,14
+lre22_dev_rgwjy,afr-afr,19
+lre22_dev_rijeq,orm-orm,19
+lre22_dev_rincv,tir-tir,16
+lre22_dev_rindo,zul-zul,17
+lre22_dev_rirhy,ara-arq,11
+lre22_dev_rjikw,fra-ntf,20
+lre22_dev_rjsik,tso-tso,16
+lre22_dev_rjvvj,tso-tso,19
+lre22_dev_rksid,nbl-nbl,22
+lre22_dev_rkycg,ven-ven,21
+lre22_dev_rlamm,zul-zul,15
+lre22_dev_rllya,tso-tso,15
+lre22_dev_rlzrk,eng-ens,14
+lre22_dev_rmxbg,tir-tir,14
+lre22_dev_rnrsy,tir-tir,19
+lre22_dev_rokej,xho-xho,17
+lre22_dev_rooaf,fra-ntf,17
+lre22_dev_rorob,ven-ven,15
+lre22_dev_rowwe,nbl-nbl,17
+lre22_dev_rqcuw,ara-ayl,11
+lre22_dev_rqdte,ara-ayl,10
+lre22_dev_rqpau,tso-tso,15
+lre22_dev_rquba,ven-ven,19
+lre22_dev_rrbgv,afr-afr,20
+lre22_dev_rsvjn,fra-ntf,16
+lre22_dev_rsynm,tir-tir,19
+lre22_dev_rtezn,tir-tir,19
+lre22_dev_rtkum,orm-orm,21
+lre22_dev_rturg,zul-zul,17
+lre22_dev_runwu,tir-tir,16
+lre22_dev_rvbmf,tso-tso,12
+lre22_dev_rvfls,tso-tso,16
+lre22_dev_rvhxb,ara-aeb,11
+lre22_dev_rvufk,orm-orm,20
+lre22_dev_rvzbo,ara-ayl,14
+lre22_dev_rwhfu,xho-xho,16
+lre22_dev_rwhiz,ara-ayl,10
+lre22_dev_rwimz,ven-ven,16
+lre22_dev_rwish,eng-ens,16
+lre22_dev_rwpzp,xho-xho,19
+lre22_dev_rwqlq,tir-tir,19
+lre22_dev_rwsnw,afr-afr,15
+lre22_dev_rwzwb,tso-tso,19
+lre22_dev_rxcjq,ara-arq,13
+lre22_dev_rxcka,ara-arq,14
+lre22_dev_rxgxu,tir-tir,19
+lre22_dev_rxqxn,nbl-nbl,20
+lre22_dev_rxwip,ara-ayl,10
+lre22_dev_rycca,ven-ven,14
+lre22_dev_rydpu,eng-ens,17
+lre22_dev_ryksb,ven-ven,14
+lre22_dev_rysmu,afr-afr,23
+lre22_dev_rzisy,ara-aeb,13
+lre22_dev_rzpus,ara-arq,15
+lre22_dev_rzqyn,ara-ayl,11
+lre22_dev_rzzca,orm-orm,21
+lre22_dev_sazdy,tso-tso,15
+lre22_dev_sbkip,afr-afr,14
+lre22_dev_sbyek,ara-arq,11
+lre22_dev_scjzn,xho-xho,21
+lre22_dev_scobo,ven-ven,17
+lre22_dev_scqui,orm-orm,16
+lre22_dev_sdccf,ara-arq,14
+lre22_dev_sdcty,tso-tso,19
+lre22_dev_sdebh,ara-ayl,12
+lre22_dev_sedif,orm-orm,21
+lre22_dev_sedug,xho-xho,18
+lre22_dev_seynu,tso-tso,13
+lre22_dev_seyxt,ara-aeb,13
+lre22_dev_sezun,ara-aeb,14
+lre22_dev_sfeyl,ara-aeb,12
+lre22_dev_sfnux,afr-afr,18
+lre22_dev_sfqnk,zul-zul,15
+lre22_dev_sftvb,ara-ayl,11
+lre22_dev_sfwkd,ven-ven,17
+lre22_dev_shgbp,fra-ntf,22
+lre22_dev_shikk,tir-tir,19
+lre22_dev_shpve,afr-afr,21
+lre22_dev_sidjm,ara-ayl,10
+lre22_dev_sihvc,orm-orm,17
+lre22_dev_siiaw,ven-ven,16
+lre22_dev_sinfr,xho-xho,19
+lre22_dev_sipnk,eng-iaf,16
+lre22_dev_sjbcr,tir-tir,19
+lre22_dev_sjdzp,eng-iaf,16
+lre22_dev_sjmsx,ven-ven,19
+lre22_dev_sjsnf,afr-afr,16
+lre22_dev_sjwmd,tir-tir,19
+lre22_dev_sjxce,nbl-nbl,16
+lre22_dev_sjzcc,eng-ens,13
+lre22_dev_sjzsv,fra-ntf,22
+lre22_dev_skegk,afr-afr,18
+lre22_dev_skpib,ven-ven,14
+lre22_dev_slgub,orm-orm,18
+lre22_dev_slryu,nbl-nbl,17
+lre22_dev_slupt,ara-ayl,13
+lre22_dev_smfbl,ara-aeb,14
+lre22_dev_smfon,xho-xho,20
+lre22_dev_smvms,afr-afr,18
+lre22_dev_snegl,xho-xho,18
+lre22_dev_snvvg,tso-tso,14
+lre22_dev_sobpf,orm-orm,19
+lre22_dev_soely,eng-iaf,14
+lre22_dev_sorzd,tir-tir,19
+lre22_dev_spixz,nbl-nbl,18
+lre22_dev_spjcl,fra-ntf,17
+lre22_dev_spzra,tso-tso,17
+lre22_dev_sqaei,xho-xho,23
+lre22_dev_sqime,ven-ven,14
+lre22_dev_srgaw,eng-iaf,15
+lre22_dev_srnhq,ven-ven,16
+lre22_dev_srsng,orm-orm,21
+lre22_dev_srysc,nbl-nbl,17
+lre22_dev_srzgk,eng-ens,16
+lre22_dev_srzsi,ara-aeb,14
+lre22_dev_ssjtt,nbl-nbl,16
+lre22_dev_stajf,xho-xho,21
+lre22_dev_sttfd,ara-aeb,15
+lre22_dev_suevr,ara-aeb,15
+lre22_dev_sumum,afr-afr,18
+lre22_dev_svukm,fra-ntf,20
+lre22_dev_swkzf,tir-tir,17
+lre22_dev_sxqmv,ara-aeb,11
+lre22_dev_sxvuf,ara-aeb,11
+lre22_dev_sydqt,eng-ens,18
+lre22_dev_syooe,eng-ens,14
+lre22_dev_szpip,tir-tir,17
+lre22_dev_szsgp,fra-ntf,19
+lre22_dev_szzuj,ara-ayl,11
+lre22_dev_tabof,orm-orm,19
+lre22_dev_tavcw,ven-ven,19
+lre22_dev_tbjal,xho-xho,22
+lre22_dev_tbxzb,fra-ntf,21
+lre22_dev_tdalr,nbl-nbl,18
+lre22_dev_tdfzf,eng-iaf,17
+lre22_dev_tdlyk,tir-tir,15
+lre22_dev_tefms,fra-ntf,15
+lre22_dev_telgo,xho-xho,19
+lre22_dev_teric,eng-ens,14
+lre22_dev_tfcgx,orm-orm,21
+lre22_dev_tgiid,xho-xho,19
+lre22_dev_tgoea,ara-ayl,13
+lre22_dev_tgrrk,eng-iaf,18
+lre22_dev_tgtyv,tso-tso,12
+lre22_dev_tgzex,tso-tso,12
+lre22_dev_thone,nbl-nbl,17
+lre22_dev_thpnk,afr-afr,18
+lre22_dev_thwls,ven-ven,17
+lre22_dev_tibov,tir-tir,14
+lre22_dev_tidld,tso-tso,16
+lre22_dev_tiezu,eng-ens,17
+lre22_dev_tioqa,nbl-nbl,16
+lre22_dev_tiuym,zul-zul,15
+lre22_dev_tjivp,afr-afr,22
+lre22_dev_tjltd,orm-orm,20
+lre22_dev_tkcqj,ara-aeb,12
+lre22_dev_tkpij,tir-tir,19
+lre22_dev_tkpwp,orm-orm,19
+lre22_dev_tkyuh,tso-tso,12
+lre22_dev_tlkrm,zul-zul,19
+lre22_dev_tlspo,zul-zul,18
+lre22_dev_tmdvx,zul-zul,17
+lre22_dev_tmynp,afr-afr,20
+lre22_dev_tntmu,xho-xho,22
+lre22_dev_tnwok,orm-orm,21
+lre22_dev_toccu,eng-iaf,16
+lre22_dev_tofur,tir-tir,14
+lre22_dev_tokhl,ven-ven,21
+lre22_dev_tonkq,zul-zul,15
+lre22_dev_topxu,zul-zul,14
+lre22_dev_touna,ara-arq,15
+lre22_dev_towvr,tso-tso,12
+lre22_dev_tpasn,tir-tir,15
+lre22_dev_tpmen,ara-ayl,10
+lre22_dev_tpuws,tir-tir,19
+lre22_dev_tqbqi,xho-xho,17
+lre22_dev_tqtfo,tso-tso,17
+lre22_dev_traqh,fra-ntf,21
+lre22_dev_trdfp,ara-ayl,15
+lre22_dev_trdml,xho-xho,23
+lre22_dev_trmpg,nbl-nbl,19
+lre22_dev_tsdyg,tso-tso,19
+lre22_dev_tsvmo,ara-ayl,11
+lre22_dev_ttcul,afr-afr,19
+lre22_dev_ttrfr,ara-arq,12
+lre22_dev_tuhrp,ven-ven,14
+lre22_dev_twaba,afr-afr,15
+lre22_dev_twcnd,tir-tir,13
+lre22_dev_twtog,ven-ven,15
+lre22_dev_twvne,tir-tir,19
+lre22_dev_txcqg,orm-orm,19
+lre22_dev_txjsy,eng-ens,18
+lre22_dev_txmpu,afr-afr,19
+lre22_dev_txqde,eng-iaf,16
+lre22_dev_tyaup,eng-ens,17
+lre22_dev_tyaym,afr-afr,17
+lre22_dev_tybrl,nbl-nbl,16
+lre22_dev_tyduc,eng-ens,17
+lre22_dev_tyhsa,fra-ntf,21
+lre22_dev_tyigo,ara-ayl,11
+lre22_dev_tykte,zul-zul,18
+lre22_dev_tymil,tir-tir,16
+lre22_dev_tyofb,ven-ven,20
+lre22_dev_tysph,fra-ntf,16
+lre22_dev_tzamn,ara-aeb,11
+lre22_dev_tzrpp,ven-ven,15
+lre22_dev_tzukm,ara-aeb,12
+lre22_dev_uabum,xho-xho,19
+lre22_dev_uankd,nbl-nbl,18
+lre22_dev_uazyk,ara-ayl,14
+lre22_dev_ubdfa,eng-iaf,15
+lre22_dev_ubugi,orm-orm,22
+lre22_dev_ucetp,ven-ven,21
+lre22_dev_ucsxt,eng-ens,12
+lre22_dev_uczke,zul-zul,14
+lre22_dev_udldh,ara-arq,11
+lre22_dev_uejdk,orm-orm,17
+lre22_dev_uekog,zul-zul,17
+lre22_dev_uemql,xho-xho,16
+lre22_dev_ueovt,eng-ens,14
+lre22_dev_uesao,zul-zul,19
+lre22_dev_ueyxm,ara-ayl,13
+lre22_dev_ufafi,tir-tir,17
+lre22_dev_ufaig,tso-tso,12
+lre22_dev_uffpc,ara-arq,14
+lre22_dev_ufrmg,ven-ven,20
+lre22_dev_ugieb,ara-aeb,12
+lre22_dev_ugoiy,ara-ayl,10
+lre22_dev_ugzkq,ara-aeb,12
+lre22_dev_uhdrj,xho-xho,18
+lre22_dev_uhjdn,ara-ayl,16
+lre22_dev_uhkcq,ara-ayl,11
+lre22_dev_uhrjo,ara-aeb,13
+lre22_dev_uhrow,afr-afr,16
+lre22_dev_uikqm,ara-arq,12
+lre22_dev_uitct,eng-ens,13
+lre22_dev_uitqu,ara-ayl,12
+lre22_dev_ujiby,eng-ens,18
+lre22_dev_ujmtl,orm-orm,22
+lre22_dev_ukdpu,ven-ven,17
+lre22_dev_ukfpb,xho-xho,19
+lre22_dev_ukklw,fra-ntf,22
+lre22_dev_ukwjy,xho-xho,17
+lre22_dev_uljbx,fra-ntf,20
+lre22_dev_uljgh,tir-tir,13
+lre22_dev_uljvo,fra-ntf,21
+lre22_dev_undfd,orm-orm,20
+lre22_dev_unmiu,ara-arq,14
+lre22_dev_updar,nbl-nbl,17
+lre22_dev_uprkv,eng-iaf,16
+lre22_dev_urkok,ara-ayl,11
+lre22_dev_urolj,orm-orm,22
+lre22_dev_uscpv,eng-ens,14
+lre22_dev_ushtk,fra-ntf,20
+lre22_dev_usiey,ven-ven,19
+lre22_dev_usitw,ara-arq,14
+lre22_dev_utkxp,nbl-nbl,19
+lre22_dev_utnvo,tir-tir,16
+lre22_dev_utyjg,tso-tso,18
+lre22_dev_uuwaa,ara-arq,12
+lre22_dev_uuxla,eng-iaf,15
+lre22_dev_uuzuj,ara-arq,14
+lre22_dev_uvcxs,eng-ens,12
+lre22_dev_uveah,ven-ven,17
+lre22_dev_uvfqy,ara-arq,13
+lre22_dev_uvnhb,fra-ntf,20
+lre22_dev_uvqbm,afr-afr,19
+lre22_dev_uvsus,zul-zul,15
+lre22_dev_uvyev,fra-ntf,20
+lre22_dev_uwicd,tso-tso,12
+lre22_dev_uwnlz,zul-zul,18
+lre22_dev_uwwyj,afr-afr,20
+lre22_dev_uwyxc,eng-iaf,17
+lre22_dev_uxjzh,xho-xho,21
+lre22_dev_uxpyg,tso-tso,15
+lre22_dev_uxrxr,tso-tso,12
+lre22_dev_uyciz,eng-ens,14
+lre22_dev_uycza,xho-xho,17
+lre22_dev_uyvyb,eng-ens,17
+lre22_dev_uziar,zul-zul,15
+lre22_dev_uzlxd,fra-ntf,22
+lre22_dev_uznjr,tir-tir,13
+lre22_dev_vagda,ara-ayl,12
+lre22_dev_vanjm,ven-ven,18
+lre22_dev_vaqia,tir-tir,19
+lre22_dev_vasjz,ara-arq,11
+lre22_dev_vcexs,tir-tir,17
+lre22_dev_vchpm,fra-ntf,21
+lre22_dev_vctsa,nbl-nbl,19
+lre22_dev_vcxit,ven-ven,15
+lre22_dev_vcyqv,xho-xho,19
+lre22_dev_vdjlh,afr-afr,22
+lre22_dev_vdogx,ven-ven,15
+lre22_dev_veutb,eng-ens,16
+lre22_dev_vezrd,tso-tso,12
+lre22_dev_vfbfg,tso-tso,12
+lre22_dev_vffqd,orm-orm,21
+lre22_dev_vfhum,afr-afr,16
+lre22_dev_vfjtw,ara-arq,11
+lre22_dev_vfnjb,eng-ens,15
+lre22_dev_vgbbh,ara-arq,13
+lre22_dev_vgcao,eng-iaf,20
+lre22_dev_vgpnk,xho-xho,19
+lre22_dev_vityk,zul-zul,18
+lre22_dev_vjeuy,tir-tir,19
+lre22_dev_vjltt,zul-zul,17
+lre22_dev_vjqrm,tir-tir,13
+lre22_dev_vjvbs,tso-tso,18
+lre22_dev_vlcbq,tso-tso,16
+lre22_dev_vlnlb,tso-tso,13
+lre22_dev_vlscu,ara-ayl,15
+lre22_dev_vlwhz,fra-ntf,22
+lre22_dev_vlyeh,tso-tso,16
+lre22_dev_vmnps,zul-zul,14
+lre22_dev_vmqxk,tso-tso,18
+lre22_dev_vmrez,ven-ven,18
+lre22_dev_vmsnh,ara-aeb,11
+lre22_dev_vmuti,ara-aeb,14
+lre22_dev_vncre,afr-afr,22
+lre22_dev_vnkqv,afr-afr,15
+lre22_dev_vnmlt,zul-zul,18
+lre22_dev_vpkra,ara-ayl,11
+lre22_dev_vpoit,ara-arq,14
+lre22_dev_vpruu,orm-orm,23
+lre22_dev_vptiv,tir-tir,18
+lre22_dev_vqhcn,tso-tso,16
+lre22_dev_vqura,tir-tir,16
+lre22_dev_vrqfs,xho-xho,23
+lre22_dev_vrvtr,zul-zul,15
+lre22_dev_vrxvj,fra-ntf,17
+lre22_dev_vsbay,eng-iaf,19
+lre22_dev_vsbvi,fra-ntf,19
+lre22_dev_vslkb,eng-ens,12
+lre22_dev_vsrdg,tso-tso,12
+lre22_dev_vsrnz,zul-zul,14
+lre22_dev_vsryb,nbl-nbl,19
+lre22_dev_vtlab,zul-zul,19
+lre22_dev_vtrff,eng-iaf,17
+lre22_dev_vtztf,ara-aeb,11
+lre22_dev_vucth,eng-ens,14
+lre22_dev_vucug,orm-orm,21
+lre22_dev_vufuu,eng-ens,18
+lre22_dev_vujbs,zul-zul,19
+lre22_dev_vuufm,afr-afr,19
+lre22_dev_vvgdf,eng-ens,18
+lre22_dev_vvlcx,ara-aeb,12
+lre22_dev_vvvho,tir-tir,18
+lre22_dev_vwait,eng-iaf,14
+lre22_dev_vwdcw,ara-arq,14
+lre22_dev_vwyzq,ara-arq,14
+lre22_dev_vwzon,eng-ens,12
+lre22_dev_vxhoc,ara-aeb,11
+lre22_dev_vxkgz,ven-ven,18
+lre22_dev_vxlgl,tir-tir,18
+lre22_dev_vxsqt,eng-ens,15
+lre22_dev_vyqsd,nbl-nbl,17
+lre22_dev_vzcai,zul-zul,19
+lre22_dev_vzgoj,eng-iaf,14
+lre22_dev_vzlon,zul-zul,16
+lre22_dev_vznrg,nbl-nbl,16
+lre22_dev_vzqme,xho-xho,19
+lre22_dev_wabqx,ven-ven,18
+lre22_dev_wafdh,fra-ntf,21
+lre22_dev_wagmt,eng-iaf,18
+lre22_dev_waocz,ven-ven,20
+lre22_dev_wavrh,zul-zul,16
+lre22_dev_wawqg,ara-ayl,13
+lre22_dev_waznj,nbl-nbl,22
+lre22_dev_wbepu,fra-ntf,19
+lre22_dev_wbygw,eng-ens,16
+lre22_dev_wccgz,tso-tso,17
+lre22_dev_wcpwx,tir-tir,18
+lre22_dev_wczkn,eng-iaf,17
+lre22_dev_wdfmt,tir-tir,17
+lre22_dev_wdgbh,ara-arq,12
+lre22_dev_wdind,tso-tso,19
+lre22_dev_wdkit,nbl-nbl,16
+lre22_dev_wdmpt,eng-ens,17
+lre22_dev_wdpya,nbl-nbl,16
+lre22_dev_wdrxo,orm-orm,21
+lre22_dev_wdyiy,ara-ayl,13
+lre22_dev_weccy,afr-afr,15
+lre22_dev_wfmco,ara-arq,14
+lre22_dev_wfnon,nbl-nbl,17
+lre22_dev_wgdui,eng-iaf,14
+lre22_dev_wgkmr,eng-iaf,17
+lre22_dev_wgnex,tir-tir,19
+lre22_dev_wgucy,eng-iaf,18
+lre22_dev_wgwdn,eng-iaf,17
+lre22_dev_whqhx,eng-iaf,15
+lre22_dev_whxwv,eng-ens,14
+lre22_dev_witnq,fra-ntf,17
+lre22_dev_wixzu,tso-tso,16
+lre22_dev_wjhbw,eng-iaf,16
+lre22_dev_wjist,orm-orm,16
+lre22_dev_wjnhh,zul-zul,19
+lre22_dev_wjnyo,ven-ven,20
+lre22_dev_wjtnm,orm-orm,19
+lre22_dev_wjzhz,ara-aeb,13
+lre22_dev_wkacx,eng-iaf,15
+lre22_dev_wkqey,fra-ntf,16
+lre22_dev_wldli,zul-zul,14
+lre22_dev_wlnst,nbl-nbl,16
+lre22_dev_wltvq,zul-zul,17
+lre22_dev_wlwhq,orm-orm,19
+lre22_dev_wmdan,xho-xho,21
+lre22_dev_wmfce,nbl-nbl,20
+lre22_dev_wmigl,ven-ven,20
+lre22_dev_wmwmc,eng-iaf,19
+lre22_dev_wmypk,xho-xho,19
+lre22_dev_wmzpv,eng-ens,17
+lre22_dev_wnjpz,ven-ven,19
+lre22_dev_wnmkt,orm-orm,23
+lre22_dev_wnpep,nbl-nbl,16
+lre22_dev_wnqhz,nbl-nbl,16
+lre22_dev_wnxpz,ven-ven,15
+lre22_dev_wnxrw,ven-ven,18
+lre22_dev_woawg,ven-ven,18
+lre22_dev_wobzv,eng-ens,14
+lre22_dev_wocbv,tso-tso,18
+lre22_dev_woerb,fra-ntf,21
+lre22_dev_wojrt,orm-orm,19
+lre22_dev_wosus,tir-tir,17
+lre22_dev_wozuc,xho-xho,19
+lre22_dev_wqcyu,tso-tso,15
+lre22_dev_wqfuv,eng-ens,17
+lre22_dev_wqhag,zul-zul,19
+lre22_dev_wqmsd,tir-tir,13
+lre22_dev_wqthl,ara-aeb,12
+lre22_dev_wqtvm,eng-ens,15
+lre22_dev_wrmnw,zul-zul,18
+lre22_dev_wrtec,zul-zul,17
+lre22_dev_wrvls,zul-zul,14
+lre22_dev_wscfs,nbl-nbl,16
+lre22_dev_wssqw,eng-ens,15
+lre22_dev_wtbdf,tir-tir,14
+lre22_dev_wtcpe,ara-aeb,11
+lre22_dev_wthrk,orm-orm,18
+lre22_dev_wtofd,eng-iaf,20
+lre22_dev_wtuol,tso-tso,18
+lre22_dev_wuqez,ara-aeb,11
+lre22_dev_wuquc,tir-tir,18
+lre22_dev_wvlde,tso-tso,13
+lre22_dev_wwbmg,ara-aeb,11
+lre22_dev_wwduf,fra-ntf,18
+lre22_dev_wwvuw,ara-arq,13
+lre22_dev_wxaev,orm-orm,17
+lre22_dev_wycsj,ven-ven,18
+lre22_dev_wypwj,ara-ayl,10
+lre22_dev_wytpq,fra-ntf,17
+lre22_dev_wzhqk,xho-xho,22
+lre22_dev_wzpmq,eng-ens,12
+lre22_dev_wztdj,zul-zul,19
+lre22_dev_wzxgv,ven-ven,18
+lre22_dev_xacjk,fra-ntf,18
+lre22_dev_xaevp,tir-tir,14
+lre22_dev_xaldr,eng-iaf,14
+lre22_dev_xapdy,ara-aeb,12
+lre22_dev_xaurw,nbl-nbl,16
+lre22_dev_xawdd,tir-tir,20
+lre22_dev_xbcpb,ara-arq,12
+lre22_dev_xbfrs,ven-ven,17
+lre22_dev_xbqsr,nbl-nbl,22
+lre22_dev_xbvcc,nbl-nbl,17
+lre22_dev_xbvqw,orm-orm,23
+lre22_dev_xcame,xho-xho,16
+lre22_dev_xcrnp,ara-aeb,13
+lre22_dev_xcswu,ven-ven,18
+lre22_dev_xcuok,orm-orm,21
+lre22_dev_xcvkj,tso-tso,16
+lre22_dev_xdtdp,fra-ntf,17
+lre22_dev_xdyea,ara-ayl,10
+lre22_dev_xerqi,fra-ntf,17
+lre22_dev_xetdb,eng-ens,14
+lre22_dev_xfecy,nbl-nbl,16
+lre22_dev_xfgcu,eng-iaf,19
+lre22_dev_xfing,tir-tir,20
+lre22_dev_xgaig,ara-aeb,15
+lre22_dev_xgoyq,eng-ens,18
+lre22_dev_xhdtx,eng-iaf,14
+lre22_dev_xhvkx,orm-orm,19
+lre22_dev_xiblr,tir-tir,17
+lre22_dev_xifty,ara-aeb,12
+lre22_dev_xigtx,ara-arq,14
+lre22_dev_xijus,tso-tso,14
+lre22_dev_xipox,xho-xho,20
+lre22_dev_xittq,ara-aeb,13
+lre22_dev_xjpwq,ara-ayl,15
+lre22_dev_xjrla,afr-afr,20
+lre22_dev_xkdof,ara-ayl,13
+lre22_dev_xkiba,eng-ens,18
+lre22_dev_xlcxh,fra-ntf,18
+lre22_dev_xlsxb,tso-tso,16
+lre22_dev_xmhpj,ven-ven,20
+lre22_dev_xnqct,ara-arq,11
+lre22_dev_xoayi,eng-ens,13
+lre22_dev_xohps,ara-arq,11
+lre22_dev_xokpn,zul-zul,18
+lre22_dev_xonym,eng-ens,14
+lre22_dev_xozod,afr-afr,14
+lre22_dev_xpenp,ara-arq,11
+lre22_dev_xpnti,ara-aeb,11
+lre22_dev_xpqyr,orm-orm,22
+lre22_dev_xpswt,orm-orm,23
+lre22_dev_xpumn,ven-ven,14
+lre22_dev_xpvcf,orm-orm,20
+lre22_dev_xqhoa,ara-ayl,13
+lre22_dev_xqnpt,orm-orm,22
+lre22_dev_xqooi,xho-xho,20
+lre22_dev_xqupu,fra-ntf,21
+lre22_dev_xresy,eng-iaf,17
+lre22_dev_xrouj,ara-ayl,16
+lre22_dev_xsnxu,ara-aeb,12
+lre22_dev_xtaof,ara-ayl,13
+lre22_dev_xtbxk,orm-orm,20
+lre22_dev_xtgak,nbl-nbl,20
+lre22_dev_xuauh,ara-aeb,13
+lre22_dev_xubei,eng-iaf,17
+lre22_dev_xubol,ara-aeb,11
+lre22_dev_xuieb,orm-orm,19
+lre22_dev_xunxs,ara-ayl,14
+lre22_dev_xutjo,nbl-nbl,20
+lre22_dev_xvbos,afr-afr,22
+lre22_dev_xvcfn,eng-ens,16
+lre22_dev_xvgqo,eng-ens,12
+lre22_dev_xwemk,zul-zul,18
+lre22_dev_xwsyq,ara-ayl,14
+lre22_dev_xxdbg,tso-tso,18
+lre22_dev_xyoua,fra-ntf,22
+lre22_dev_xzoej,ara-aeb,13
+lre22_dev_xzrdl,ara-arq,13
+lre22_dev_xztsz,tso-tso,16
+lre22_dev_xzxbd,zul-zul,15
+lre22_dev_yagvv,tso-tso,13
+lre22_dev_ybqju,tso-tso,13
+lre22_dev_ybrji,ara-arq,11
+lre22_dev_ybsmy,ven-ven,21
+lre22_dev_ycbaf,ara-aeb,14
+lre22_dev_ychsm,ven-ven,14
+lre22_dev_ycrlj,xho-xho,17
+lre22_dev_ycuhc,orm-orm,21
+lre22_dev_ydhqc,ara-arq,13
+lre22_dev_ydmnb,nbl-nbl,17
+lre22_dev_yduem,xho-xho,21
+lre22_dev_yemzu,ara-aeb,11
+lre22_dev_yeoyx,eng-ens,18
+lre22_dev_yersp,ara-ayl,13
+lre22_dev_yeshv,eng-iaf,17
+lre22_dev_yexec,ven-ven,20
+lre22_dev_yeyna,ara-ayl,14
+lre22_dev_yfxmd,ara-arq,14
+lre22_dev_yfzah,ara-arq,14
+lre22_dev_ygkvo,ara-arq,11
+lre22_dev_yhgvr,ara-arq,15
+lre22_dev_yhwin,ara-arq,12
+lre22_dev_yirig,ara-ayl,16
+lre22_dev_yixgu,xho-xho,16
+lre22_dev_yjbfl,xho-xho,19
+lre22_dev_yjodc,eng-ens,14
+lre22_dev_yjoht,ara-aeb,12
+lre22_dev_yjqkb,ara-arq,14
+lre22_dev_yjrkq,ara-arq,15
+lre22_dev_yjrng,afr-afr,16
+lre22_dev_ykpzq,afr-afr,21
+lre22_dev_yktop,eng-iaf,20
+lre22_dev_ylfah,zul-zul,15
+lre22_dev_ylgex,tso-tso,14
+lre22_dev_ylkds,nbl-nbl,17
+lre22_dev_ylvyc,xho-xho,20
+lre22_dev_ylzic,eng-iaf,20
+lre22_dev_ymoon,afr-afr,17
+lre22_dev_yncqr,ara-arq,13
+lre22_dev_ynjtn,ven-ven,18
+lre22_dev_ynmzy,tso-tso,16
+lre22_dev_ynozi,fra-ntf,21
+lre22_dev_yntec,orm-orm,19
+lre22_dev_ynurl,tso-tso,14
+lre22_dev_ypdtt,ara-aeb,11
+lre22_dev_yprom,tso-tso,13
+lre22_dev_yptsk,xho-xho,23
+lre22_dev_ypyft,eng-iaf,14
+lre22_dev_yqhwt,orm-orm,23
+lre22_dev_yqtxe,eng-iaf,19
+lre22_dev_yquja,ara-ayl,10
+lre22_dev_yqxhl,eng-ens,14
+lre22_dev_yqyby,nbl-nbl,18
+lre22_dev_yqzua,fra-ntf,16
+lre22_dev_yrfxo,ven-ven,21
+lre22_dev_yrgzf,ara-aeb,13
+lre22_dev_yruqe,tso-tso,17
+lre22_dev_yrwgb,zul-zul,18
+lre22_dev_yrxsi,orm-orm,21
+lre22_dev_ysdkl,tso-tso,15
+lre22_dev_ytgav,xho-xho,16
+lre22_dev_ytoet,ara-arq,14
+lre22_dev_yuabg,eng-ens,16
+lre22_dev_yundm,tso-tso,14
+lre22_dev_yuvux,ara-ayl,13
+lre22_dev_yvdcv,fra-ntf,21
+lre22_dev_yvoli,orm-orm,23
+lre22_dev_yweox,orm-orm,21
+lre22_dev_ywgoc,eng-iaf,19
+lre22_dev_ywoyx,ven-ven,18
+lre22_dev_ywxql,zul-zul,19
+lre22_dev_yxkyl,eng-iaf,15
+lre22_dev_yxtmn,ara-aeb,14
+lre22_dev_yycsn,ara-ayl,12
+lre22_dev_yyswd,eng-iaf,16
+lre22_dev_yyugr,ven-ven,21
+lre22_dev_yzitu,orm-orm,20
+lre22_dev_yzwmi,eng-ens,16
+lre22_dev_yzzww,zul-zul,17
+lre22_dev_zabub,ara-ayl,16
+lre22_dev_zabuv,eng-iaf,14
+lre22_dev_zacuc,zul-zul,19
+lre22_dev_zavru,zul-zul,19
+lre22_dev_zbfgy,ara-arq,12
+lre22_dev_zbjez,nbl-nbl,17
+lre22_dev_zbtpo,ven-ven,18
+lre22_dev_zbzip,tso-tso,19
+lre22_dev_zcevz,nbl-nbl,16
+lre22_dev_zcnsv,afr-afr,21
+lre22_dev_zcqkl,eng-iaf,20
+lre22_dev_zczer,ven-ven,14
+lre22_dev_zdcdt,nbl-nbl,18
+lre22_dev_zddua,xho-xho,19
+lre22_dev_zdvsh,ara-arq,14
+lre22_dev_zdwxx,ara-ayl,14
+lre22_dev_zdyxi,tir-tir,14
+lre22_dev_zetju,eng-iaf,17
+lre22_dev_zfsek,ara-arq,11
+lre22_dev_zfvfa,eng-ens,18
+lre22_dev_zggiu,zul-zul,19
+lre22_dev_zgndz,tso-tso,14
+lre22_dev_zgxth,eng-ens,16
+lre22_dev_zhlxa,ara-ayl,14
+lre22_dev_zhnsb,ara-ayl,15
+lre22_dev_zhsmo,ara-aeb,13
+lre22_dev_zhvbf,xho-xho,18
+lre22_dev_zhzrh,eng-iaf,15
+lre22_dev_ziigd,orm-orm,21
+lre22_dev_zilud,tir-tir,19
+lre22_dev_zjivp,zul-zul,19
+lre22_dev_zjleg,zul-zul,19
+lre22_dev_zjquq,orm-orm,16
+lre22_dev_zkgjo,nbl-nbl,22
+lre22_dev_zkhes,fra-ntf,16
+lre22_dev_zkioq,ara-aeb,12
+lre22_dev_zkwaw,afr-afr,21
+lre22_dev_zlapc,ara-ayl,13
+lre22_dev_zlntm,zul-zul,19
+lre22_dev_zmmyn,xho-xho,23
+lre22_dev_zmxld,ven-ven,17
+lre22_dev_znhcf,ven-ven,21
+lre22_dev_znwsk,afr-afr,22
+lre22_dev_znxvg,eng-ens,18
+lre22_dev_znycz,ara-aeb,13
+lre22_dev_zoayx,zul-zul,18
+lre22_dev_zogte,nbl-nbl,16
+lre22_dev_zoldl,ara-aeb,12
+lre22_dev_zoqzl,eng-ens,17
+lre22_dev_zorfv,eng-iaf,16
+lre22_dev_zoseh,ara-arq,12
+lre22_dev_zpotb,xho-xho,16
+lre22_dev_zptbg,tir-tir,14
+lre22_dev_zqjzi,ara-aeb,11
+lre22_dev_zqljj,ara-aeb,14
+lre22_dev_zqlri,orm-orm,18
+lre22_dev_zqoif,zul-zul,19
+lre22_dev_zqorv,ara-aeb,12
+lre22_dev_zqwgs,fra-ntf,18
+lre22_dev_zrhbt,tir-tir,19
+lre22_dev_zrqar,ara-aeb,13
+lre22_dev_zrqec,eng-iaf,17
+lre22_dev_ztdrx,fra-ntf,15
+lre22_dev_ztdwr,orm-orm,17
+lre22_dev_zthiv,ara-arq,15
+lre22_dev_ztknh,xho-xho,18
+lre22_dev_ztlcq,ara-aeb,13
+lre22_dev_ztufj,fra-ntf,19
+lre22_dev_zubjl,fra-ntf,20
+lre22_dev_zunuw,tso-tso,17
+lre22_dev_zutul,tir-tir,13
+lre22_dev_zutvv,eng-ens,12
+lre22_dev_zuugc,eng-iaf,17
+lre22_dev_zuvqx,eng-iaf,14
+lre22_dev_zvthu,orm-orm,20
+lre22_dev_zvvov,ara-aeb,11
+lre22_dev_zvyuh,ara-arq,14
+lre22_dev_zwfqq,eng-iaf,17
+lre22_dev_zwosr,xho-xho,16
+lre22_dev_zwvhw,tso-tso,12
+lre22_dev_zxihz,ven-ven,14
+lre22_dev_zydma,eng-ens,12
+lre22_dev_zyqlz,zul-zul,19
+lre22_dev_zyyie,orm-orm,23
+lre22_dev_zyywo,eng-iaf,14
+lre22_dev_zzyze,ara-ayl,12
diff --git a/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/train_segments.csv b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/train_segments.csv
new file mode 100644
index 00000000..6518f24e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/resources/dev_splits/fold_1/train_segments.csv
@@ -0,0 +1,2114 @@
+id,class_id,subclass_idx
+lre22_dev_aadaq,afr-afr,5
+lre22_dev_aaxdt,xho-xho,14
+lre22_dev_abujj,xho-xho,15
+lre22_dev_acgiu,zul-zul,6
+lre22_dev_acnyv,ven-ven,7
+lre22_dev_adbku,ara-ayl,4
+lre22_dev_ademr,orm-orm,3
+lre22_dev_adgoy,xho-xho,4
+lre22_dev_adnpi,eng-ens,1
+lre22_dev_adqaa,ven-ven,10
+lre22_dev_adwzf,zul-zul,2
+lre22_dev_aeiuj,afr-afr,4
+lre22_dev_afhui,eng-ens,4
+lre22_dev_afuav,nbl-nbl,15
+lre22_dev_afvvg,ven-ven,10
+lre22_dev_afxjf,eng-iaf,10
+lre22_dev_agmwb,ara-aeb,10
+lre22_dev_agnik,eng-ens,3
+lre22_dev_ahcja,orm-orm,14
+lre22_dev_ahobp,afr-afr,13
+lre22_dev_ahupk,eng-ens,11
+lre22_dev_aicjg,xho-xho,12
+lre22_dev_aikrz,eng-ens,9
+lre22_dev_ailwo,orm-orm,7
+lre22_dev_aiqhl,tir-tir,10
+lre22_dev_aiuwf,ara-ayl,5
+lre22_dev_aizyr,ara-arq,0
+lre22_dev_ajbui,zul-zul,12
+lre22_dev_ajigk,ara-aeb,10
+lre22_dev_ajuwq,ara-ayl,3
+lre22_dev_akbly,nbl-nbl,3
+lre22_dev_akhwr,xho-xho,6
+lre22_dev_aksxd,nbl-nbl,6
+lre22_dev_aktcg,afr-afr,1
+lre22_dev_aktzw,eng-ens,11
+lre22_dev_akulq,orm-orm,14
+lre22_dev_alcie,orm-orm,11
+lre22_dev_alunz,xho-xho,6
+lre22_dev_amaec,tir-tir,10
+lre22_dev_amnvo,ara-arq,6
+lre22_dev_amxrk,zul-zul,9
+lre22_dev_anmuv,tso-tso,11
+lre22_dev_aomcz,ara-aeb,7
+lre22_dev_aooht,fra-ntf,11
+lre22_dev_aprbe,ara-arq,3
+lre22_dev_apxxx,orm-orm,12
+lre22_dev_aqdwu,ven-ven,6
+lre22_dev_aqejl,xho-xho,5
+lre22_dev_aqnyy,tso-tso,5
+lre22_dev_arjuc,afr-afr,5
+lre22_dev_arrkp,tir-tir,1
+lre22_dev_atdgp,zul-zul,13
+lre22_dev_atoxn,eng-ens,10
+lre22_dev_audls,afr-afr,6
+lre22_dev_auilj,ven-ven,11
+lre22_dev_auqgt,eng-iaf,3
+lre22_dev_autlo,zul-zul,7
+lre22_dev_avait,zul-zul,3
+lre22_dev_avvik,nbl-nbl,14
+lre22_dev_awgem,ara-ayl,3
+lre22_dev_awgnb,fra-ntf,14
+lre22_dev_awvym,ara-ayl,9
+lre22_dev_axhbz,tir-tir,12
+lre22_dev_axici,tir-tir,8
+lre22_dev_axtpv,xho-xho,6
+lre22_dev_aygsz,ara-aeb,4
+lre22_dev_ayiif,ven-ven,7
+lre22_dev_azqvo,zul-zul,3
+lre22_dev_basml,eng-ens,11
+lre22_dev_bawje,tir-tir,6
+lre22_dev_bbana,zul-zul,7
+lre22_dev_bbtpz,ven-ven,5
+lre22_dev_bcbrw,eng-iaf,2
+lre22_dev_bchvx,zul-zul,9
+lre22_dev_bcllp,afr-afr,13
+lre22_dev_bcsmi,fra-ntf,6
+lre22_dev_bdqaw,ven-ven,6
+lre22_dev_bdwle,ara-arq,6
+lre22_dev_behbh,ara-ayl,4
+lre22_dev_bexda,ara-arq,6
+lre22_dev_bfbyn,ara-aeb,9
+lre22_dev_bfjgx,ara-ayl,7
+lre22_dev_bgbjo,nbl-nbl,1
+lre22_dev_bgebs,ara-ayl,5
+lre22_dev_bgnod,fra-ntf,3
+lre22_dev_bhezb,ara-ayl,7
+lre22_dev_bhyuy,afr-afr,13
+lre22_dev_bidge,tir-tir,12
+lre22_dev_bimnd,eng-ens,7
+lre22_dev_biyaj,ara-ayl,5
+lre22_dev_bjsgu,afr-afr,10
+lre22_dev_blmfp,eng-iaf,5
+lre22_dev_blohd,ven-ven,4
+lre22_dev_bmebz,ara-arq,4
+lre22_dev_bmjuo,ara-aeb,6
+lre22_dev_bmkrm,fra-ntf,10
+lre22_dev_bmzym,zul-zul,5
+lre22_dev_bnfuu,orm-orm,13
+lre22_dev_bnilb,zul-zul,8
+lre22_dev_bnxna,eng-ens,1
+lre22_dev_boikl,orm-orm,7
+lre22_dev_boisz,ven-ven,2
+lre22_dev_boqxy,zul-zul,13
+lre22_dev_bpqhd,tso-tso,2
+lre22_dev_briiw,ara-aeb,8
+lre22_dev_brohj,fra-ntf,1
+lre22_dev_brqdv,nbl-nbl,3
+lre22_dev_brwcj,afr-afr,6
+lre22_dev_bsclv,orm-orm,8
+lre22_dev_bsdbb,ara-arq,4
+lre22_dev_bstjt,nbl-nbl,10
+lre22_dev_btbke,ara-aeb,0
+lre22_dev_btcfj,ven-ven,12
+lre22_dev_btomw,ven-ven,6
+lre22_dev_btpvy,afr-afr,1
+lre22_dev_btrtb,ara-arq,4
+lre22_dev_btruf,zul-zul,8
+lre22_dev_btsll,ara-ayl,7
+lre22_dev_butrw,ara-ayl,6
+lre22_dev_buwrj,ara-ayl,2
+lre22_dev_bvlhb,fra-ntf,8
+lre22_dev_bvmql,xho-xho,10
+lre22_dev_bvnsc,tir-tir,10
+lre22_dev_bwrej,ven-ven,9
+lre22_dev_bxial,eng-ens,2
+lre22_dev_bxnbf,fra-ntf,9
+lre22_dev_bybim,afr-afr,6
+lre22_dev_byegp,orm-orm,15
+lre22_dev_byngq,ven-ven,9
+lre22_dev_byytf,fra-ntf,6
+lre22_dev_bzies,tso-tso,3
+lre22_dev_bzipd,afr-afr,7
+lre22_dev_cacop,nbl-nbl,5
+lre22_dev_caent,afr-afr,12
+lre22_dev_capsb,ven-ven,0
+lre22_dev_cawbw,orm-orm,12
+lre22_dev_cblep,ven-ven,3
+lre22_dev_cblig,fra-ntf,6
+lre22_dev_ccexy,ven-ven,7
+lre22_dev_ccsye,ara-aeb,8
+lre22_dev_cctyt,eng-iaf,11
+lre22_dev_ccuie,eng-ens,7
+lre22_dev_ccvzf,eng-iaf,1
+lre22_dev_cdlkq,tso-tso,8
+lre22_dev_cdtiu,ara-ayl,9
+lre22_dev_cemyb,tir-tir,12
+lre22_dev_ceprg,eng-iaf,9
+lre22_dev_ceqow,nbl-nbl,15
+lre22_dev_cfdsu,fra-ntf,7
+lre22_dev_cfhbm,ven-ven,3
+lre22_dev_cfsew,afr-afr,12
+lre22_dev_cgges,eng-iaf,11
+lre22_dev_cgjnr,eng-iaf,10
+lre22_dev_cgotg,eng-ens,11
+lre22_dev_cgovb,nbl-nbl,15
+lre22_dev_cgssg,tir-tir,7
+lre22_dev_chhsl,tir-tir,7
+lre22_dev_chjuh,nbl-nbl,9
+lre22_dev_chpoe,nbl-nbl,11
+lre22_dev_chtgu,ara-aeb,10
+lre22_dev_chtlt,eng-iaf,10
+lre22_dev_cigir,eng-ens,9
+lre22_dev_ciyeh,ara-ayl,2
+lre22_dev_cjswm,orm-orm,12
+lre22_dev_cjtdl,ven-ven,13
+lre22_dev_ckzie,ara-aeb,10
+lre22_dev_cldfc,ara-ayl,8
+lre22_dev_clxqz,ara-arq,9
+lre22_dev_cmahj,afr-afr,13
+lre22_dev_cmqxm,tir-tir,6
+lre22_dev_cmrdt,afr-afr,5
+lre22_dev_cmvpq,ara-ayl,2
+lre22_dev_cnbfw,eng-iaf,5
+lre22_dev_cnbvd,afr-afr,9
+lre22_dev_cnomp,orm-orm,15
+lre22_dev_cnrvj,xho-xho,11
+lre22_dev_cnszu,ara-ayl,4
+lre22_dev_cnudd,xho-xho,14
+lre22_dev_cnuoi,orm-orm,14
+lre22_dev_cnxjs,orm-orm,8
+lre22_dev_coarm,xho-xho,4
+lre22_dev_cocyn,zul-zul,6
+lre22_dev_colxc,zul-zul,13
+lre22_dev_cosfn,ara-aeb,10
+lre22_dev_cosgu,ara-ayl,7
+lre22_dev_cpjab,ara-aeb,10
+lre22_dev_cpple,tso-tso,6
+lre22_dev_cqhjy,ara-ayl,3
+lre22_dev_cqkmy,ara-aeb,10
+lre22_dev_cqukb,tso-tso,9
+lre22_dev_cqusc,orm-orm,6
+lre22_dev_cqyzf,fra-ntf,13
+lre22_dev_crcwu,xho-xho,12
+lre22_dev_crqjz,nbl-nbl,10
+lre22_dev_crtpm,ara-arq,5
+lre22_dev_crucu,tir-tir,6
+lre22_dev_crvby,eng-iaf,12
+lre22_dev_crvoh,eng-ens,7
+lre22_dev_csjxv,ara-arq,3
+lre22_dev_ctfiv,ara-aeb,5
+lre22_dev_ctgpr,ven-ven,12
+lre22_dev_ctlrz,tir-tir,8
+lre22_dev_ctzhm,zul-zul,6
+lre22_dev_cudew,ven-ven,8
+lre22_dev_cusin,ara-arq,10
+lre22_dev_cvaad,eng-iaf,5
+lre22_dev_cvedm,zul-zul,12
+lre22_dev_cvgfx,eng-iaf,8
+lre22_dev_cvujh,ara-ayl,2
+lre22_dev_cweil,ara-aeb,10
+lre22_dev_cweuh,eng-ens,7
+lre22_dev_cwiro,afr-afr,6
+lre22_dev_cwtby,ara-arq,7
+lre22_dev_cxggy,afr-afr,4
+lre22_dev_cxnqr,tso-tso,7
+lre22_dev_cxpan,nbl-nbl,14
+lre22_dev_cxsxl,ara-aeb,10
+lre22_dev_cxyti,tso-tso,8
+lre22_dev_cypcg,zul-zul,12
+lre22_dev_czcmz,zul-zul,10
+lre22_dev_czdzw,orm-orm,7
+lre22_dev_czppj,zul-zul,10
+lre22_dev_czxff,zul-zul,9
+lre22_dev_czxld,fra-ntf,9
+lre22_dev_dajnt,zul-zul,12
+lre22_dev_dbcxi,orm-orm,9
+lre22_dev_dbdbv,tso-tso,8
+lre22_dev_dbdwv,orm-orm,5
+lre22_dev_dbgof,nbl-nbl,15
+lre22_dev_dblhh,eng-iaf,0
+lre22_dev_dbljb,xho-xho,12
+lre22_dev_dcibg,eng-iaf,4
+lre22_dev_dcobk,ara-arq,8
+lre22_dev_dcvcu,afr-afr,4
+lre22_dev_dcvyc,fra-ntf,14
+lre22_dev_ddfeo,ara-ayl,5
+lre22_dev_ddhaq,zul-zul,10
+lre22_dev_ddhes,afr-afr,6
+lre22_dev_ddsds,afr-afr,12
+lre22_dev_ddxvn,ven-ven,5
+lre22_dev_dfdrs,ven-ven,7
+lre22_dev_dfifl,ara-ayl,9
+lre22_dev_dfjek,ven-ven,4
+lre22_dev_dflco,zul-zul,12
+lre22_dev_dftta,tso-tso,6
+lre22_dev_dfxnq,eng-ens,11
+lre22_dev_dgjdi,orm-orm,8
+lre22_dev_dgqwo,tir-tir,9
+lre22_dev_dhapq,ara-aeb,8
+lre22_dev_dhdfk,eng-ens,8
+lre22_dev_dhfjj,ara-arq,4
+lre22_dev_dhlxh,ara-aeb,4
+lre22_dev_dhnne,eng-ens,10
+lre22_dev_dhtlz,eng-ens,6
+lre22_dev_diarz,ara-ayl,2
+lre22_dev_diggg,tir-tir,9
+lre22_dev_diqtw,ara-aeb,8
+lre22_dev_dixuw,orm-orm,9
+lre22_dev_diypf,orm-orm,13
+lre22_dev_djzsk,nbl-nbl,13
+lre22_dev_dksey,nbl-nbl,11
+lre22_dev_dlzwh,fra-ntf,12
+lre22_dev_dmdpv,eng-ens,2
+lre22_dev_dmeea,orm-orm,14
+lre22_dev_dmhdv,xho-xho,10
+lre22_dev_dmics,fra-ntf,14
+lre22_dev_dmiiu,ara-aeb,6
+lre22_dev_dmjxr,xho-xho,10
+lre22_dev_dmzxn,afr-afr,4
+lre22_dev_dngtw,ara-ayl,3
+lre22_dev_dnjdq,eng-ens,7
+lre22_dev_dnprz,zul-zul,12
+lre22_dev_dobdj,fra-ntf,0
+lre22_dev_dobwk,orm-orm,8
+lre22_dev_donqm,ara-arq,3
+lre22_dev_dpbyt,tso-tso,6
+lre22_dev_dpfns,ara-aeb,4
+lre22_dev_dpjjp,fra-ntf,7
+lre22_dev_dpomx,eng-iaf,5
+lre22_dev_dpwhs,eng-ens,8
+lre22_dev_dpygj,eng-iaf,8
+lre22_dev_dqzex,xho-xho,3
+lre22_dev_drcqx,eng-iaf,7
+lre22_dev_drfhb,ara-aeb,10
+lre22_dev_drfte,ara-arq,8
+lre22_dev_driks,eng-ens,11
+lre22_dev_drofs,fra-ntf,1
+lre22_dev_dslxl,ara-ayl,7
+lre22_dev_dsmwd,ven-ven,13
+lre22_dev_dsyyk,tir-tir,9
+lre22_dev_dthcb,zul-zul,12
+lre22_dev_dtumd,fra-ntf,5
+lre22_dev_dtwmj,afr-afr,7
+lre22_dev_duegm,tso-tso,9
+lre22_dev_dvirs,afr-afr,6
+lre22_dev_dvtzf,eng-iaf,7
+lre22_dev_dwcfi,ven-ven,7
+lre22_dev_dwfle,fra-ntf,7
+lre22_dev_dwgsv,tir-tir,6
+lre22_dev_dwlay,ara-arq,3
+lre22_dev_dwnit,xho-xho,15
+lre22_dev_dwvoh,tso-tso,6
+lre22_dev_dxgpq,afr-afr,12
+lre22_dev_dxhpf,ara-ayl,9
+lre22_dev_dxlhq,ara-arq,5
+lre22_dev_dxrcj,zul-zul,5
+lre22_dev_dywox,tir-tir,9
+lre22_dev_dzjrv,eng-iaf,8
+lre22_dev_dzsql,tso-tso,6
+lre22_dev_dzxkv,orm-orm,13
+lre22_dev_eabne,xho-xho,2
+lre22_dev_eacdl,fra-ntf,14
+lre22_dev_eaupg,eng-iaf,11
+lre22_dev_eawug,eng-iaf,6
+lre22_dev_ebbgx,nbl-nbl,15
+lre22_dev_ecber,afr-afr,10
+lre22_dev_ecdgv,ara-arq,5
+lre22_dev_ecneb,afr-afr,6
+lre22_dev_ecxrr,tir-tir,9
+lre22_dev_edldw,tir-tir,10
+lre22_dev_edofc,afr-afr,6
+lre22_dev_edvaf,xho-xho,13
+lre22_dev_edydw,eng-ens,5
+lre22_dev_eejtn,zul-zul,4
+lre22_dev_eekzc,fra-ntf,4
+lre22_dev_eenhx,eng-iaf,9
+lre22_dev_efcgi,fra-ntf,0
+lre22_dev_efdoz,ven-ven,8
+lre22_dev_efioy,tso-tso,9
+lre22_dev_efiwx,eng-ens,9
+lre22_dev_efrlw,ven-ven,2
+lre22_dev_eghmh,eng-ens,11
+lre22_dev_ehhyu,nbl-nbl,10
+lre22_dev_eiomi,ven-ven,12
+lre22_dev_eisiy,orm-orm,8
+lre22_dev_ejaiq,ara-aeb,1
+lre22_dev_ejkmr,eng-iaf,5
+lre22_dev_ejthv,ven-ven,12
+lre22_dev_ejtyd,fra-ntf,14
+lre22_dev_ekfzq,ara-ayl,6
+lre22_dev_ekgjp,zul-zul,3
+lre22_dev_ekixu,nbl-nbl,2
+lre22_dev_ekjxx,ara-arq,6
+lre22_dev_ekvxc,eng-iaf,4
+lre22_dev_eldrg,orm-orm,11
+lre22_dev_elitc,ara-arq,3
+lre22_dev_emdtf,xho-xho,0
+lre22_dev_emhqx,tir-tir,4
+lre22_dev_emxnm,afr-afr,4
+lre22_dev_emzaa,xho-xho,3
+lre22_dev_engqe,xho-xho,15
+lre22_dev_ennjl,tso-tso,10
+lre22_dev_eokyg,nbl-nbl,2
+lre22_dev_epkwr,tir-tir,5
+lre22_dev_epojj,tir-tir,7
+lre22_dev_epsdk,nbl-nbl,12
+lre22_dev_epsfl,xho-xho,14
+lre22_dev_epuno,eng-ens,1
+lre22_dev_epylu,eng-iaf,10
+lre22_dev_ereen,ara-arq,10
+lre22_dev_eriaf,eng-ens,4
+lre22_dev_ermqx,ara-arq,2
+lre22_dev_escob,fra-ntf,9
+lre22_dev_esjsk,ara-ayl,7
+lre22_dev_esqti,xho-xho,9
+lre22_dev_etaln,zul-zul,12
+lre22_dev_etarn,nbl-nbl,6
+lre22_dev_etndu,ven-ven,13
+lre22_dev_etpdc,afr-afr,3
+lre22_dev_etsam,zul-zul,7
+lre22_dev_etwge,eng-ens,6
+lre22_dev_etxyc,orm-orm,12
+lre22_dev_eumsq,zul-zul,10
+lre22_dev_eusfl,orm-orm,8
+lre22_dev_eutkk,tso-tso,0
+lre22_dev_euxuy,orm-orm,13
+lre22_dev_evaon,ara-aeb,4
+lre22_dev_evkaz,eng-iaf,8
+lre22_dev_evret,fra-ntf,8
+lre22_dev_evvep,tso-tso,9
+lre22_dev_evvvd,tir-tir,10
+lre22_dev_ewems,ven-ven,7
+lre22_dev_ewijw,orm-orm,11
+lre22_dev_ewqpv,eng-iaf,6
+lre22_dev_ewywf,nbl-nbl,10
+lre22_dev_exaia,afr-afr,3
+lre22_dev_exbum,afr-afr,4
+lre22_dev_exhhd,ara-aeb,5
+lre22_dev_exkkf,afr-afr,3
+lre22_dev_extrh,zul-zul,6
+lre22_dev_exzyo,xho-xho,15
+lre22_dev_eyrzt,ara-ayl,1
+lre22_dev_eysdu,zul-zul,4
+lre22_dev_eyshz,xho-xho,1
+lre22_dev_eyuyq,ara-ayl,7
+lre22_dev_ezsyu,ven-ven,3
+lre22_dev_faahr,afr-afr,9
+lre22_dev_fabli,ven-ven,6
+lre22_dev_fatah,zul-zul,12
+lre22_dev_fccpw,orm-orm,12
+lre22_dev_fcpbu,xho-xho,8
+lre22_dev_fcqbx,tso-tso,3
+lre22_dev_fcwnw,fra-ntf,8
+lre22_dev_fdgia,orm-orm,10
+lre22_dev_febnk,eng-ens,5
+lre22_dev_fedau,eng-iaf,5
+lre22_dev_fehxn,xho-xho,8
+lre22_dev_fejsd,ven-ven,8
+lre22_dev_feqjc,eng-iaf,12
+lre22_dev_fesss,nbl-nbl,15
+lre22_dev_feuww,fra-ntf,8
+lre22_dev_fevex,zul-zul,2
+lre22_dev_ffban,ara-arq,6
+lre22_dev_ffefw,orm-orm,13
+lre22_dev_ffsps,fra-ntf,8
+lre22_dev_ffwid,tso-tso,11
+lre22_dev_fgbtr,nbl-nbl,15
+lre22_dev_fgmbr,ara-arq,6
+lre22_dev_fgmxd,eng-ens,9
+lre22_dev_fgnfs,tir-tir,12
+lre22_dev_fgrze,eng-ens,11
+lre22_dev_fhlhy,ara-aeb,7
+lre22_dev_fihvr,eng-iaf,7
+lre22_dev_fiizm,xho-xho,14
+lre22_dev_fiksd,fra-ntf,12
+lre22_dev_fitjt,tso-tso,6
+lre22_dev_fiuun,eng-ens,7
+lre22_dev_fjdul,ara-ayl,3
+lre22_dev_fjgrh,ven-ven,8
+lre22_dev_fkaqj,nbl-nbl,13
+lre22_dev_flfgv,ara-aeb,9
+lre22_dev_flirl,fra-ntf,13
+lre22_dev_fljab,fra-ntf,14
+lre22_dev_flnzm,tir-tir,11
+lre22_dev_flsmp,orm-orm,15
+lre22_dev_fmjvq,ven-ven,2
+lre22_dev_fmmxd,afr-afr,4
+lre22_dev_fnglh,afr-afr,13
+lre22_dev_fnsax,xho-xho,6
+lre22_dev_fojyn,eng-ens,5
+lre22_dev_foqgk,ven-ven,2
+lre22_dev_fovba,ara-arq,4
+lre22_dev_fozyj,ara-arq,2
+lre22_dev_fpavw,ara-aeb,8
+lre22_dev_fptba,eng-ens,3
+lre22_dev_fqdfc,tso-tso,11
+lre22_dev_fqdhm,eng-iaf,8
+lre22_dev_fqfet,nbl-nbl,7
+lre22_dev_fqgty,fra-ntf,4
+lre22_dev_fqgyd,zul-zul,10
+lre22_dev_fqvup,tso-tso,2
+lre22_dev_frviu,ara-aeb,10
+lre22_dev_frwfk,nbl-nbl,9
+lre22_dev_fsygm,eng-iaf,5
+lre22_dev_ftfjv,orm-orm,11
+lre22_dev_ftjvg,afr-afr,12
+lre22_dev_ftmnu,ara-aeb,10
+lre22_dev_ftrcl,eng-ens,3
+lre22_dev_ftygz,eng-ens,8
+lre22_dev_fughv,eng-iaf,3
+lre22_dev_fuhuk,ara-ayl,5
+lre22_dev_fusyr,ven-ven,13
+lre22_dev_futhm,zul-zul,5
+lre22_dev_fvbzh,ara-ayl,7
+lre22_dev_fvecf,ven-ven,9
+lre22_dev_fvktn,fra-ntf,8
+lre22_dev_fvpts,orm-orm,6
+lre22_dev_fvsmm,eng-iaf,12
+lre22_dev_fvvgc,ara-arq,5
+lre22_dev_fwvzh,zul-zul,2
+lre22_dev_fwwsy,xho-xho,5
+lre22_dev_fxggn,fra-ntf,1
+lre22_dev_fxqfi,orm-orm,10
+lre22_dev_fxuqw,ara-ayl,3
+lre22_dev_fxwfc,eng-iaf,12
+lre22_dev_fymdc,tso-tso,4
+lre22_dev_fywir,tso-tso,10
+lre22_dev_fzjzu,xho-xho,14
+lre22_dev_fzpeh,ara-aeb,10
+lre22_dev_fztdi,tir-tir,9
+lre22_dev_gcced,ven-ven,6
+lre22_dev_gchqj,zul-zul,10
+lre22_dev_gctmk,xho-xho,12
+lre22_dev_gcupw,ven-ven,7
+lre22_dev_gdfdn,tir-tir,5
+lre22_dev_gdlpg,tir-tir,3
+lre22_dev_gdrwq,fra-ntf,14
+lre22_dev_gdvjh,afr-afr,5
+lre22_dev_gdvtc,eng-iaf,13
+lre22_dev_gdxck,orm-orm,4
+lre22_dev_gecgq,afr-afr,12
+lre22_dev_gevbs,nbl-nbl,13
+lre22_dev_gfqxw,tir-tir,11
+lre22_dev_gfujh,eng-ens,8
+lre22_dev_gfwqx,fra-ntf,10
+lre22_dev_ggchj,tir-tir,10
+lre22_dev_ggeie,ara-arq,8
+lre22_dev_ggqob,ara-aeb,9
+lre22_dev_ghllb,eng-ens,8
+lre22_dev_ghlqh,afr-afr,12
+lre22_dev_ghmuk,afr-afr,13
+lre22_dev_ghskg,tso-tso,4
+lre22_dev_ghwmw,ara-arq,2
+lre22_dev_giijn,ven-ven,6
+lre22_dev_gised,xho-xho,9
+lre22_dev_gisrt,tir-tir,9
+lre22_dev_gjptx,nbl-nbl,4
+lre22_dev_gjvkc,ara-arq,7
+lre22_dev_gjxkc,eng-iaf,13
+lre22_dev_gkywh,ara-aeb,7
+lre22_dev_glhtl,eng-iaf,3
+lre22_dev_glulw,ara-aeb,8
+lre22_dev_gmpja,nbl-nbl,3
+lre22_dev_gmpjm,nbl-nbl,12
+lre22_dev_gnkvz,eng-iaf,13
+lre22_dev_gnmcz,nbl-nbl,4
+lre22_dev_goggr,afr-afr,5
+lre22_dev_goqov,ara-aeb,8
+lre22_dev_gpzgq,tso-tso,9
+lre22_dev_gpzuz,fra-ntf,5
+lre22_dev_gqpul,ara-arq,10
+lre22_dev_gratu,tir-tir,7
+lre22_dev_grewx,afr-afr,9
+lre22_dev_grizt,eng-ens,2
+lre22_dev_grsam,afr-afr,11
+lre22_dev_grsyr,zul-zul,1
+lre22_dev_grxus,nbl-nbl,15
+lre22_dev_gsanj,ven-ven,13
+lre22_dev_gsbwz,nbl-nbl,9
+lre22_dev_gtwjj,tso-tso,4
+lre22_dev_gtxwq,orm-orm,12
+lre22_dev_gubts,ara-ayl,0
+lre22_dev_gvawh,xho-xho,11
+lre22_dev_gvfsb,ara-aeb,10
+lre22_dev_gvhgg,afr-afr,9
+lre22_dev_gvnaj,fra-ntf,8
+lre22_dev_gvysc,ara-aeb,10
+lre22_dev_gwfkz,xho-xho,2
+lre22_dev_gwnqp,xho-xho,7
+lre22_dev_gwumi,tso-tso,3
+lre22_dev_gwvcw,xho-xho,11
+lre22_dev_gwwxz,eng-iaf,1
+lre22_dev_gwzrc,eng-ens,11
+lre22_dev_gxtlx,fra-ntf,13
+lre22_dev_gxygl,tso-tso,9
+lre22_dev_gycld,orm-orm,4
+lre22_dev_gzakl,nbl-nbl,15
+lre22_dev_gzrgo,ara-arq,9
+lre22_dev_hbkul,orm-orm,6
+lre22_dev_hbodn,eng-ens,10
+lre22_dev_hbwgy,ara-arq,6
+lre22_dev_hbwyc,nbl-nbl,5
+lre22_dev_hczek,fra-ntf,7
+lre22_dev_hdpsb,nbl-nbl,6
+lre22_dev_hdvsb,ara-aeb,8
+lre22_dev_hetsy,xho-xho,10
+lre22_dev_hfgrm,ven-ven,12
+lre22_dev_hfurz,afr-afr,13
+lre22_dev_hfwyw,nbl-nbl,11
+lre22_dev_hgdqx,tso-tso,3
+lre22_dev_hgwdk,eng-ens,8
+lre22_dev_hgxqf,eng-iaf,8
+lre22_dev_hgyuk,ven-ven,11
+lre22_dev_hhetm,fra-ntf,14
+lre22_dev_hhjki,ara-arq,8
+lre22_dev_hhvtc,ara-arq,10
+lre22_dev_hhxqv,tso-tso,5
+lre22_dev_hiisb,nbl-nbl,15
+lre22_dev_hioxp,tso-tso,3
+lre22_dev_hjqaf,ara-aeb,9
+lre22_dev_hjqid,orm-orm,6
+lre22_dev_hjzwc,eng-iaf,3
+lre22_dev_hkdzu,ara-arq,9
+lre22_dev_hlatl,eng-iaf,12
+lre22_dev_hlywv,nbl-nbl,2
+lre22_dev_hlzxa,ven-ven,7
+lre22_dev_hmvzg,ara-ayl,3
+lre22_dev_hnjgb,eng-ens,9
+lre22_dev_hntdv,eng-ens,11
+lre22_dev_hoish,tir-tir,2
+lre22_dev_hokbg,ara-ayl,6
+lre22_dev_hondp,eng-iaf,8
+lre22_dev_hpbve,tir-tir,11
+lre22_dev_hpdvc,fra-ntf,8
+lre22_dev_hpgst,orm-orm,5
+lre22_dev_hqbjb,xho-xho,5
+lre22_dev_hqdev,tso-tso,2
+lre22_dev_hqidg,tir-tir,1
+lre22_dev_hqids,afr-afr,9
+lre22_dev_hqltr,tir-tir,4
+lre22_dev_hqqhq,eng-ens,11
+lre22_dev_hrmcg,zul-zul,13
+lre22_dev_hrrcp,afr-afr,8
+lre22_dev_hstgi,xho-xho,9
+lre22_dev_hsvpq,ara-ayl,9
+lre22_dev_hswsy,ara-aeb,4
+lre22_dev_htcgm,eng-iaf,6
+lre22_dev_htedo,xho-xho,13
+lre22_dev_hthkx,eng-iaf,7
+lre22_dev_htohd,afr-afr,6
+lre22_dev_htxik,fra-ntf,0
+lre22_dev_huqbr,xho-xho,10
+lre22_dev_hvdom,afr-afr,8
+lre22_dev_hvkoa,afr-afr,13
+lre22_dev_hvnkg,tir-tir,9
+lre22_dev_hvocp,nbl-nbl,12
+lre22_dev_hvqzj,zul-zul,12
+lre22_dev_hvwph,afr-afr,3
+lre22_dev_hwaqg,zul-zul,8
+lre22_dev_hwgvu,ara-aeb,6
+lre22_dev_hwhlz,ven-ven,11
+lre22_dev_hwkes,fra-ntf,12
+lre22_dev_hwvna,eng-ens,2
+lre22_dev_hxfim,eng-iaf,12
+lre22_dev_hxmdw,afr-afr,10
+lre22_dev_hxrnp,zul-zul,6
+lre22_dev_hxvie,tir-tir,9
+lre22_dev_hxvju,zul-zul,3
+lre22_dev_hxzxm,zul-zul,6
+lre22_dev_hybef,nbl-nbl,14
+lre22_dev_hyfok,eng-ens,2
+lre22_dev_hyscv,ara-arq,4
+lre22_dev_hyzod,eng-iaf,6
+lre22_dev_hzdpb,tso-tso,7
+lre22_dev_hzjwn,ara-aeb,5
+lre22_dev_hzljv,tir-tir,8
+lre22_dev_hzomy,tso-tso,9
+lre22_dev_iaaar,tso-tso,9
+lre22_dev_iaimu,afr-afr,13
+lre22_dev_iakmg,orm-orm,15
+lre22_dev_iarxv,ara-aeb,9
+lre22_dev_iaywv,ara-ayl,6
+lre22_dev_ibcne,eng-ens,11
+lre22_dev_ibeth,zul-zul,2
+lre22_dev_ibwbi,tir-tir,9
+lre22_dev_ibyqr,tso-tso,7
+lre22_dev_iccwp,eng-iaf,6
+lre22_dev_ichmi,afr-afr,12
+lre22_dev_idjrt,zul-zul,8
+lre22_dev_iegng,afr-afr,8
+lre22_dev_iezrr,ara-ayl,7
+lre22_dev_ifaib,ara-ayl,5
+lre22_dev_ifhil,tso-tso,9
+lre22_dev_ifptd,ven-ven,12
+lre22_dev_ifriu,ara-aeb,6
+lre22_dev_ignvp,zul-zul,13
+lre22_dev_igxzy,eng-iaf,12
+lre22_dev_ihdva,fra-ntf,10
+lre22_dev_iiydv,eng-iaf,5
+lre22_dev_ijoyg,ara-ayl,9
+lre22_dev_ikghg,eng-iaf,7
+lre22_dev_ikijv,ven-ven,2
+lre22_dev_ilawb,ara-aeb,8
+lre22_dev_ilgnm,orm-orm,6
+lre22_dev_ilqhp,orm-orm,13
+lre22_dev_imrsx,tso-tso,8
+lre22_dev_inrfz,ara-arq,1
+lre22_dev_inrlw,eng-ens,1
+lre22_dev_inttm,tso-tso,8
+lre22_dev_iorip,ven-ven,13
+lre22_dev_ioryq,ara-aeb,8
+lre22_dev_iosse,afr-afr,1
+lre22_dev_ipahz,tir-tir,12
+lre22_dev_ipaup,tir-tir,10
+lre22_dev_ipllz,tir-tir,12
+lre22_dev_iprih,ara-aeb,4
+lre22_dev_iqkpj,tir-tir,6
+lre22_dev_iqowb,ara-aeb,0
+lre22_dev_iqzfp,orm-orm,15
+lre22_dev_irhue,tso-tso,8
+lre22_dev_irkvo,orm-orm,15
+lre22_dev_irnie,ara-aeb,8
+lre22_dev_irnxg,zul-zul,9
+lre22_dev_irsgt,ven-ven,2
+lre22_dev_isavf,nbl-nbl,0
+lre22_dev_isfpd,nbl-nbl,11
+lre22_dev_iskfd,ara-arq,4
+lre22_dev_isndz,ara-arq,6
+lre22_dev_istwz,nbl-nbl,15
+lre22_dev_isxpy,orm-orm,5
+lre22_dev_iszkk,tir-tir,9
+lre22_dev_itdot,ara-ayl,9
+lre22_dev_itfgh,eng-iaf,9
+lre22_dev_itlqd,tir-tir,12
+lre22_dev_itmbo,ara-aeb,10
+lre22_dev_itznp,ara-aeb,3
+lre22_dev_iucwv,zul-zul,5
+lre22_dev_iuowb,ara-aeb,8
+lre22_dev_iupes,zul-zul,4
+lre22_dev_iurgk,fra-ntf,4
+lre22_dev_ivcpr,nbl-nbl,12
+lre22_dev_ivrwa,ven-ven,3
+lre22_dev_ivvlb,afr-afr,11
+lre22_dev_ivwhm,tir-tir,6
+lre22_dev_iwoya,ara-aeb,4
+lre22_dev_iwpvu,orm-orm,5
+lre22_dev_ixpuq,ara-ayl,5
+lre22_dev_ixpyb,tso-tso,11
+lre22_dev_iyfiz,eng-iaf,5
+lre22_dev_iylyu,xho-xho,12
+lre22_dev_iyuli,zul-zul,13
+lre22_dev_iyupt,orm-orm,5
+lre22_dev_iyxjf,zul-zul,12
+lre22_dev_iyzgz,tso-tso,10
+lre22_dev_izepb,ara-arq,4
+lre22_dev_izkix,ven-ven,10
+lre22_dev_izknz,ven-ven,12
+lre22_dev_jadfl,ara-arq,9
+lre22_dev_jafja,zul-zul,9
+lre22_dev_jamvn,ven-ven,1
+lre22_dev_jbach,eng-iaf,2
+lre22_dev_jbqcq,ara-aeb,6
+lre22_dev_jcxgo,afr-afr,6
+lre22_dev_jddrh,fra-ntf,13
+lre22_dev_jdjpg,tir-tir,12
+lre22_dev_jdtrb,eng-iaf,11
+lre22_dev_jdwjj,zul-zul,7
+lre22_dev_jdzqw,tir-tir,3
+lre22_dev_jeaev,nbl-nbl,8
+lre22_dev_jeobs,ara-aeb,9
+lre22_dev_jesxq,eng-ens,10
+lre22_dev_jgcla,ara-arq,2
+lre22_dev_jggxv,fra-ntf,3
+lre22_dev_jgntz,orm-orm,5
+lre22_dev_jhcao,ven-ven,7
+lre22_dev_jhgik,eng-ens,11
+lre22_dev_jhpkj,ara-arq,4
+lre22_dev_jhuof,orm-orm,15
+lre22_dev_jignq,ara-ayl,9
+lre22_dev_jjffc,ven-ven,13
+lre22_dev_jjkfe,eng-ens,9
+lre22_dev_jjqxi,ara-aeb,8
+lre22_dev_jjrgq,eng-iaf,4
+lre22_dev_jkacy,tso-tso,3
+lre22_dev_jkmin,orm-orm,15
+lre22_dev_jkobe,xho-xho,7
+lre22_dev_jkosd,zul-zul,10
+lre22_dev_jkovc,tso-tso,3
+lre22_dev_jktcq,zul-zul,7
+lre22_dev_jlodp,eng-ens,9
+lre22_dev_jmbjo,nbl-nbl,9
+lre22_dev_jmccw,ara-arq,3
+lre22_dev_jminj,fra-ntf,5
+lre22_dev_jmmyw,afr-afr,3
+lre22_dev_jobae,fra-ntf,13
+lre22_dev_jobsv,nbl-nbl,14
+lre22_dev_jobxi,ara-arq,5
+lre22_dev_joghi,ara-arq,6
+lre22_dev_johkj,xho-xho,7
+lre22_dev_jolqw,ara-ayl,5
+lre22_dev_jplye,fra-ntf,11
+lre22_dev_jpsmt,ara-arq,9
+lre22_dev_jqdnf,eng-iaf,13
+lre22_dev_jqqpg,orm-orm,5
+lre22_dev_jqqrs,nbl-nbl,11
+lre22_dev_jrmnp,tir-tir,9
+lre22_dev_jsahe,fra-ntf,12
+lre22_dev_jsciw,eng-ens,5
+lre22_dev_jsisu,eng-iaf,4
+lre22_dev_jstjq,zul-zul,4
+lre22_dev_jsxuw,eng-iaf,8
+lre22_dev_jtaxh,ven-ven,4
+lre22_dev_jtgjo,ara-arq,9
+lre22_dev_jtxor,orm-orm,3
+lre22_dev_junyj,orm-orm,5
+lre22_dev_juykt,ara-ayl,7
+lre22_dev_jvqzf,fra-ntf,9
+lre22_dev_jvvxl,afr-afr,7
+lre22_dev_jvxpt,nbl-nbl,1
+lre22_dev_jwfeb,eng-iaf,4
+lre22_dev_jwmmp,eng-ens,3
+lre22_dev_jwyiq,tso-tso,10
+lre22_dev_jxcmp,ara-aeb,10
+lre22_dev_jxfsy,ara-ayl,9
+lre22_dev_jxjar,tso-tso,10
+lre22_dev_jylrr,ara-aeb,9
+lre22_dev_jzciw,orm-orm,5
+lre22_dev_jzcyt,tso-tso,5
+lre22_dev_jzhpf,tso-tso,4
+lre22_dev_jzidh,afr-afr,11
+lre22_dev_jznzw,eng-iaf,6
+lre22_dev_jzoqd,afr-afr,7
+lre22_dev_jzwnu,ven-ven,11
+lre22_dev_kaoyk,afr-afr,6
+lre22_dev_kasoe,zul-zul,12
+lre22_dev_kaygq,eng-ens,9
+lre22_dev_kayqh,fra-ntf,8
+lre22_dev_kbpcw,eng-iaf,3
+lre22_dev_kbtrx,orm-orm,10
+lre22_dev_kcebk,ven-ven,7
+lre22_dev_kdbil,orm-orm,15
+lre22_dev_kddhf,ara-arq,10
+lre22_dev_kdeij,ara-ayl,3
+lre22_dev_kdiak,zul-zul,12
+lre22_dev_kedwl,nbl-nbl,12
+lre22_dev_keouf,fra-ntf,9
+lre22_dev_keozw,ara-aeb,10
+lre22_dev_kervm,eng-ens,7
+lre22_dev_kflpm,xho-xho,1
+lre22_dev_kfqpd,ara-arq,8
+lre22_dev_kgaqj,ara-aeb,8
+lre22_dev_kghnx,fra-ntf,3
+lre22_dev_kgoze,zul-zul,4
+lre22_dev_kgrxe,fra-ntf,9
+lre22_dev_kgsdu,ara-arq,5
+lre22_dev_kheef,xho-xho,15
+lre22_dev_khgyl,xho-xho,8
+lre22_dev_khsgr,tso-tso,7
+lre22_dev_khxvm,nbl-nbl,9
+lre22_dev_kijjo,ara-aeb,3
+lre22_dev_kiush,xho-xho,2
+lre22_dev_kiyso,ara-arq,1
+lre22_dev_kjewo,ven-ven,6
+lre22_dev_kjgkg,ara-ayl,5
+lre22_dev_kjksh,ven-ven,3
+lre22_dev_kjomd,afr-afr,4
+lre22_dev_kjrcy,afr-afr,11
+lre22_dev_kkauw,fra-ntf,10
+lre22_dev_kkiew,orm-orm,15
+lre22_dev_kkyyu,zul-zul,8
+lre22_dev_klafc,ara-ayl,4
+lre22_dev_klalo,eng-ens,5
+lre22_dev_kliip,afr-afr,1
+lre22_dev_klkxg,tso-tso,8
+lre22_dev_klqwc,ara-arq,7
+lre22_dev_kmbgg,tir-tir,12
+lre22_dev_kmgoo,tir-tir,8
+lre22_dev_kmnko,zul-zul,3
+lre22_dev_kmtyc,ara-aeb,8
+lre22_dev_kmxqj,xho-xho,8
+lre22_dev_kmzdw,fra-ntf,3
+lre22_dev_knxsi,ara-arq,9
+lre22_dev_kofob,orm-orm,7
+lre22_dev_kokfk,fra-ntf,14
+lre22_dev_kokir,nbl-nbl,12
+lre22_dev_kooxu,ara-arq,9
+lre22_dev_korip,tso-tso,7
+lre22_dev_kpbnd,zul-zul,4
+lre22_dev_kpnyf,eng-iaf,3
+lre22_dev_kpwts,ara-ayl,8
+lre22_dev_kpxne,orm-orm,6
+lre22_dev_kpzbl,ven-ven,12
+lre22_dev_kqact,zul-zul,0
+lre22_dev_kqfbl,eng-iaf,12
+lre22_dev_kqfsm,zul-zul,5
+lre22_dev_kqfyp,ara-arq,1
+lre22_dev_kqkqj,ara-ayl,7
+lre22_dev_kqvwr,xho-xho,13
+lre22_dev_kragl,zul-zul,13
+lre22_dev_krbdn,xho-xho,14
+lre22_dev_ksake,ara-aeb,8
+lre22_dev_ksoly,nbl-nbl,11
+lre22_dev_kttyt,orm-orm,5
+lre22_dev_kttzq,tso-tso,9
+lre22_dev_ktwaf,zul-zul,3
+lre22_dev_ktwqf,ven-ven,6
+lre22_dev_ktxef,zul-zul,0
+lre22_dev_ktztb,orm-orm,12
+lre22_dev_kufkm,nbl-nbl,15
+lre22_dev_kuqsu,afr-afr,9
+lre22_dev_kuyka,tir-tir,4
+lre22_dev_kvcpn,ara-ayl,3
+lre22_dev_kvghz,eng-iaf,10
+lre22_dev_kvswv,ven-ven,11
+lre22_dev_kxkos,orm-orm,10
+lre22_dev_kxkzg,ara-ayl,9
+lre22_dev_kxqef,ven-ven,12
+lre22_dev_kyjpf,ven-ven,7
+lre22_dev_kynap,ara-ayl,9
+lre22_dev_kyptg,ven-ven,8
+lre22_dev_kytyr,nbl-nbl,11
+lre22_dev_kywmf,orm-orm,4
+lre22_dev_kzibn,zul-zul,3
+lre22_dev_kzqxx,fra-ntf,1
+lre22_dev_lacgv,tso-tso,7
+lre22_dev_lagpe,tso-tso,6
+lre22_dev_lanuu,tso-tso,9
+lre22_dev_lapag,afr-afr,6
+lre22_dev_larnq,zul-zul,4
+lre22_dev_lbbvq,xho-xho,8
+lre22_dev_lbfca,ara-arq,8
+lre22_dev_lbhoj,orm-orm,11
+lre22_dev_lbiin,ara-ayl,4
+lre22_dev_lcdyj,ara-arq,9
+lre22_dev_ldasz,fra-ntf,9
+lre22_dev_ldbur,tso-tso,1
+lre22_dev_lddhs,orm-orm,12
+lre22_dev_ldedw,ara-aeb,5
+lre22_dev_ldmbr,ara-ayl,5
+lre22_dev_ldmqc,tir-tir,7
+lre22_dev_leadw,eng-iaf,3
+lre22_dev_leaqq,tso-tso,10
+lre22_dev_ledsh,afr-afr,11
+lre22_dev_leovk,afr-afr,6
+lre22_dev_lexlh,ara-aeb,2
+lre22_dev_lfilk,eng-ens,10
+lre22_dev_lfyll,zul-zul,10
+lre22_dev_lgada,zul-zul,6
+lre22_dev_lgcjy,afr-afr,9
+lre22_dev_lgfri,ara-aeb,5
+lre22_dev_lgkbt,xho-xho,4
+lre22_dev_lhbjq,ara-arq,0
+lre22_dev_lhemi,xho-xho,9
+lre22_dev_lhfne,ara-arq,6
+lre22_dev_lhmtg,ara-arq,9
+lre22_dev_lieso,ara-aeb,8
+lre22_dev_likcy,afr-afr,13
+lre22_dev_lipyu,zul-zul,12
+lre22_dev_lisum,ven-ven,4
+lre22_dev_ljevp,ara-ayl,3
+lre22_dev_ljijh,orm-orm,3
+lre22_dev_ljylg,nbl-nbl,13
+lre22_dev_lkfig,ara-ayl,2
+lre22_dev_lklnc,ara-arq,3
+lre22_dev_lkopy,tir-tir,9
+lre22_dev_lllwi,eng-iaf,5
+lre22_dev_llstb,nbl-nbl,10
+lre22_dev_lmeax,eng-iaf,10
+lre22_dev_lmkui,ara-arq,7
+lre22_dev_lmrbp,tir-tir,9
+lre22_dev_lnejh,eng-ens,10
+lre22_dev_lnttv,ven-ven,10
+lre22_dev_loxqz,eng-iaf,8
+lre22_dev_loybq,ara-aeb,10
+lre22_dev_lpadb,fra-ntf,4
+lre22_dev_lpahk,nbl-nbl,11
+lre22_dev_lphgs,tir-tir,7
+lre22_dev_lphoa,eng-ens,2
+lre22_dev_lpkie,eng-iaf,5
+lre22_dev_lpkpc,zul-zul,6
+lre22_dev_lptpx,eng-iaf,4
+lre22_dev_lqwcv,xho-xho,13
+lre22_dev_lrgwx,orm-orm,10
+lre22_dev_lruoj,orm-orm,2
+lre22_dev_lrwee,fra-ntf,10
+lre22_dev_lsess,ven-ven,1
+lre22_dev_lsycj,tir-tir,9
+lre22_dev_ltaoe,eng-ens,8
+lre22_dev_ltish,ara-aeb,5
+lre22_dev_ltqeb,eng-ens,8
+lre22_dev_ltzfg,ven-ven,10
+lre22_dev_luuhd,ara-arq,2
+lre22_dev_lvejl,zul-zul,11
+lre22_dev_lvgsm,tir-tir,10
+lre22_dev_lvwle,xho-xho,7
+lre22_dev_lvxea,tir-tir,8
+lre22_dev_lwsmk,eng-ens,10
+lre22_dev_lwzhq,ara-ayl,3
+lre22_dev_lxbdd,ara-ayl,8
+lre22_dev_lxdgx,nbl-nbl,1
+lre22_dev_lxjij,ara-ayl,7
+lre22_dev_lxldm,tso-tso,8
+lre22_dev_lxmsa,zul-zul,11
+lre22_dev_lxugv,zul-zul,13
+lre22_dev_lxwig,tso-tso,4
+lre22_dev_lyigi,xho-xho,4
+lre22_dev_lymzv,ara-arq,6
+lre22_dev_lyuls,ara-arq,4
+lre22_dev_lyyzw,ara-ayl,5
+lre22_dev_lzhrm,ara-arq,8
+lre22_dev_lzjgb,xho-xho,12
+lre22_dev_lzrpe,xho-xho,8
+lre22_dev_lzvmq,fra-ntf,13
+lre22_dev_maagy,ven-ven,6
+lre22_dev_mabmx,ara-arq,4
+lre22_dev_macre,zul-zul,7
+lre22_dev_maggb,nbl-nbl,7
+lre22_dev_margf,ara-ayl,6
+lre22_dev_maydg,eng-iaf,4
+lre22_dev_mbsgm,zul-zul,7
+lre22_dev_mbttd,fra-ntf,14
+lre22_dev_mcebh,tso-tso,8
+lre22_dev_mcfve,ara-ayl,3
+lre22_dev_mclrc,zul-zul,12
+lre22_dev_mcvgl,ara-ayl,5
+lre22_dev_mdgok,ara-aeb,5
+lre22_dev_mdilb,ven-ven,3
+lre22_dev_mdzqr,nbl-nbl,11
+lre22_dev_mehfu,ara-arq,3
+lre22_dev_meiyg,eng-ens,11
+lre22_dev_merbq,orm-orm,9
+lre22_dev_mfoys,afr-afr,8
+lre22_dev_mgpfx,xho-xho,8
+lre22_dev_mgtzj,zul-zul,12
+lre22_dev_mgxxc,ven-ven,11
+lre22_dev_mhldj,nbl-nbl,14
+lre22_dev_mhvio,eng-iaf,6
+lre22_dev_mhxgi,tir-tir,9
+lre22_dev_miegc,fra-ntf,6
+lre22_dev_miwyu,ara-aeb,8
+lre22_dev_mjocm,ara-aeb,2
+lre22_dev_mjqij,orm-orm,12
+lre22_dev_mjxgy,afr-afr,8
+lre22_dev_mkeyt,tir-tir,12
+lre22_dev_mklub,ven-ven,4
+lre22_dev_mknzf,ara-aeb,10
+lre22_dev_mlhes,ara-arq,9
+lre22_dev_mlhse,tso-tso,3
+lre22_dev_mlhtc,orm-orm,8
+lre22_dev_mlpuq,ven-ven,10
+lre22_dev_mluow,orm-orm,2
+lre22_dev_mmwtu,ara-arq,4
+lre22_dev_mmwzf,tso-tso,7
+lre22_dev_mnjdq,tir-tir,10
+lre22_dev_mnkfe,nbl-nbl,4
+lre22_dev_mnmcm,ara-arq,3
+lre22_dev_mocss,xho-xho,9
+lre22_dev_mohxo,zul-zul,12
+lre22_dev_mojui,fra-ntf,1
+lre22_dev_mojvy,xho-xho,7
+lre22_dev_molqa,fra-ntf,14
+lre22_dev_mopiq,nbl-nbl,14
+lre22_dev_moqto,tir-tir,12
+lre22_dev_morri,ara-aeb,8
+lre22_dev_mpxyg,eng-ens,4
+lre22_dev_mqiap,xho-xho,14
+lre22_dev_mqxep,ara-ayl,2
+lre22_dev_mrcoe,ara-ayl,7
+lre22_dev_mriiq,tso-tso,4
+lre22_dev_mryoy,eng-ens,11
+lre22_dev_mryzh,ara-arq,4
+lre22_dev_msadm,ven-ven,2
+lre22_dev_msghz,nbl-nbl,11
+lre22_dev_mtpfp,ara-aeb,9
+lre22_dev_mtqft,orm-orm,14
+lre22_dev_mtzvt,ara-aeb,10
+lre22_dev_munim,xho-xho,15
+lre22_dev_murhb,nbl-nbl,1
+lre22_dev_mvbra,xho-xho,4
+lre22_dev_mvhza,afr-afr,13
+lre22_dev_mviud,xho-xho,12
+lre22_dev_mvxjk,afr-afr,9
+lre22_dev_mwnkm,orm-orm,8
+lre22_dev_mwoml,xho-xho,9
+lre22_dev_mxhup,eng-ens,8
+lre22_dev_mykuh,ara-ayl,5
+lre22_dev_myqfn,eng-iaf,4
+lre22_dev_mywmj,ven-ven,9
+lre22_dev_mzbrr,ara-arq,10
+lre22_dev_mzsiq,afr-afr,9
+lre22_dev_mztms,eng-ens,3
+lre22_dev_mzuxc,ara-arq,9
+lre22_dev_nbdbe,ara-ayl,7
+lre22_dev_nbjqz,ara-aeb,9
+lre22_dev_nbyhp,afr-afr,3
+lre22_dev_ncnyb,ven-ven,8
+lre22_dev_ncocl,nbl-nbl,6
+lre22_dev_ndecq,ara-ayl,8
+lre22_dev_ndjsl,nbl-nbl,6
+lre22_dev_nelsk,orm-orm,0
+lre22_dev_nenly,eng-iaf,11
+lre22_dev_neqkb,ven-ven,2
+lre22_dev_nfjid,orm-orm,12
+lre22_dev_nfkqr,orm-orm,8
+lre22_dev_nfoas,orm-orm,15
+lre22_dev_ngjbm,eng-ens,10
+lre22_dev_ngmbz,eng-iaf,9
+lre22_dev_ngnua,fra-ntf,10
+lre22_dev_nguuu,fra-ntf,13
+lre22_dev_ngyse,ven-ven,7
+lre22_dev_nhfso,fra-ntf,14
+lre22_dev_nhuue,zul-zul,1
+lre22_dev_niack,ara-ayl,8
+lre22_dev_niari,ven-ven,7
+lre22_dev_nibme,ara-arq,9
+lre22_dev_nikby,tso-tso,10
+lre22_dev_nimex,ara-ayl,8
+lre22_dev_nivmv,xho-xho,11
+lre22_dev_nkebu,eng-ens,5
+lre22_dev_nkgml,eng-ens,10
+lre22_dev_nkofi,fra-ntf,11
+lre22_dev_nkrez,xho-xho,5
+lre22_dev_nkscn,tso-tso,5
+lre22_dev_nkwrs,ara-aeb,2
+lre22_dev_nkxcy,afr-afr,4
+lre22_dev_nlast,xho-xho,12
+lre22_dev_nlcun,eng-ens,0
+lre22_dev_nljyr,afr-afr,5
+lre22_dev_nlkdv,eng-iaf,12
+lre22_dev_nlpcs,ara-ayl,7
+lre22_dev_nlrcn,ara-ayl,4
+lre22_dev_nlxla,xho-xho,0
+lre22_dev_nmmij,ara-ayl,4
+lre22_dev_nmrkv,fra-ntf,12
+lre22_dev_nmufp,tso-tso,10
+lre22_dev_nnbmo,tso-tso,10
+lre22_dev_nnnpi,afr-afr,4
+lre22_dev_nnzok,tir-tir,5
+lre22_dev_noqch,fra-ntf,12
+lre22_dev_nownd,xho-xho,2
+lre22_dev_npabl,nbl-nbl,5
+lre22_dev_npjhu,afr-afr,6
+lre22_dev_nqbks,afr-afr,11
+lre22_dev_nqijo,orm-orm,7
+lre22_dev_nqljj,ara-arq,6
+lre22_dev_nqvfr,tir-tir,7
+lre22_dev_nrtej,tir-tir,11
+lre22_dev_nshvj,nbl-nbl,7
+lre22_dev_nsmyy,tir-tir,12
+lre22_dev_nsqcm,fra-ntf,13
+lre22_dev_nstrj,nbl-nbl,9
+lre22_dev_nsvla,nbl-nbl,10
+lre22_dev_nthbx,eng-ens,0
+lre22_dev_nvwkf,ven-ven,0
+lre22_dev_nvwzy,tso-tso,11
+lre22_dev_nvyyg,orm-orm,7
+lre22_dev_nxdml,eng-ens,1
+lre22_dev_nxmxb,zul-zul,12
+lre22_dev_nxqpl,nbl-nbl,13
+lre22_dev_nxslf,fra-ntf,9
+lre22_dev_nyaof,nbl-nbl,5
+lre22_dev_nzeot,zul-zul,12
+lre22_dev_nzhhf,ara-ayl,7
+lre22_dev_nzpbh,fra-ntf,14
+lre22_dev_nzyjp,orm-orm,4
+lre22_dev_nzzyd,xho-xho,11
+lre22_dev_oaiij,ven-ven,7
+lre22_dev_oaimr,orm-orm,14
+lre22_dev_oatzl,fra-ntf,13
+lre22_dev_oaycx,ara-ayl,8
+lre22_dev_objwd,eng-ens,1
+lre22_dev_oboem,tir-tir,9
+lre22_dev_obzyj,xho-xho,5
+lre22_dev_occhn,fra-ntf,9
+lre22_dev_ocfcr,ven-ven,7
+lre22_dev_ochni,ven-ven,13
+lre22_dev_ociva,tir-tir,5
+lre22_dev_odofq,xho-xho,5
+lre22_dev_odtjr,eng-ens,11
+lre22_dev_oejjy,fra-ntf,4
+lre22_dev_offnw,afr-afr,8
+lre22_dev_ofgqs,ara-ayl,6
+lre22_dev_ofkvj,xho-xho,15
+lre22_dev_ofzhh,orm-orm,11
+lre22_dev_ogilp,afr-afr,6
+lre22_dev_oglxd,ara-ayl,4
+lre22_dev_ogoyt,tso-tso,8
+lre22_dev_ogpou,ven-ven,3
+lre22_dev_ohatz,eng-ens,10
+lre22_dev_ohlzs,nbl-nbl,15
+lre22_dev_ohpzj,tir-tir,4
+lre22_dev_ohzdt,ara-aeb,5
+lre22_dev_oicrh,eng-ens,9
+lre22_dev_oigem,orm-orm,14
+lre22_dev_ojbnw,ara-arq,4
+lre22_dev_ojebm,ven-ven,7
+lre22_dev_ojila,ara-arq,4
+lre22_dev_ojiso,fra-ntf,5
+lre22_dev_ojpdy,tso-tso,9
+lre22_dev_ojtki,tir-tir,11
+lre22_dev_ojxso,nbl-nbl,4
+lre22_dev_okdqa,fra-ntf,14
+lre22_dev_oktvp,ara-ayl,7
+lre22_dev_okvsg,zul-zul,10
+lre22_dev_okyah,tso-tso,11
+lre22_dev_olabw,ara-arq,4
+lre22_dev_omhry,tir-tir,4
+lre22_dev_omnrf,eng-iaf,13
+lre22_dev_omptm,ven-ven,6
+lre22_dev_omqfq,fra-ntf,4
+lre22_dev_onqdn,fra-ntf,13
+lre22_dev_onsyx,tso-tso,9
+lre22_dev_onvgj,tir-tir,6
+lre22_dev_onzha,zul-zul,10
+lre22_dev_ooptw,nbl-nbl,5
+lre22_dev_oowvo,eng-ens,11
+lre22_dev_ooyea,tso-tso,2
+lre22_dev_oozri,ven-ven,0
+lre22_dev_opazz,ara-ayl,1
+lre22_dev_opqkl,nbl-nbl,11
+lre22_dev_oqsva,ara-ayl,2
+lre22_dev_oquxw,nbl-nbl,15
+lre22_dev_orktv,afr-afr,5
+lre22_dev_ornjf,ara-ayl,6
+lre22_dev_ortbp,ara-arq,0
+lre22_dev_osauy,fra-ntf,12
+lre22_dev_osnch,afr-afr,1
+lre22_dev_otelo,eng-iaf,7
+lre22_dev_otewx,tso-tso,10
+lre22_dev_otnwj,eng-ens,3
+lre22_dev_ouecw,ara-aeb,10
+lre22_dev_ouzui,ara-arq,3
+lre22_dev_ovdtj,ara-ayl,6
+lre22_dev_ovjny,tso-tso,1
+lre22_dev_ovqwp,ara-ayl,7
+lre22_dev_ovvkn,afr-afr,11
+lre22_dev_ovvmi,tso-tso,2
+lre22_dev_owyeq,ara-arq,6
+lre22_dev_oxlrt,ara-aeb,10
+lre22_dev_oybst,zul-zul,9
+lre22_dev_oybua,nbl-nbl,2
+lre22_dev_oykjs,tso-tso,4
+lre22_dev_oyswm,ara-arq,8
+lre22_dev_oyxbj,ven-ven,8
+lre22_dev_oyxtq,eng-ens,11
+lre22_dev_oyyxh,ara-arq,8
+lre22_dev_ozbct,tir-tir,12
+lre22_dev_ozcvt,ara-aeb,10
+lre22_dev_ozjel,ara-arq,10
+lre22_dev_ozmuj,zul-zul,3
+lre22_dev_ozuvk,tir-tir,10
+lre22_dev_paguh,fra-ntf,1
+lre22_dev_paspj,tir-tir,6
+lre22_dev_pbmai,fra-ntf,6
+lre22_dev_pbpug,zul-zul,10
+lre22_dev_pbsbs,tso-tso,10
+lre22_dev_pbszl,tso-tso,1
+lre22_dev_pbxxf,eng-iaf,2
+lre22_dev_pcgvn,eng-iaf,3
+lre22_dev_pcmbn,eng-ens,1
+lre22_dev_pcqce,ara-arq,8
+lre22_dev_pdlnr,tso-tso,2
+lre22_dev_pdrus,orm-orm,1
+lre22_dev_pedyx,eng-iaf,12
+lre22_dev_pegyr,nbl-nbl,11
+lre22_dev_pesej,ara-arq,4
+lre22_dev_pevhh,tir-tir,12
+lre22_dev_peykl,xho-xho,13
+lre22_dev_pezwc,tso-tso,4
+lre22_dev_pfemh,eng-iaf,4
+lre22_dev_pfrfc,ven-ven,8
+lre22_dev_pfsoa,nbl-nbl,15
+lre22_dev_pgeoo,tso-tso,9
+lre22_dev_pgwei,orm-orm,2
+lre22_dev_pgxyv,tso-tso,4
+lre22_dev_phofb,ara-ayl,8
+lre22_dev_phula,nbl-nbl,14
+lre22_dev_phwnf,tso-tso,9
+lre22_dev_pifyx,orm-orm,9
+lre22_dev_pilvp,tso-tso,11
+lre22_dev_pinzj,nbl-nbl,11
+lre22_dev_piocw,ara-aeb,8
+lre22_dev_pipas,zul-zul,13
+lre22_dev_pipgo,afr-afr,3
+lre22_dev_pitmn,ara-arq,10
+lre22_dev_pizdz,ara-aeb,2
+lre22_dev_pizlx,ara-ayl,6
+lre22_dev_pjatg,ven-ven,9
+lre22_dev_pjavt,orm-orm,11
+lre22_dev_pjcec,eng-iaf,12
+lre22_dev_pjdwy,afr-afr,1
+lre22_dev_pjlmw,ara-ayl,7
+lre22_dev_pjsqe,eng-ens,7
+lre22_dev_pkdij,ara-ayl,3
+lre22_dev_pkekq,ara-aeb,3
+lre22_dev_pkpst,eng-iaf,9
+lre22_dev_plhqb,nbl-nbl,13
+lre22_dev_plowv,nbl-nbl,5
+lre22_dev_plrjb,xho-xho,12
+lre22_dev_pmove,eng-iaf,4
+lre22_dev_pneax,eng-ens,11
+lre22_dev_pnexr,nbl-nbl,9
+lre22_dev_pngea,nbl-nbl,11
+lre22_dev_pnipe,eng-ens,9
+lre22_dev_pnmlr,ara-arq,5
+lre22_dev_pnsuk,xho-xho,2
+lre22_dev_pnuct,tir-tir,10
+lre22_dev_pocev,ara-arq,4
+lre22_dev_powkd,eng-ens,9
+lre22_dev_pprvm,ara-ayl,7
+lre22_dev_ppyle,ara-aeb,7
+lre22_dev_pqfda,fra-ntf,5
+lre22_dev_pqryo,afr-afr,4
+lre22_dev_prrzc,afr-afr,9
+lre22_dev_psjuf,afr-afr,13
+lre22_dev_psngm,zul-zul,13
+lre22_dev_psroz,fra-ntf,13
+lre22_dev_pssqo,orm-orm,10
+lre22_dev_psvlh,fra-ntf,13
+lre22_dev_pswld,tir-tir,10
+lre22_dev_ptcns,nbl-nbl,11
+lre22_dev_ptobm,afr-afr,6
+lre22_dev_ptowg,tir-tir,8
+lre22_dev_ptreu,xho-xho,15
+lre22_dev_ptwru,fra-ntf,14
+lre22_dev_ptyff,ara-ayl,1
+lre22_dev_ptygm,tir-tir,3
+lre22_dev_pudne,ara-arq,4
+lre22_dev_puelp,zul-zul,9
+lre22_dev_purej,nbl-nbl,9
+lre22_dev_puyvb,ara-ayl,3
+lre22_dev_pvrdh,ara-aeb,9
+lre22_dev_pvryr,eng-ens,11
+lre22_dev_pwets,tir-tir,9
+lre22_dev_pwgnk,tir-tir,10
+lre22_dev_pwhyy,tir-tir,11
+lre22_dev_pwkgs,zul-zul,2
+lre22_dev_pwtdp,eng-iaf,0
+lre22_dev_pxccc,ara-ayl,5
+lre22_dev_pxpdo,xho-xho,14
+lre22_dev_pxsot,xho-xho,14
+lre22_dev_pxuhy,ara-aeb,6
+lre22_dev_pybxn,eng-iaf,11
+lre22_dev_pyoft,eng-iaf,12
+lre22_dev_pyvql,eng-iaf,7
+lre22_dev_pzcnz,nbl-nbl,2
+lre22_dev_pzhrk,ara-aeb,4
+lre22_dev_qadjy,ven-ven,7
+lre22_dev_qaeek,ven-ven,7
+lre22_dev_qafse,eng-iaf,11
+lre22_dev_qahft,ven-ven,13
+lre22_dev_qakoa,zul-zul,9
+lre22_dev_qalhd,ara-ayl,2
+lre22_dev_qazjh,ven-ven,11
+lre22_dev_qbfkw,eng-iaf,6
+lre22_dev_qbgcd,fra-ntf,14
+lre22_dev_qbisr,ara-ayl,3
+lre22_dev_qcnbm,ven-ven,3
+lre22_dev_qdcbb,tir-tir,5
+lre22_dev_qdfgi,zul-zul,12
+lre22_dev_qdmbj,eng-ens,4
+lre22_dev_qdwtg,fra-ntf,11
+lre22_dev_qefvt,ara-ayl,7
+lre22_dev_qffki,orm-orm,13
+lre22_dev_qfplk,tir-tir,8
+lre22_dev_qgxdl,xho-xho,14
+lre22_dev_qhadd,afr-afr,2
+lre22_dev_qhgaf,ara-ayl,7
+lre22_dev_qhinf,tir-tir,6
+lre22_dev_qhkjz,ara-aeb,6
+lre22_dev_qhlwj,ara-arq,8
+lre22_dev_qiarf,ara-arq,4
+lre22_dev_qidwl,ara-arq,5
+lre22_dev_qivzc,orm-orm,12
+lre22_dev_qizyt,ara-ayl,2
+lre22_dev_qjeue,ara-arq,9
+lre22_dev_qjgxh,ara-arq,1
+lre22_dev_qkdhb,afr-afr,1
+lre22_dev_qkiqi,orm-orm,4
+lre22_dev_qkoth,tir-tir,5
+lre22_dev_qkucq,fra-ntf,3
+lre22_dev_qltea,nbl-nbl,2
+lre22_dev_qlube,ara-aeb,5
+lre22_dev_qmcji,nbl-nbl,15
+lre22_dev_qmpzc,nbl-nbl,11
+lre22_dev_qmsog,tir-tir,3
+lre22_dev_qoech,eng-iaf,7
+lre22_dev_qovfg,ara-arq,10
+lre22_dev_qozzv,tir-tir,2
+lre22_dev_qpasx,tir-tir,3
+lre22_dev_qpauj,ara-aeb,4
+lre22_dev_qpfch,orm-orm,6
+lre22_dev_qpvea,orm-orm,9
+lre22_dev_qrgka,ara-arq,8
+lre22_dev_qrqmm,ara-ayl,7
+lre22_dev_qsaol,xho-xho,14
+lre22_dev_qsgpx,ara-arq,10
+lre22_dev_qspeg,eng-ens,7
+lre22_dev_qsvbe,fra-ntf,3
+lre22_dev_qsxoh,fra-ntf,5
+lre22_dev_qtbnc,xho-xho,7
+lre22_dev_qthzi,afr-afr,12
+lre22_dev_qtmaw,fra-ntf,13
+lre22_dev_qtnqh,eng-iaf,13
+lre22_dev_qtpsb,tso-tso,8
+lre22_dev_qtqpc,eng-iaf,12
+lre22_dev_qtwfv,eng-iaf,4
+lre22_dev_qvamq,fra-ntf,9
+lre22_dev_qveuq,tir-tir,9
+lre22_dev_qvffg,orm-orm,0
+lre22_dev_qvplf,xho-xho,6
+lre22_dev_qvqvi,ven-ven,7
+lre22_dev_qwhsh,afr-afr,7
+lre22_dev_qwiwm,eng-ens,9
+lre22_dev_qxbch,ara-aeb,9
+lre22_dev_qxlca,nbl-nbl,2
+lre22_dev_qxscb,afr-afr,2
+lre22_dev_qyoqn,fra-ntf,9
+lre22_dev_qyrgs,nbl-nbl,3
+lre22_dev_qytdl,fra-ntf,9
+lre22_dev_qyyeb,eng-iaf,12
+lre22_dev_qyzqb,tso-tso,8
+lre22_dev_qzayi,orm-orm,12
+lre22_dev_qzexr,eng-iaf,5
+lre22_dev_qzrfi,ara-arq,10
+lre22_dev_qztjh,orm-orm,3
+lre22_dev_qztze,eng-iaf,12
+lre22_dev_raent,eng-iaf,2
+lre22_dev_ragjh,orm-orm,14
+lre22_dev_ramzu,ara-ayl,6
+lre22_dev_ratmr,ven-ven,7
+lre22_dev_rawak,ara-arq,9
+lre22_dev_rbbne,ven-ven,7
+lre22_dev_rbcul,eng-iaf,10
+lre22_dev_rbsoy,eng-iaf,12
+lre22_dev_rbxqy,tso-tso,9
+lre22_dev_rcejf,xho-xho,7
+lre22_dev_rdbzt,zul-zul,7
+lre22_dev_rdhpu,ara-aeb,8
+lre22_dev_rdsew,ven-ven,2
+lre22_dev_rdtkf,ven-ven,11
+lre22_dev_reeba,ara-ayl,6
+lre22_dev_relip,eng-iaf,11
+lre22_dev_rfdoh,ara-aeb,9
+lre22_dev_rfkja,xho-xho,11
+lre22_dev_rflev,ven-ven,3
+lre22_dev_rfqcx,nbl-nbl,14
+lre22_dev_rfwuv,eng-ens,1
+lre22_dev_rgsil,fra-ntf,6
+lre22_dev_rhcuj,ara-aeb,8
+lre22_dev_rhdgz,eng-iaf,12
+lre22_dev_rhpmn,ven-ven,7
+lre22_dev_rhtoe,eng-iaf,11
+lre22_dev_rhyqq,ara-aeb,2
+lre22_dev_riltn,ara-aeb,10
+lre22_dev_rinti,xho-xho,12
+lre22_dev_rioxh,xho-xho,12
+lre22_dev_ripix,tir-tir,10
+lre22_dev_rjbji,ven-ven,10
+lre22_dev_rjqbz,eng-iaf,0
+lre22_dev_rkemd,tir-tir,8
+lre22_dev_rktzl,nbl-nbl,13
+lre22_dev_rkuni,xho-xho,15
+lre22_dev_rlsgd,fra-ntf,5
+lre22_dev_rlypa,afr-afr,7
+lre22_dev_rmeav,ven-ven,8
+lre22_dev_rmejy,fra-ntf,12
+lre22_dev_rmeuz,zul-zul,6
+lre22_dev_rmjsj,nbl-nbl,5
+lre22_dev_rmtxj,eng-iaf,13
+lre22_dev_rnpyc,ara-ayl,2
+lre22_dev_rnunw,orm-orm,9
+lre22_dev_rnvvw,tso-tso,9
+lre22_dev_roavh,fra-ntf,6
+lre22_dev_rodbi,xho-xho,15
+lre22_dev_roeph,xho-xho,13
+lre22_dev_rolun,ara-ayl,3
+lre22_dev_roydh,xho-xho,7
+lre22_dev_rpajy,ara-aeb,8
+lre22_dev_rpdsm,ara-ayl,5
+lre22_dev_rpfae,afr-afr,9
+lre22_dev_rpvyc,eng-iaf,9
+lre22_dev_rqxot,tso-tso,9
+lre22_dev_rumiv,ara-aeb,9
+lre22_dev_runhh,afr-afr,6
+lre22_dev_ruvpd,eng-iaf,4
+lre22_dev_rvpkd,fra-ntf,1
+lre22_dev_rvqxq,orm-orm,12
+lre22_dev_rvstc,ara-arq,7
+lre22_dev_rwbea,tir-tir,9
+lre22_dev_rweyk,nbl-nbl,2
+lre22_dev_rwnfb,eng-ens,8
+lre22_dev_rwrhn,afr-afr,11
+lre22_dev_rxhkp,ara-arq,3
+lre22_dev_rxixz,nbl-nbl,15
+lre22_dev_rxmft,zul-zul,7
+lre22_dev_ryknh,ara-ayl,5
+lre22_dev_rytyf,zul-zul,12
+lre22_dev_rywss,tso-tso,1
+lre22_dev_rzjrd,nbl-nbl,7
+lre22_dev_rzpyx,tso-tso,2
+lre22_dev_satbk,ven-ven,7
+lre22_dev_sbfhc,fra-ntf,6
+lre22_dev_sboxi,xho-xho,15
+lre22_dev_scxxn,eng-iaf,5
+lre22_dev_scyvp,ara-aeb,6
+lre22_dev_sdbou,tir-tir,10
+lre22_dev_sddua,tir-tir,11
+lre22_dev_seasj,afr-afr,7
+lre22_dev_sevcw,tir-tir,12
+lre22_dev_sfevx,tso-tso,4
+lre22_dev_sfqgm,fra-ntf,1
+lre22_dev_sgaza,ara-aeb,8
+lre22_dev_sgkrh,afr-afr,9
+lre22_dev_sgmjh,nbl-nbl,14
+lre22_dev_shafn,ven-ven,8
+lre22_dev_shaob,orm-orm,10
+lre22_dev_shnns,afr-afr,6
+lre22_dev_siprc,ven-ven,7
+lre22_dev_sisge,afr-afr,13
+lre22_dev_siuwu,ara-arq,10
+lre22_dev_sivik,fra-ntf,2
+lre22_dev_sjyoo,afr-afr,1
+lre22_dev_skacz,fra-ntf,13
+lre22_dev_skcai,orm-orm,12
+lre22_dev_skctw,nbl-nbl,0
+lre22_dev_skygk,afr-afr,13
+lre22_dev_slraf,ara-aeb,6
+lre22_dev_slrzl,eng-ens,11
+lre22_dev_sltzh,xho-xho,6
+lre22_dev_sluki,ven-ven,1
+lre22_dev_slyez,tso-tso,8
+lre22_dev_slzuh,xho-xho,15
+lre22_dev_smdsm,nbl-nbl,7
+lre22_dev_smhae,ara-ayl,3
+lre22_dev_smxhe,ara-aeb,10
+lre22_dev_snayr,afr-afr,2
+lre22_dev_snbxs,eng-ens,8
+lre22_dev_sngol,tso-tso,9
+lre22_dev_snhun,fra-ntf,13
+lre22_dev_snkib,ven-ven,8
+lre22_dev_snqld,eng-iaf,2
+lre22_dev_sntvb,eng-ens,11
+lre22_dev_snzbl,tir-tir,12
+lre22_dev_sobid,afr-afr,3
+lre22_dev_soknx,orm-orm,15
+lre22_dev_spesw,ven-ven,13
+lre22_dev_sphuq,eng-iaf,12
+lre22_dev_spqcy,xho-xho,11
+lre22_dev_sqcyu,zul-zul,9
+lre22_dev_sqdkr,eng-iaf,13
+lre22_dev_sqfnt,ara-aeb,9
+lre22_dev_sqhrr,eng-ens,11
+lre22_dev_sqyiu,ara-ayl,4
+lre22_dev_srbwp,ara-aeb,10
+lre22_dev_srokn,afr-afr,6
+lre22_dev_srzck,ara-ayl,3
+lre22_dev_ssbei,tso-tso,10
+lre22_dev_ssfmz,eng-iaf,12
+lre22_dev_ssmgk,xho-xho,10
+lre22_dev_ssmsy,xho-xho,4
+lre22_dev_stgcb,afr-afr,10
+lre22_dev_stihb,afr-afr,0
+lre22_dev_stkav,ara-aeb,9
+lre22_dev_stkrw,xho-xho,3
+lre22_dev_sttnk,fra-ntf,8
+lre22_dev_stwkk,eng-iaf,12
+lre22_dev_stwrt,nbl-nbl,1
+lre22_dev_subio,afr-afr,1
+lre22_dev_sumjk,ara-arq,6
+lre22_dev_suocb,nbl-nbl,6
+lre22_dev_svcbx,tso-tso,9
+lre22_dev_svllg,fra-ntf,14
+lre22_dev_svvqs,afr-afr,3
+lre22_dev_svxyz,ara-ayl,1
+lre22_dev_swhlf,ara-aeb,10
+lre22_dev_swhnk,fra-ntf,12
+lre22_dev_swnrg,ven-ven,12
+lre22_dev_swofz,zul-zul,4
+lre22_dev_swuls,tso-tso,8
+lre22_dev_sxfkn,ara-aeb,2
+lre22_dev_sycoz,tir-tir,10
+lre22_dev_syoek,fra-ntf,5
+lre22_dev_sypnb,ven-ven,13
+lre22_dev_syvrt,eng-iaf,8
+lre22_dev_szmoc,ven-ven,6
+lre22_dev_szmwp,eng-ens,8
+lre22_dev_talec,ven-ven,11
+lre22_dev_tasfs,ven-ven,7
+lre22_dev_tbbrr,xho-xho,5
+lre22_dev_tbcun,ara-aeb,3
+lre22_dev_tbhnw,nbl-nbl,15
+lre22_dev_tblhf,ven-ven,12
+lre22_dev_tbozq,xho-xho,1
+lre22_dev_tcckd,ara-ayl,3
+lre22_dev_tcele,tso-tso,11
+lre22_dev_tciob,tso-tso,10
+lre22_dev_tcpxj,tir-tir,9
+lre22_dev_tdejo,tir-tir,6
+lre22_dev_tdfqo,tso-tso,0
+lre22_dev_tdhhf,zul-zul,10
+lre22_dev_tdjje,ven-ven,10
+lre22_dev_tdkrp,orm-orm,6
+lre22_dev_tebop,tso-tso,10
+lre22_dev_teeqm,ven-ven,6
+lre22_dev_tejsn,tir-tir,12
+lre22_dev_teptc,ara-arq,10
+lre22_dev_tetmt,orm-orm,9
+lre22_dev_tfkij,ara-aeb,2
+lre22_dev_tfnin,tir-tir,3
+lre22_dev_tfyqz,tir-tir,3
+lre22_dev_tgbui,ara-aeb,5
+lre22_dev_tgixi,xho-xho,13
+lre22_dev_tgmud,eng-iaf,6
+lre22_dev_tgult,eng-ens,2
+lre22_dev_thcjv,tso-tso,5
+lre22_dev_thzir,eng-ens,11
+lre22_dev_tisfm,fra-ntf,9
+lre22_dev_tixou,xho-xho,2
+lre22_dev_tiyuw,afr-afr,5
+lre22_dev_tjdcc,afr-afr,13
+lre22_dev_tjikt,zul-zul,12
+lre22_dev_tjpdw,ara-arq,8
+lre22_dev_tkadi,ven-ven,12
+lre22_dev_tkcbm,afr-afr,6
+lre22_dev_tkgfw,eng-ens,11
+lre22_dev_tkiks,ara-aeb,6
+lre22_dev_tlgzi,xho-xho,1
+lre22_dev_tlhlw,tir-tir,6
+lre22_dev_tloqn,afr-afr,6
+lre22_dev_tmcje,eng-ens,4
+lre22_dev_tmjpw,eng-iaf,2
+lre22_dev_tmxtu,ven-ven,2
+lre22_dev_tngwh,tir-tir,8
+lre22_dev_tnqdv,ara-aeb,9
+lre22_dev_tnqro,xho-xho,15
+lre22_dev_tnqzy,orm-orm,7
+lre22_dev_tnskm,xho-xho,12
+lre22_dev_tnvhc,ven-ven,12
+lre22_dev_tofhy,zul-zul,6
+lre22_dev_tohkd,zul-zul,9
+lre22_dev_tonqb,ven-ven,6
+lre22_dev_tpbib,tso-tso,1
+lre22_dev_tpejq,ara-arq,3
+lre22_dev_tpfir,eng-ens,11
+lre22_dev_tphgn,zul-zul,12
+lre22_dev_tpidd,ara-arq,6
+lre22_dev_tpkce,eng-ens,11
+lre22_dev_tpszi,orm-orm,15
+lre22_dev_tpwcn,eng-iaf,6
+lre22_dev_trdfy,ara-ayl,3
+lre22_dev_tsbms,ara-ayl,4
+lre22_dev_tslui,tso-tso,6
+lre22_dev_tsvvy,zul-zul,10
+lre22_dev_tsyey,xho-xho,10
+lre22_dev_ttlco,eng-iaf,12
+lre22_dev_tubpr,orm-orm,13
+lre22_dev_tugpl,eng-ens,9
+lre22_dev_tuoiq,tir-tir,4
+lre22_dev_tuxfx,zul-zul,3
+lre22_dev_tvahj,tir-tir,9
+lre22_dev_tvewc,eng-iaf,3
+lre22_dev_tvfvc,ara-ayl,8
+lre22_dev_tvkod,xho-xho,5
+lre22_dev_tvkwe,zul-zul,9
+lre22_dev_tvopo,xho-xho,12
+lre22_dev_tvqui,eng-ens,7
+lre22_dev_tvsbw,ara-arq,6
+lre22_dev_tvxvk,ven-ven,8
+lre22_dev_twbkf,nbl-nbl,9
+lre22_dev_twfot,ara-arq,6
+lre22_dev_twkns,ara-ayl,4
+lre22_dev_twuvf,eng-ens,10
+lre22_dev_txahv,eng-ens,8
+lre22_dev_txcob,ara-aeb,6
+lre22_dev_txnvi,zul-zul,3
+lre22_dev_txurh,afr-afr,7
+lre22_dev_txzkl,ara-arq,5
+lre22_dev_tyfad,tso-tso,7
+lre22_dev_tyhwp,ara-aeb,8
+lre22_dev_tzism,tir-tir,12
+lre22_dev_tzsfj,tir-tir,12
+lre22_dev_tzwof,eng-iaf,9
+lre22_dev_uahzm,afr-afr,5
+lre22_dev_uajwt,tso-tso,7
+lre22_dev_uanlr,zul-zul,13
+lre22_dev_uaoju,zul-zul,8
+lre22_dev_uaryk,xho-xho,15
+lre22_dev_ubfaf,ven-ven,12
+lre22_dev_ucbje,ara-aeb,8
+lre22_dev_ucrpa,ara-arq,3
+lre22_dev_udtzx,eng-iaf,7
+lre22_dev_uduja,fra-ntf,6
+lre22_dev_udxpl,tso-tso,2
+lre22_dev_uesmx,eng-iaf,5
+lre22_dev_ufewk,eng-iaf,8
+lre22_dev_ugjxy,tir-tir,4
+lre22_dev_ugsxl,eng-ens,3
+lre22_dev_ugvov,tso-tso,8
+lre22_dev_uhmdw,tso-tso,10
+lre22_dev_uhqng,nbl-nbl,12
+lre22_dev_uhymw,tir-tir,8
+lre22_dev_uhzmr,eng-ens,2
+lre22_dev_uimtg,ara-ayl,4
+lre22_dev_uirdr,nbl-nbl,13
+lre22_dev_uiszj,ara-aeb,8
+lre22_dev_ujada,ara-ayl,9
+lre22_dev_ujmqw,ven-ven,4
+lre22_dev_ujswr,afr-afr,11
+lre22_dev_ujvve,xho-xho,10
+lre22_dev_ukfha,ara-ayl,6
+lre22_dev_ukkpr,eng-ens,10
+lre22_dev_ukpdg,fra-ntf,13
+lre22_dev_ukpoy,nbl-nbl,15
+lre22_dev_uktod,ara-ayl,4
+lre22_dev_uktvh,zul-zul,13
+lre22_dev_ukuwo,ara-ayl,5
+lre22_dev_ukynv,zul-zul,12
+lre22_dev_ulepv,ara-ayl,5
+lre22_dev_ulgtj,zul-zul,7
+lre22_dev_ulofk,eng-iaf,11
+lre22_dev_uluog,ara-arq,3
+lre22_dev_umbpy,zul-zul,13
+lre22_dev_umjzo,tso-tso,5
+lre22_dev_uncdb,ara-arq,9
+lre22_dev_unffr,ara-ayl,8
+lre22_dev_unpif,eng-ens,9
+lre22_dev_uoikj,eng-iaf,13
+lre22_dev_uopfp,nbl-nbl,7
+lre22_dev_upenl,eng-iaf,13
+lre22_dev_uphuw,xho-xho,11
+lre22_dev_upkbw,ara-ayl,4
+lre22_dev_uplen,xho-xho,9
+lre22_dev_upqod,orm-orm,6
+lre22_dev_upspe,afr-afr,12
+lre22_dev_uqnkk,tir-tir,12
+lre22_dev_uqvxc,eng-ens,0
+lre22_dev_urgqx,ara-ayl,8
+lre22_dev_urkgk,tir-tir,12
+lre22_dev_uscky,xho-xho,3
+lre22_dev_usiwx,tir-tir,9
+lre22_dev_usnzj,zul-zul,5
+lre22_dev_usopt,xho-xho,8
+lre22_dev_uswgv,nbl-nbl,11
+lre22_dev_uszcb,ara-arq,4
+lre22_dev_utahf,ara-ayl,7
+lre22_dev_utaxq,tso-tso,9
+lre22_dev_utcwb,afr-afr,10
+lre22_dev_uuhry,tir-tir,9
+lre22_dev_uuprr,eng-ens,7
+lre22_dev_uuvqh,zul-zul,2
+lre22_dev_uwcmh,orm-orm,4
+lre22_dev_uwiev,zul-zul,13
+lre22_dev_uwjzb,ven-ven,10
+lre22_dev_uwony,orm-orm,1
+lre22_dev_uwqeq,orm-orm,2
+lre22_dev_uwvfl,nbl-nbl,5
+lre22_dev_uxdjn,xho-xho,12
+lre22_dev_uxqte,zul-zul,13
+lre22_dev_uxryh,ven-ven,11
+lre22_dev_uyhzp,orm-orm,15
+lre22_dev_uyrjl,tso-tso,10
+lre22_dev_uyzcl,eng-ens,11
+lre22_dev_uzbqz,fra-ntf,4
+lre22_dev_uzoxq,ara-aeb,9
+lre22_dev_vabxl,nbl-nbl,11
+lre22_dev_vafyo,nbl-nbl,15
+lre22_dev_vascl,nbl-nbl,0
+lre22_dev_vauqx,ara-arq,10
+lre22_dev_vbscm,xho-xho,3
+lre22_dev_vbulh,xho-xho,12
+lre22_dev_vbwwp,xho-xho,15
+lre22_dev_vbznk,ara-arq,6
+lre22_dev_vcibu,nbl-nbl,9
+lre22_dev_vcjun,zul-zul,12
+lre22_dev_vckxt,xho-xho,7
+lre22_dev_vdkjy,fra-ntf,14
+lre22_dev_vdmyt,ara-ayl,0
+lre22_dev_vdoif,ven-ven,13
+lre22_dev_vdvjv,orm-orm,12
+lre22_dev_vebet,ara-aeb,1
+lre22_dev_velkr,ara-aeb,1
+lre22_dev_vgbmm,tir-tir,9
+lre22_dev_vgucw,nbl-nbl,7
+lre22_dev_vhiyb,afr-afr,9
+lre22_dev_vhoej,tir-tir,5
+lre22_dev_vhryd,orm-orm,13
+lre22_dev_vhzdh,tso-tso,10
+lre22_dev_viapx,tso-tso,3
+lre22_dev_vifdj,ara-ayl,4
+lre22_dev_vijbo,zul-zul,12
+lre22_dev_virnr,eng-ens,6
+lre22_dev_vjhbd,orm-orm,6
+lre22_dev_vjoca,ara-aeb,10
+lre22_dev_vjtou,eng-ens,5
+lre22_dev_vjxpv,ara-aeb,10
+lre22_dev_vkmab,fra-ntf,2
+lre22_dev_vkrvz,tir-tir,8
+lre22_dev_vkwwf,tso-tso,9
+lre22_dev_vlbdk,zul-zul,6
+lre22_dev_vliie,orm-orm,9
+lre22_dev_vlrve,eng-iaf,2
+lre22_dev_vmaet,tir-tir,3
+lre22_dev_vmdhi,eng-ens,10
+lre22_dev_vmdjw,nbl-nbl,13
+lre22_dev_vmjut,fra-ntf,9
+lre22_dev_vmrrg,eng-ens,3
+lre22_dev_vnjxn,nbl-nbl,7
+lre22_dev_vnmxm,ven-ven,12
+lre22_dev_vnykj,zul-zul,10
+lre22_dev_vovab,zul-zul,11
+lre22_dev_vovvl,zul-zul,11
+lre22_dev_vpcey,tir-tir,6
+lre22_dev_vpodd,nbl-nbl,11
+lre22_dev_vptke,eng-ens,4
+lre22_dev_vpulr,xho-xho,15
+lre22_dev_vpuve,tir-tir,8
+lre22_dev_vqttr,eng-iaf,12
+lre22_dev_vqzae,eng-iaf,11
+lre22_dev_vrnsg,tso-tso,8
+lre22_dev_vshpc,ara-aeb,6
+lre22_dev_vslbh,ara-arq,9
+lre22_dev_vsmaz,tir-tir,5
+lre22_dev_vsnez,tso-tso,8
+lre22_dev_vsnjp,fra-ntf,14
+lre22_dev_vsocn,ven-ven,7
+lre22_dev_vsvom,afr-afr,8
+lre22_dev_vtnfc,tir-tir,4
+lre22_dev_vtnlb,eng-ens,4
+lre22_dev_vubwb,eng-ens,8
+lre22_dev_vufsn,ara-aeb,3
+lre22_dev_vuiqu,tir-tir,8
+lre22_dev_vumeq,xho-xho,0
+lre22_dev_vupse,ven-ven,6
+lre22_dev_vvauz,xho-xho,14
+lre22_dev_vvfze,eng-ens,11
+lre22_dev_vviyr,zul-zul,12
+lre22_dev_vvwiq,fra-ntf,5
+lre22_dev_vwnkj,zul-zul,5
+lre22_dev_vwoww,orm-orm,7
+lre22_dev_vwtne,afr-afr,5
+lre22_dev_vwxgt,ara-arq,10
+lre22_dev_vxabl,eng-ens,8
+lre22_dev_vxnsl,afr-afr,7
+lre22_dev_vxslj,tir-tir,10
+lre22_dev_vxsvc,tir-tir,11
+lre22_dev_vxuiz,ara-aeb,10
+lre22_dev_vzarl,ara-ayl,7
+lre22_dev_vzeew,ven-ven,6
+lre22_dev_vzjtc,ara-arq,0
+lre22_dev_vzkdb,tso-tso,10
+lre22_dev_vzvpq,ara-arq,9
+lre22_dev_waqyh,xho-xho,15
+lre22_dev_wawwu,xho-xho,14
+lre22_dev_wbgqi,tso-tso,11
+lre22_dev_wcctp,eng-ens,10
+lre22_dev_wdcer,afr-afr,3
+lre22_dev_wdeor,fra-ntf,14
+lre22_dev_wdfdd,eng-iaf,2
+lre22_dev_wdkvb,eng-ens,11
+lre22_dev_wdogx,ara-aeb,7
+lre22_dev_wdqdq,ara-arq,10
+lre22_dev_wdxwu,tir-tir,5
+lre22_dev_weaek,ara-arq,4
+lre22_dev_wefui,tso-tso,10
+lre22_dev_wehjh,tir-tir,10
+lre22_dev_weypz,nbl-nbl,12
+lre22_dev_wffdy,zul-zul,12
+lre22_dev_wffgq,tso-tso,8
+lre22_dev_wfvlh,ven-ven,8
+lre22_dev_wgago,eng-ens,5
+lre22_dev_wglzd,afr-afr,11
+lre22_dev_wgsbu,afr-afr,5
+lre22_dev_whdhw,nbl-nbl,7
+lre22_dev_whogu,eng-iaf,13
+lre22_dev_whpee,tso-tso,9
+lre22_dev_whqpd,ara-aeb,9
+lre22_dev_wikrr,ven-ven,11
+lre22_dev_witju,fra-ntf,11
+lre22_dev_wjcme,orm-orm,10
+lre22_dev_wkare,ara-arq,2
+lre22_dev_wkbfe,afr-afr,9
+lre22_dev_wkecn,xho-xho,13
+lre22_dev_wkhxo,afr-afr,9
+lre22_dev_wlgae,ara-arq,6
+lre22_dev_wlnls,eng-iaf,7
+lre22_dev_wlsxb,eng-ens,1
+lre22_dev_wlwuc,nbl-nbl,8
+lre22_dev_wnaqr,nbl-nbl,9
+lre22_dev_wndpq,fra-ntf,13
+lre22_dev_wnkdc,ara-ayl,2
+lre22_dev_wnknc,nbl-nbl,9
+lre22_dev_wnppz,orm-orm,15
+lre22_dev_wpzgm,afr-afr,13
+lre22_dev_wqhqj,ara-ayl,9
+lre22_dev_wqreb,afr-afr,11
+lre22_dev_wqrez,eng-ens,4
+lre22_dev_wqtsf,ara-arq,8
+lre22_dev_wqwtc,orm-orm,3
+lre22_dev_wrfwf,ven-ven,7
+lre22_dev_wrqqt,orm-orm,15
+lre22_dev_wrutf,afr-afr,7
+lre22_dev_wrvzk,nbl-nbl,1
+lre22_dev_wrxly,fra-ntf,13
+lre22_dev_wsbiw,ara-aeb,8
+lre22_dev_wshay,zul-zul,8
+lre22_dev_wsous,tso-tso,5
+lre22_dev_wszpj,ven-ven,7
+lre22_dev_wtksi,afr-afr,8
+lre22_dev_wugbw,xho-xho,6
+lre22_dev_wujfv,afr-afr,11
+lre22_dev_wuwek,xho-xho,12
+lre22_dev_wvhhk,fra-ntf,2
+lre22_dev_wvosz,nbl-nbl,3
+lre22_dev_wwagu,xho-xho,14
+lre22_dev_wwbuj,eng-iaf,2
+lre22_dev_wwgnr,afr-afr,10
+lre22_dev_wwjev,afr-afr,12
+lre22_dev_wwmsu,ara-arq,4
+lre22_dev_wwrmy,ven-ven,7
+lre22_dev_wwvhd,ara-arq,9
+lre22_dev_wxdjv,ara-ayl,6
+lre22_dev_wygox,tir-tir,6
+lre22_dev_wyhuq,zul-zul,13
+lre22_dev_wzoir,xho-xho,15
+lre22_dev_wzvwa,orm-orm,6
+lre22_dev_xapvn,tso-tso,8
+lre22_dev_xarkl,eng-ens,5
+lre22_dev_xavhh,nbl-nbl,10
+lre22_dev_xazuy,orm-orm,3
+lre22_dev_xbnft,eng-iaf,0
+lre22_dev_xbqbc,fra-ntf,7
+lre22_dev_xbzfw,tir-tir,11
+lre22_dev_xccde,ara-arq,3
+lre22_dev_xcdty,zul-zul,8
+lre22_dev_xcjkb,ara-ayl,7
+lre22_dev_xcmty,ara-arq,10
+lre22_dev_xcsbc,tso-tso,1
+lre22_dev_xdkjb,nbl-nbl,11
+lre22_dev_xdknq,nbl-nbl,11
+lre22_dev_xdoik,eng-ens,10
+lre22_dev_xdtyd,nbl-nbl,4
+lre22_dev_xearl,eng-iaf,3
+lre22_dev_xedqa,nbl-nbl,11
+lre22_dev_xefnx,eng-ens,11
+lre22_dev_xeipr,tir-tir,11
+lre22_dev_xekhs,zul-zul,9
+lre22_dev_xelzr,ara-aeb,9
+lre22_dev_xenhb,ara-aeb,3
+lre22_dev_xfdsx,xho-xho,12
+lre22_dev_xfggl,xho-xho,9
+lre22_dev_xgspz,eng-iaf,13
+lre22_dev_xgwmu,tso-tso,8
+lre22_dev_xhbmk,orm-orm,15
+lre22_dev_xhdtl,orm-orm,3
+lre22_dev_xisjn,ara-arq,8
+lre22_dev_xitdz,nbl-nbl,10
+lre22_dev_xizbg,xho-xho,14
+lre22_dev_xjcph,xho-xho,10
+lre22_dev_xjcvd,zul-zul,7
+lre22_dev_xjlgm,ara-aeb,3
+lre22_dev_xjxzy,eng-ens,2
+lre22_dev_xkfsd,ven-ven,12
+lre22_dev_xkktj,eng-iaf,12
+lre22_dev_xkmmy,ara-aeb,10
+lre22_dev_xltgz,ara-ayl,5
+lre22_dev_xmbby,orm-orm,3
+lre22_dev_xmcmv,xho-xho,14
+lre22_dev_xngam,fra-ntf,14
+lre22_dev_xnsev,ara-ayl,8
+lre22_dev_xnwsq,ara-arq,8
+lre22_dev_xnwwh,zul-zul,13
+lre22_dev_xobeh,tir-tir,11
+lre22_dev_xolau,ven-ven,13
+lre22_dev_xoqtn,eng-iaf,10
+lre22_dev_xovpd,eng-iaf,10
+lre22_dev_xpaff,eng-ens,9
+lre22_dev_xpahm,ara-arq,4
+lre22_dev_xpcrs,tso-tso,5
+lre22_dev_xpdsg,eng-iaf,5
+lre22_dev_xpjqj,nbl-nbl,6
+lre22_dev_xqwtk,ara-arq,10
+lre22_dev_xrfge,ara-arq,8
+lre22_dev_xrhka,orm-orm,9
+lre22_dev_xrpup,zul-zul,8
+lre22_dev_xsbff,ara-aeb,9
+lre22_dev_xsffv,tso-tso,1
+lre22_dev_xstnu,eng-ens,5
+lre22_dev_xthfd,ara-aeb,8
+lre22_dev_xthzz,ven-ven,4
+lre22_dev_xtmgg,eng-iaf,13
+lre22_dev_xtyic,nbl-nbl,14
+lre22_dev_xucyl,eng-ens,7
+lre22_dev_xudii,ara-ayl,3
+lre22_dev_xugux,afr-afr,0
+lre22_dev_xuqnj,ara-ayl,4
+lre22_dev_xvaoh,nbl-nbl,9
+lre22_dev_xvclh,afr-afr,9
+lre22_dev_xveae,xho-xho,4
+lre22_dev_xxpqz,ara-arq,9
+lre22_dev_xxqad,tso-tso,10
+lre22_dev_xybed,tir-tir,9
+lre22_dev_xyrex,eng-ens,11
+lre22_dev_xzlas,eng-iaf,9
+lre22_dev_xztyr,orm-orm,9
+lre22_dev_yaxkb,zul-zul,12
+lre22_dev_ybcvu,xho-xho,13
+lre22_dev_ybjon,orm-orm,2
+lre22_dev_ybubm,ven-ven,5
+lre22_dev_ycarc,eng-ens,6
+lre22_dev_ychjj,orm-orm,2
+lre22_dev_ycnyc,tir-tir,7
+lre22_dev_ycsvt,afr-afr,12
+lre22_dev_ydaxa,nbl-nbl,8
+lre22_dev_ydrxu,nbl-nbl,1
+lre22_dev_yeekw,fra-ntf,13
+lre22_dev_yevan,tir-tir,11
+lre22_dev_yfaan,tir-tir,10
+lre22_dev_yfayx,afr-afr,6
+lre22_dev_yfpsd,fra-ntf,1
+lre22_dev_yfxkm,ven-ven,7
+lre22_dev_yguqk,ven-ven,3
+lre22_dev_yhrgj,afr-afr,8
+lre22_dev_yhzyq,ara-ayl,5
+lre22_dev_yiqui,eng-iaf,12
+lre22_dev_yjens,ara-ayl,7
+lre22_dev_yjkxx,eng-ens,8
+lre22_dev_yjypk,ara-ayl,9
+lre22_dev_ykchd,ven-ven,8
+lre22_dev_ykktl,xho-xho,0
+lre22_dev_ylhwh,orm-orm,9
+lre22_dev_ylnms,tso-tso,2
+lre22_dev_ylsdz,ven-ven,7
+lre22_dev_ymcmp,eng-iaf,8
+lre22_dev_ymfzx,tso-tso,7
+lre22_dev_ymizm,fra-ntf,0
+lre22_dev_ympvj,tir-tir,9
+lre22_dev_ymslh,tir-tir,12
+lre22_dev_ynavg,zul-zul,9
+lre22_dev_ynhlk,tir-tir,9
+lre22_dev_ynnkb,eng-ens,10
+lre22_dev_yogkc,fra-ntf,7
+lre22_dev_yokld,eng-ens,4
+lre22_dev_yokve,tir-tir,6
+lre22_dev_yomdz,ara-ayl,6
+lre22_dev_yomuu,xho-xho,12
+lre22_dev_yoobm,ara-ayl,8
+lre22_dev_yoocz,eng-ens,10
+lre22_dev_yopyf,eng-iaf,5
+lre22_dev_yoxoc,tir-tir,8
+lre22_dev_ypaem,afr-afr,5
+lre22_dev_ypamp,afr-afr,7
+lre22_dev_ypjpq,tir-tir,8
+lre22_dev_yplba,ara-arq,9
+lre22_dev_ypnrh,fra-ntf,1
+lre22_dev_ypqfg,eng-ens,7
+lre22_dev_yrdsl,eng-ens,2
+lre22_dev_yrtkv,afr-afr,7
+lre22_dev_yrwrb,nbl-nbl,9
+lre22_dev_ysmlk,eng-ens,11
+lre22_dev_yspja,orm-orm,5
+lre22_dev_ytfnn,fra-ntf,14
+lre22_dev_yturp,ara-aeb,6
+lre22_dev_ytvbd,afr-afr,4
+lre22_dev_yuhvo,tso-tso,8
+lre22_dev_yundi,ara-arq,3
+lre22_dev_yvmnx,ara-arq,10
+lre22_dev_yvqud,xho-xho,15
+lre22_dev_yvxdd,ara-ayl,4
+lre22_dev_ywjtq,xho-xho,5
+lre22_dev_ywnza,fra-ntf,12
+lre22_dev_yxnno,tso-tso,10
+lre22_dev_yxoww,tir-tir,7
+lre22_dev_yxpgi,ara-arq,5
+lre22_dev_yxsta,eng-ens,7
+lre22_dev_yyltz,xho-xho,8
+lre22_dev_yyqqx,fra-ntf,12
+lre22_dev_yzloh,ara-ayl,7
+lre22_dev_zacdy,ara-ayl,3
+lre22_dev_zadkk,tir-tir,9
+lre22_dev_zalpc,afr-afr,6
+lre22_dev_zarod,orm-orm,8
+lre22_dev_zasvb,afr-afr,11
+lre22_dev_zazom,ara-arq,9
+lre22_dev_zbfqk,afr-afr,13
+lre22_dev_zbqew,tso-tso,2
+lre22_dev_zbrkn,eng-ens,7
+lre22_dev_zbubp,zul-zul,9
+lre22_dev_zbytc,ara-arq,8
+lre22_dev_zcfns,tir-tir,6
+lre22_dev_zcfzk,afr-afr,7
+lre22_dev_zcrgv,ara-arq,10
+lre22_dev_zdxdn,ara-ayl,7
+lre22_dev_zdydi,eng-ens,1
+lre22_dev_zebzq,ven-ven,4
+lre22_dev_zedlk,xho-xho,14
+lre22_dev_zeqpp,tir-tir,12
+lre22_dev_zfjbm,ara-arq,10
+lre22_dev_zfkne,nbl-nbl,13
+lre22_dev_zflnr,ven-ven,13
+lre22_dev_zfoyd,xho-xho,4
+lre22_dev_zgdyu,eng-iaf,8
+lre22_dev_zgmja,zul-zul,9
+lre22_dev_zgvfs,ara-arq,6
+lre22_dev_zhmud,orm-orm,14
+lre22_dev_zhoml,tso-tso,9
+lre22_dev_zijcb,xho-xho,10
+lre22_dev_ziktm,ara-aeb,10
+lre22_dev_zipxy,ara-arq,9
+lre22_dev_ziqxc,eng-iaf,1
+lre22_dev_zjhir,ven-ven,7
+lre22_dev_zjmqp,orm-orm,13
+lre22_dev_zjrrk,tso-tso,11
+lre22_dev_zjtwd,ara-aeb,3
+lre22_dev_zkfcf,xho-xho,6
+lre22_dev_zkftc,nbl-nbl,4
+lre22_dev_zkqei,ara-ayl,7
+lre22_dev_zkwqo,zul-zul,11
+lre22_dev_zlamn,nbl-nbl,6
+lre22_dev_zlbor,xho-xho,14
+lre22_dev_zloet,ven-ven,8
+lre22_dev_zlvhk,zul-zul,5
+lre22_dev_zlzqv,fra-ntf,12
+lre22_dev_zmobq,ara-ayl,7
+lre22_dev_zmuiv,zul-zul,9
+lre22_dev_znvqw,zul-zul,4
+lre22_dev_znzuu,tir-tir,0
+lre22_dev_zoava,eng-iaf,6
+lre22_dev_zodvu,tso-tso,0
+lre22_dev_zosdw,nbl-nbl,15
+lre22_dev_zpnvq,xho-xho,6
+lre22_dev_zqeby,eng-iaf,12
+lre22_dev_zqgdd,nbl-nbl,9
+lre22_dev_zqhaw,nbl-nbl,5
+lre22_dev_zqkau,orm-orm,8
+lre22_dev_zqkel,ara-ayl,9
+lre22_dev_zqlnd,ara-aeb,8
+lre22_dev_zrnpw,orm-orm,8
+lre22_dev_zrqvc,afr-afr,9
+lre22_dev_zrrgq,ven-ven,8
+lre22_dev_zryit,zul-zul,8
+lre22_dev_zsckt,zul-zul,4
+lre22_dev_zucqq,orm-orm,4
+lre22_dev_zusln,orm-orm,11
+lre22_dev_zuxzw,tir-tir,0
+lre22_dev_zvabs,tir-tir,11
+lre22_dev_zvlid,tso-tso,11
+lre22_dev_zvned,eng-iaf,5
+lre22_dev_zvtwr,xho-xho,11
+lre22_dev_zwmim,orm-orm,11
+lre22_dev_zwnsu,ara-arq,8
+lre22_dev_zwtxn,ara-arq,10
+lre22_dev_zxfcm,orm-orm,3
+lre22_dev_zxsgm,tir-tir,5
+lre22_dev_zybya,eng-iaf,10
+lre22_dev_zygak,zul-zul,1
+lre22_dev_zylqc,eng-ens,3
+lre22_dev_zyppc,fra-ntf,8
+lre22_dev_zywem,eng-ens,8
+lre22_dev_zzapx,ara-ayl,5
+lre22_dev_zzumc,ara-arq,2
+lre22_dev_zzvdl,fra-ntf,5
+lre22_dev_zzvjv,nbl-nbl,14
diff --git a/egs/lre22/fixed.v1.8k/resources/lre17_ara-ary/segs_ara-ary.csv b/egs/lre22/fixed.v1.8k/resources/lre17_ara-ary/segs_ara-ary.csv
new file mode 100644
index 00000000..4f5caa4d
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/resources/lre17_ara-ary/segs_ara-ary.csv
@@ -0,0 +1,1306 @@
+id,class_id,logp
+20110112_085632_25-a.sph,ara-arq,0.9999215882183581
+20110112_085632_25-b.sph,ara-arq,0.9933264028811798
+20110112_093821_26-a.sph,ara-arq,0.9982419072530201
+20110112_093821_26-b.sph,ara-arq,0.9877989962861538
+20110112_100739_27-a.sph,ara-arq,0.9998601825318931
+20110112_100739_27-b.sph,ara-arq,0.9998461026324816
+20110112_102931_28-a.sph,ara-arq,0.9988240996203235
+20110112_102931_28-b.sph,ara-arq,0.9992945069664346
+20110112_110035_29-a.sph,ara-arq,0.9956050254373241
+20110112_110035_29-b.sph,ara-arq,0.9998179655506749
+20110112_120034_30-a.sph,ara-arq,0.9999956254632509
+20110112_120034_30-b.sph,ara-arq,0.9999961650306969
+20110112_121837_31-a.sph,ara-arq,0.9992484722468692
+20110112_121837_31-b.sph,ara-arq,0.9988521768999281
+20110112_125124_32-a.sph,ara-arq,0.9992562768302394
+20110112_125124_32-b.sph,ara-arq,0.9965659470692162
+20110112_131159_33-a.sph,ara-arq,0.9999484673386585
+20110112_131159_33-b.sph,ara-arq,0.999988595598022
+20110112_135057_34-a.sph,ara-arq,0.9964035382836839
+20110112_140409_35-b.sph,ara-arq,0.9993163776118849
+20110112_143151_37-a.sph,ara-arq,0.9978091561892594
+20110112_144321_38-a.sph,ara-arq,0.9882030389155663
+20110112_151915_40-a.sph,ara-arq,0.9999863814255752
+20110112_151915_40-b.sph,ara-arq,0.9999867872908822
+20110112_164438_41-a.sph,ara-arq,0.9911464286505152
+20110112_164438_41-b.sph,ara-arq,0.9982784286981317
+20110112_170310_42-b.sph,ara-arq,0.9957287722811907
+20110112_174334_44-a.sph,ara-arq,0.9994637389176244
+20110112_174334_44-b.sph,ara-arq,0.9999979913404728
+20110112_175917_46-a.sph,ara-arq,0.999896350765443
+20110112_175917_46-b.sph,ara-arq,0.9992906517833624
+20110112_181316_47-a.sph,ara-arq,0.9865135533386489
+20110112_181316_47-b.sph,ara-arq,0.9799283164010801
+20110112_184303_48-a.sph,ara-arq,0.9999941253436267
+20110112_184303_48-b.sph,ara-arq,0.9894727409462367
+20110112_185018_49-a.sph,ara-arq,0.9993235515178335
+20110112_185018_49-b.sph,ara-arq,0.9964110986149859
+20110112_190919_51-a.sph,ara-arq,0.9975136717392243
+20110112_195355_54-a.sph,ara-arq,0.9994973714549525
+20110112_210716_56-b.sph,ara-arq,0.9999953505624561
+20110113_154325_58-a.sph,ara-arq,0.9999971862717217
+20110113_155707_60-a.sph,ara-aeb,0.999254762801491
+20110113_155707_60-b.sph,ara-arq,0.9999588882350571
+20110113_160907_61-a.sph,ara-arq,0.9999960880781135
+20110113_160907_61-b.sph,ara-arq,0.9999863740819315
+20110113_210803_66-a.sph,ara-arq,0.9973323794741106
+20110113_210803_66-b.sph,ara-arq,0.9989444092443852
+20110114_010743_69-a.sph,ara-arq,0.9999099284602719
+20110114_010743_69-b.sph,ara-arq,0.9763890642555946
+20110114_132253_70-a.sph,ara-arq,0.9999970659217092
+20110114_170901_71-a.sph,ara-arq,0.9999919868727419
+20110114_170901_71-b.sph,ara-arq,0.9854190368540645
+20110114_174847_72-a.sph,ara-arq,0.9999794141191631
+20110114_174847_72-b.sph,ara-arq,0.9999942469709167
+20110115_083054_73-a.sph,ara-arq,0.999835593793634
+20110115_083054_73-b.sph,ara-arq,0.9874468440342952
+20110115_090248_75-a.sph,ara-arq,0.9999406720119036
+20110115_090248_75-b.sph,ara-arq,0.992340537440409
+20110115_093602_76-a.sph,ara-arq,0.999989554911121
+20110115_093602_76-b.sph,ara-arq,0.9999977173414476
+20110115_094928_77-a.sph,ara-arq,0.9978338195939027
+20110115_094928_77-b.sph,ara-arq,0.9999543347173833
+20110115_101940_80-a.sph,ara-arq,0.9999919712722404
+20110115_101940_80-b.sph,ara-arq,0.9999936335315381
+20110115_114622_88-a.sph,ara-arq,0.9994230181724559
+20110115_114622_88-b.sph,ara-arq,0.9987494039544832
+20110115_115414_89-a.sph,ara-arq,0.9997873068050884
+20110115_115414_89-b.sph,ara-arq,0.9996879869582883
+20110115_120333_90-a.sph,ara-arq,0.9999911829644114
+20110115_120333_90-b.sph,ara-aeb,0.9907357013205295
+20110115_154229_93-a.sph,ara-arq,0.9999908976134848
+20110115_160534_94-a.sph,ara-arq,0.9999941336401025
+20110115_160534_94-b.sph,ara-arq,0.9970317402712736
+20110115_170405_96-a.sph,ara-arq,0.9991143532054212
+20110115_170405_96-b.sph,ara-arq,0.9996419863942765
+20110115_172633_97-a.sph,ara-arq,0.983493662520541
+20110115_172633_97-b.sph,ara-arq,0.9999960891934105
+20110115_173918_98-b.sph,ara-arq,0.9899208181081557
+20110116_212111_99-a.sph,ara-arq,0.9998892280312053
+20110118_122005_102-a.sph,ara-arq,0.9999988862765399
+20110118_122005_102-b.sph,ara-arq,0.9909318482173783
+20110118_154651_104-a.sph,ara-arq,0.999987636189934
+20110119_101115_108-a.sph,ara-arq,0.9999649201104261
+20110119_101115_108-b.sph,ara-arq,0.9970793271378511
+20110119_103907_109-a.sph,ara-arq,0.999982064110703
+20110119_103907_109-b.sph,ara-arq,0.9930181910779016
+20110119_123138_110-b.sph,ara-arq,0.9865006019221569
+20110119_130923_111-a.sph,ara-arq,0.997152167078472
+20110119_130923_111-b.sph,ara-arq,0.9981643655714929
+20110119_131501_113-b.sph,ara-arq,0.9999035286024007
+20110119_162158_114-a.sph,ara-arq,0.9999999535234435
+20110119_162158_114-b.sph,ara-arq,0.9972175314415649
+20110119_164045_115-a.sph,ara-arq,0.999995752838363
+20110119_164045_115-b.sph,ara-arq,0.9997628034439311
+20110119_185412_118-a.sph,ara-arq,0.9998202289146529
+20110119_185412_118-b.sph,ara-arq,0.9984931074535468
+20110119_191933_119-a.sph,ara-arq,0.9998427532239414
+20110119_191933_119-b.sph,ara-arq,0.9999158034849497
+20110120_063303_126-a.sph,ara-aeb,0.9983408495530686
+20110120_065333_127-a.sph,ara-arq,0.9992096176188296
+20110120_065333_127-b.sph,ara-arq,0.9999999917747343
+20110120_103241_131-a.sph,ara-arq,0.9993182345500153
+20110121_133744_171-a.sph,ara-arq,0.9999999215696308
+20110121_135108_172-a.sph,ara-arq,0.9997247255224011
+20110121_135108_172-b.sph,ara-arq,0.9948115916234687
+20110121_150759_174-a.sph,ara-arq,0.9999584736982609
+20110121_205639_189-a.sph,ara-arq,0.9999897466147079
+20110121_205639_189-b.sph,ara-arq,0.9999990343864227
+20110122_102217_196-a.sph,ara-arq,0.980904081808585
+20110122_182307_237-b.sph,ara-arq,0.9999628382800599
+20110122_213252_254-a.sph,ara-aeb,0.9977468847806278
+20110123_065916_259-a.sph,ara-arq,0.9998361236710239
+20110123_065916_259-b.sph,ara-arq,0.9999198626609019
+20110123_082139_260-a.sph,ara-arq,0.9998122368739342
+20110123_091452_261-a.sph,ara-arq,0.9937530884216169
+20110124_150410_307-b.sph,ara-arq,0.9998523760407785
+20110124_160331_310-a.sph,ara-arq,0.9999119580552518
+20110124_160331_310-b.sph,ara-arq,0.9996355765737956
+20110126_231521_427-a.sph,ara-arq,0.9999990527242926
+20110126_231521_427-b.sph,ara-arq,0.999082522282352
+20110126_233137_428-a.sph,ara-arq,0.9999980559906911
+20110126_233137_428-b.sph,ara-arq,0.999549593523283
+20110127_113123_434-a.sph,ara-arq,0.9998900000422434
+20110127_133351_443-a.sph,ara-arq,0.9999924768392251
+20110127_133351_443-b.sph,ara-arq,0.9984673062603949
+20110127_200135_452-a.sph,ara-arq,0.9995916079616751
+20110127_200135_452-b.sph,ara-arq,0.9928940070693326
+20110127_201455_453-a.sph,ara-arq,0.9999998409583539
+20110127_201455_453-b.sph,ara-arq,0.9917325527348603
+20110127_211633_454-a.sph,ara-arq,0.9999252600002078
+20110128_182748_472-a.sph,ara-arq,0.9999937657077407
+20110128_182748_472-b.sph,ara-arq,0.996288694787586
+20110128_185835_473-b.sph,ara-arq,0.9993941153143588
+20110128_193520_475-a.sph,ara-arq,0.9999530106214491
+20110128_193520_475-b.sph,ara-arq,0.9999527486872603
+20110128_200815_476-a.sph,ara-arq,0.9999873059955551
+20110128_200815_476-b.sph,ara-arq,0.9964487496070723
+20110128_203824_477-b.sph,ara-arq,0.9997165845051637
+20110128_222333_480-a.sph,ara-arq,0.9999970783462304
+20110130_080611_510-a.sph,ara-arq,0.9999749215499425
+20110130_080611_510-b.sph,ara-aeb,0.9999966593413755
+20110130_085820_512-a.sph,ara-arq,0.9999636011539246
+20110130_085820_512-b.sph,ara-arq,0.9813498028090423
+20110130_092246_513-b.sph,ara-arq,0.9999059749555838
+20110130_100253_514-a.sph,ara-arq,0.9921278081859116
+20110130_100253_514-b.sph,ara-arq,0.9999516899828312
+20110130_155522_528-a.sph,ara-arq,0.9999069680862643
+20110130_155522_528-b.sph,ara-arq,0.9997900343245884
+20110130_161649_529-a.sph,ara-arq,0.9998613523919572
+20110130_161649_529-b.sph,ara-arq,0.9977029871347945
+20110130_164452_531-a.sph,ara-arq,0.9999976354441193
+20110130_164452_531-b.sph,ara-arq,0.9965312960271767
+20110130_184540_532-a.sph,ara-ayl,0.999999999998713
+20110130_184540_532-b.sph,ara-arq,0.9849336598416535
+20110201_140835_576-a.sph,ara-arq,0.9975300712529358
+20110201_140835_576-b.sph,ara-arq,0.9999993561949782
+20110201_163316_581-a.sph,ara-arq,0.9999983446810359
+20110201_163316_581-b.sph,ara-aeb,0.9924557962400737
+20110203_191239_616-b.sph,ara-arq,0.9998885155856538
+20110204_153604_625-a.sph,ara-arq,0.9999972105890849
+20110204_153604_625-b.sph,ara-arq,0.9999410840414572
+20110204_163201_626-a.sph,ara-arq,0.9976110910766881
+20110204_163201_626-b.sph,ara-arq,0.9999876934539865
+20110204_164625_627-b.sph,ara-arq,0.9999477802299377
+20110204_171649_628-a.sph,ara-arq,0.999905415195764
+20110204_171649_628-b.sph,ara-arq,0.9999920273859438
+20110204_174823_629-a.sph,ara-arq,0.9999950593350131
+20110204_183311_631-a.sph,ara-arq,0.9999953899932322
+20110204_183311_631-b.sph,ara-arq,0.9999635149153278
+20110204_190013_632-a.sph,ara-arq,0.9999387012014681
+20110204_190013_632-b.sph,ara-arq,0.9999962749405605
+20110204_190208_633-a.sph,ara-arq,0.9999999768635758
+20110204_190208_633-b.sph,ara-arq,0.9977926594648339
+20110204_200618_634-a.sph,ara-arq,0.9936747368214611
+20110204_200618_634-b.sph,ara-arq,0.9999479114428771
+20110204_203655_635-b.sph,ara-arq,0.9996727006128494
+20110204_205300_638-a.sph,ara-arq,0.9992486308165472
+20110205_153631_666-b.sph,ara-arq,0.9996547635561664
+20110205_172120_671-a.sph,ara-arq,0.9999915914855924
+20110206_095118_685-a.sph,ara-arq,0.9999999541857382
+20110206_105102_688-a.sph,ara-arq,0.9999997707236105
+20110206_105102_688-b.sph,ara-arq,0.9993810260864877
+20110206_105820_689-a.sph,ara-arq,0.9987592088484722
+20110206_113326_691-a.sph,ara-arq,0.9998279296292647
+20110206_120354_693-b.sph,ara-arq,0.9980948428595703
+20110206_122113_696-a.sph,ara-arq,0.9995909791238744
+20110206_122113_696-b.sph,ara-arq,0.9999192768200029
+20110206_132644_702-a.sph,ara-arq,0.9999998034776016
+20110206_152016_714-a.sph,ara-arq,0.9999867036857935
+20110206_155159_717-a.sph,ara-arq,0.9998418634479207
+20110206_155159_717-b.sph,ara-arq,0.9888916327994056
+20110206_165119_720-a.sph,ara-arq,0.9999902498906699
+20110206_165119_720-b.sph,ara-arq,0.9920166455204702
+20110206_172320_721-a.sph,ara-arq,0.9997423969682552
+20110206_172320_721-b.sph,ara-arq,0.9804934273026574
+20110206_192709_726-a.sph,ara-arq,0.9840175835722847
+20110206_192709_726-b.sph,ara-arq,0.9977287985909209
+20110206_194621_727-a.sph,ara-arq,0.9831344460618126
+20110208_175519_758-b.sph,ara-arq,0.9999988769528441
+20110209_181948_779-a.sph,ara-arq,0.9936522404471088
+20110209_181948_779-b.sph,ara-arq,0.9997791918710421
+20110209_183724_782-a.sph,ara-arq,0.9998784380878949
+20110209_183724_782-b.sph,ara-arq,0.9999246025038221
+20110210_183402_800-a.sph,ara-arq,0.9984594573811986
+20110210_183402_800-b.sph,ara-arq,0.9980188028033471
+20110210_185230_803-a.sph,ara-arq,0.9999982013809502
+20110210_185230_803-b.sph,ara-arq,0.9995995024241807
+20110211_120852_808-a.sph,ara-arq,0.9932259220823395
+20110211_152026_820-a.sph,ara-arq,0.9999980021493747
+20110211_153702_822-a.sph,ara-arq,0.9969999901106751
+20110211_153702_822-b.sph,ara-arq,0.9997316547372318
+20110211_155607_823-a.sph,ara-arq,0.9999999999999418
+20110211_155607_823-b.sph,ara-arq,0.9998718954393995
+20110212_181444_870-a.sph,ara-arq,0.9989480808880217
+20110212_181444_870-b.sph,ara-arq,0.9999872566336785
+20110212_183328_871-a.sph,ara-arq,0.9903448729423271
+20110212_183328_871-b.sph,ara-arq,0.9810930960909464
+20110212_185203_872-a.sph,ara-arq,0.981273333555111
+20110212_191246_873-b.sph,ara-arq,0.9995332526132436
+20110213_114400_879-a.sph,ara-arq,0.9960907157237656
+20110213_114400_879-b.sph,ara-arq,0.9857366670372355
+20110213_120949_881-a.sph,ara-arq,0.9844207993983157
+20110213_120949_881-b.sph,ara-arq,0.9903953303779026
+20110213_122816_882-a.sph,ara-arq,0.9891871489623377
+20110213_122816_882-b.sph,ara-arq,0.9999935070606318
+20110213_131054_885-b.sph,ara-arq,0.9999028302287125
+20110213_133818_888-b.sph,ara-arq,0.9970198413986752
+20110213_142146_896-a.sph,ara-arq,0.9997721189705135
+20110213_142146_896-b.sph,ara-arq,0.998800376513235
+20110213_144952_900-a.sph,ara-arq,0.9968885288172212
+20110213_164838_913-b.sph,ara-arq,0.9999808228052904
+20110213_181716_914-a.sph,ara-arq,0.9996267031488214
+20110213_181716_914-b.sph,ara-arq,0.9978692419798673
+20110214_210504_930-a.sph,ara-arq,0.9999999993069508
+20110214_212408_932-b.sph,ara-arq,0.9994783176628016
+20110217_133012_975-b.sph,ara-arq,0.9999978652333108
+20110217_134937_976-b.sph,ara-arq,0.9981620062883075
+20110217_135627_977-a.sph,ara-arq,0.9923823762680786
+20110217_135627_977-b.sph,ara-ayl,0.99994304153697
+20110217_140828_980-b.sph,ara-arq,0.9998865552711499
+20110217_142557_982-a.sph,ara-arq,0.9927005912288147
+20110217_145020_983-a.sph,ara-arq,0.9992976883509107
+20110217_145020_983-b.sph,ara-arq,0.9994242961468159
+20110217_171932_987-a.sph,ara-arq,0.9998300688490686
+20110217_171932_987-b.sph,ara-arq,0.9999938868544527
+20110217_173619_988-a.sph,ara-arq,0.9998859524996734
+20110217_173619_988-b.sph,ara-arq,0.9993538778019107
+20110218_143916_1008-a.sph,ara-arq,0.9995276489555424
+20110218_152219_1012-a.sph,ara-arq,0.9989564931737804
+20110218_152219_1012-b.sph,ara-arq,0.983424400422087
+20110218_154208_1013-a.sph,ara-arq,0.9996610622089865
+20110218_171114_1015-a.sph,ara-arq,0.9983391451448304
+20110218_171114_1015-b.sph,ara-aeb,0.9839049923252243
+20110219_130356_1026-a.sph,ara-arq,0.9800875696973508
+20110219_130356_1026-b.sph,ara-arq,0.9981097426100024
+20110220_153604_1050-b.sph,ara-arq,0.9992922307310227
+20110226_102551_1168-a.sph,ara-arq,0.9956164145717841
+20110226_104245_1169-a.sph,ara-arq,0.999615990129898
+20110226_104245_1169-b.sph,ara-arq,0.9999432762407108
+20110226_105951_1171-b.sph,ara-arq,0.9963320797245594
+20110227_115638_1179-a.sph,ara-arq,0.9979731787181892
+20110227_115638_1179-b.sph,ara-arq,0.9999984412629119
+20110227_123734_1181-a.sph,ara-arq,0.9996205551160018
+20110227_125439_1182-a.sph,ara-arq,0.9996946469526378
+20110227_125439_1182-b.sph,ara-arq,0.9995090769571758
+20110227_131635_1183-a.sph,ara-arq,0.9981612635005762
+20110227_131635_1183-b.sph,ara-arq,0.9999225601135852
+20110227_134655_1184-a.sph,ara-arq,0.9993111982898277
+20110227_140420_1185-b.sph,ara-arq,0.9988598965943818
+20110227_142125_1186-a.sph,ara-arq,0.9948006601325144
+20110227_142125_1186-b.sph,ara-arq,0.9988307215422513
+20110227_154132_1189-a.sph,ara-arq,0.9938418813114719
+20110227_154132_1189-b.sph,ara-arq,0.9999920687652308
+20110227_155909_1191-b.sph,ara-arq,0.998912461185742
+20110227_162241_1192-a.sph,ara-arq,0.9984703552540448
+20110227_162241_1192-b.sph,ara-arq,0.9999895197829509
+20110227_163935_1195-b.sph,ara-arq,0.9949971842748578
+20110228_174826_1217-a.sph,ara-arq,0.9999886137143743
+20110228_174826_1217-b.sph,ara-arq,0.9993742634620741
+20110301_154921_1230-a.sph,ara-arq,0.9997093954036395
+20110301_154921_1230-b.sph,ara-arq,0.9986268296407617
+20110305_101932_1323-a.sph,ara-arq,0.9961912502914853
+20110305_101932_1323-b.sph,ara-arq,0.9992166940246625
+20110305_103655_1326-a.sph,ara-arq,0.9964829278234681
+20110305_175842_1332-a.sph,ara-arq,0.9971054262198007
+20110305_175842_1332-b.sph,ara-arq,0.9999802993572223
+20110305_181929_1333-b.sph,ara-arq,0.9999661470430923
+20110306_111437_1342-b.sph,ara-arq,0.9997018164726993
+20110306_113229_1344-b.sph,ara-arq,0.9999988318789892
+20110306_115706_1347-b.sph,ara-arq,0.9978191157389156
+20110306_121619_1348-a.sph,ara-arq,0.9827291278544082
+20110306_123404_1350-a.sph,ara-arq,0.9988325513754784
+20110306_155835_1353-b.sph,ara-arq,0.9841830936168241
+20110308_141939_1391-a.sph,ara-arq,0.9927596828605247
+20110309_090633_1407-a.sph,ara-arq,0.9974682774936627
+20110309_090633_1407-b.sph,ara-arq,0.9999472047179431
+20110309_092426_1408-a.sph,ara-arq,0.9891944796228922
+20110309_190600_1415-b.sph,ara-arq,0.9992401337610775
+20110312_100116_1442-b.sph,ara-aeb,0.9999964907283014
+lre11ablk.sph,ara-arq,0.9999907113220694
+lre11aedq.sph,ara-arq,0.9998876721921264
+lre11afar.sph,ara-arq,0.9998482741863788
+lre11aglc.sph,ara-arq,0.9999898456118099
+lre11ahqo.sph,ara-arq,0.9918603244627976
+lre11alas.sph,ara-arq,0.9908901959831016
+lre11alwj.sph,ara-arq,0.9894410455076774
+lre11amzo.sph,ara-arq,0.9786892682080185
+lre11anjz.sph,ara-arq,0.9999981759198692
+lre11aojl.sph,ara-arq,0.9997424488929775
+lre11apsf.sph,ara-arq,0.9849324254750552
+lre11avmm.sph,ara-arq,0.9999504288541092
+lre11axmy.sph,ara-arq,0.9999523003483873
+lre11azex.sph,ara-arq,0.9995398052261274
+lre11bbvj.sph,ara-arq,0.9999335460724018
+lre11bcek.sph,ara-arq,0.9999851926128998
+lre11bcpv.sph,ara-arq,0.984682382200886
+lre11biws.sph,ara-arq,0.9998120788157547
+lre11bnkp.sph,ara-arq,0.9995987125056803
+lre11bnsh.sph,ara-arq,0.9996962089592643
+lre11bnsx.sph,ara-arq,0.9977111633190938
+lre11bpyg.sph,ara-arq,0.9959055192068114
+lre11bpzi.sph,ara-arq,0.9905831011969168
+lre11bqon.sph,ara-arq,0.9969555758000546
+lre11brct.sph,ara-arq,0.9975246452439199
+lre11bsry.sph,ara-arq,0.9894503783769195
+lre11byco.sph,ara-arq,0.9999976971030877
+lre11bzjc.sph,ara-arq,0.999543540290902
+lre11bzlo.sph,ara-arq,0.9999970319421916
+lre11cejv.sph,ara-arq,0.9982811975618141
+lre11cesz.sph,ara-arq,0.998990976426668
+lre11cfgz.sph,ara-arq,0.9935160910053205
+lre11cfwm.sph,ara-arq,0.9999964881312391
+lre11cgay.sph,ara-arq,0.9996390794313366
+lre11cgxl.sph,ara-arq,0.9888137475556131
+lre11cian.sph,ara-arq,0.9999456258260779
+lre11cjxu.sph,ara-arq,0.9999091559138382
+lre11cmat.sph,ara-arq,0.999993807597705
+lre11cmnm.sph,ara-arq,0.9968753878911075
+lre11cpyg.sph,ara-arq,0.99778707361397
+lre11cysx.sph,ara-arq,0.9999531509783556
+lre11czoc.sph,ara-arq,0.9999723971738445
+lre11czzz.sph,ara-arq,0.9967642546011689
+lre11dcxm.sph,ara-arq,0.9988254822426347
+lre11dkdu.sph,ara-arq,0.9995208011254988
+lre11dmgu.sph,ara-arq,0.9999964088803398
+lre11dnsn.sph,ara-arq,0.9999932007597087
+lre11dtba.sph,ara-arq,0.9999993605172935
+lre11dtee.sph,ara-arq,0.9998725513672501
+lre11dtma.sph,ara-arq,0.977093779334459
+lre11dwvy.sph,ara-arq,0.9912064733461909
+lre11dzmv.sph,ara-arq,0.9893557384712441
+lre11edst.sph,ara-arq,0.9996483218194355
+lre11efjk.sph,ara-arq,0.9997175005082578
+lre11eiyw.sph,ara-arq,0.9876725818861913
+lre11ekip.sph,ara-arq,0.9996941527919115
+lre11eohx.sph,ara-arq,0.9999635932561415
+lre11erez.sph,ara-arq,0.9965516335686703
+lre11erxq.sph,ara-arq,0.9996087806099485
+lre11erxr.sph,ara-arq,0.9775705269985646
+lre11eufb.sph,ara-arq,0.9843849296538485
+lre11fagv.sph,ara-arq,0.9998946883634996
+lre11fbda.sph,ara-arq,0.9978295206364557
+lre11fcjj.sph,ara-ayl,0.993383627457564
+lre11fkvi.sph,ara-arq,0.9995855863185078
+lre11fodl.sph,ara-aeb,0.9926903655566781
+lre11fqsk.sph,ara-arq,0.9999977131862029
+lre11ftfz.sph,ara-arq,0.9992187521546018
+lre11fvvi.sph,ara-arq,0.9998858777480336
+lre11fwev.sph,ara-arq,0.9895982019894587
+lre11fwgy.sph,ara-arq,0.9999585016943806
+lre11fxxe.sph,ara-arq,0.9999324150603885
+lre11fyfu.sph,ara-arq,0.9998347600563473
+lre11fyul.sph,ara-arq,0.9936234560510464
+lre11fzut.sph,ara-arq,0.9842511617654447
+lre11gdzy.sph,ara-arq,0.999889190713416
+lre11gezd.sph,ara-arq,0.9998699060210541
+lre11gfzz.sph,ara-arq,0.9999891185814981
+lre11ggoj.sph,ara-arq,0.9939957237418172
+lre11ggpo.sph,ara-arq,0.9856255396828421
+lre11goba.sph,ara-arq,0.990344531513245
+lre11gobo.sph,ara-arq,0.9999875942826566
+lre11grhs.sph,ara-arq,0.9997556743150159
+lre11grvu.sph,ara-arq,0.9999968541025068
+lre11gugo.sph,ara-arq,0.9965937007797033
+lre11hfea.sph,ara-arq,0.9998608609896967
+lre11hhql.sph,ara-arq,0.9998338106542838
+lre11hnhd.sph,ara-arq,0.9772497743518446
+lre11honr.sph,ara-arq,0.9998932196048694
+lre11hqam.sph,ara-arq,0.9981273699564014
+lre11hqxf.sph,ara-arq,0.9994436545087109
+lre11hxhj.sph,ara-arq,0.9999969785031192
+lre11iape.sph,ara-arq,0.9805984363079878
+lre11ibqb.sph,ara-arq,0.9818806715807676
+lre11ijgj.sph,ara-arq,0.9990861345342709
+lre11ilih.sph,ara-arq,0.9999502485361578
+lre11imki.sph,ara-ayl,0.9999999999342657
+lre11iqwq.sph,ara-arq,0.9989577296816973
+lre11irup.sph,ara-arq,0.9987522767488314
+lre11itok.sph,ara-arq,0.9999898498294778
+lre11ivvj.sph,ara-arq,0.9989286012303883
+lre11ixke.sph,ara-arq,0.9999500945534188
+lre11jepu.sph,ara-arq,0.9972127532313085
+lre11jfrt.sph,ara-arq,0.999937410524841
+lre11jgdm.sph,ara-arq,0.9999313841413808
+lre11jgex.sph,ara-arq,0.9996981300593533
+lre11jjzk.sph,ara-arq,0.9969824384056464
+lre11jkcg.sph,ara-arq,0.9999901763442688
+lre11jlbb.sph,ara-ayl,0.9999797418290254
+lre11jmkp.sph,ara-aeb,0.9829506107982495
+lre11jnbo.sph,ara-arq,0.9955088149420692
+lre11joqm.sph,ara-arq,0.9979145377503337
+lre11jpnp.sph,ara-arq,0.9820469567638314
+lre11jqay.sph,ara-arq,0.9999059550640546
+lre11jsem.sph,ara-arq,0.9999659015380002
+lre11jtsu.sph,ara-arq,0.9999985874275487
+lre11jxjq.sph,ara-arq,0.9996252775252656
+lre11kcxw.sph,ara-arq,0.9999980198875125
+lre11kghl.sph,ara-arq,0.9999969246150942
+lre11khai.sph,ara-arq,0.9999850849749775
+lre11khpn.sph,ara-arq,0.9996319733879265
+lre11kizl.sph,ara-arq,0.9946944777387341
+lre11kjhr.sph,ara-arq,0.9999999478107263
+lre11kkvl.sph,ara-arq,0.9999331725841073
+lre11kmrd.sph,ara-arq,0.998407471871956
+lre11kvlp.sph,ara-arq,0.9881647482815468
+lre11kweb.sph,ara-arq,0.9959584099323461
+lre11laah.sph,ara-arq,0.9999994459184748
+lre11laym.sph,ara-arq,0.9996946633832319
+lre11lcve.sph,ara-ayl,0.9754071685038981
+lre11lgwf.sph,ara-arq,0.999998185212916
+lre11lkfn.sph,ara-arq,0.9871985523328064
+lre11lljy.sph,ara-arq,0.9999951992577
+lre11ltbl.sph,ara-arq,0.9925424060658127
+lre11lvrc.sph,ara-arq,0.999994692352155
+lre11lxeu.sph,ara-arq,0.9998494543892836
+lre11mciy.sph,ara-arq,0.9991649802638065
+lre11mcpb.sph,ara-arq,0.9998812879498469
+lre11mdaq.sph,ara-arq,0.9999993536606017
+lre11mdlw.sph,ara-arq,0.9999933631714529
+lre11megz.sph,ara-arq,0.999991578971874
+lre11mgcj.sph,ara-arq,0.9997465534552311
+lre11mgda.sph,ara-arq,0.9998675569243112
+lre11mhts.sph,ara-arq,0.9993162979080465
+lre11mimu.sph,ara-arq,0.9853693267657061
+lre11mmil.sph,ara-arq,0.9998013743994384
+lre11moic.sph,ara-arq,0.9999975487340111
+lre11mpyg.sph,ara-arq,0.9999918690235887
+lre11mrgx.sph,ara-arq,0.9916396537114318
+lre11muco.sph,ara-arq,0.9990051655062514
+lre11myev.sph,ara-arq,0.9999999709774632
+lre11mzsf.sph,ara-arq,0.9814633660769434
+lre11nfrs.sph,ara-arq,0.9993363405655639
+lre11nhol.sph,ara-arq,0.9997583313854068
+lre11nhpm.sph,ara-arq,0.9999766347438241
+lre11nisq.sph,ara-arq,0.9999493082338785
+lre11njwd.sph,ara-arq,0.9999968670068229
+lre11nlof.sph,ara-arq,0.997025558783378
+lre11nsiw.sph,ara-arq,0.9759841774215573
+lre11ntcf.sph,ara-arq,0.9777836287483279
+lre11nted.sph,ara-arq,0.9998141941637515
+lre11nvno.sph,ara-arq,0.9999964888490706
+lre11oavt.sph,ara-arq,0.9965200220599933
+lre11ocsv.sph,ara-arq,0.9999827956617842
+lre11oege.sph,ara-arq,0.9950563664377374
+lre11ofei.sph,ara-arq,0.9998876820742316
+lre11ohag.sph,ara-arq,0.9985768786597264
+lre11oije.sph,ara-arq,0.992072008446929
+lre11ojgd.sph,ara-arq,0.9940659470468965
+lre11ojvf.sph,ara-arq,0.9993122001400614
+lre11okxt.sph,ara-arq,0.9994222299446635
+lre11omni.sph,ara-arq,0.9998835501527243
+lre11onrg.sph,ara-arq,0.9916365803164747
+lre11ontl.sph,ara-arq,0.9995751330000039
+lre11opue.sph,ara-arq,0.9997738097677723
+lre11oqro.sph,ara-arq,0.999967793829243
+lre11otxd.sph,ara-arq,0.9999418179160698
+lre11ouii.sph,ara-arq,0.9998576517513276
+lre11ovwf.sph,ara-arq,0.9889696901091004
+lre11oydk.sph,ara-arq,0.9819224710181479
+lre11ozdn.sph,ara-arq,0.9998106016178588
+lre11pagq.sph,ara-arq,0.9801218480437873
+lre11paur.sph,ara-arq,0.999987431916008
+lre11pfti.sph,ara-arq,0.9863893498230394
+lre11pfzy.sph,ara-arq,0.9999851535829934
+lre11pprb.sph,ara-arq,0.9907367260657962
+lre11pqno.sph,ara-arq,0.9999947908245772
+lre11pvoj.sph,ara-arq,0.9999986601480322
+lre11pysj.sph,ara-arq,0.9818927362425611
+lre11pzsc.sph,ara-arq,0.9777996051185309
+lre11qaml.sph,ara-arq,0.9901897881820463
+lre11qcse.sph,ara-arq,0.9786650402081483
+lre11qhrk.sph,ara-arq,0.99996447871608
+lre11qilb.sph,ara-arq,0.9999944023407891
+lre11qjbu.sph,ara-arq,0.999998909993637
+lre11qoxa.sph,ara-arq,0.9992628389476516
+lre11qpqk.sph,ara-arq,0.9988889605651897
+lre11qrlt.sph,ara-arq,0.9993564789200029
+lre11qtkd.sph,ara-arq,0.9964050771568211
+lre11qupc.sph,ara-arq,0.9758848861520171
+lre11qwil.sph,ara-arq,0.9999982738573114
+lre11qwqs.sph,ara-arq,0.9998351735862747
+lre11rafm.sph,ara-arq,0.9999610086553676
+lre11rdod.sph,ara-arq,0.9883226566986822
+lre11rdqv.sph,ara-ayl,0.985738149564001
+lre11relv.sph,ara-arq,0.999999468502387
+lre11rjui.sph,ara-arq,0.9999924251546126
+lre11rkhs.sph,ara-arq,0.9956440478348277
+lre11rldx.sph,ara-arq,0.9989857178552667
+lre11rwqr.sph,ara-arq,0.9999995661106464
+lre11sgia.sph,ara-arq,0.9792334621995509
+lre11skba.sph,ara-arq,0.9971597662211976
+lre11smpy.sph,ara-arq,0.9999299231162642
+lre11snqm.sph,ara-arq,0.9998770993281684
+lre11snzs.sph,ara-arq,0.9998957560021872
+lre11svhq.sph,ara-arq,0.9926690106361062
+lre11sxdk.sph,ara-arq,0.9999894072598812
+lre11szjx.sph,ara-arq,0.9997496078258093
+lre11tisp.sph,ara-aeb,0.9935967845696344
+lre11tkeq.sph,ara-arq,0.999992668852024
+lre11tkgv.sph,ara-arq,0.9999634239721431
+lre11tlbn.sph,ara-arq,0.9916960032980205
+lre11tlgc.sph,ara-arq,0.9921670149117343
+lre11tnbu.sph,ara-arq,0.9999917662026707
+lre11tqjp.sph,ara-arq,0.9999997186207273
+lre11trmj.sph,ara-aeb,0.9985641023002019
+lre11txsn.sph,ara-arq,0.9895624098081941
+lre11ubjy.sph,ara-arq,0.9991221016840601
+lre11ubmu.sph,ara-arq,0.9999965435512681
+lre11uhux.sph,ara-arq,0.997435675415044
+lre11ujqi.sph,ara-arq,0.9939996473996353
+lre11ullo.sph,ara-arq,0.9999999998276814
+lre11umdt.sph,ara-arq,0.9962758965298869
+lre11unmt.sph,ara-arq,0.9985618690998576
+lre11uqzm.sph,ara-arq,0.9985040805093104
+lre11urlw.sph,ara-arq,0.9998924522602656
+lre11usmv.sph,ara-arq,0.9997207805439943
+lre11uvte.sph,ara-arq,0.9983265140452946
+lre11uwxi.sph,ara-arq,0.9993165982905879
+lre11vcwy.sph,ara-arq,0.9969565461227344
+lre11veuu.sph,ara-arq,0.9999796896377858
+lre11vezt.sph,ara-arq,0.9767680653788202
+lre11vfna.sph,ara-aeb,0.9964120446674009
+lre11vhhz.sph,ara-arq,0.998103902690531
+lre11vhvh.sph,ara-arq,0.9999927635072146
+lre11vjcl.sph,ara-arq,0.9999819610209169
+lre11vkma.sph,ara-arq,0.9945794427407135
+lre11vncd.sph,ara-arq,0.9907523248594148
+lre11vrrg.sph,ara-arq,0.99460105641934
+lre11vsry.sph,ara-arq,0.9951631752607728
+lre11vssm.sph,ara-arq,0.9804358152668605
+lre11vukq.sph,ara-arq,0.998095638681888
+lre11vwzy.sph,ara-arq,0.9999994453927953
+lre11vxev.sph,ara-arq,0.9986174583419248
+lre11vyma.sph,ara-arq,0.9935618499200927
+lre11vzdv.sph,ara-arq,0.9940242404482954
+lre11wjmo.sph,ara-arq,0.9946379138594132
+lre11wlmf.sph,ara-arq,0.9985332278711876
+lre11wogz.sph,ara-arq,0.9999996831958213
+lre11wpeu.sph,ara-arq,0.987053003738009
+lre11xesf.sph,ara-arq,0.9926552158707163
+lre11xlhq.sph,ara-arq,0.9861699702078971
+lre11xmop.sph,ara-arq,0.9998106693232437
+lre11xncb.sph,ara-arq,0.9997518363731595
+lre11xsib.sph,ara-arq,0.9999999575778398
+lre11yfkq.sph,ara-arq,0.9978416409757853
+lre11yfuh.sph,ara-arq,0.9852732209813364
+lre11yjtr.sph,ara-arq,0.9999875286020034
+lre11ykqy.sph,ara-arq,0.999994599228917
+lre11ynky.sph,ara-arq,0.9870374231633215
+lre11ynub.sph,ara-arq,0.9975464312675795
+lre11ynut.sph,ara-arq,0.994865816956002
+lre11yohv.sph,ara-arq,0.9999860886998846
+lre11ypuu.sph,ara-arq,0.999939937136318
+lre11yqmg.sph,ara-arq,0.9988753925216532
+lre11yskr.sph,ara-arq,0.9990482482843873
+lre11yysp.sph,ara-arq,0.9909036055745113
+lre11zaix.sph,ara-arq,0.9993370825297897
+lre11zcny.sph,ara-arq,0.9863060880274914
+lre11zgmi.sph,ara-arq,0.9852780607073358
+lre11znqr.sph,ara-arq,0.999970678330779
+lre11zosk.sph,ara-aeb,0.9992041227652806
+lre11zsfl.sph,ara-arq,0.9999316377930149
+lre11zvte.sph,ara-arq,0.9994693690287269
+lre11zwzv.sph,ara-arq,0.998921262378038
+lre11zxvd.sph,ara-arq,0.9970232759937795
+lre11zzww.sph,ara-arq,0.9968858579018414
+lre17_abtnjqwo.sph,ara-arq,0.999946799291851
+lre17_acckmchx.sph,ara-aeb,0.9986711227355097
+lre17_acoxtkfz.sph,ara-aeb,0.9842666785196265
+lre17_adharrss.flac,ara-arq,0.9947182189896948
+lre17_adharrss.flac-gsm,ara-arq,0.9956384390235652
+lre17_aduhvtel.sph,ara-arq,0.9999889349109844
+lre17_aekqbfnc.sph,ara-arq,0.9984883022389972
+lre17_afpeboji.sph,ara-arq,0.9999593134555593
+lre17_agovhiqf.sph,ara-arq,0.9980746218401729
+lre17_aiesnqgx.sph,ara-arq,0.9999860691612521
+lre17_aipdwmxb.sph,ara-aeb,0.9989779255268202
+lre17_anlfomhj.flac,ara-ayl,0.9994229196677115
+lre17_anocqhav.sph,ara-arq,0.9997793277424729
+lre17_ansqkmxg.sph,ara-arq,0.9990411219484128
+lre17_aokadzsc.sph,ara-arq,0.9999717598295709
+lre17_aqfwvqpg.sph,ara-arq,0.9999999815159134
+lre17_aquebikd.sph,ara-arq,0.9999983875736405
+lre17_arzwedtw.sph,ara-arq,0.9945282050060794
+lre17_asbkyxts.sph,ara-arq,0.9902930286287294
+lre17_astrrnby.flac,ara-arq,0.9755268847782662
+lre17_astrrnby.flac-g711a,ara-arq,0.9854914061367727
+lre17_avfgalrs.flac-gsm,ara-arq,0.9999972882283532
+lre17_avrkkwph.sph,ara-arq,0.9988768871380727
+lre17_axiutoza.sph,ara-arq,0.9997779954741397
+lre17_ayizvbkc.sph,ara-arq,0.9999840812557923
+lre17_azjbrozk.sph,ara-arq,0.9953169139803245
+lre17_bdpgpxku.flac-g711a,ara-ayl,0.9999977914337187
+lre17_beeeutoh.flac,ara-arq,0.9942343106320238
+lre17_beeeutoh.flac-g723_1,ara-arq,0.9974204443646679
+lre17_bfaxqjqb.sph,ara-arq,0.999936007945235
+lre17_bfdjopui.sph,ara-aeb,0.9781197289083158
+lre17_bjfsfjit.flac,ara-arq,0.9879397124065223
+lre17_bjkkkuno.sph,ara-arq,0.999487026079353
+lre17_bjoiupem.sph,ara-arq,0.9880927563645315
+lre17_bjzozier.sph,ara-aeb,0.9997062951168889
+lre17_bkcpyhve.sph,ara-arq,0.9993564259542776
+lre17_bkjcaggk.flac,ara-arq,0.9994997776308957
+lre17_bkjcaggk.flac-g711mu,ara-arq,0.9993395177228688
+lre17_bktxvmar.sph,ara-arq,0.9999917820037525
+lre17_blazxkfa.sph,ara-arq,0.9999639560062823
+lre17_blbxhpiv.sph,ara-arq,0.9949809784805863
+lre17_blljbkpf.sph,ara-arq,0.9934970373407573
+lre17_bmujmfhj.sph,ara-arq,0.9775708820251491
+lre17_boryyjhf.sph,ara-arq,0.9977484576765338
+lre17_bowjoyjr.sph,ara-arq,0.9816765571144581
+lre17_bqxjnfxx.sph,ara-aeb,0.9929235091866946
+lre17_bqyznxui.flac-g711a,ara-arq,0.9999829540600199
+lre17_brpyutxm.sph,ara-aeb,0.9832823165542701
+lre17_bswfxzyr.sph,ara-arq,0.9999980843040208
+lre17_btafdkdg.flac,ara-arq,0.9992005312956213
+lre17_btafdkdg.flac-gsm,ara-arq,0.9992352599146054
+lre17_buwtqeqb.flac,ara-arq,0.987792729025184
+lre17_buwtqeqb.flac-g726,ara-arq,0.999145758855682
+lre17_bvqgsidl.sph,ara-aeb,0.9862447101786331
+lre17_bwxfqusr.sph,ara-aeb,0.9936207466860856
+lre17_bxwvpnfw.flac-opus,ara-arq,0.9999995104818462
+lre17_bymvcgmj.sph,ara-aeb,0.9997234164043465
+lre17_byzcayjn.flac,ara-arq,0.998985645903786
+lre17_byzcayjn.flac-opus,ara-arq,0.9999450148660525
+lre17_bzmjxehu.flac-opus,ara-arq,0.9999664531787248
+lre17_cairjuvk.sph,ara-arq,0.9987867029788313
+lre17_campowcv.sph,ara-arq,0.999787205693806
+lre17_ccazlpob.flac-g723_1,ara-arq,0.9974334229828895
+lre17_cccspkdm.flac,ara-arq,0.9880424513373823
+lre17_cccspkdm.flac-g726,ara-arq,0.9954627517627038
+lre17_ccjrvsph.sph,ara-arq,0.9999054835372087
+lre17_ccoewvvh.sph,ara-arq,0.9893765175449871
+lre17_ccypdbbu.sph,ara-arq,0.9999252890248448
+lre17_cdavzdsz.flac-g722,ara-arq,0.9997403404294325
+lre17_cflgybxg.sph,ara-arq,0.9940658569786607
+lre17_cfnizhql.sph,ara-arq,0.9961060724590401
+lre17_cfwfsjev.sph,ara-arq,0.9999998965888017
+lre17_cfznpgjd.sph,ara-arq,0.9971102748785
+lre17_cgmytvfk.sph,ara-aeb,0.9997875043473322
+lre17_cipisqbs.flac-g723_1,ara-arq,0.999066551053694
+lre17_cluexwgz.sph,ara-arq,0.9998680970232108
+lre17_cnfohesd.flac,ara-arq,0.9999887142160848
+lre17_cnfohesd.flac-g723_1,ara-arq,0.9995963322893509
+lre17_cphdyjdq.sph,ara-arq,0.9906215457248232
+lre17_csenuaki.sph,ara-arq,0.9999827796312072
+lre17_ctilbvnd.sph,ara-arq,0.9941490810957968
+lre17_ctrojttf.sph,ara-arq,0.9997415475987689
+lre17_ctudkyri.sph,ara-arq,0.999816925839242
+lre17_cupizrsx.sph,ara-arq,0.9977854992043466
+lre17_cureptst.flac,ara-arq,0.9966603829855006
+lre17_cureptst.flac-g726,ara-arq,0.9981724402252548
+lre17_cuvtxdbp.sph,ara-arq,0.9992149913711046
+lre17_cvdmebty.sph,ara-arq,0.9986034345626393
+lre17_cwbzqjzi.sph,ara-arq,0.9947360860803344
+lre17_cwdccgrs.sph,ara-arq,0.9954471876114118
+lre17_cwlcovrq.flac,ara-arq,0.9836373295795586
+lre17_cwlcovrq.flac-g726,ara-arq,0.9772066393285307
+lre17_cxfmtvjk.flac,ara-arq,0.998626631969006
+lre17_cxfmtvjk.flac-gsm,ara-arq,0.9995994571705398
+lre17_czdgssvb.sph,ara-arq,0.9839800680795405
+lre17_daifemlo.flac,ara-arq,0.9999923124695197
+lre17_daifemlo.flac-opus,ara-arq,0.9984999152656753
+lre17_dbwacwxo.sph,ara-aeb,0.9959928678878073
+lre17_dctjgdcf.sph,ara-arq,0.9999693971426666
+lre17_degmucpq.flac,ara-arq,0.9998238238013132
+lre17_degmucpq.flac-gsm,ara-arq,0.9997379324783554
+lre17_dfotbhmi.sph,ara-arq,0.9999967791115594
+lre17_dhsngizg.flac,ara-arq,0.9982745689022119
+lre17_dhsngizg.flac-g722,ara-arq,0.9899076755215684
+lre17_dhttmloy.flac,ara-aeb,0.9965398617526191
+lre17_dhttmloy.flac-g726,ara-arq,0.9999896912229259
+lre17_dimkfdga.sph,ara-aeb,0.9968909392210388
+lre17_dkorjmpr.sph,ara-arq,0.9993720459297395
+lre17_dlkdkiml.sph,ara-arq,0.9998570457972521
+lre17_dmptasts.sph,ara-arq,0.9999900227916614
+lre17_dmxpkcsa.sph,ara-aeb,0.9886627341769952
+lre17_dqynyyeg.flac-gsm,ara-arq,0.9885091570772832
+lre17_dreturny.sph,ara-arq,0.9999666233198918
+lre17_drvwwpat.sph,ara-arq,0.9986475256791328
+lre17_dsyovtja.sph,ara-arq,0.9993324032717326
+lre17_dtfklpze.flac,ara-arq,0.980875299084855
+lre17_duwzoctt.sph,ara-arq,0.9944810760803787
+lre17_dxcmnnvm.sph,ara-arq,0.998013143951749
+lre17_dyhzanuz.flac,ara-arq,0.9990022950435253
+lre17_dzdfoalc.flac-g711mu,ara-arq,0.9992312655669962
+lre17_dzpjlevc.sph,ara-arq,0.9999957026648084
+lre17_ecoxuoxn.sph,ara-arq,0.9991002623934314
+lre17_ecphppxx.sph,ara-arq,0.9999618844845741
+lre17_ecwsvpey.sph,ara-arq,0.9976190788174759
+lre17_edrerhyd.flac,ara-ayl,0.9999948782776695
+lre17_edrerhyd.flac-g726,ara-ayl,0.999989105843972
+lre17_efuktxso.sph,ara-arq,0.9941980497799132
+lre17_ehubbeoo.sph,ara-arq,0.9996933627528989
+lre17_ejqromcl.sph,ara-arq,0.9925673764218849
+lre17_ekecvked.sph,ara-arq,0.9994069294232564
+lre17_eknvksdj.sph,ara-arq,0.9999954297332013
+lre17_elihcnoy.sph,ara-arq,0.9999453376193964
+lre17_elkbffyz.sph,ara-aeb,0.9994985958843845
+lre17_elyyerit.sph,ara-arq,0.9969900208425778
+lre17_emvsmkok.sph,ara-arq,0.9999992133320064
+lre17_enrykydq.flac,ara-arq,0.9997480550009473
+lre17_enrykydq.flac-g726,ara-ayl,0.9999971853826948
+lre17_eqodnzbt.flac,ara-arq,0.9975299669768147
+lre17_eqzepcqb.sph,ara-arq,0.9999815043421167
+lre17_erwwlbkn.sph,ara-arq,0.997330169701168
+lre17_esimguhv.flac-g723_1,ara-arq,0.9990357824171696
+lre17_ewmwbivr.flac,ara-aeb,0.9941407951069882
+lre17_ewsegeoy.sph,ara-aeb,0.9999996013414438
+lre17_extnxkey.sph,ara-arq,0.9899611206184787
+lre17_fgvuyqrc.sph,ara-arq,0.9999740198189395
+lre17_fheeozab.sph,ara-arq,0.9999998531340115
+lre17_fhjonuvo.flac,ara-arq,0.9912514441770434
+lre17_fhjonuvo.flac-gsm,ara-arq,0.9986942437203434
+lre17_fhobhhji.flac-g711mu,ara-arq,0.9999982807618869
+lre17_fhqkrhdc.sph,ara-aeb,0.9974882737208554
+lre17_fjeaknag.sph,ara-arq,0.9999528433654035
+lre17_flghevgj.sph,ara-arq,0.9999275347920555
+lre17_flllshvw.sph,ara-arq,0.9995890566887757
+lre17_fmaaifty.sph,ara-arq,0.9967330411235821
+lre17_fmyxmvuh.sph,ara-arq,0.9947613058963332
+lre17_fobsmsvj.sph,ara-arq,0.9985126437061927
+lre17_fosfumyj.flac,ara-arq,0.9981175606728612
+lre17_fosfumyj.flac-opus,ara-arq,0.9973412918879427
+lre17_fovgucqc.flac-gsm,ara-arq,0.9902063770771822
+lre17_fpzybapz.flac,ara-arq,0.9999831117898691
+lre17_fpzybapz.flac-g722,ara-arq,0.9999745011543022
+lre17_frfvxgkm.flac,ara-arq,0.9982105807022026
+lre17_frfvxgkm.flac-g723_1,ara-arq,0.9999990437299212
+lre17_frldxzov.flac,ara-arq,0.9999911187378006
+lre17_frnemphs.sph,ara-aeb,0.9999309467267882
+lre17_frrujsta.sph,ara-aeb,0.9827694350674886
+lre17_fsibsssn.flac,ara-arq,0.9967716611519729
+lre17_fsibsssn.flac-opus,ara-arq,0.9943914629735336
+lre17_fstjhoom.sph,ara-arq,0.9999560494459958
+lre17_fuelrqpq.sph,ara-arq,0.998314155825479
+lre17_fwyhddxz.sph,ara-arq,0.9999653658276243
+lre17_fxhpiabv.flac-g722,ara-arq,0.9785533261819718
+lre17_fyoimwzn.sph,ara-aeb,0.9933070038389972
+lre17_fyousbwl.sph,ara-arq,0.9997738038053198
+lre17_fzetpzrs.sph,ara-arq,0.9845858022736108
+lre17_gbdwksrl.flac-opus,ara-ayl,0.9999810055915502
+lre17_gbkeixqy.sph,ara-arq,0.9995010489207078
+lre17_gbmrfptf.sph,ara-arq,0.9995997838188411
+lre17_gcwvbecw.flac,ara-arq,0.9999989525506976
+lre17_gcwvbecw.flac-g726,ara-arq,0.9999995241973817
+lre17_gekpnsqw.flac,ara-arq,0.9995617602232915
+lre17_gekpnsqw.flac-g711a,ara-arq,0.9990205101656683
+lre17_gfmhcimo.flac,ara-arq,0.9843261830443644
+lre17_gfmhcimo.flac-g711a,ara-arq,0.9920939572460264
+lre17_giljetfl.sph,ara-arq,0.9998866157683133
+lre17_givvturo.flac,ara-arq,0.9999960772188857
+lre17_givvturo.flac-g722,ara-arq,0.9998983053609016
+lre17_gkfwivzq.sph,ara-arq,0.9980134657798864
+lre17_gokkodsj.flac-g722,ara-ayl,0.9962500403266442
+lre17_gpvtlzov.flac-g711a,ara-arq,0.9999996204042616
+lre17_gqcxwuze.sph,ara-arq,0.9997783053110009
+lre17_gqpcfrwm.flac-g711mu,ara-arq,0.9817550583044142
+lre17_grjpzakf.sph,ara-arq,0.9839396690676935
+lre17_grjzqftr.sph,ara-arq,0.9877772556918923
+lre17_gszgcsjf.sph,ara-arq,0.998185259970527
+lre17_gvcqvsap.sph,ara-ayl,0.9997241868465031
+lre17_gxvmjddr.sph,ara-arq,0.9986899594684224
+lre17_hbopaybj.flac,ara-arq,0.987858946064221
+lre17_hbopaybj.flac-g726,ara-arq,0.999994436902088
+lre17_hchvsbqr.sph,ara-arq,0.999906917330984
+lre17_hdofrwsf.sph,ara-arq,0.9933958450004624
+lre17_heemkdqp.flac,ara-arq,0.9999909671052553
+lre17_heemkdqp.flac-g711mu,ara-arq,0.9985133817101537
+lre17_hezbzaqo.flac,ara-arq,0.9850199928962854
+lre17_hezbzaqo.flac-opus,ara-arq,0.9999841295369671
+lre17_hfcpmeoa.flac,ara-arq,0.9947181969213107
+lre17_hfcpmeoa.flac-g711a,ara-arq,0.9999138159106336
+lre17_hfjennzi.sph,ara-aeb,0.9983301362771589
+lre17_hhbqfxfc.sph,ara-aeb,0.9906856058776015
+lre17_hhdplflf.sph,ara-arq,0.9999949491011441
+lre17_hjimhzob.sph,ara-arq,0.9983113233299764
+lre17_hkeqbypc.flac,ara-arq,0.999999661618148
+lre17_hkeqbypc.flac-gsm,ara-arq,0.9966513627962669
+lre17_hlegmknx.sph,ara-arq,0.9999782289720263
+lre17_hmmdberw.sph,ara-aeb,0.9998194744091253
+lre17_hmptzweu.sph,ara-arq,0.9999971779992906
+lre17_hmqodybe.sph,ara-ayl,0.999996399948908
+lre17_hqrhzhyj.sph,ara-arq,0.9999231926652757
+lre17_hqzkhrhn.sph,ara-arq,0.9998770075415304
+lre17_hromittp.flac-g711a,ara-arq,0.9873451303247496
+lre17_hsdzydln.flac,ara-arq,0.9821628698106489
+lre17_hsdzydln.flac-g722,ara-arq,0.9988122191294789
+lre17_hsyuvhtp.sph,ara-arq,0.9990478816052286
+lre17_hvweyrfw.sph,ara-aeb,0.9988668377871749
+lre17_hwnjyblc.sph,ara-arq,0.9999249104513325
+lre17_hxpvwduf.flac-g711a,ara-aeb,0.9938488854312174
+lre17_hyhwjuli.sph,ara-arq,0.998858421685253
+lre17_hyreqvpy.flac,ara-arq,0.9878561156668769
+lre17_hyreqvpy.flac-g711mu,ara-arq,0.9999928431157828
+lre17_ibclsyjb.sph,ara-aeb,0.9846988495735338
+lre17_ifdrxwfj.sph,ara-arq,0.9988623308711881
+lre17_igayvnul.sph,ara-arq,0.9858583197264382
+lre17_igvjetcy.sph,ara-arq,0.9997565397210374
+lre17_igvlwujq.sph,ara-aeb,0.9942243168589683
+lre17_iibcchiq.flac-gsm,ara-arq,0.9989051845669153
+lre17_ilmlmyvv.sph,ara-aeb,0.9879290883225061
+lre17_inhzmrxh.sph,ara-arq,0.9999602544207984
+lre17_inufxzrc.sph,ara-arq,0.9997778215419035
+lre17_iqtqtuvc.flac,ara-arq,0.999987834966952
+lre17_iqtqtuvc.flac-opus,ara-arq,0.9817938892370449
+lre17_itjgcxig.sph,ara-arq,0.999561265994042
+lre17_itsqwgkz.sph,ara-arq,0.9999999957865953
+lre17_ittvvvfb.sph,ara-arq,0.9999964209775712
+lre17_ivcdeiky.flac,ara-arq,0.9873438502201111
+lre17_iwtlmazd.sph,ara-arq,0.9873719419778358
+lre17_ixbvjxte.sph,ara-arq,0.9997976143150719
+lre17_iycttrsq.sph,ara-arq,0.987846742780538
+lre17_iyqnjpod.sph,ara-arq,0.9936664779953471
+lre17_izhxudfa.sph,ara-arq,0.9999249686091597
+lre17_javisjpg.sph,ara-arq,0.9933263960275387
+lre17_jclfqqom.sph,ara-arq,0.9996552571484193
+lre17_jcperagi.sph,ara-arq,0.9999535310829344
+lre17_jcueuvkk.sph,ara-arq,0.9998819304923648
+lre17_jgqtrgqt.sph,ara-arq,0.9999823610331084
+lre17_jgzyarns.sph,ara-arq,0.9999898713367306
+lre17_jhjgasxv.sph,ara-aeb,0.9991139740455672
+lre17_jhoqfjpk.flac,ara-arq,0.9999995523948527
+lre17_jhoqfjpk.flac-g711a,ara-arq,0.9944430263756097
+lre17_jiakkjtr.sph,ara-arq,0.9999993323735444
+lre17_jilypibp.flac-gsm,ara-arq,0.9996434093761065
+lre17_jiowcahg.sph,ara-arq,0.992648625274396
+lre17_jlvgsuxh.sph,ara-aeb,0.9948123012485498
+lre17_jlvtorab.sph,ara-arq,0.983513534636461
+lre17_jmkuwbpc.sph,ara-arq,0.9994527050835158
+lre17_jnipskqx.flac-g711mu,ara-arq,0.9999742870473751
+lre17_jpeqxepv.sph,ara-aeb,0.9994104144919757
+lre17_jpeyombi.sph,ara-arq,0.9999998044387237
+lre17_jpjtuxvw.flac,ara-arq,0.9996847495267612
+lre17_jqmoqqfm.flac,ara-arq,0.9999869216223071
+lre17_jqmoqqfm.flac-g726,ara-arq,0.9999999894357187
+lre17_jtdfvpln.sph,ara-arq,0.999997871153658
+lre17_jtqoxxtm.sph,ara-aeb,0.9965086342211626
+lre17_jvpfjwdp.flac,ara-ayl,0.9999999997451912
+lre17_jvurmddm.flac,ara-arq,0.9987187046194855
+lre17_jvurmddm.flac-gsm,ara-arq,0.9983730947085013
+lre17_jwkybctt.sph,ara-arq,0.9999989593481196
+lre17_jxcmtrxm.sph,ara-ayl,0.9996284167713838
+lre17_jywavsuu.flac,ara-arq,0.9913930400541082
+lre17_kaaesmko.flac,ara-arq,0.9999294732363818
+lre17_kbmrgfwm.sph,ara-arq,0.9999528508197458
+lre17_kbodxjcn.flac,ara-arq,0.9982992372902407
+lre17_kcdcpzly.sph,ara-arq,0.9759220472115765
+lre17_keetepyz.flac,ara-arq,0.9927333842986636
+lre17_keetepyz.flac-g722,ara-arq,0.9999998412855006
+lre17_kfmsssrs.sph,ara-arq,0.9997193659178423
+lre17_kfsotues.sph,ara-arq,0.999998669422541
+lre17_khygxcdj.sph,ara-arq,0.9868874065356342
+lre17_kjtqnjgt.sph,ara-arq,0.9791232775307577
+lre17_kkcxpjzr.flac,ara-arq,0.9999999887350973
+lre17_kkcxpjzr.flac-g711a,ara-arq,0.9999944954328739
+lre17_kmzwffxp.sph,ara-arq,0.9782750887595135
+lre17_knkvczhw.flac-g722,ara-arq,0.9999106890724243
+lre17_kpbzatbg.sph,ara-arq,0.9935992901995973
+lre17_kpcquycc.sph,ara-arq,0.9988726069205118
+lre17_kugvrfiw.sph,ara-arq,0.9986475838029554
+lre17_kuzbruhc.sph,ara-arq,0.9998879396014413
+lre17_kwvzftsa.sph,ara-arq,0.9906568648956764
+lre17_kzutiwjm.sph,ara-arq,0.9883352489803169
+lre17_larfsawf.sph,ara-arq,0.9833813699639339
+lre17_lectmxiy.sph,ara-arq,0.9997601079206343
+lre17_lfdmjqzk.sph,ara-arq,0.9997174449458649
+lre17_lfqfgpty.flac,ara-arq,0.9877470265323836
+lre17_lfqfgpty.flac-opus,ara-arq,0.9998989537391589
+lre17_lgimdxjv.sph,ara-arq,0.9897020483952464
+lre17_lgmtfuaf.sph,ara-arq,0.9997034751344174
+lre17_lgzhdvir.flac,ara-arq,0.9999666603862899
+lre17_lgzhdvir.flac-g723_1,ara-arq,0.9999814711501472
+lre17_litfqatc.sph,ara-arq,0.9999697307957149
+lre17_ljqkqvuk.sph,ara-arq,0.9931204062930487
+lre17_lkeepofx.sph,ara-aeb,0.9815662246718163
+lre17_lkvpiaco.flac-g711mu,ara-aeb,0.9894714726927342
+lre17_llwfixbt.flac,ara-arq,0.9998571656021117
+lre17_llwfixbt.flac-opus,ara-arq,0.9999840733365404
+lre17_llxcpovx.sph,ara-arq,0.9999726611852431
+lre17_lmtexhdt.sph,ara-arq,0.9955378310817409
+lre17_lnlzbiqv.sph,ara-arq,0.9845486175862881
+lre17_lnwqjgum.sph,ara-arq,0.9994084640832857
+lre17_logsuwkc.sph,ara-ayl,0.9973010083242871
+lre17_lpdrjcmf.sph,ara-arq,0.9999999987748978
+lre17_lpnxjclp.sph,ara-arq,0.999990551187932
+lre17_lpwlbnvd.sph,ara-aeb,0.9985688096228789
+lre17_lqcxhbgx.flac,ara-aeb,0.9789437284063228
+lre17_lqeynset.sph,ara-arq,0.9946714116424836
+lre17_lqqtwkna.sph,ara-arq,0.9959561849558086
+lre17_lrchzlnf.sph,ara-arq,0.9999964986490049
+lre17_lriptaxa.sph,ara-arq,0.9999861992203515
+lre17_lrmpuslv.sph,ara-arq,0.9999833158908321
+lre17_lsglcrqu.sph,ara-arq,0.9994974702145716
+lre17_ltobvlca.flac,ara-aeb,0.9998321920878662
+lre17_ltobvlca.flac-g726,ara-arq,0.9808235035540288
+lre17_lumlsydt.flac,ara-arq,0.9993847597571562
+lre17_lvwbcjui.sph,ara-arq,0.9999990130410765
+lre17_lwbqplua.flac,ara-arq,0.9985377563185653
+lre17_lyvsulsp.sph,ara-arq,0.9996779798745427
+lre17_lzzfbiwk.sph,ara-arq,0.9999780111773144
+lre17_mazmicwf.flac,ara-arq,0.9761802678092957
+lre17_mazmicwf.flac-g726,ara-arq,0.9999987851548972
+lre17_mcchuzqa.flac,ara-arq,0.9994453172253329
+lre17_mcchuzqa.flac-g726,ara-arq,0.9994722195518764
+lre17_mhelcckx.sph,ara-aeb,0.9921911924278494
+lre17_minmrdvv.flac,ara-arq,0.9999806822091847
+lre17_minmrdvv.flac-g711mu,ara-arq,0.9999798110768492
+lre17_miyeplrp.flac,ara-ayl,0.999967537246669
+lre17_miyeplrp.flac-g722,ara-ayl,0.9999974966675732
+lre17_mjkrjctc.sph,ara-arq,0.9996993211891599
+lre17_mjuhytod.flac-g722,ara-aeb,0.9942683057158186
+lre17_mjxevtqw.flac,ara-arq,0.9789540403486894
+lre17_mllyvrkw.sph,ara-arq,0.9984655364684033
+lre17_mneiaioi.sph,ara-aeb,0.985452121186191
+lre17_mnoswtar.flac,ara-arq,0.9999465324732042
+lre17_mnoswtar.flac-g722,ara-arq,0.9994314827928369
+lre17_moihuogw.sph,ara-arq,0.9999981054273598
+lre17_moohuqbu.flac-opus,ara-arq,0.9946789594259231
+lre17_mpewcntj.sph,ara-arq,0.9999987697239342
+lre17_mtyfveku.sph,ara-arq,0.9829721690668127
+lre17_mvbpdkqz.sph,ara-ayl,0.9948321204607391
+lre17_mxcghtfj.sph,ara-arq,0.9927362055311203
+lre17_mxhoedfe.sph,ara-arq,0.9999999146737504
+lre17_mxmdmamo.sph,ara-aeb,0.988517700201585
+lre17_mxmjurdd.sph,ara-arq,0.9980132953988482
+lre17_mzdpsrvs.sph,ara-aeb,0.992494003405007
+lre17_mzsfsjad.sph,ara-aeb,0.99997586512649
+lre17_naeguqak.sph,ara-arq,0.9999929946428248
+lre17_nblzukhx.flac,ara-arq,0.9999359009222737
+lre17_nblzukhx.flac-gsm,ara-arq,0.9999999966088737
+lre17_ndkkdwgy.sph,ara-arq,0.9975163999653704
+lre17_negphusk.sph,ara-arq,0.9998989889366274
+lre17_nhdlsoit.sph,ara-arq,0.9916584056978099
+lre17_njbwudbl.sph,ara-arq,0.999934592749547
+lre17_njontgtu.sph,ara-arq,0.9794969009896114
+lre17_nkgdldta.sph,ara-arq,0.9916521956821477
+lre17_nkqygxxz.sph,ara-arq,0.9997656446176615
+lre17_nocucjva.sph,ara-arq,0.9995204775364295
+lre17_nojsrnhx.sph,ara-arq,0.999919125620621
+lre17_nowvnwzc.sph,ara-arq,0.995585233402159
+lre17_nqfliycm.sph,ara-arq,0.9999912069728009
+lre17_nqkyimjt.sph,ara-arq,0.9881291130932576
+lre17_nqxowwop.flac,ara-arq,0.9997443038852292
+lre17_nqxowwop.flac-g723_1,ara-arq,0.9914457208775102
+lre17_nrunzxja.flac,ara-arq,0.9993221612062564
+lre17_nrunzxja.flac-g711mu,ara-arq,0.9999566225291738
+lre17_nsiynodu.sph,ara-arq,0.9908214588078317
+lre17_nssuzfbr.sph,ara-arq,0.9999396177844772
+lre17_ntbrwymu.sph,ara-arq,0.9993012725372231
+lre17_nuvzuxee.sph,ara-arq,0.9996041721916568
+lre17_nvgpubxb.sph,ara-arq,0.9759857598176621
+lre17_nxjuqezl.flac,ara-arq,0.9995754800524955
+lre17_nxjuqezl.flac-gsm,ara-arq,0.9793987540104333
+lre17_nxvquxsr.sph,ara-arq,0.9990399807148835
+lre17_nzeyrrcl.sph,ara-arq,0.9999461953593082
+lre17_nzmnjjpc.flac,ara-aeb,0.9847092271434903
+lre17_obbtvsaj.flac-g711a,ara-arq,0.9999409869224803
+lre17_obkyiehe.sph,ara-arq,0.9998679965082828
+lre17_obrcwlmw.sph,ara-aeb,0.9998778475538475
+lre17_ogwcxkjw.sph,ara-arq,0.9895315802827847
+lre17_oireqedt.sph,ara-arq,0.9917281473076983
+lre17_oirnebxz.flac,ara-arq,0.9967110495563957
+lre17_oirnebxz.flac-opus,ara-aeb,0.9933118074655622
+lre17_oiveluew.sph,ara-arq,0.9968284888503907
+lre17_oizxklej.sph,ara-arq,0.9986908296100067
+lre17_olqpjrwd.sph,ara-ayl,0.985872505893845
+lre17_olwownje.sph,ara-arq,0.9999818242744661
+lre17_onckhujt.sph,ara-aeb,0.9979436467237117
+lre17_onknnaim.sph,ara-aeb,0.9987555260169619
+lre17_opsncnkb.sph,ara-aeb,0.9997592175168953
+lre17_opxoeses.flac-g711mu,ara-arq,0.9895267895164883
+lre17_oqnuceey.flac,ara-arq,0.9818564260274837
+lre17_oqnuceey.flac-opus,ara-arq,0.9960254767681471
+lre17_orthumig.sph,ara-arq,0.9979787737264081
+lre17_ouhrqmvj.sph,ara-arq,0.9999999735219096
+lre17_oukunjzc.flac,ara-arq,0.9999998874015028
+lre17_oukunjzc.flac-g722,ara-arq,0.9999998022867953
+lre17_ouvsypqp.sph,ara-arq,0.9999862194709894
+lre17_ownmyzum.sph,ara-arq,0.9983899224785795
+lre17_owxndsay.sph,ara-arq,0.9991660737491793
+lre17_oxoeettt.sph,ara-aeb,0.994764323060291
+lre17_oxvlijdf.sph,ara-arq,0.9980756971870425
+lre17_oylngzoh.sph,ara-arq,0.9999772205491734
+lre17_pbmuxcky.flac,ara-arq,0.9980374961356401
+lre17_pbmuxcky.flac-opus,ara-arq,0.9999987347640981
+lre17_pdcigndc.sph,ara-arq,0.9956870254382242
+lre17_pfcsmyfp.flac,ara-arq,0.999994210275427
+lre17_pfcsmyfp.flac-opus,ara-arq,0.9995142367581035
+lre17_pfecwivw.flac,ara-arq,0.997995447936321
+lre17_pfecwivw.flac-gsm,ara-arq,0.979556413285578
+lre17_pfenqxed.sph,ara-arq,0.9935592984355501
+lre17_pgqzdpfq.sph,ara-arq,0.9987770018281733
+lre17_phvwlddn.sph,ara-arq,0.9996126414779914
+lre17_piiiaqsg.sph,ara-arq,0.9883006332201746
+lre17_piixpsbr.flac,ara-arq,0.9999739707108446
+lre17_piixpsbr.flac-g722,ara-arq,0.9996474019470863
+lre17_pixqbtbm.flac,ara-arq,0.9949782923210799
+lre17_pixqbtbm.flac-g726,ara-ayl,0.9966265706203424
+lre17_pjfvtjab.sph,ara-arq,0.999885331527543
+lre17_pklmiexr.sph,ara-arq,0.9992720760130763
+lre17_pnlxhqnm.sph,ara-ayl,0.9999929993566911
+lre17_pnrhsfou.flac-g722,ara-arq,0.9999640996706576
+lre17_pnwenjwm.sph,ara-arq,0.9960418034539658
+lre17_poheolla.sph,ara-aeb,0.9985638045139876
+lre17_poysotsv.sph,ara-aeb,0.9856503882631178
+lre17_ppvtutvt.sph,ara-arq,0.9998354077570467
+lre17_pqawpvfb.flac,ara-arq,0.9950780853489194
+lre17_pqawpvfb.flac-g711a,ara-arq,0.9994370686040279
+lre17_pqwwzwxo.sph,ara-arq,0.999990243213515
+lre17_psacvdup.flac,ara-arq,0.9999630353117823
+lre17_psacvdup.flac-opus,ara-arq,0.9994055674097663
+lre17_pslkpzhl.sph,ara-arq,0.9997159742438066
+lre17_pufnzdvd.flac,ara-arq,0.9997950371376702
+lre17_pufnzdvd.flac-gsm,ara-arq,0.9999959634431062
+lre17_pujabbev.sph,ara-arq,0.9994079718102534
+lre17_pvfvlhsq.flac,ara-aeb,0.9999713112925558
+lre17_pvfvlhsq.flac-gsm,ara-arq,0.9999254611931253
+lre17_pwhqsovd.sph,ara-arq,0.9917133657396171
+lre17_pxekbodb.sph,ara-arq,0.9984745978775882
+lre17_qdwsexfm.sph,ara-arq,0.9999974580577462
+lre17_qhiyavse.sph,ara-arq,0.9912788455576231
+lre17_qivtcmgk.sph,ara-arq,0.9836174820047392
+lre17_qjitoyxc.sph,ara-arq,0.9999874042742806
+lre17_qkxouubm.sph,ara-arq,0.9998872491727429
+lre17_qljscllj.sph,ara-aeb,0.9979913152483216
+lre17_qlzldcpe.sph,ara-aeb,0.997053186781475
+lre17_qmcrgdzz.sph,ara-arq,0.9996004571017476
+lre17_qmjbylrs.flac,ara-arq,0.9999120076761361
+lre17_qmjbylrs.flac-g723_1,ara-arq,0.9942971237057362
+lre17_qmjpvlvg.sph,ara-arq,0.9998030655795183
+lre17_qogybjhz.sph,ara-arq,0.9994621165166646
+lre17_qpntxzjb.sph,ara-aeb,0.9957626204201693
+lre17_qpredbkv.sph,ara-arq,0.9990911968221025
+lre17_qrbdlmjx.sph,ara-arq,0.9999999999999865
+lre17_qrcvlqts.sph,ara-arq,0.9767514676069964
+lre17_qscgrzxe.flac,ara-arq,0.9998709648180928
+lre17_qsewfkyh.sph,ara-arq,0.9999999864987743
+lre17_qstdyztt.flac-g711mu,ara-arq,0.9999759932517555
+lre17_qszrgiyz.sph,ara-arq,0.9999496361715189
+lre17_qtaulytr.sph,ara-arq,0.9770776315818761
+lre17_qudalolg.sph,ara-arq,0.998897750323492
+lre17_qufteqvo.sph,ara-aeb,0.992027129263138
+lre17_qwiyjayz.sph,ara-arq,0.9995498080059056
+lre17_qwvrxfzu.sph,ara-arq,0.9988604816072997
+lre17_qyiarywg.flac,ara-arq,0.9999605279920688
+lre17_qyiarywg.flac-g723_1,ara-arq,0.9999999619112184
+lre17_qyzhxzvj.sph,ara-arq,0.9759510962602079
+lre17_rajrtwbo.sph,ara-aeb,0.9999999999944702
+lre17_rcryqfgn.sph,ara-arq,0.9972776568740012
+lre17_rcueudci.flac,ara-arq,0.9988837735514282
+lre17_rcueudci.flac-gsm,ara-arq,0.9999697760125505
+lre17_reicsaat.sph,ara-arq,0.9997754476127328
+lre17_reyualuk.flac,ara-arq,0.9992844879623304
+lre17_rfwyqutk.sph,ara-arq,0.9997722244477082
+lre17_rggtfbrd.sph,ara-aeb,0.9931773563621665
+lre17_rhepwrug.sph,ara-arq,0.9998787750778266
+lre17_rkocbhzs.sph,ara-arq,0.9960601282813184
+lre17_rlcyzlcy.sph,ara-arq,0.9994472570443922
+lre17_rlpbjbed.sph,ara-arq,0.9942325885969098
+lre17_rlqkwaeh.sph,ara-arq,0.9953431894962037
+lre17_rnveyooi.sph,ara-ayl,0.9996778752622651
+lre17_rnvyrkwg.flac-g723_1,ara-arq,0.9853171747622366
+lre17_rqacreai.sph,ara-arq,0.999207518939918
+lre17_rqlzthlg.sph,ara-aeb,0.989048786309874
+lre17_ruzqcwpn.sph,ara-arq,0.9999863065050799
+lre17_rwvdctfg.flac,ara-aeb,0.9815766373873294
+lre17_rynppewk.flac,ara-arq,0.9999708695439152
+lre17_rypzhghv.flac,ara-arq,0.9947011510267938
+lre17_rypzhghv.flac-g711a,ara-arq,0.9847132293141271
+lre17_sagynpjo.sph,ara-arq,0.999744534125517
+lre17_sbxerjvo.sph,ara-arq,0.999987473908599
+lre17_scfolxob.flac,ara-arq,0.9999713591244429
+lre17_serpsscu.flac,ara-arq,0.9793042704401821
+lre17_serpsscu.flac-g723_1,ara-arq,0.9999130486126522
+lre17_sffusbzg.sph,ara-arq,0.9999268097555194
+lre17_sfjwayps.flac,ara-ayl,0.9946715937173086
+lre17_sfjwayps.flac-g726,ara-arq,0.9982714238405073
+lre17_sgkgyjvk.flac,ara-arq,0.9995433812540649
+lre17_sgkgyjvk.flac-gsm,ara-arq,0.9996437880979923
+lre17_sjnfbigi.sph,ara-arq,0.99999366855751
+lre17_skdclppi.sph,ara-arq,0.9889180838738156
+lre17_smjdgznr.flac,ara-aeb,0.9964248254318828
+lre17_snfzxijz.sph,ara-ayl,0.9827938327458273
+lre17_sofspqyi.sph,ara-arq,0.9999994382673698
+lre17_sqoxhftl.sph,ara-arq,0.9996790538981134
+lre17_stbhhhou.sph,ara-arq,0.9945710415211226
+lre17_stpksvvi.sph,ara-aeb,0.996004757361174
+lre17_stxkelkq.sph,ara-ayl,0.9752463023195366
+lre17_suqttdyg.sph,ara-arq,0.997742051798683
+lre17_susdosey.sph,ara-arq,0.9977231084345539
+lre17_suvxbjhl.sph,ara-arq,0.9986381780682658
+lre17_svetuuie.sph,ara-arq,0.9998479453288084
+lre17_svzozbfk.sph,ara-arq,0.9998881824847226
+lre17_swgrlydv.sph,ara-arq,0.9990560832648376
+lre17_sxgfwork.sph,ara-arq,0.9999763564539524
+lre17_syatmwze.sph,ara-arq,0.9848649335693501
+lre17_syxmxolu.sph,ara-aeb,0.9867936744030255
+lre17_tbbuisna.sph,ara-arq,0.9999805669714006
+lre17_tbplljcp.flac,ara-arq,0.9998129818454303
+lre17_tbplljcp.flac-gsm,ara-arq,0.9999999839340195
+lre17_tcmjqsvf.sph,ara-arq,0.9936464811055075
+lre17_tcvunuvp.sph,ara-aeb,0.9946331881971427
+lre17_tduxpzqq.sph,ara-aeb,0.9996190225252365
+lre17_teyvymzd.flac-g711mu,ara-ayl,0.9787413632582724
+lre17_tfngvqdf.flac,ara-arq,0.999990203549186
+lre17_tfngvqdf.flac-g726,ara-arq,0.9983466771871533
+lre17_tforvtmc.sph,ara-arq,0.9935448102639823
+lre17_tfxmolis.sph,ara-arq,0.9998286292942293
+lre17_thjcyqwr.flac,ara-arq,0.9994467118163807
+lre17_thjcyqwr.flac-opus,ara-arq,0.9976422823383214
+lre17_thxeccdu.sph,ara-arq,0.9998750920305819
+lre17_ticjhhbi.sph,ara-arq,0.9794445128724558
+lre17_tjcshvrl.sph,ara-arq,0.9999926102290503
+lre17_tjremugr.sph,ara-arq,0.9999991435250514
+lre17_tlutsejs.sph,ara-arq,0.9988207070133517
+lre17_tnrvafxe.sph,ara-arq,0.9944573271724075
+lre17_tnxtgdnc.sph,ara-aeb,0.9942622096810594
+lre17_tolpbvsc.flac,ara-arq,0.9999996570853448
+lre17_tolpbvsc.flac-g711mu,ara-arq,0.9999930938174156
+lre17_totoyxhm.sph,ara-arq,0.999996710305506
+lre17_tqkpkxgu.sph,ara-arq,0.9999960590328173
+lre17_tqmnzgyb.sph,ara-arq,0.9999682738390965
+lre17_tsppppzj.sph,ara-arq,0.9999889892476231
+lre17_tssmuwge.sph,ara-arq,0.9993471439476459
+lre17_ttkmfmkk.sph,ara-arq,0.9961646913300042
+lre17_ttvvzlvt.sph,ara-arq,0.9945500617775027
+lre17_twkrspxj.flac-g711mu,ara-arq,0.9970066534454132
+lre17_tyqxhlrh.sph,ara-arq,0.9947460028171129
+lre17_tzwuzntv.flac,ara-arq,0.9999848694087901
+lre17_tzwuzntv.flac-g711a,ara-arq,0.9999999505751382
+lre17_uawwqpsa.sph,ara-arq,0.9857085643990153
+lre17_ubnnanex.sph,ara-arq,0.9999993559208963
+lre17_ucfvsgyr.flac,ara-arq,0.9961772950424368
+lre17_ucfvsgyr.flac-g711mu,ara-arq,0.9999597640499912
+lre17_ufifckts.flac-gsm,ara-ayl,0.9928071586629514
+lre17_uiyescxr.sph,ara-arq,0.9997891506043249
+lre17_ukkxkxxt.sph,ara-aeb,0.9935449493739165
+lre17_umissmzv.sph,ara-arq,0.9990005119204275
+lre17_unxhwqmy.flac,ara-aeb,0.9963270393303603
+lre17_upseluva.sph,ara-arq,0.9909948021770557
+lre17_upvapoke.sph,ara-arq,0.9930629481999376
+lre17_uqtiiong.sph,ara-arq,0.9999999685612003
+lre17_usdeaflg.sph,ara-arq,0.9857851998633298
+lre17_uszjbbko.sph,ara-arq,0.9999941374675029
+lre17_utjkjjcn.sph,ara-arq,0.9979757853366961
+lre17_utooogzo.sph,ara-ayl,0.9959130999661093
+lre17_uwescwtn.sph,ara-arq,0.9999992993153919
+lre17_uwldzayo.sph,ara-arq,0.9999916059792026
+lre17_uwuytsxe.sph,ara-arq,0.9947486052008054
+lre17_uzxmtvue.sph,ara-arq,0.9968064808522498
+lre17_vaugwmvv.sph,ara-arq,0.9994338833370221
+lre17_vbgmqfuo.flac,ara-arq,0.980847451266026
+lre17_vbjsoyeh.sph,ara-arq,0.9999116642269064
+lre17_vcksyiuy.flac-g711a,ara-aeb,0.9835977136688748
+lre17_vgxwjuno.sph,ara-arq,0.999983609611863
+lre17_vingckxa.flac,ara-arq,0.9986237456025335
+lre17_vjffccpz.sph,ara-arq,0.9999954081046549
+lre17_vjfjqitw.flac-gsm,ara-arq,0.9994035904368442
+lre17_vjtprfjw.flac,ara-ayl,0.9964430683674823
+lre17_vjtprfjw.flac-g726,ara-arq,0.9805106671407414
+lre17_vjvrlhfs.sph,ara-arq,0.9833183398241712
+lre17_vkqxvmtc.sph,ara-arq,0.9994368014427134
+lre17_vmssxzzd.sph,ara-arq,0.9996477418713372
+lre17_vndndpzq.sph,ara-arq,0.9791108970484209
+lre17_vnlvmhpc.flac-g711mu,ara-arq,0.9999365684834217
+lre17_vnxwpwge.sph,ara-aeb,0.9986622760430225
+lre17_vovpsxcd.sph,ara-arq,0.9998378398538086
+lre17_vpossvdt.flac-g711a,ara-arq,0.9999460791991176
+lre17_vrejajcm.sph,ara-arq,0.9776138943860346
+lre17_vswsposp.sph,ara-arq,0.9994020716061514
+lre17_vtigorkv.sph,ara-arq,0.9999623163215805
+lre17_vtkffspm.flac,ara-arq,0.9997898857084506
+lre17_vuznysrk.flac-g711mu,ara-arq,0.9979511877631668
+lre17_vvyqmniq.sph,ara-arq,0.9999515987872877
+lre17_vwijmoke.sph,ara-arq,0.9989975351908933
+lre17_vynkvprp.sph,ara-arq,0.9999845129327151
+lre17_wagzvxqz.sph,ara-arq,0.9973278031174633
+lre17_wahbanqs.sph,ara-arq,0.9998842310344779
+lre17_wairvblk.sph,ara-arq,0.9987016619787147
+lre17_wesfzmws.sph,ara-arq,0.9908093051797177
+lre17_wfvvkjuv.sph,ara-arq,0.999944678936039
+lre17_wggunlcp.sph,ara-arq,0.9996749635832616
+lre17_wgnbrmfd.sph,ara-aeb,0.980616930826586
+lre17_whjqstnl.sph,ara-ayl,0.999979615699586
+lre17_whqbhubs.sph,ara-arq,0.9898499177391032
+lre17_widuepdg.sph,ara-aeb,0.9817918573044054
+lre17_wkhkxpmr.sph,ara-ayl,0.9998286566004881
+lre17_wmtiighi.sph,ara-arq,0.987786199224793
+lre17_wnevoywa.sph,ara-arq,0.9999869741085323
+lre17_woccwvjw.sph,ara-arq,0.9999711680680078
+lre17_wrwmvkyy.sph,ara-arq,0.9999793167856921
+lre17_wryaaaay.sph,ara-arq,0.9791656416113507
+lre17_wtkatcwm.sph,ara-arq,0.9850427630920813
+lre17_wvgqdrqk.sph,ara-arq,0.9791490717212691
+lre17_wvyabqbx.sph,ara-arq,0.9990947686607856
+lre17_wwypkyea.sph,ara-aeb,0.9892611131137926
+lre17_wxaxnvpq.sph,ara-arq,0.996190338175843
+lre17_wxirsbfe.sph,ara-arq,0.9998855761433262
+lre17_wxwauidm.sph,ara-arq,0.9899615630910984
+lre17_wyjetcgf.sph,ara-arq,0.9999367576307792
+lre17_xaowthgy.sph,ara-arq,0.9983944376668455
+lre17_xdcmpfbl.sph,ara-arq,0.9914795091793974
+lre17_xdhrhgmk.flac,ara-arq,0.9999939811339105
+lre17_xdhrhgmk.flac-opus,ara-arq,0.9999380489585059
+lre17_xepisjpn.sph,ara-arq,0.998123428276411
+lre17_xhpkbvei.sph,ara-aeb,0.9896471029490118
+lre17_xhqfsfkf.sph,ara-arq,0.9999985545272336
+lre17_xkayfgzq.sph,ara-arq,0.9985227970239359
+lre17_xllwincb.sph,ara-arq,0.9999999189534862
+lre17_xlqqxoym.sph,ara-arq,0.9999541071953805
+lre17_xmvmloxn.flac,ara-arq,0.9827789901954631
+lre17_xmvmloxn.flac-g723_1,ara-arq,0.9998467402490113
+lre17_xnyjhsyy.sph,ara-arq,0.9999956069402056
+lre17_xovcjkso.sph,ara-aeb,0.9920305514596128
+lre17_xroveufz.sph,ara-aeb,0.9993682416393447
+lre17_xskjgkzq.sph,ara-arq,0.9970587807073615
+lre17_xsuhxjmz.sph,ara-arq,0.9999908698954791
+lre17_xtwbrgfu.sph,ara-aeb,0.9964922765834566
+lre17_xujatdxg.sph,ara-arq,0.9999544795771792
+lre17_xviuupwl.sph,ara-arq,0.9999998045117351
+lre17_xvxlncwz.sph,ara-arq,0.9999098345731946
+lre17_xyyhmsku.flac,ara-arq,0.998537851661698
+lre17_ybtygbuu.sph,ara-arq,0.9999292474625724
+lre17_yctdihii.sph,ara-arq,0.999999574179243
+lre17_ydmmannh.flac,ara-arq,0.9983920334462054
+lre17_ydmmannh.flac-g723_1,ara-arq,0.9995348938940224
+lre17_yekpxxwc.flac,ara-arq,0.9988877246272224
+lre17_yekpxxwc.flac-g711mu,ara-aeb,0.9923589185563311
+lre17_ygndvzfp.sph,ara-arq,0.9999911328372215
+lre17_yhjzokrv.sph,ara-arq,0.9958319219062072
+lre17_yilroulj.flac-g711a,ara-aeb,0.9795574945355306
+lre17_yivtnzkg.sph,ara-arq,0.9999715099952994
+lre17_yjoblztq.flac,ara-arq,0.9999973207194949
+lre17_yjoblztq.flac-opus,ara-arq,0.9998238058972634
+lre17_ykxiohej.sph,ara-arq,0.9990315895452987
+lre17_ylsidleu.flac-g711a,ara-arq,0.9984473055243461
+lre17_yltydxpy.sph,ara-arq,0.9956478528044228
+lre17_yownwnlt.flac,ara-arq,0.9998812691288554
+lre17_yownwnlt.flac-g722,ara-arq,0.9916894249368101
+lre17_ypetotbw.flac,ara-arq,0.9999949516854025
+lre17_ypetotbw.flac-gsm,ara-aeb,0.9972546803878858
+lre17_yqgdczse.flac,ara-arq,0.9930023849936759
+lre17_yqhtqtnl.flac,ara-arq,0.9993399870284819
+lre17_yqhtqtnl.flac-opus,ara-arq,0.9998349097677763
+lre17_yrzjdbif.sph,ara-aeb,0.9999958037591928
+lre17_ysadxqiw.sph,ara-arq,0.9999106154927021
+lre17_ysdzkrmo.flac-g711a,ara-arq,0.9974528853995988
+lre17_ytgfvwpa.flac,ara-arq,0.9998217502864875
+lre17_ytgfvwpa.flac-opus,ara-arq,0.9998524581773589
+lre17_yuduwhrd.flac,ara-arq,0.9955028997292512
+lre17_yuoequzk.sph,ara-arq,0.9999099394309094
+lre17_yuxtqtbd.sph,ara-arq,0.9999994359070692
+lre17_yvybpria.sph,ara-arq,0.9973286645664943
+lre17_ywssuzqt.sph,ara-arq,0.9999990761916652
+lre17_yygbpsdg.sph,ara-arq,0.9999490833528133
+lre17_yynyldnq.sph,ara-arq,0.9999228504305794
+lre17_yzbbhyzt.sph,ara-arq,0.9993694348705324
+lre17_yzjlvluy.sph,ara-arq,0.9996272929446929
+lre17_zaopfwhd.flac,ara-arq,0.9904595900538987
+lre17_zarwpotk.sph,ara-arq,0.9999289722738884
+lre17_zbolnsoz.sph,ara-arq,0.9999999973417777
+lre17_zcjklnfe.sph,ara-arq,0.9996813639473103
+lre17_zcxzxqos.sph,ara-arq,0.9999489746159186
+lre17_zdhatipt.flac,ara-ayl,0.9990280980242568
+lre17_zdhatipt.flac-g726,ara-aeb,0.9996834792362927
+lre17_zfajxywc.sph,ara-arq,0.9999934420544287
+lre17_zgdxpveq.flac-g711a,ara-arq,0.9999999408428903
+lre17_zgiksrvx.flac-g711mu,ara-arq,0.9993189757834657
+lre17_zhdbyfcw.sph,ara-arq,0.9997585048527508
+lre17_zhdfyrxw.sph,ara-arq,0.999148999070484
+lre17_zilbjisa.flac,ara-ayl,0.9999999857011468
+lre17_zilbjisa.flac-gsm,ara-ayl,0.9998704515922084
+lre17_zkdjfgbp.sph,ara-aeb,0.9951005308493373
+lre17_zmebjusq.sph,ara-arq,0.9993030221446528
+lre17_zmodeuem.sph,ara-arq,0.9999743212165114
+lre17_zmyziuxc.flac,ara-aeb,0.9999827977872252
+lre17_zmyziuxc.flac-g711a,ara-aeb,0.9950355679983685
+lre17_zpjrydvx.sph,ara-aeb,0.9994743369849513
+lre17_zrnsvuzf.sph,ara-arq,0.994174882388934
+lre17_zruejjuh.flac,ara-arq,0.9992671462220715
+lre17_zsrybjvn.sph,ara-arq,0.9993004483160852
+lre17_zvvdwwpv.flac,ara-aeb,0.9997026564129534
+lre17_zvvdwwpv.flac-g726,ara-aeb,0.9999912235110034
+lre17_zzkdjfea.sph,ara-arq,0.9842091321709953
diff --git a/egs/lre22/fixed.v1.8k/run_001_prepare_data.sh b/egs/lre22/fixed.v1.8k/run_001_prepare_data.sh
new file mode 100755
index 00000000..60eb6891
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_001_prepare_data.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+config_file=default_config.sh
+stage=1
+
+. parse_options.sh || exit 1;
+. datapath.sh
+
+
+if [ $stage -le 1 ];then
+  # Prepares voxlingua 107 for training
+  hyp_utils/conda_env.sh \
+    local/prepare_voxlingua107.py \
+    --corpus-dir $voxlingua_root \
+    --output-dir data/voxlingua107 \
+    --remove-langs en-en es-es ar-ar pt-pt \
+    --map-langs-to-lre-codes \
+    --target-fs 8000
+  
+fi
+
+if [ $stage -le 2 ];then
+  # Prepare LRE17 Training data
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_train \
+    --subset train \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_cts \
+    --subset dev \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_afv \
+    --subset dev \
+    --source vast \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_cts \
+    --subset eval \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_afv \
+    --subset eval \
+    --source vast \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 3 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_dev.py \
+    --corpus-dir $lre22_dev_root \
+    --output-dir data/lre22_dev \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 4 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_eval.py \
+    --corpus-dir $lre22_eval_root \
+    --output-dir data/lre22_eval \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 5 ];then
+  local/download_lre22_scorer.sh
+  local/download_focal.sh
+fi
diff --git a/egs/lre22/fixed.v1.8k/run_002_compute_evad.sh b/egs/lre22/fixed.v1.8k/run_002_compute_evad.sh
new file mode 100755
index 00000000..676ed335
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_002_compute_evad.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=b1
+storage_name=$(date +'%m_%d_%H_%M')
+vaddir=`pwd`/exp/vad_e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+
+if [ $stage -le 1 ]; then
+    # Prepare to distribute data over multiple machines
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+	dir_name=$USER/hyp-data/lre22-fixed-v1.8k-$storage_name/vad/storage
+	if [ "$nodes" == "b0" ];then
+	    utils/create_split_dir.pl \
+			    utils/create_split_dir.pl \
+		/export/b{04,05,06,07}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "b1" ];then
+	    utils/create_split_dir.pl \
+		/export/b1{0,1,2,3,4,5,6,7,8,9}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "c0" ];then
+	    utils/create_split_dir.pl \
+		/export/c{06,07,08,09}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs01" ];then
+	    utils/create_split_dir.pl \
+		/export/fs01/$dir_name $vaddir/storage
+	else
+	    echo "we don't distribute data between multiple machines"
+	fi
+    fi
+fi
+
+# VAD Train/Test Datasets
+if [ $stage -le 2 ];then 
+  for name in voxlingua107 \
+		lre17_train \
+		lre17_dev_cts lre17_dev_afv \
+		lre17_eval_cts lre17_eval_afv \
+		lre22_dev lre22_eval \
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 40 ? $num_spk:40))
+    hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
+				 --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+				 data/${name} exp/make_vad/$name $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
+fi
+
diff --git a/egs/lre22/fixed.v1.8k/run_003_prepare_noises_rirs.sh b/egs/lre22/fixed.v1.8k/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..638143f0
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 8 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../v1.16k/RIRS_NOISES ];then
+	    ln -s ../v1.16k/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 8 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 8 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 8 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/egs/lre22/fixed.v1.8k/run_004_apply_codecs.sh b/egs/lre22/fixed.v1.8k/run_004_apply_codecs.sh
new file mode 100755
index 00000000..afd6a8ed
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_004_apply_codecs.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ];then
+
+  for data in voxlingua107 lre17_dev_afv lre17_eval_afv
+  do
+      hyp_utils/conda_env.sh \
+	local/apply_tel_codecs_to_kaldi_datadir.py \
+	--input-dir data/$data \
+	--output-dir data/${data}_codecs
+  done
+
+fi
diff --git a/egs/lre22/fixed.v1.8k/run_010_prepare_xvec_train_data.sh b/egs/lre22/fixed.v1.8k/run_010_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..fbff4a02
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_010_prepare_xvec_train_data.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # This script preprocess audio for x-vector training
+  for name in voxlingua107_codecs \
+		lre17_train \
+  		lre17_{dev,eval}_{cts,afv,afv_codecs}
+  do
+    steps_xvec/preprocess_audios_for_nnet_train.sh \
+      --nj 40 --cmd "$train_cmd" \
+      --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+      data/${name} data/${name}_proc_audio_no_sil exp/${name}_proc_audio_no_sil
+    utils/fix_data_dir.sh data/${name}_proc_audio_no_sil
+  done
+fi
+
+if [ $stage -le 2 ];then
+  utils/combine_data.sh \
+    data/lre17_proc_audio_no_sil \
+    data/lre17_train_proc_audio_no_sil \
+    data/lre17_{dev,eval}_{cts,afv,afv_codecs}_proc_audio_no_sil
+fi
+
+if [ $stage -le 3 ]; then
+  # Now, we remove files with less than 3s
+  hyp_utils/remove_short_audios.sh --min-len 3 data/voxlingua107_codecs_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/lre17_proc_audio_no_sil
+fi
+
+if [ $stage -le 4 ];then
+  # merge voxlingua and lre17
+  utils/combine_data.sh \
+    data/voxlingua107_lre17_proc_audio_no_sil \
+    data/voxlingua107_codecs_proc_audio_no_sil \
+    data/lre17_proc_audio_no_sil
+fi
+
+if [ $stage -le 5 ]; then
+  for name in lre17_proc_audio_no_sil  voxlingua107_lre17_proc_audio_no_sil
+  do
+    hyp_utils/conda_env.sh \
+      local/split_segments_train_val.py \
+      --segments-file data/$name/utt2lang \
+      --recordings-file data/$name/wav.scp \
+      --durations-file data/$name/utt2dur \
+      --val-percent 2. \
+      --output-dir data/$name/train_val_split
+  done
+fi
+
+if [ $stage -le 6 ]; then
+  for name in voxlingua107_lre17_proc_audio_no_sil
+  do
+    hyp_utils/conda_env.sh \
+      local/split_segments_train_val.py \
+      --segments-file data/$name/utt2lang \
+      --recordings-file data/$name/wav.scp \
+      --durations-file data/$name/utt2dur \
+      --remove-langs en-en es-es ar-ar pt-pt \
+      --val-percent 2. \
+      --ara-ary-seg-file resources/lre17_ara-ary/segs_ara-ary.csv \
+      --output-dir data/$name/train_val_split_noary
+  done
+  mkdir data/voxlingua107_lre17_noary_proc_audio_no_sil
+  cd data/voxlingua107_lre17_noary_proc_audio_no_sil
+  ln -s ../voxlingua107_lre17_proc_audio_no_sil/wav.scp
+  ln -s ../voxlingua107_lre17_proc_audio_no_sil/train_val_split_noary train_val_split
+  cd -
+  
+fi
+
+if [ $stage -le 7 ]; then
+  awk 'BEGIN{
+adapt_langs_list="ara-acm ara-aeb ara-apc ara-arq ara-arz ara-ayl eng-gbr eng-usg por-brz zho-cmn zho-nan am-am sn-sn fra-mix haw-haw ia-ia ceb-ceb tl-tl sa-sa su-su te-te yo-yo sw-sw war-war km-km tr-tr gn-gn ha-ha ln-ln mg-mg";
+nf=split(adapt_langs_list, f, " "); 
+for(i=1;i<=nf;i++){ adapt_langs[f[i]]=1;};
+FS=","; OFS=",";
+getline; print $0;
+}
+{if ($1 in adapt_langs) { $3="1."} else{ $3="0.01"}; print $0}' \
+      data/voxlingua107_lre17_noary_proc_audio_no_sil/train_val_split/class_file.csv > \
+      data/voxlingua107_lre17_noary_proc_audio_no_sil/train_val_split/class_file_adapt_1.csv
+fi
diff --git a/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh b/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh
new file mode 100755
index 00000000..c67c8741
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-fixed-v1.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Class balanced Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu 
+  
+fi
+exit
+
+# Class-balanced + hard prototipe mining Fine-tuning
+if [ $stage -le 3 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+exit
+
+# Fine-tuning
+if [ $stage -le 4 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s4_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s4_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s4_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s4_base_cfg $nnet_s4_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s3 \
+    --trainer.exp-path $nnet_s4_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+
+# Fine-tuning
+if [ $stage -le 5 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s5_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s5_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s5_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s5_base_cfg $nnet_s5_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s4 \
+    --trainer.exp-path $nnet_s5_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Fine-tuning
+if [ $stage -le 6 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s6_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s6_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s6_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s6_base_cfg $nnet_s6_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file_adapt_1.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s5 \
+    --trainer.exp-path $nnet_s6_dir \
+    --num-gpus $ngpu 
+  
+fi
+
diff --git a/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh~ b/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh~
new file mode 100755
index 00000000..2be763c7
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_011_train_xvector.sh~
@@ -0,0 +1,161 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-fixed-v1.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 3 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 4 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s4_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s4_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s4_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s4_base_cfg $nnet_s4_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s3 \
+    --trainer.exp-path $nnet_s4_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+
+# Large Margin Fine-tuning
+if [ $stage -le 5 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s5_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s5_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s5_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s5_base_cfg $nnet_s5_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s4 \
+    --trainer.exp-path $nnet_s5_dir \
+    --num-gpus $ngpu 
+  
+fi
+
+# Large Margin Fine-tuning
+if [ $stage -le 6 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s6_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s6_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s6_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s6_base_cfg $nnet_s6_args $extra_args \
+    --data.train.dataset.audio-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file_adapt_1.csv \
+    --data.val.dataset.audio-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s5 \
+    --trainer.exp-path $nnet_s6_dir \
+    --num-gpus $ngpu 
+  
+fi
+
diff --git a/egs/lre22/fixed.v1.8k/run_030_extract_xvectors.sh b/egs/lre22/fixed.v1.8k/run_030_extract_xvectors.sh
new file mode 100755
index 00000000..dc760d5b
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_030_extract_xvectors.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=2
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=false
+do_tsne=true
+split_dev=false
+xvec_chunk_length=12800
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+    xvec_cmd="$cuda_eval_cmd --mem 4G"
+else
+    xvec_cmd="$train_cmd --mem 12G"
+fi
+
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+# if [ $stage -le 1 ]; then
+#     # Extract xvectors for training 
+#     for name in lre17_proc_audio_no_sil voxlingua107_codecs_proc_audio_no_sil
+#     do
+#       steps_xvec/extract_xvectors_from_wav.sh \
+# 	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+# 	--use-bin-vad false \
+# 	--random-utt-length true --min-utt-length 300 --max-utt-length 3000 \
+# 	--feat-config $feat_config \
+#     	$nnet data/${name} \
+#     	$xvector_dir/${name}
+#     done
+# fi
+
+if [ $stage -le 2 ]; then
+    # Extract xvectors for training 
+    for name in lre22_dev
+    do
+      steps_xvec/extract_xvectors_from_wav.sh \
+	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	--use-bin-vad true --num-augs 10 --aug-config conf/reverb_noise_aug.yaml \
+	--random-utt-length true --min-utt-length 300 --max-utt-length 3000 \
+	--feat-config $feat_config \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}_aug \
+	data/${name}_aug
+    done
+fi
+
+
+if [ $stage -le 3 ]; then
+    # Extracts x-vectors for dev and eval
+    for name in lre22_dev lre22_eval
+    do
+	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+	nj=$(($num_spk < 100 ? $num_spk:100))
+	steps_xvec/extract_xvectors_from_wav.sh \
+	  --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+	  --feat-config $feat_config \
+	  $nnet data/$name \
+	  $xvector_dir/$name
+    done
+fi
+
+if [ $stage -le 4 ]; then
+  for name in lre22_dev
+  do
+    if [ "$do_tsne" == "true" ] || [ "$split_dev" == "true" ];then
+      $train_cmd \
+	$xvector_dir/$name/tsne/tsne.log \
+	hyp_utils/conda_env.sh \
+	plot_embedding_tsne.py \
+	--train-list data/$name/utt2lang \
+	--train-v-file scp:$xvector_dir/$name/xvector.scp \
+	--output-dir $xvector_dir/$name/tsne \
+	--pca-var-r 0.975 \
+	--lnorm \
+	--prob-plot 1. \
+	--tsne.metric cosine \
+	--tsne.early-exaggeration 12 --tsne.perplexity 30
+
+      $train_cmd \
+	$xvector_dir/$name/tsne_per_class/tsne.log \
+	hyp_utils/conda_env.sh \
+	plot_embedding_tsne_per_class.py \
+	--train-list data/$name/utt2lang \
+	--train-v-file scp:$xvector_dir/$name/xvector.scp \
+	--output-dir $xvector_dir/$name/tsne_per_class \
+	--pca-var-r 0.975 \
+	--lnorm \
+	--prob-plot 1. \
+	--tsne.metric cosine \
+	--tsne.early-exaggeration 12 --tsne.perplexity 30 \
+	--do-ahc --cluster-tsne --ahc-thr -5
+
+      if [ "$split_dev" == "true" ];then
+	hyp_utils/conda_env.sh \
+	  local/split_dev.py \
+	  --segs-file $xvector_dir/$name/tsne_per_class/segments.csv \
+	  --output-dir ./resources/dev_splits \
+	  --num-folds 2
+
+	# delete the split data dirs so they are regenerated later
+	rm -rf data/lre22_dev_p{1,2}
+
+      fi
+    fi
+  done
+fi
+
+if [ $stage -le 5 ]; then
+  if [ ! -d data/lre22_dev_p1 ];then
+    awk -F "," '$1!="id" { print $1}' \
+	./resources/dev_splits/fold_0/train_segments.csv \
+	> p1.lst
+    awk -F "," '$1!="id" { print $1}' \
+	./resources/dev_splits/fold_0/test_segments.csv \
+	> p2.lst
+    
+    for p in p1 p2
+    do
+      utils/subset_data_dir.sh \
+	--utt-list $p.lst \
+	data/lre22_dev data/lre22_dev_$p
+    done
+  fi
+fi
+
+if [ $stage -le 6 ]; then
+  if [ -d data/lre22_dev_aug ] && [ ! -d data/lre22_dev_aug_p1 ];then
+    awk -v fsegs=./resources/dev_splits/fold_0/train_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+	> p1.lst
+
+    awk -v fsegs=./resources/dev_splits/fold_0/test_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]=1;
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+	> p2.lst
+
+    for p in p1 p2
+    do
+      utils/subset_data_dir.sh \
+	--utt-list $p.lst \
+	data/lre22_dev_aug data/lre22_dev_aug_$p
+    done
+  fi
+fi
+
+if [ $stage -le 7 ];then
+  if [ -f $xvector_dir/lre22_dev_aug/xvector.scp ];then
+    mkdir -p $xvector_dir/lre22_dev_aug_clean
+    cat $xvector_dir/lre22_dev/xvector.scp \
+	$xvector_dir/lre22_dev_aug/xvector.scp \
+	> $xvector_dir/lre22_dev_aug_clean/xvector.scp
+
+    for p in "" _p1 _p2
+    do
+      if [ ! -d data/lre22_dev_aug_clean$p ]; then
+	utils/combine_data.sh \
+	  data/lre22_dev_aug_clean$p \
+	  data/lre22_dev$p \
+	  data/lre22_dev_aug$p
+      fi
+    done
+  fi
+fi
+
+exit
diff --git a/egs/lre22/fixed.v1.8k/run_040_be_final.sh b/egs/lre22/fixed.v1.8k/run_040_be_final.sh
new file mode 100755
index 00000000..fe5b6f18
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_040_be_final.sh
@@ -0,0 +1,434 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+be_base_dir=exp/be/$nnet_name
+score_base_dir=exp/scores/$nnet_name
+
+if [ $stage -le 1 ];then
+  for r in 1 #0.9999 0.999 #0.99 0.975 0.95
+  do
+    be_name=pca${r}_cw_lnorm_lgbe_lre22_aug
+    be_dir=$be_base_dir/$be_name
+    score_dir=$score_base_dir/$be_name
+
+    (
+      for p_trn in p1 p2
+      do
+
+	if [ "$p_trn" == "p1" ];then
+	  p_test="p2"
+	else
+	  p_test="p1"
+	fi
+	be_dir_p=${be_dir}_$p_trn
+	(
+	  $train_cmd \
+	    $be_dir_p/train.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/train_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	    --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+	    --pca.pca-var-r $r \
+	    --do-lnorm --whiten \
+	    --output-dir $be_dir_p
+
+	  $train_cmd \
+	    ${score_dir}_p12/test_${p_test}.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev_$p_test/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir_p \
+	    --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+
+
+	) &
+	
+      done
+
+      (
+	$train_cmd \
+	  $be_dir/train.log \
+	  hyp_utils/conda_env.sh \
+	  steps_be/train_be_v1.py \
+	  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	  --train-list data/lre22_dev_aug_clean/utt2lang \
+	  --pca.pca-var-r $r \
+	  --do-lnorm --whiten \
+	  --output-dir $be_dir
+
+	$train_cmd \
+	    ${score_dir}_p12/test_dev.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	$train_cmd \
+	    ${score_dir}/test_eval.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+	    --trial-list data/lre22_eval/utt2spk \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	) &
+
+      wait
+
+      hyp_utils/conda_env.sh \
+	local/merge_scores.py \
+	--in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	--out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+      local/score_lre22.sh dev \
+	${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	${score_dir}_p12/nocal/lre22_dev_results
+
+      local/train_calibration_lre22.sh ${score_dir}_p12
+      local/score_lre22.sh dev \
+	${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}_p12/cal_v1/lre22_dev_results
+
+      local/score_lre22.sh dev \
+	${score_dir}/nocal/lre22_dev_scores.tsv \
+	${score_dir}/nocal/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/nocal/lre22_eval_scores.tsv \
+	${score_dir}/nocal/lre22_eval_results
+
+      local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+      local/score_lre22.sh dev \
+	${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}/cal_v1/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	${score_dir}/cal_v1/lre22_eval_results
+
+      # local/validate_lre22.sh \
+      # 	${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+     ) &
+
+    
+  done
+  wait
+
+fi
+
+exit
+# Back-ends below over-fitted
+
+if [ $stage -le 2 ];then
+  for r in 1 
+  do
+    for penalty in l2 #l1
+    do
+      for c in 1 #0.1 1
+      do
+	for ary_thr in 0.975 #0.85 0.7 #0.99 0.95 0.9 #15 ##1 5 10 20
+	do
+	  be_name=pca${r}_cw_lnorm_lsvm_${penalty}_c${c}_sqhinge_lre22_aug_lre17_aryt${ary_thr}
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  (
+	    for p_trn in p1 p2
+	    do
+	      
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+	      
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd \
+		  $be_dir_p/train.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/train_be_v3.py \
+		  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		  --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+		  --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		  --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		  --pca.pca-var-r $r \
+		  --svm.penalty $penalty --svm.c $c --svm.dual false \
+		  --do-lnorm --whiten --ary-thr $ary_thr \
+		  --output-dir $be_dir_p
+		
+		$train_cmd \
+		  ${score_dir}_p12/test_${p_test}.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/eval_be_v2.py \
+		  --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		  --trial-list data/lre22_dev_$p_test/utt2lang \
+		  --has-labels \
+		  --model-dir $be_dir_p \
+		  --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+	      $train_cmd \
+		$be_dir/train.log \
+		hyp_utils/conda_env.sh \
+		steps_be/train_be_v3.py \
+		--v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		--train-list data/lre22_dev_aug_clean/utt2lang \
+		--lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		--lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		--pca.pca-var-r $r \
+		--svm.penalty $penalty --svm.c $c --svm.dual false \
+		--do-lnorm --whiten --ary-thr $ary_thr \
+		--output-dir $be_dir
+		
+	      $train_cmd \
+		${score_dir}/test_dev.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		--trial-list data/lre22_dev/utt2lang \
+		--has-labels \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	      $train_cmd \
+		${score_dir}/test_eval.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+		--trial-list data/lre22_eval/utt2spk \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	    ) &
+	    
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+	  
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+	    
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+	    
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi
+
+if [ $stage -le 3 ];then
+  for r in 1 # 0.9999 0.99 0.975 0.95 0.9 0.8
+  do
+    for shrinking in true #false
+    do
+      for c in 1 10 #0.1 1 10 #0.01 0.1 1 10 # 0.0001
+      do
+	for vl in false #true #false
+	do
+	  if [ "$vl" == "true" ];then
+	    do_vl="--do-vl"
+	  else
+	    do_vl="--no_do-vl"
+	  fi
+	  ary_thr=0.975
+	  be_name=pca${r}_cw_lnorm_gsvm_shrinking_${shrinking}_c${c}_lre17_aryt${ary_thr}_vl${vl}_aug_clean
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  #score_dir=$score_base_dir/${be_name}_logpost
+	  (
+	    for p_trn in p1 p2
+	    do
+
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd $be_dir_p/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500\
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir_p \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}_p12/test_${p_test}.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev_$p_test/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir_p \
+			   --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+		$train_cmd $be_dir/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500 \
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}/test_dev.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+		
+		$train_cmd ${score_dir}/test_eval.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+			   --trial-list data/lre22_eval/utt2spk \
+			   --svm.eval-type cat-log-post \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	      ) &
+
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi
diff --git a/egs/lre22/fixed.v1.8k/run_050_fusion_v1.sh b/egs/lre22/fixed.v1.8k/run_050_fusion_v1.sh
new file mode 100755
index 00000000..ffe3d6c6
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/run_050_fusion_v1.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+score_dir_0=exp/scores
+nnet_1=fbank64_stmn_ecapatdnn2048x4_v1.0.s2
+nnet_2=fbank64_stmn_fwseres2net50s8_v1.0.s2
+be_1=pca1_cw_lnorm_lgbe_lre22_aug
+score_dirs="$score_dir_0/$nnet_1/$be_1
+$score_dir_0/$nnet_2/$be_1"
+
+train_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"_p12/cal_v1" }; print $0}')
+test_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"/cal_v1" }; print $0}')
+
+output_dir=exp/fusion/fus_v1.0
+
+local/train_fusion_lre22.sh "$train_score_dirs" $output_dir/train
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/train/lre22_dev_scores.tsv \
+  ${output_dir}/train/lre22_dev_results
+
+local/eval_fusion_lre22.sh "$test_score_dirs" $output_dir/train/fus.mat $output_dir/test
+
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/test/lre22_dev_scores.tsv \
+  ${output_dir}/test/lre22_dev_results
+
+local/score_lre22.sh eval \
+  ${output_dir}/test/lre22_eval_scores.tsv \
+  ${output_dir}/test/lre22_eval_results
+
+
+
+
+
+		   
diff --git a/egs/lre22/fixed.v1.8k/steps b/egs/lre22/fixed.v1.8k/steps
new file mode 120000
index 00000000..aede39fe
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps
@@ -0,0 +1 @@
+hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/steps_be/eval_be_v1.py b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v1.py
new file mode 100755
index 00000000..85fee18c
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v1.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("test acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels)
+
+
+def train_be(
+    v_file,
+    trial_list,
+    class_name,
+    has_labels,
+    gbe,
+    model_dir,
+    score_file,
+    verbose,
+):
+    config_logger(verbose)
+    model_dir = Path(model_dir)
+    output_dir = Path(score_file).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    segs = SegmentSet.load(trial_list)
+    reader = DRF.create(v_file)
+    x = reader.read(segs["id"], squeeze=True)
+    del reader
+    logging.info("loaded %d samples", x.shape[0])
+
+    trans_file = model_dir / "transforms.h5"
+    if trans_file.is_file():
+        logging.info("loading transform file %s", trans_file)
+        trans = TransformList.load(trans_file)
+        logging.info("applies transform")
+        x = trans(x)
+
+    gbe_file = model_dir / "model_gbe.h5"
+    logging.info("loading GBE file %s", gbe_file)
+    gbe_model = GBE.load(gbe_file)
+    logging.info("GBE args=%s", str(gbe))
+    logging.info("evals GBE")
+    scores = gbe_model(x, **gbe)
+
+    if has_labels:
+        class_ids = segs[class_name]
+        y_true = np.asarray([gbe_model.labels.index(l) for l in class_ids])
+        # labels, y_true = np.unique(class_ids, return_inverse=True)
+        y_pred = np.argmax(scores, axis=-1)
+        compute_metrics(y_true, y_pred, gbe_model.labels)
+
+    logging.info("Saving scores to %s", score_file)
+    score_table = {"segmentid": segs["id"]}
+    for i, key in enumerate(gbe_model.labels):
+        score_table[key] = scores[:, i]
+
+    score_table = pd.DataFrame(score_table)
+    score_table.to_csv(score_file, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Evals linear GBE",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--trial-list", required=True)
+    GBE.add_eval_args(parser, prefix="gbe")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--has-labels", default=False, action=ActionYesNo)
+    parser.add_argument("--model-dir", required=True)
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/eval_be_v2.py b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v2.py
new file mode 100755
index 00000000..78b50935
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v2.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("test acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    trial_list,
+    class_name,
+    has_labels,
+    svm,
+    model_dir,
+    score_file,
+    verbose,
+):
+    config_logger(verbose)
+    model_dir = Path(model_dir)
+    output_dir = Path(score_file).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    segs = SegmentSet.load(trial_list)
+    reader = DRF.create(v_file)
+    x = reader.read(segs["id"], squeeze=True)
+    del reader
+    logging.info("loaded %d samples", x.shape[0])
+
+    trans_file = model_dir / "transforms.h5"
+    if trans_file.is_file():
+        logging.info("loading transform file %s", trans_file)
+        trans = TransformList.load(trans_file)
+        logging.info("applies transform")
+        x = trans(x)
+
+    svm_file = model_dir / "model_svm.h5"
+    logging.info("loading SVM file %s", svm_file)
+    svm_model = SVM.load(svm_file)
+    logging.info("SVM args=%s", str(svm))
+    logging.info("evals SVM")
+    scores = svm_model(x, **svm)
+
+    if has_labels:
+        class_ids = segs[class_name]
+        y_true = np.asarray([svm_model.labels.index(l) for l in class_ids])
+        # labels, y_true = np.unique(class_ids, return_inverse=True)
+        y_pred = np.argmax(scores, axis=-1)
+        compute_metrics(y_true, y_pred, svm_model.labels)
+
+    logging.info("Saving scores to %s", score_file)
+    score_table = {"segmentid": segs["id"]}
+    for i, key in enumerate(svm_model.labels):
+        score_table[key] = scores[:, i]
+
+    score_table = pd.DataFrame(score_table)
+    score_table.to_csv(score_file, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Evals linear SVM",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--trial-list", required=True)
+    SVM.add_eval_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--has-labels", default=False, action=ActionYesNo)
+    parser.add_argument("--model-dir", required=True)
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/eval_be_v5.py b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v5.py
new file mode 100755
index 00000000..ad11a667
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/eval_be_v5.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList
+from hyperion.np.classifiers import GaussianSVMC as SVM
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("test acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    label_idxs = [i for i in range(len(labels))]
+    C = compute_confusion_matrix(y_true, y_pred, label_idxs, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, label_idxs, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    trial_list,
+    class_name,
+    has_labels,
+    svm,
+    model_dir,
+    score_file,
+    verbose,
+):
+    config_logger(verbose)
+    model_dir = Path(model_dir)
+    output_dir = Path(score_file).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    segs = SegmentSet.load(trial_list)
+    reader = DRF.create(v_file)
+    x = reader.read(segs["id"], squeeze=True)
+    del reader
+    logging.info("loaded %d samples", x.shape[0])
+
+    trans_file = model_dir / "transforms.h5"
+    if trans_file.is_file():
+        logging.info("loading transform file %s", trans_file)
+        trans = TransformList.load(trans_file)
+        logging.info("applies transform")
+        x = trans(x)
+
+    svm_file = model_dir / "model_svm.h5"
+    logging.info("loading SVM file %s", svm_file)
+    svm_model = SVM.load(svm_file)
+    if not isinstance(svm_model, SVM):
+        print("Model loading failed")
+
+#    model_labels = ['afr-afr', 'ara-aeb', 'ara-arq', 'ara-ayl', 'eng-ens', 'eng-iaf', 'fra-ntf', 'nbl-nbl', 'orm-orm', 'tir-tir', 'tso-tso', 'ven-ven', 'xho-xho', 'zul-zul']
+#    model_labels = list(svm_model.labels)
+#    print('model_labels', np.shape(model_labels))
+#    if 'zzzzzz' in model_labels:
+#        model_labels.remove('zzzzzz')
+#    svm_model.labels = model_labels
+    print('svm_model.labels', np.shape(svm_model.labels))
+
+    logging.info("SVM args=%s", str(svm))
+    logging.info("evals SVM")
+    scores = svm_model(x, **svm)
+
+    if has_labels:
+        class_ids = segs[class_name]
+        y_true = np.asarray([svm_model.labels.index(l) for l in class_ids if l in svm_model.labels])
+        # labels, y_true = np.unique(class_ids, return_inverse=True)
+        y_pred = np.argmax(scores, axis=-1)
+        compute_metrics(y_true, y_pred, svm_model.labels)
+
+    logging.info("Saving scores to %s", score_file)
+    score_table = {"segmentid": segs["id"]}
+    for i, key in enumerate(svm_model.labels):
+        score_table[key] = scores[:, i]
+
+    score_table = pd.DataFrame(score_table)
+    score_table.to_csv(score_file, sep="\t", index=False)
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Evals gaussian SVM",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--trial-list", required=True)
+    SVM.add_eval_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--has-labels", default=False, action=ActionYesNo)
+    parser.add_argument("--model-dir", required=True)
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/eval_fusion.m b/egs/lre22/fixed.v1.8k/steps_be/eval_fusion.m
new file mode 100644
index 00000000..830ee6c8
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/eval_fusion.m
@@ -0,0 +1,17 @@
+function eval_fusion(in_files, out_file, model_file)
+
+    load(model_file, 'alpha', 'beta', 'labels');
+    n_files = length(in_files);
+    scores={};
+    for i=1:n_files
+        T_i = readtable(in_files{i}, 'FileType', 'delimitedtext', 'Delimiter','tab', 'ReadRowNames', true, 'VariableNamingRule', 'preserve');
+        T_i = sortrows(T_i, 'RowNames');
+        s_i = T_i.Variables';
+        scores{i}=s_i;
+    end
+    scores = apply_nary_lin_fusion(scores, alpha, beta);
+    T_i.Variables = scores';
+    %T_i.Properties.VariableNames = T_i.Properties.VariableDescriptions;
+    writetable(T_i, out_file, 'FileType', 'text', 'Delimiter','tab', 'WriteRowNames', true)
+
+    
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v1.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v1.py
new file mode 100755
index 00000000..983d903d
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v1.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels)
+
+
+def train_be(
+    v_file,
+    train_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    gbe,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    train_reader = DRF.create(v_file)
+    x_trn = train_reader.read(train_segs["id"], squeeze=True)
+    del train_reader
+    class_ids = train_segs[class_name]
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("loaded %d samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("GBE args=%s", str(gbe))
+    gbe = GBE(labels=labels, **gbe)
+    gbe.fit(x_trn, y_true)
+    logging.info("trained GBE")
+    scores = gbe.eval_linear(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and Gaussian BE")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    gbe.save(output_dir / "model_gbe.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Train linear GBE",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GBE.add_class_args(parser, prefix="gbe")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v2.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v2.py
new file mode 100755
index 00000000..599b55c4
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v2.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels)
+
+
+def train_be(
+    v_file,
+    train_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    svm,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    train_reader = DRF.create(v_file)
+    x_trn = train_reader.read(train_segs["id"], squeeze=True)
+    del train_reader
+    class_ids = train_segs[class_name]
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("loaded %d samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("SVM args=%s", str(svm))
+    model = SVM(labels=labels, **svm)
+    model.fit(x_trn, y_true)
+    logging.info("trained SVM")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and SVM")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.save(output_dir / "model_svm.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Train linear SVM Classifier",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    SVM.add_class_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v3.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v3.py
new file mode 100755
index 00000000..87009212
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v3.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    lre17_v_file,
+    lre17_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    ary_thr,
+    num_nons,
+    pca,
+    svm,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_lre17 = SegmentSet.load(lre17_list)
+    ary_idx = segs_lre17[class_name] == "ara-ary"
+    # lre17_segs.loc[ara_ary_idx, class_name] = "ara-ayl"  # "ara-arq"  # "ara-aeb"
+    segs_ary = segs_lre17.loc[ary_idx]
+
+    logging.info("label maghrebi arabic samples")
+    v_reader = DRF.create(lre17_v_file)
+    x_ary = v_reader.read(segs_ary["id"], squeeze=True)
+    logging.info("loaded %d lre17 ara-ary samples", x_ary.shape[0])
+
+    ara_idx = train_segs[class_name].isin(["ara-ayl", "ara-arq", "ara-aeb"])
+    x_ara = x_trn[ara_idx]
+    class_ids_ara = train_segs.loc[ara_idx, class_name].values
+
+    gbe_ara = GBE()
+    labels_ara, y_true_ara = np.unique(class_ids_ara, return_inverse=True)
+    gbe_ara.fit(x_ara, y_true_ara)
+    scores_ary = gbe_ara(x_ary)
+    y_pred_ary = np.argmax(scores_ary, axis=-1)
+    logp_ary = np.max(softmax(scores_ary, axis=-1), axis=-1)
+    print(logp_ary, y_pred_ary)
+    # dscores_ary = np.diff(np.sort(scores_ary, axis=-1), axis=-1)[:, -1]
+    # sel_ary = dscores_ary > ary_thr
+    sel_ary = logp_ary > ary_thr
+    segs_ary = segs_ary.loc[sel_ary]
+    y_pred_ary = y_pred_ary[sel_ary]
+    x_ary = x_ary[sel_ary]
+    segs_ary[class_name] = [labels_ara[c] for c in y_pred_ary]
+    logging.info("selected %d ara-ary segments", x_ary.shape[0])
+    segs_ary["logp"] = logp_ary[sel_ary]
+    SegmentSet(segs_ary).save(output_dir / "segs_ary.csv")
+
+    logging.info("selecting non-target segments")
+    segs_non = segs_lre17.loc[~ary_idx].copy()
+    segs_non[class_name] = "zzzzzz"
+    x_non = v_reader.read(segs_non["id"], squeeze=True)
+    logging.info("loaded %d lre17 non-tar samples", x_non.shape[0])
+
+    class_ids = train_segs[class_name].values
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    gbe = GBE()
+    gbe.fit(x_trn, y_true)
+    scores_non = np.max(gbe(x_non), axis=1)
+    sel_non = np.argsort(scores_non)[-num_nons:]
+    segs_non = segs_non.iloc[sel_non]
+    x_non = x_non[sel_non]
+    logging.info("selected %d non-tar segments", x_non.shape[0])
+
+    class_ids = (
+        list(train_segs[class_name].values)
+        + list(segs_ary[class_name].values)
+        + list(segs_non[class_name].values)
+    )
+    x_trn = np.concatenate((x_trn, x_ary, x_non), axis=0)
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("SVM args=%s", str(svm))
+    model = SVM(labels=labels, **svm)
+    model.fit(x_trn, y_true)
+    logging.info("trained SVM")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and SVM")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.svm.coef_ = model.svm.coef_[:-1]
+    model.svm.intercept_ = model.svm.intercept_[:-1]
+    model.labels = model.labels[:-1]
+    model.save(output_dir / "model_svm.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Train linear SVM Classifier",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--lre17-v-file", required=True)
+    parser.add_argument("--lre17-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    SVM.add_class_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--ary-thr", default=10, type=float)
+    parser.add_argument("--num-nons", default=10000, type=int)
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v4.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v4.py
new file mode 100755
index 00000000..986393a8
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v4.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    lre17_v_file,
+    lre17_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    ary_thr,
+    # num_nons,
+    pca,
+    gbe,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_lre17 = SegmentSet.load(lre17_list)
+    ary_idx = segs_lre17[class_name] == "ara-ary"
+    # lre17_segs.loc[ara_ary_idx, class_name] = "ara-ayl"  # "ara-arq"  # "ara-aeb"
+    segs_ary = segs_lre17.loc[ary_idx]
+
+    logging.info("label maghrebi arabic samples")
+    v_reader = DRF.create(lre17_v_file)
+    x_ary = v_reader.read(segs_ary["id"], squeeze=True)
+    logging.info("loaded %d lre17 ara-ary samples", x_ary.shape[0])
+
+    ara_idx = train_segs[class_name].isin(["ara-ayl", "ara-arq", "ara-aeb"])
+    x_ara = x_trn[ara_idx]
+    class_ids_ara = train_segs.loc[ara_idx, class_name].values
+
+    gbe_ara = GBE()
+    labels_ara, y_true_ara = np.unique(class_ids_ara, return_inverse=True)
+    gbe_ara.fit(x_ara, y_true_ara)
+    scores_ary = gbe_ara(x_ary)
+    y_pred_ary = np.argmax(scores_ary, axis=-1)
+    p_ary = np.max(softmax(scores_ary, axis=-1), axis=-1)
+    sel_ary = p_ary > ary_thr
+    segs_ary = segs_ary.loc[sel_ary]
+    y_pred_ary = y_pred_ary[sel_ary]
+    x_ary = x_ary[sel_ary]
+    segs_ary[class_name] = [labels_ara[c] for c in y_pred_ary]
+    logging.info("selected %d ara-ary segments", x_ary.shape[0])
+    segs_ary["p"] = p_ary[sel_ary]
+    SegmentSet(segs_ary).save(output_dir / "segs_ary.csv")
+
+    # logging.info("selecting non-target segments")
+    # segs_non = segs_lre17.loc[~ary_idx].copy()
+    # segs_non[class_name] = "zzzzzz"
+    # x_non = v_reader.read(segs_non["id"], squeeze=True)
+    # logging.info("loaded %d lre17 non-tar samples", x_non.shape[0])
+
+    # class_ids = train_segs[class_name].values
+    # labels, y_true = np.unique(class_ids, return_inverse=True)
+    # gbe = GBE()
+    # gbe.fit(x_trn, y_true)
+    # scores_non = np.max(gbe(x_non), axis=1)
+    # sel_non = np.argsort(scores_non)[-num_nons:]
+    # segs_non = segs_non.iloc[sel_non]
+    # x_non = x_non[sel_non]
+    # logging.info("selected %d non-tar segments", x_non.shape[0])
+
+    # class_ids = (
+    #     list(train_segs[class_name].values)
+    #     + list(segs_ary[class_name].values)
+    #     + list(segs_non[class_name].values)
+    # )
+    # x_trn = np.concatenate((x_trn, x_ary, x_non), axis=0)
+    class_ids = list(train_segs[class_name].values) + list(segs_ary[class_name].values)
+    x_trn = np.concatenate((x_trn, x_ary), axis=0)
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("GBE args=%s", str(gbe))
+    model = GBE(labels=labels, **gbe)
+    model.fit(x_trn, y_true)
+    logging.info("trained GBE")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and GBE")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.save(output_dir / "model_gbe.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(
+        description="Train linear GBE Classifier",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--lre17-v-file", required=True)
+    parser.add_argument("--lre17-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GBE.add_class_args(parser, prefix="gbe")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--ary-thr", default=10, type=float)
+    # parser.add_argument("--num-nons", default=10000, type=int)
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v5.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v5.py
new file mode 100755
index 00000000..32cfd6c9
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v5.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.helpers import VectorClassReader as VCR
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.classifiers import GaussianSVMC as GSVM
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    lre17_v_file,
+    lre17_list,
+    voxlingua_v_file,
+    voxlingua_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    ary_thr,
+    num_nons,
+    pca,
+    svm,
+    output_dir,
+    verbose,
+    do_vl,
+    do_lre17,
+):
+    print(locals(), flush=True)
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    x_ary = []
+    x_non = []
+    y_ary = []
+    y_non = []
+
+    if do_lre17:
+        segs_lre17 = SegmentSet.load(lre17_list)
+        ary_idx = segs_lre17[class_name] == "ara-ary"
+        # lre17_segs.loc[ara_ary_idx, class_name] = "ara-ayl"  # "ara-arq"  # "ara-aeb"
+        segs_ary = segs_lre17.loc[ary_idx]
+
+        logging.info("label maghrebi arabic samples")
+        v_reader = DRF.create(lre17_v_file)
+        x_ary = v_reader.read(segs_ary["id"], squeeze=True)
+        logging.info("loaded %d lre17 ara-ary samples", x_ary.shape[0])
+
+        ara_idx = train_segs[class_name].isin(["ara-ayl", "ara-arq", "ara-aeb"])
+        x_ara = x_trn[ara_idx]
+        class_ids_ara = train_segs.loc[ara_idx, class_name].values
+
+        gbe_ara = GBE()
+        labels_ara, y_true_ara = np.unique(class_ids_ara, return_inverse=True)
+        gbe_ara.fit(x_ara, y_true_ara)
+        scores_ary = gbe_ara(x_ary)
+        y_pred_ary = np.argmax(scores_ary, axis=-1)
+        logp_ary = np.max(softmax(scores_ary, axis=-1), axis=-1)
+        print(logp_ary, y_pred_ary)
+        # dscores_ary = np.diff(np.sort(scores_ary, axis=-1), axis=-1)[:, -1]
+        # sel_ary = dscores_ary > ary_thr
+        sel_ary = logp_ary > ary_thr
+        segs_ary = segs_ary.loc[sel_ary]
+        y_pred_ary = y_pred_ary[sel_ary]
+        x_ary = x_ary[sel_ary]
+        segs_ary[class_name] = [labels_ara[c] for c in y_pred_ary]
+        logging.info("selected %d ara-ary segments", x_ary.shape[0])
+        segs_ary["logp"] = logp_ary[sel_ary]
+        SegmentSet(segs_ary).save(output_dir / "segs_ary.csv")
+
+        logging.info("selecting non-target segments")
+        lre17_close_idx = segs_lre17[class_name].isin(
+            ["ara-acm", "ara-apc", "eng-usg", "por-brz"]
+        )
+        segs_non = segs_lre17.loc[lre17_close_idx].copy()
+        segs_non[class_name] = "zzzzzz"
+        x_non = v_reader.read(segs_non["id"], squeeze=True)
+        logging.info("loaded %d lre17 non-tar samples", x_non.shape[0])
+
+        y_ary = list(segs_ary[class_name].values)
+        y_non = list(segs_non[class_name].values)
+
+    # class_ids = train_segs[class_name].values
+    # labels, y_true = np.unique(class_ids, return_inverse=True)
+    # gbe = GBE()
+    # gbe.fit(x_trn, y_true)
+    # scores_non = np.max(gbe(x_non), axis=1)
+    # sel_non = np.argsort(scores_non)[-num_nons:]
+    # segs_non = segs_non.iloc[sel_non]
+    # x_non = x_non[sel_non]
+    # logging.info("selected %d non-tar segments", x_non.shape[0])
+
+    if do_vl:
+        v_reader_vl = DRF.create(voxlingua_v_file)
+        segs_voxlingua = SegmentSet.load(voxlingua_list)
+        vl_close_idx = segs_voxlingua[class_name].isin(
+            [
+                "en-en",
+                "am-am",
+                "sn-sn",
+                "fra-mix",
+                "haw-haw",
+                "zho-cmn",
+                "ia-ia",
+                "ceb-ceb",
+                "sa-sa",
+                "su-su",
+                "te-te",
+                "yo-yo",
+                "sw-sw",
+                "pt-pt",
+                "war-war",
+                "km-km",
+                "tr-tr",
+                "gn-gn",
+            ]
+        )
+        segs_vl_close = segs_voxlingua.loc[vl_close_idx].copy()
+        segs_vl_close[class_name] = "zzzzzz"
+        x_non_vl = v_reader_vl.read(segs_vl_close["id"], squeeze=True)
+
+        vl_afk_idx = segs_voxlingua[class_name] == "afr-afr"
+        if not np.any(vl_afk_idx):
+            vl_afk_idx = segs_voxlingua[class_name] == "af-af"
+        segs_vl_afk = segs_voxlingua.loc[vl_afk_idx].copy()
+        segs_vl_afk[class_name] = "afr-afr"
+        x_trn_vl = v_reader_vl.read(segs_vl_afk["id"], squeeze=True)
+
+        y_trn_vl = list(segs_vl_afk[class_name].values)
+        y_non_vl = list(segs_vl_close[class_name].values)
+
+        del v_reader_vl
+    else:
+        x_trn_vl = np.zeros((0, x_trn.shape[1]))
+        x_non_vl = np.zeros((0, x_trn.shape[1]))
+        y_trn_vl = []
+        y_non_vl = []
+
+    class_ids = (
+        list(train_segs[class_name].values) + y_trn_vl + y_ary + y_non + y_non_vl
+    )
+    x_trn = np.concatenate((x_trn, x_trn_vl, x_ary, x_non, x_non_vl), axis=0)
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("Gaussian SVM args=%s", str(svm))
+    model = GSVM(labels=labels, **svm)
+    model.fit(x_trn, y_true)
+    logging.info("trained SVM")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and SVM")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    #    model.svm.coef_ = model.svm.coef_[:-1]
+    #    model.svm.intercept_ = model.svm.intercept_[:-1]
+    model_labels = list(np.copy(model.labels))
+    if "zzzzzz" in model_labels:
+        model_labels.remove("zzzzzz")
+    model.labels = model_labels
+    print("model.labels before save", np.shape(model.labels))
+    model.save(output_dir / "model_svm.h5")
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Train gaussian SVM Classifier",
+    )
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--lre17-v-file", required=True)
+    parser.add_argument("--lre17-list", required=True)
+    parser.add_argument("--voxlingua-v-file", required=True)
+    parser.add_argument("--voxlingua-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GSVM.add_class_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--ary-thr", default=10, type=float)
+    parser.add_argument("--num-nons", default=10000, type=int)
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument("--do-vl", default=True, action=ActionYesNo)
+    parser.add_argument("--do-lre17", default=True, action=ActionYesNo)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    #    parser.add_argument("--classifier", default="lsvm", choices=["lsvm", "gsvm", "rf"], required=False)
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v6.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v6.py
new file mode 100755
index 00000000..d481a18d
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v6.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+tar_langs = (
+    "afr-afr",
+    "ara-aeb",
+    "ara-arq",
+    "ara-ayl",
+    "eng-ens",
+    "eng-iaf",
+    "fra-ntf",
+    "nbl-nbl",
+    "orm-orm",
+    "tir-tir",
+    "tso-tso",
+    "ven-ven",
+    "xho-xho",
+    "zul-zul",
+)
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    cv_v_file,
+    cv_list,
+    afr_v_file,
+    afr_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    gbe,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_cv = SegmentSet.load(cv_list)
+    # ary_idx = segs_lre17[class_name] == "ara-ary"
+    # segs_ary = segs_lre17.loc[ary_idx]
+
+    segs_cv = SegmentSet.load(cv_list)
+    cv_idx = np.zeros((len(segs_cv),), dtype=bool)
+    for lang in tar_langs:
+        cv_idx_i = segs_cv[class_name] == lang
+        cv_idx = np.logical_or(cv_idx, cv_idx_i)
+
+    segs_cv = segs_cv.loc[cv_idx]
+    # segs_cv.loc[segs_cv[class_name] == "eng-ine", class_name] = "eng-iaf"
+
+    # v_reader = DRF.create(cv_v_file)
+    # x_cv = v_reader.read(segs_cv["id"], squeeze=True)
+    # logging.info("loaded %d cv samples", x_cv.shape[0])
+
+    segs_afr = SegmentSet.load(afr_list)
+    afr_idx = np.zeros((len(segs_afr),), dtype=bool)
+    for lang in tar_langs:
+        afr_idx_i = segs_afr[class_name] == lang
+        afr_idx = np.logical_or(afr_idx, afr_idx_i)
+
+    segs_afr = segs_afr.loc[afr_idx]
+
+    v_reader = DRF.create(afr_v_file)
+    x_afr = v_reader.read(segs_afr["id"], squeeze=True)
+    logging.info("loaded %d afr samples", x_afr.shape[0])
+
+    class_ids = (
+        list(train_segs[class_name].values)
+        # + list(segs_cv[class_name].values)
+        + list(segs_afr[class_name].values)
+    )
+    # x_trn = np.concatenate((x_trn, x_cv, x_afr), axis=0)
+    x_trn = np.concatenate((x_trn, x_afr), axis=0)
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+    else:
+        lnorm = None
+
+    logging.info("GBE args=%s", str(gbe))
+    model = GBE(labels=labels, **gbe)
+    model.fit(x_trn, y_true)
+    logging.info("trained GBE")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and GBE")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.save(output_dir / "model_gbe.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train linear GBE Classifier",)
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--cv-v-file", required=True)
+    parser.add_argument("--cv-list", required=True)
+    parser.add_argument("--afr-v-file", required=True)
+    parser.add_argument("--afr-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GBE.add_class_args(parser, prefix="gbe")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v7.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v7.py
new file mode 100755
index 00000000..1b37d92e
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v7.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearSVMC as SVM
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+tar_langs = (
+    "afr-afr",
+    "ara-aeb",
+    "ara-arq",
+    "ara-ayl",
+    "eng-ens",
+    "eng-iaf",
+    "fra-ntf",
+    "nbl-nbl",
+    "orm-orm",
+    "tir-tir",
+    "tso-tso",
+    "ven-ven",
+    "xho-xho",
+    "zul-zul",
+)
+
+non_langs = (
+    "fra-can",
+    "fra-fra",
+    "fra-ntf",
+    "afr-afr",
+    "ara-acm",
+    "ara-arz",
+    "ara-jor",
+    "ara-ksa",
+    "ara-kuw",
+    "ara-leb",
+    "ara-mau",
+    "ara-mor",
+    "ara-oma",
+    "ara-pal",
+    "ara-qat",
+    "ara-sud",
+    "ara-syr",
+    "ara-uae",
+    "ara-yem",
+    "ara-apc",
+    "eng-gbr",
+    "eng-usg",
+)
+
+
+def read_ood_data(train_list, v_file, class_name):
+    v_reader = DRF.create(v_file)
+
+    segs = SegmentSet.load(train_list)
+    idx = np.zeros((len(segs),), dtype=bool)
+    for lang in tar_langs:
+        idx_i = segs[class_name] == lang
+        idx = np.logical_or(idx, idx_i)
+
+    segs_tar = segs.loc[idx].copy()
+    if len(segs_tar) > 0:
+        x_tar = v_reader.read(segs_tar["id"], squeeze=True)
+    else:
+        x_tar = None
+
+    idx = np.zeros((len(segs),), dtype=bool)
+    for lang in non_langs:
+        idx_i = segs[class_name] == lang
+        idx = np.logical_or(idx, idx_i)
+
+    segs_non = segs.loc[idx].copy()
+    segs_non[class_name] = "zzzzzzz"
+    if len(segs_non) > 0:
+        x_non = v_reader.read(segs_non["id"], squeeze=True)
+    else:
+        x_non = None
+
+    logging.info(
+        "read %s got ntar: %d nnon: %d", train_list, len(segs_tar), len(segs_non)
+    )
+    return segs_tar, x_tar, segs_non, x_non
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    sre_v_file,
+    sre_list,
+    lre17_v_file,
+    lre17_list,
+    cv_v_file,
+    cv_list,
+    afr_v_file,
+    afr_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    svm,
+    output_dir,
+    ood_weight,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_sre_tar, x_sre_tar, segs_sre_non, x_sre_non = read_ood_data(
+        sre_list, sre_v_file, class_name,
+    )
+    _, _, segs_lre17_non, x_lre17_non = read_ood_data(
+        lre17_list, lre17_v_file, class_name,
+    )
+    segs_cv_tar, x_cv_tar, segs_cv_non, x_cv_non = read_ood_data(
+        cv_list, cv_v_file, class_name
+    )
+    segs_afr_tar, x_afr_tar, segs_afr_non, x_afr_non = read_ood_data(
+        afr_list, afr_v_file, class_name,
+    )
+
+    # class_ids = train_segs[class_name].values
+    # labels, y_true = np.unique(class_ids, return_inverse=True)
+    # gbe = GBE()
+    # gbe.fit(x_trn, y_true)
+    # scores_non = np.max(gbe(x_non), axis=1)
+    # sel_non = np.argsort(scores_non)[-num_nons:]
+    # segs_non = segs_non.iloc[sel_non]
+    # x_non = x_non[sel_non]
+    # logging.info("selected %d non-tar segments", x_non.shape[0])
+
+    # class_ids = (
+    #     list(train_segs[class_name].values)
+    #     + list(segs_sre_tar[class_name].values)
+    #     + list(segs_cv_tar[class_name].values)
+    #     + list(segs_afr_tar[class_name].values)
+    #     + list(segs_sre_non[class_name].values)
+    #     + list(segs_lre17_non[class_name].values)
+    #     + list(segs_cv_non[class_name].values)
+    #     + list(segs_afr_non[class_name].values)
+    # )
+    # x_trn = np.concatenate(
+    #     (
+    #         x_trn,
+    #         x_sre_tar,
+    #         x_cv_tar,
+    #         x_afr_tar,
+    #         x_sre_non,
+    #         x_lre17_non,
+    #         x_cv_non,
+    #         x_afr_non,
+    #     ),
+    #     axis=0,
+    # )
+    class_ids = (
+        list(train_segs[class_name].values)
+        + list(segs_sre_tar[class_name].values)
+        + list(segs_cv_tar[class_name].values)
+        + list(segs_afr_tar[class_name].values)
+        + list(segs_sre_non[class_name].values)
+        + list(segs_lre17_non[class_name].values)
+        + list(segs_cv_non[class_name].values)
+        + list(segs_afr_non[class_name].values)
+    )
+    x = np.concatenate(
+        (
+            x_trn,
+            x_sre_tar,
+            x_cv_tar,
+            x_afr_tar,
+            x_sre_non,
+            x_lre17_non,
+            x_cv_non,
+            x_afr_non,
+        ),
+        axis=0,
+    )
+    sample_weight = np.concatenate(
+        (
+            np.ones((len(train_segs),)),
+            ood_weight * np.ones((len(segs_sre_tar),)),
+            ood_weight * np.ones((len(segs_cv_tar),)),
+            ood_weight * np.ones((len(segs_afr_tar),)),
+            ood_weight * np.ones((len(segs_sre_non),)),
+            np.ones((len(segs_lre17_non),)),
+            ood_weight * np.ones((len(segs_cv_non),)),
+            ood_weight * np.ones((len(segs_afr_non),)),
+        )
+    )
+
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x = pca(x)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x)
+
+        logging.info("apply lnorm")
+        x = lnorm(x)
+    else:
+        lnorm = None
+
+    logging.info("SVM args=%s", str(svm))
+    model = SVM(labels=labels, **svm)
+    model.fit(x, y_true, sample_weight=sample_weight)
+    logging.info("trained SVM")
+    scores = model(x)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and SVM")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.svm.coef_ = model.svm.coef_[:-1]
+    model.svm.intercept_ = model.svm.intercept_[:-1]
+    model.labels = model.labels[:-1]
+    model.save(output_dir / "model_svm.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train linear SVM Classifier",)
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--sre-v-file", required=True)
+    parser.add_argument("--sre-list", required=True)
+    parser.add_argument("--lre17-v-file", required=True)
+    parser.add_argument("--lre17-list", required=True)
+    parser.add_argument("--cv-v-file", required=True)
+    parser.add_argument("--cv-list", required=True)
+    parser.add_argument("--afr-v-file", required=True)
+    parser.add_argument("--afr-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    SVM.add_class_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    # parser.add_argument("--num-nons", default=10000, type=int)
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--ood-weight", default=0.1, type=float)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v8.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v8.py
new file mode 100755
index 00000000..ec9d5e56
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v8.py
@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import GaussianSVMC as GSVM
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+tar_langs = (
+    "afr-afr",
+    "ara-aeb",
+    "ara-arq",
+    "ara-ayl",
+    "eng-ens",
+    "eng-iaf",
+    "fra-ntf",
+    "nbl-nbl",
+    "orm-orm",
+    "tir-tir",
+    "tso-tso",
+    "ven-ven",
+    "xho-xho",
+    "zul-zul",
+)
+
+non_langs = (
+    "fra-can",
+    "fra-fra",
+    "fra-ntf",
+    "afr-afr",
+    "ara-acm",
+    "ara-arz",
+    "ara-jor",
+    "ara-ksa",
+    "ara-kuw",
+    "ara-leb",
+    "ara-mau",
+    "ara-mor",
+    "ara-oma",
+    "ara-pal",
+    "ara-qat",
+    "ara-sud",
+    "ara-syr",
+    "ara-uae",
+    "ara-yem",
+    "ara-apc",
+    "eng-gbr",
+    "eng-usg",
+)
+
+
+def read_ood_data(train_list, v_file, class_name):
+    v_reader = DRF.create(v_file)
+
+    segs = SegmentSet.load(train_list)
+    idx = np.zeros((len(segs),), dtype=bool)
+    for lang in tar_langs:
+        idx_i = segs[class_name] == lang
+        idx = np.logical_or(idx, idx_i)
+
+    segs_tar = segs.loc[idx].copy()
+    if len(segs_tar) > 0:
+        x_tar = v_reader.read(segs_tar["id"], squeeze=True)
+    else:
+        x_tar = None
+
+    idx = np.zeros((len(segs),), dtype=bool)
+    for lang in non_langs:
+        idx_i = segs[class_name] == lang
+        idx = np.logical_or(idx, idx_i)
+
+    segs_non = segs.loc[idx].copy()
+    segs_non[class_name] = "zzzzzz"
+    if len(segs_non) > 0:
+        x_non = v_reader.read(segs_non["id"], squeeze=True)
+    else:
+        x_non = None
+
+    logging.info(
+        "read %s got ntar: %d nnon: %d", train_list, len(segs_tar), len(segs_non)
+    )
+    return segs_tar, x_tar, segs_non, x_non
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    sre_v_file,
+    sre_list,
+    lre17_v_file,
+    lre17_list,
+    cv_v_file,
+    cv_list,
+    afr_v_file,
+    afr_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    svm,
+    output_dir,
+    ood_weight,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_sre_tar, x_sre_tar, segs_sre_non, x_sre_non = read_ood_data(
+        sre_list, sre_v_file, class_name,
+    )
+    _, _, segs_lre17_non, x_lre17_non = read_ood_data(
+        lre17_list, lre17_v_file, class_name,
+    )
+    segs_cv_tar, x_cv_tar, segs_cv_non, x_cv_non = read_ood_data(
+        cv_list, cv_v_file, class_name
+    )
+    segs_afr_tar, x_afr_tar, segs_afr_non, x_afr_non = read_ood_data(
+        afr_list, afr_v_file, class_name,
+    )
+
+    # class_ids = train_segs[class_name].values
+    # labels, y_true = np.unique(class_ids, return_inverse=True)
+    # gbe = GBE()
+    # gbe.fit(x_trn, y_true)
+    # scores_non = np.max(gbe(x_non), axis=1)
+    # sel_non = np.argsort(scores_non)[-num_nons:]
+    # segs_non = segs_non.iloc[sel_non]
+    # x_non = x_non[sel_non]
+    # logging.info("selected %d non-tar segments", x_non.shape[0])
+
+    # class_ids = (
+    #     list(train_segs[class_name].values)
+    #     + list(segs_sre_tar[class_name].values)
+    #     + list(segs_cv_tar[class_name].values)
+    #     + list(segs_afr_tar[class_name].values)
+    #     + list(segs_sre_non[class_name].values)
+    #     + list(segs_lre17_non[class_name].values)
+    #     + list(segs_cv_non[class_name].values)
+    #     + list(segs_afr_non[class_name].values)
+    # )
+    # x_trn = np.concatenate(
+    #     (
+    #         x_trn,
+    #         x_sre_tar,
+    #         x_cv_tar,
+    #         x_afr_tar,
+    #         x_sre_non,
+    #         x_lre17_non,
+    #         x_cv_non,
+    #         x_afr_non,
+    #     ),
+    #     axis=0,
+    # )
+    class_ids = (
+        list(train_segs[class_name].values)
+        + list(segs_sre_tar[class_name].values)
+        + list(segs_cv_tar[class_name].values)
+        + list(segs_afr_tar[class_name].values)
+        + list(segs_sre_non[class_name].values)
+        + list(segs_lre17_non[class_name].values)
+        + list(segs_cv_non[class_name].values)
+        + list(segs_afr_non[class_name].values)
+    )
+    x = np.concatenate(
+        (
+            x_trn,
+            x_sre_tar,
+            x_cv_tar,
+            x_afr_tar,
+            x_sre_non,
+            x_lre17_non,
+            x_cv_non,
+            x_afr_non,
+        ),
+        axis=0,
+    )
+    sample_weight = np.concatenate(
+        (
+            np.ones((len(train_segs),)),
+            ood_weight * np.ones((len(segs_sre_tar),)),
+            ood_weight * np.ones((len(segs_cv_tar),)),
+            ood_weight * np.ones((len(segs_afr_tar),)),
+            ood_weight * np.ones((len(segs_sre_non),)),
+            np.ones((len(segs_lre17_non),)),
+            ood_weight * np.ones((len(segs_cv_non),)),
+            ood_weight * np.ones((len(segs_afr_non),)),
+        )
+    )
+
+    labels, y_true = np.unique(class_ids, return_inverse=True)
+    logging.info("%d training samples", x_trn.shape[0])
+
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x = pca(x)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x)
+
+        logging.info("apply lnorm")
+        x = lnorm(x)
+    else:
+        lnorm = None
+
+    logging.info("SVM args=%s", str(svm))
+    model = GSVM(labels=labels, **svm)
+    model.fit(x, y_true, sample_weight=sample_weight)
+    logging.info("trained SVM")
+    scores = model(x)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true, y_pred, labels)
+
+    logging.info("Saving transforms and SVM")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model_labels = list(np.copy(model.labels))
+    if "zzzzzz" in model_labels:
+        model_labels.remove("zzzzzz")
+    model.labels = model_labels
+    print("model.labels before save", np.shape(model.labels))
+    model.save(output_dir / "model_svm.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train linear SVM Classifier",)
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--sre-v-file", required=True)
+    parser.add_argument("--sre-list", required=True)
+    parser.add_argument("--lre17-v-file", required=True)
+    parser.add_argument("--lre17-list", required=True)
+    parser.add_argument("--cv-v-file", required=True)
+    parser.add_argument("--cv-list", required=True)
+    parser.add_argument("--afr-v-file", required=True)
+    parser.add_argument("--afr-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GSVM.add_class_args(parser, prefix="svm")
+    parser.add_argument("--class-name", default="class_id")
+    # parser.add_argument("--num-nons", default=10000, type=int)
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--ood-weight", default=0.1, type=float)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_be_v9.py b/egs/lre22/fixed.v1.8k/steps_be/train_be_v9.py
new file mode 100755
index 00000000..5c174233
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_be_v9.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import sys
+import os
+import logging
+from jsonargparse import (
+    ArgumentParser,
+    ActionConfigFile,
+    ActionParser,
+    namespace_to_dict,
+    ActionYesNo,
+)
+import time
+from pathlib import Path
+
+import numpy as np
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import SegmentSet
+from hyperion.utils.math import softmax
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.transforms import TransformList, PCA, LNorm
+from hyperion.np.classifiers import LinearGBE as GBE
+from hyperion.np.metrics import (
+    compute_accuracy,
+    compute_confusion_matrix,
+    print_confusion_matrix,
+)
+
+tar_langs = (
+    "afr-afr",
+    "ara-aeb",
+    "ara-arq",
+    "ara-ayl",
+    "eng-ens",
+    "eng-iaf",
+    "fra-ntf",
+    "nbl-nbl",
+    "orm-orm",
+    "tir-tir",
+    "tso-tso",
+    "ven-ven",
+    "xho-xho",
+    "zul-zul",
+)
+
+
+def read_ood_data(train_list, v_file, class_name):
+    v_reader = DRF.create(v_file)
+
+    segs = SegmentSet.load(train_list)
+    idx = np.zeros((len(segs),), dtype=bool)
+    for lang in tar_langs:
+        idx_i = segs[class_name] == lang
+        idx = np.logical_or(idx, idx_i)
+
+    segs_tar = segs.loc[idx].copy()
+    if len(segs_tar) > 0:
+        x_tar = v_reader.read(segs_tar["id"], squeeze=True)
+    else:
+        x_tar = None
+
+    logging.info(
+        "read %s got ntar: %d", train_list, len(segs_tar),
+    )
+    return segs_tar, x_tar
+
+
+def compute_metrics(y_true, y_pred, labels):
+
+    acc = compute_accuracy(y_true, y_pred)
+    logging.info("training acc: %.2f %%", acc * 100)
+    logging.info("non-normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=False)
+    print_confusion_matrix(C, labels)
+    logging.info("normalized confusion matrix:")
+    C = compute_confusion_matrix(y_true, y_pred, normalize=True)
+    print_confusion_matrix(C * 100, labels, fmt=".2f")
+
+
+def train_be(
+    v_file,
+    train_list,
+    sre_v_file,
+    sre_list,
+    cv_v_file,
+    cv_list,
+    afr_v_file,
+    afr_list,
+    class_name,
+    do_lnorm,
+    whiten,
+    pca,
+    gbe,
+    output_dir,
+    verbose,
+):
+    config_logger(verbose)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logging.info("loading data")
+    train_segs = SegmentSet.load(train_list)
+    v_reader = DRF.create(v_file)
+    x_trn = v_reader.read(train_segs["id"], squeeze=True)
+    del v_reader
+    logging.info("loaded %d train samples", x_trn.shape[0])
+
+    segs_sre, x_sre = read_ood_data(sre_list, sre_v_file, class_name,)
+    segs_cv, x_cv = read_ood_data(cv_list, cv_v_file, class_name)
+    segs_afr, x_afr = read_ood_data(afr_list, afr_v_file, class_name,)
+
+    class_ids_trn = train_segs[class_name].values
+    x_ood = np.concatenate((x_sre, x_cv, x_afr), axis=0)
+    class_ids_ood = (
+        list(segs_sre[class_name].values)
+        + list(segs_cv[class_name].values)
+        + list(segs_afr[class_name].values)
+    )
+
+    labels, y_true_trn = np.unique(class_ids_trn, return_inverse=True)
+    _, y_true_ood = np.unique(
+        np.concatenate((labels, class_ids_ood)), return_inverse=True
+    )
+    y_true_ood = y_true_ood[len(labels) :]
+
+    logging.info("%d ood samples", x_ood.shape[0])
+    logging.info("%d training samples", x_trn.shape[0])
+
+    x_ood += np.mean(x_trn, axis=0, keepdims=True) - np.mean(
+        x_ood, axis=0, keepdims=True
+    )
+    logging.info("PCA args=%s", str(pca))
+    pca_var_r = pca["pca_var_r"]
+    pca_dim = pca["pca_dim"]
+    if pca_var_r is not None and pca_var_r < 1.0 or pca_dim is not None:
+        logging.info("training PCA")
+        pca = PCA(**pca)
+        pca.fit(x_trn)
+        logging.info("PCA dimension: %d", pca.pca_dim)
+        logging.info("apply PCA")
+        x_trn = pca(x_trn)
+        x_ood = pca(x_ood)
+    else:
+        pca = None
+
+    if do_lnorm:
+        lnorm = LNorm()
+        if whiten:
+            logging.info("training whitening")
+            lnorm.fit(x_trn)
+
+        logging.info("apply lnorm")
+        x_trn = lnorm(x_trn)
+        x_ood = lnorm(x_ood)
+    else:
+        lnorm = None
+
+    prior_0 = GBE(
+        mu=np.zeros((len(labels), x_trn.shape[1])),
+        W=np.eye(x_trn.shape[1]),
+        beta=16,
+        nu=x_trn.shape[1],
+    )
+    print(prior_0.__dict__)
+    prior = GBE(prior=prior_0)
+    prior.fit(x_ood, y_true_ood)
+    prior.nu = 0.1 * prior.nu
+    prior.beta = 0.01 * prior.beta
+    print(prior.__dict__)
+    model = GBE(labels=labels, prior=prior)
+    model.fit(x_trn, y_true_trn)
+    print(model.__dict__, flush=True)
+    logging.info("trained GBE")
+    scores = model(x_trn)
+    y_pred = np.argmax(scores, axis=-1)
+
+    compute_metrics(y_true_trn, y_pred, labels)
+
+    logging.info("Saving transforms and GBE")
+    transforms = []
+    if pca is not None:
+        transforms.append(pca)
+    if lnorm is not None:
+        transforms.append(lnorm)
+
+    if transforms:
+        transforms = TransformList(transforms)
+        transforms.save(output_dir / "transforms.h5")
+
+    model.save(output_dir / "model_gbe.h5")
+
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description="Train linear GBE Classifier",)
+
+    parser.add_argument("--v-file", required=True)
+    parser.add_argument("--train-list", required=True)
+    parser.add_argument("--sre-v-file", required=True)
+    parser.add_argument("--sre-list", required=True)
+    parser.add_argument("--cv-v-file", required=True)
+    parser.add_argument("--cv-list", required=True)
+    parser.add_argument("--afr-v-file", required=True)
+    parser.add_argument("--afr-list", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    GBE.add_class_args(parser, prefix="gbe")
+    parser.add_argument("--class-name", default="class_id")
+    parser.add_argument("--do-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--whiten", default=True, action=ActionYesNo)
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    train_be(**namespace_to_dict(args))
diff --git a/egs/lre22/fixed.v1.8k/steps_be/train_fusion.m b/egs/lre22/fixed.v1.8k/steps_be/train_fusion.m
new file mode 100644
index 00000000..8f1c3dda
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_be/train_fusion.m
@@ -0,0 +1,16 @@
+function train_fusion(train_list, in_files, model_file)
+
+    train_list = readtable(train_list, 'FileType', 'delimitedtext', 'Delimiter', ' ', 'ReadVariableNames', false, 'ReadRowNames', true);
+    train_list = sortrows(train_list, 'RowNames');
+    [labels, ia, ic]=unique(train_list);
+    n_files = length(in_files);
+    scores={};
+    for i=1:n_files
+        T_i = readtable(in_files{i}, 'FileType', 'delimitedtext', 'Delimiter','tab', 'ReadRowNames', true, 'VariableNamingRule', 'preserve');
+        T_i = sortrows(T_i, 'RowNames');
+        s_i = T_i.Variables';
+        scores{i}=s_i;
+    end
+    [alpha, beta] = train_nary_llr_fusion(scores, ic, 0, 1e-6, [], ones(1,1))
+    save(model_file, 'alpha', 'beta', 'labels');
+    
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/steps_xvec b/egs/lre22/fixed.v1.8k/steps_xvec
new file mode 120000
index 00000000..af66a94d
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/steps_xvec
@@ -0,0 +1 @@
+hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/lre22/fixed.v1.8k/utils b/egs/lre22/fixed.v1.8k/utils
new file mode 120000
index 00000000..3d590a1d
--- /dev/null
+++ b/egs/lre22/fixed.v1.8k/utils
@@ -0,0 +1 @@
+hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/README.md b/egs/lre22/open.v1.8k/README.md
new file mode 100644
index 00000000..877f99ca
--- /dev/null
+++ b/egs/lre22/open.v1.8k/README.md
@@ -0,0 +1,43 @@
+# LRE22 Fixed Condition V1
+
+Recipe for the NIST LRE22 fixed condition based to the JHU-MIT Submission.
+
+## Citing
+```
+@inproceedings{villalba23_interspeech,
+  author={Jesús Villalba and Jonas Borgstrom and Maliha Jahan and Saurabh Kataria and Leibny Paola Garcia and Pedro Torres-Carrasquillo and Najim Dehak},
+  title={{Advances in Language Recognition in Low Resource African Languages: The JHU-MIT Submission for NIST LRE22}},
+  year=2023,
+  booktitle={Proc. INTERSPEECH 2023},
+  pages={521--525},
+  doi={10.21437/Interspeech.2023-1094}
+}
+```
+
+## Training Data
+
+  - x-Vector networks trained on:
+    - VoxLingua107
+    - NIST LRE17 Train + Dev + Eval / CTS + AfV
+  - Gaussian back-end trained on:
+    - NIST LRE22 dev with 2-fold cross-val + x10 augmentations
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it uses ECAPA-TDNN 4 layers of 2048 dim.
+   - To change the default network run scripts with the config-file argument:
+```bash
+run_011_train_xvector.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+run_030_extract_xvectors.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh --use-gpu true
+run_040_be_final.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+```
+
+## Results
+
+| Config | Model Type | Model Details | Back-end | Dev MinCp | Dev ActCp | Eval MinCp | Eval ActCp |
+| ------ | ---------- | ------------- | -------- | :-------: | :-------: | :--------: | :--------: |
+| config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-2 | GBE | 0.207 | 0.209 | 0.198 | 0.199 |
+| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-2 | GBE | 0.227 | 0.229 | 0.213 | 0.215 |
+| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 0.182 | 0.183 | 0.180 | 0.181 |
+
diff --git a/egs/lre22/open.v1.8k/cmd.sh b/egs/lre22/open.v1.8k/cmd.sh
new file mode 100755
index 00000000..f22c66b4
--- /dev/null
+++ b/egs/lre22/open.v1.8k/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01][234589]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/lre22/open.v1.8k/conf/clsp.conf b/egs/lre22/open.v1.8k/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/lre22/open.v1.8k/conf/coe_gpu_bigmem.conf b/egs/lre22/open.v1.8k/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/lre22/open.v1.8k/conf/coe_gpu_long.conf b/egs/lre22/open.v1.8k/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/lre22/open.v1.8k/conf/coe_gpu_rtx.conf b/egs/lre22/open.v1.8k/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/lre22/open.v1.8k/conf/coe_gpu_short.conf b/egs/lre22/open.v1.8k/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/lre22/open.v1.8k/conf/coe_gpu_v100.conf b/egs/lre22/open.v1.8k/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/lre22/open.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml b/egs/lre22/open.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
new file mode 100644
index 00000000..fce3804a
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/fbank64_specaug1_stmn_8k.yaml
@@ -0,0 +1,24 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 5
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 1
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 8
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 1
+  mask_method: mean
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/open.v1.8k/conf/fbank64_stmn_8k.yaml b/egs/lre22/open.v1.8k/conf/fbank64_stmn_8k.yaml
new file mode 100644
index 00000000..dfd0d3e5
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/fbank64_stmn_8k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/fbank64_stmn_8k.yaml b/egs/lre22/open.v1.8k/conf/other_conf/fbank64_stmn_8k.yaml
new file mode 100644
index 00000000..dfd0d3e5
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/fbank64_stmn_8k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 8000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 3700
+  num_filters: 64
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/fbank80_stmn_16k.yaml b/egs/lre22/open.v1.8k/conf/other_conf/fbank80_stmn_16k.yaml
new file mode 100644
index 00000000..f4091f5d
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/fbank80_stmn_16k.yaml
@@ -0,0 +1,12 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/reverb_noise_aug.yaml b/egs/lre22/open.v1.8k/conf/other_conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage1_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage1_v2.4.yaml
new file mode 100644
index 00000000..16b17c08
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage1_v2.4.yaml
@@ -0,0 +1,124 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    - 2048
+    - 3072
+    - 3072
+    - 4096
+    - 4096
+    - 5120
+    - 5120
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 2
+    - 3
+    - 2
+    - 3
+    - 2
+    - 3
+    resb_strides:
+    - 1
+    - 1
+    - 2
+    - 1
+    - 2
+    - 1
+    - 2
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 7168
+    endpoint_layers:
+    - 2
+    - 4
+    - 6
+    - 8
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage2_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage2_v2.4.yaml
new file mode 100644
index 00000000..2bc8675f
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage2_v2.4.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage3_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage3_v2.4.yaml
new file mode 100644
index 00000000..2bc8675f
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048-5120x8_xvec_stage3_v2.4.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..f43b3712
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,99 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+    dropout_rate: 0.1
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 15000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 5e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 256
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.1.yaml
new file mode 100644
index 00000000..5d98e662
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.1.yaml
@@ -0,0 +1,95 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 256
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  cos_scale: 30.0
+  margin: 0.2
+  intertop_margin: 0.1
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.2.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.2.yaml
new file mode 100644
index 00000000..038e7207
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.2.yaml
@@ -0,0 +1,97 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 256
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.2
+  intertop_margin: 0.1
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.3.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.3.yaml
new file mode 100644
index 00000000..f0200ad2
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.3.yaml
@@ -0,0 +1,77 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  margin: 0.
+  margin_warmup_epochs: 0
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.001
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+  train_mode: ft-embed-affine
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.4.yaml
new file mode 100644
index 00000000..3718b10b
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v2.4.yaml
@@ -0,0 +1,97 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 256
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
new file mode 100644
index 00000000..d1c87491
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -0,0 +1,98 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    dropout_rate: 0.3
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.98
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..66c69e8e
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
@@ -0,0 +1,98 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    dropout_rate: 0.3
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.98
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.2.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.2.yaml
new file mode 100644
index 00000000..3a4a81a7
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.2.yaml
@@ -0,0 +1,98 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    dropout_rate: 0.3
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.98
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.5.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.5.yaml
new file mode 100644
index 00000000..17b1b6cf
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage1_v3.5.yaml
@@ -0,0 +1,98 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    hid_act: swish
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    dropout_rate: 0.1
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.
+  intertop_margin: 0.
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.98
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 24000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.1.yaml
new file mode 100644
index 00000000..54f76200
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.1.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.2.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.2.yaml
new file mode 100644
index 00000000..d68860be
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.2.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.yaml
new file mode 100644
index 00000000..54f76200
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.1.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.3.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.3.yaml
new file mode 100644
index 00000000..465d92eb
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.3.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.001
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    #decay_steps: 8000
+    #hold_steps: 10000
+    decay_steps: 12000
+    hold_steps: 12000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 4000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.4.yaml
new file mode 100644
index 00000000..64e71f65
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage2_v2.4.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.1.yaml
new file mode 100644
index 00000000..64e71f65
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.1.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.4.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.4.yaml
new file mode 100644
index 00000000..64e71f65
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_ecapatdnn2048x4_xvec_stage3_v2.4.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.1.yaml
new file mode 100644
index 00000000..fe0171d1
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.1.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_type: tseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  se_r: 512
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  cos_scale: 30.0
+  margin: 0.2
+  intertop_margin: 0.1
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 16000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.2.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.2.yaml
new file mode 100644
index 00000000..80925cc7
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.2.yaml
@@ -0,0 +1,77 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  resnet_type: tseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  se_r: 512
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 128
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.2
+  intertop_margin: 0.1
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 16000
+    hold_steps: 40000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.3.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.3.yaml
new file mode 100644
index 00000000..11997c55
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage1_v2.3.yaml
@@ -0,0 +1,77 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  margin: 0.
+  margin_warmup_epochs: 0
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.001
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+  train_mode: ft-embed-affine
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.1.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.1.yaml
new file mode 100644
index 00000000..cde840fe
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.1.yaml
@@ -0,0 +1,79 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  #margin: 0.4
+  #margin: 0.2
+  margin: 0.
+  margin_warmup_epochs: 2
+  #intertop_margin: 0.1
+  intertop_margin: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    # decay_steps: 8000
+    decay_steps: 4000
+    # hold_steps: 10000
+    hold_steps: 5000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    # warmup_steps: 10000
+    warmup_steps: 5000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.3.yaml b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.3.yaml
new file mode 100644
index 00000000..4f704b29
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/train_tseres2net50s8_xvec_stage2_v2.3.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 12
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 1
+      class_name: class_id
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 6
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 3
+      class_name: class_id
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+feats: fbank64_stmn_8k.yaml
+model:
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.4
+  margin_warmup_epochs: 2
+  intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.001
+    momentum: 0.9
+    # opt_type: adam
+    # lr: 0.001
+    # amsgrad: true
+    # beta1: 0.9
+    # beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    #decay_steps: 8000
+    #hold_steps: 10000
+    decay_steps: 12000
+    hold_steps: 12000
+    min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+    warmup_steps: 4000
+  use_amp: true
+  swa_start: 9
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 512
+
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/vad_16k.yaml b/egs/lre22/open.v1.8k/conf/other_conf/vad_16k.yaml
new file mode 100644
index 00000000..5fb0111c
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/vad_16k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 16000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/lre22/open.v1.8k/conf/other_conf/vad_8k.yaml b/egs/lre22/open.v1.8k/conf/other_conf/vad_8k.yaml
new file mode 100644
index 00000000..7592c9d1
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/other_conf/vad_8k.yaml
@@ -0,0 +1,8 @@
+sample_frequency: 8000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: 5.5
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
diff --git a/egs/lre22/open.v1.8k/conf/reverb_noise_aug.yaml b/egs/lre22/open.v1.8k/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/lre22/open.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml b/egs/lre22/open.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..1448df98
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
@@ -0,0 +1,105 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  resnet_enc:
+    in_feats: 64
+    in_conv_channels: 2048
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+    - 1
+    - 1
+    - 1
+    - 1
+    resb_channels:
+    - 2048
+    resb_kernel_sizes:
+    - 3
+    resb_dilations:
+    - 2
+    - 3
+    - 4
+    - 5
+    resb_strides:
+    - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 16
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 4096
+    norm_before: false
+    dropout_rate: 0.2
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.0
+  intertop_margin: 0.0
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.2
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 65000
+    warmup_steps: 15000
+    min_lr: 1.0e-06
+    #decay_steps: 16000
+    #hold_steps: 40000
+    #warmup_steps: 5000
+    #min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+  use_amp: true
+  swa_start: 12
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 15
+  #eff_batch_size: 512
+  eff_batch_size: 256
diff --git a/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml b/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
new file mode 100644
index 00000000..13ce9445
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
@@ -0,0 +1,82 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 24
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+feats: fbank64_specaug1_stmn_8k.yaml
+model:
+  resnet_type: fwseres2net50
+  in_channels: 1
+  in_feats: 64
+  in_kernel_size: 3
+  in_stride: 1
+  no_maxpool: true
+  res2net_width_factor: 3.25
+  res2net_scale: 8
+  se_r: 4
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 30.0
+  margin: 0.0
+  intertop_margin: 0.0
+  margin_warmup_epochs: 3.0
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.95
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 65000
+    warmup_steps: 15000
+    min_lr: 1.0e-06
+    #decay_steps: 16000
+    #hold_steps: 40000
+    #warmup_steps: 5000
+    #min_lr: 1.0e-05
+    update_lr_on_opt_step: true
+  use_amp: true
+  swa_start: 12
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
diff --git a/egs/lre22/open.v1.8k/conf/vad_8k.yaml b/egs/lre22/open.v1.8k/conf/vad_8k.yaml
new file mode 100644
index 00000000..1cfe34b0
--- /dev/null
+++ b/egs/lre22/open.v1.8k/conf/vad_8k.yaml
@@ -0,0 +1,9 @@
+sample_frequency: 8000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: -4.89
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
+wav_scale: 1
diff --git a/egs/lre22/open.v1.8k/datapath.sh b/egs/lre22/open.v1.8k/datapath.sh
new file mode 100644
index 00000000..fec52329
--- /dev/null
+++ b/egs/lre22/open.v1.8k/datapath.sh
@@ -0,0 +1,87 @@
+# Copyright
+#            2022   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+#paths to databases
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  ldc_root3=/export/fs02/corpora3/LDC
+  ldc_root5=/export/corpora5/LDC
+  ldc_root=/export/corpora6/LDC
+  sre16_dev_root=$ldc_root/LDC2019S20/data/dev/R148_0_0
+  sre16_eval_root=$ldc_root/LDC2019S20/data/eval/R149_0_1
+  sre18_dev_root=$ldc_root5/LDC2018E46
+  sre18_eval_root=$ldc_root3/LDC2018E51
+  sre19cmn2_eval_root=$ldc_root3/LDC2019E58
+  sre_superset_root=$ldc_root/LDC2021E08
+  sre21_dev_root=$ldc_root/LDC2021E09
+  sre21_eval_root=$ldc_root/LDC2021E10
+  lre17_train_root=$ldc_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$ldc_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$ldc_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=/export/corpora6/lre22_test_data_v2
+  voxlingua_root=/export/corpora6/voxlingua107
+  musan_root=/export/corpora5/JHU/musan
+  babel_assamese_root=$ldc_root/LDC2016S06
+  babel_bengali_root=$ldc_root/LDC2016S08
+  babel_pashto_root=$ldc_root/LDC2016S09
+  babel_turkish_root=$ldc_root/LDC2016S10
+  babel_georgian_root=$ldc_root/LDC2016S12
+  babel_vietnam_root=$ldc_root/LDC2017S01
+  babel_haitian_root=$ldc_root/LDC2017S03
+  babel_lao_root=$ldc_root/LDC2017S08
+  babel_tamil_root=$ldc_root/LDC2017S13
+  babel_zulu_root=$ldc_root/LDC2017S19
+  babel_kurmanji_root=$ldc_root/LDC2017S22
+  babel_tok_root=$ldc_root/LDC2018S02
+  babel_kazakh_root=$ldc_root/LDC2018S13
+  babel_telugu_root=$ldc_root/LDC2018S16
+  babel_lithuanian_root=$ldc_root/LDC2019S03
+  fleurs_root=/export/corpora6/LRE/FLEURS2022
+  lwazi_root=/export/corpora6/LRE/Lwazi2009
+  nchlt_root=/export/corpora6/LRE/NCHLT2014
+  ammi_root=/export/corpora6/LRE/AMMI2020
+  cv20_root=/export/corpora5/mozilla-common-voice/cv-corpus-5.1-2020-06-22
+  cv22_root=/export/corpora6/LRE/CommonVoice2020/cv-corpus-11.0-2022-09-21
+  adi_root=/export/corpora6/ADI17
+  ast_root=/export/corpora6/LRE/AST2004
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  ldc_root=/export/common/data/corpora/LDC
+  sre_root=/export/common/data/corpora/NIST/SRE
+  my_root=/exp/jvillalba/corpora
+  sre16_dev_root=/exp/jvillalba/corpora/LDC2019S20/data/dev/R148_0_0
+  sre16_eval_root=/exp/jvillalba/corpora/LDC2019S20/data/eval/R149_0_1
+  sre18_dev_root=$sre_root/SRE18/LDC2018E46_2018_NIST_Speaker_Recognition_Evaluation_Development_Set
+  sre18_eval_root=$sre_root/SRE18/Eval/LDC2018E51
+  sre19cmn2_eval_root=/exp/jvillalba/corpora/LDC2019E58
+  sre_superset_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E08
+  sre21_dev_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E09
+  sre21_eval_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E10
+  lre17_train_root=$my_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$my_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$my_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=$my_root/lre22_test_data_v2
+  voxlingua_root=$my_root/voxlingua107
+  musan_root=/expscratch/dgromero/corpora/musan
+  babel_assamese_root=$ldc_root/LDC2016S06
+  babel_bengali_root=$ldc_root/LDC2016S08
+  babel_pashto_root=$ldc_root/LDC2016S09
+  babel_turkish_root=$my_root/LDC2016S10
+  babel_georgian_root=$my_root/LDC2016S12
+  babel_vietnam_root=$my_root/LDC2017S01
+  babel_haitian_root=$my_root/LDC2017S03
+  babel_lao_root=$ldc_root/LDC2017S08
+  babel_tamil_root=$ldc_root/LDC2017S13
+  babel_zulu_root=$ldc_root/LDC2017S19
+  babel_kurmanji_root=$ldc_root/LDC2017S22
+  babel_tok_root=$my_root/LDC2018S02
+  babel_kazakh_root=$ldc_root/LDC2018S13
+  babel_telugu_root=$ldc_root/LDC2018S16
+  babel_lithuanian_root=$my_root/LDC2019S03
+  adi_root=/exp/jvillalba/corpora/ADI17
+
+else
+  echo "Put your database paths here"
+  exit 1
+fi
diff --git a/egs/lre22/open.v1.8k/default_config.sh b/egs/lre22/open.v1.8k/default_config.sh
new file mode 120000
index 00000000..d1be989f
--- /dev/null
+++ b/egs/lre22/open.v1.8k/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
new file mode 100644
index 00000000..1abb3d3f
--- /dev/null
+++ b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh
@@ -0,0 +1,20 @@
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=open
+
+# x-vector cfg
+
+nnet_type=resnet1d
+nnet_stages=1
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v1.0.yaml
+
+nnet_name=${feat_type}_ecapatdnn2048x4_v1.0
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0012.pth
diff --git a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
new file mode 100644
index 00000000..6a735e4c
--- /dev/null
+++ b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
@@ -0,0 +1,45 @@
+# acoustic features
+feat_config=conf/fbank64_stmn_8k.yaml
+feat_type=fbank64_stmn
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=open
+
+# x-vector cfg
+
+nnet_type=resnet
+nnet_stages=2
+nnet_s1_base_cfg=conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
+
+nnet_name=${feat_type}_fwseres2net50s8_v1.0
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/swa_model_ep0012.pth
+#nnet_s1=$nnet_s1_dir/model_ep0001.pth
+nnet_s1=$nnet_s1_dir/model_ep0008.pth
+nnet_s1=$nnet_s1_dir/model_ep0011.pth
+nnet_s1=$nnet_s1_dir/model_ep0015.pth
+nnet_s1=$nnet_s1_dir/swa_model_ep0016.pth
+
+nnet_s2_base_cfg=conf/train_tseres2net50s8_xvec_stage2_v1.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+#nnet_s2=$nnet_s2_dir/swa_model_ep0013.pth
+nnet_s2=$nnet_s2_dir/model_ep0001.pth
+nnet_s2=$nnet_s2_dir/model_ep0002.pth
+nnet_s2=$nnet_s2_dir/model_ep0004.pth
+# nnet_s2=$nnet_s2_dir/model_ep0008.pth
+# nnet_s2=$nnet_s2_dir/swa_model_ep0012.pth
+
+nnet_s3_base_cfg=conf/train_tseres2net50s8_xvec_stage3_v2.1.yaml
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+#nnet_s3=$nnet_s3_dir/swa_model_ep0013.pth
+#nnet_s3=$nnet_s3_dir/model_ep0007.pth
+nnet_s3=$nnet_s3_dir/model_ep0001.pth
+nnet_s3=$nnet_s3_dir/model_ep0004.pth
+nnet_s3=$nnet_s3_dir/model_ep0008.pth
+
diff --git a/egs/lre22/open.v1.8k/hyp_utils b/egs/lre22/open.v1.8k/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/lre22/open.v1.8k/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/local b/egs/lre22/open.v1.8k/local
new file mode 120000
index 00000000..c2a3fdea
--- /dev/null
+++ b/egs/lre22/open.v1.8k/local
@@ -0,0 +1 @@
+../fixed.v1.8k/local
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/path.sh b/egs/lre22/open.v1.8k/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/lre22/open.v1.8k/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/lre22/open.v1.8k/resources b/egs/lre22/open.v1.8k/resources
new file mode 120000
index 00000000..113b3492
--- /dev/null
+++ b/egs/lre22/open.v1.8k/resources
@@ -0,0 +1 @@
+../fixed.v1.8k/resources
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/run_001_prepare_data.sh b/egs/lre22/open.v1.8k/run_001_prepare_data.sh
new file mode 100755
index 00000000..bb64cdbe
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_001_prepare_data.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+config_file=default_config.sh
+stage=1
+
+. parse_options.sh || exit 1;
+. datapath.sh
+
+
+if [ $stage -le 1 ];then
+  # Prepares voxlingua 107 for training
+  hyp_utils/conda_env.sh \
+    local/prepare_voxlingua107.py \
+    --corpus-dir $voxlingua_root \
+    --output-dir data/voxlingua107 \
+    --remove-langs en-en es-es ar-ar pt-pt \
+    --map-langs-to-lre-codes \
+    --target-fs 8000
+  
+fi
+
+if [ $stage -le 2 ];then
+  # Prepare LRE17 Training data
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_train \
+    --subset train \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_cts \
+    --subset dev \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_afv \
+    --subset dev \
+    --source vast \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_cts \
+    --subset eval \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_afv \
+    --subset eval \
+    --source vast \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 3 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_dev.py \
+    --corpus-dir $lre22_dev_root \
+    --output-dir data/lre22_dev \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 4 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_eval.py \
+    --corpus-dir $lre22_eval_root \
+    --output-dir data/lre22_eval \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 5 ];then
+    local/make_sre16_train_dev.sh $sre16_dev_root 8 data
+    local/make_sre16_train_eval.sh $sre16_eval_root 8 data
+fi
+
+if [ $stage -le 6 ];then
+    local/make_sre18_dev_unlabeled.sh $sre18_dev_root 8 data
+    local/make_sre18_train_dev.sh $sre18_dev_root 8 data
+    local/make_sre18_train_eval.sh $sre18_eval_root 8 data
+fi
+
+if [ $stage -le 7 ];then
+    # Prepare sre19
+    local/make_sre19cmn2_eval.sh $sre19cmn2_eval_root 8 data
+fi
+
+if [ $stage -le 8 ];then
+    # Prepare SRE21 dev
+    hyp_utils/conda_env.sh \
+    	local/prepare_sre21av_dev_audio.py \
+    	--corpus-dir $sre21_dev_root \
+    	--target-fs 8000 \
+    	--output-path data/sre21_audio_dev \
+    	--av-output-path data/sre21_audio-visual_dev
+    # Prepare SRE21 eval
+    hyp_utils/conda_env.sh \
+	local/prepare_sre21av_eval_audio.py \
+	--corpus-dir $sre21_eval_root \
+	--target-fs 8000 \
+	--output-path data/sre21_audio_eval \
+	--av-output-path data/sre21_audio-visual_eval
+
+fi
+
+if [ $stage -le 9 ];then
+    # Prepare SRE CTS superset
+    hyp_utils/conda_env.sh \
+	local/prepare_sre_cts_superset.py \
+	--corpus-dir $sre_superset_root \
+	--target-fs 8000 \
+        --output-dir data/sre_cts_superset
+fi
+
+if [ $stage -le 10 ];then
+    # Prepare babel datasets
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_assamese_root \
+	--target-fs 8000 \
+	--lang-code as-as \
+        --output-dir data/babel_assamese
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_bengali_root \
+	--target-fs 8000 \
+	--lang-code bn-bn \
+        --output-dir data/babel_bengali
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_pashto_root \
+	--target-fs 8000 \
+	--lang-code ps-ps \
+        --output-dir data/babel_pashto
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_turkish_root \
+	--target-fs 8000 \
+	--lang-code tr-tr \
+        --output-dir data/babel_turkish
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_georgian_root \
+	--target-fs 8000 \
+	--lang-code ka-ka \
+        --output-dir data/babel_georgian
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_vietnam_root \
+	--target-fs 8000 \
+	--lang-code vi-vi \
+        --output-dir data/babel_vietnam
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_haitian_root \
+	--target-fs 8000 \
+	--lang-code ht-ht \
+        --output-dir data/babel_haitian
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_lao_root \
+	--target-fs 8000 \
+	--lang-code lo-lo \
+        --output-dir data/babel_lao
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_tamil_root \
+	--target-fs 8000 \
+	--lang-code ta-ta \
+        --output-dir data/babel_tamil
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_zulu_root \
+	--target-fs 8000 \
+	--lang-code zul-zul \
+        --output-dir data/babel_zulu
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_kurmanji_root \
+	--target-fs 8000 \
+	--lang-code kur-kur \
+        --output-dir data/babel_kurmanji
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_tok_root \
+	--target-fs 8000 \
+	--lang-code tok-tok \
+        --output-dir data/babel_tok
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_kazakh_root \
+	--target-fs 8000 \
+	--lang-code kk-kk \
+        --output-dir data/babel_kazakh
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_telugu_root \
+	--target-fs 8000 \
+	--lang-code te-te \
+        --output-dir data/babel_telugu
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_lithuanian_root \
+	--target-fs 8000 \
+	--lang-code lt-lt \
+        --output-dir data/babel_lithuanian
+
+fi
+
+if [ $stage -le 11 ];then
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre.py \
+	--corpus-dir $fleurs_root \
+	--output-dir data/fleurs22 \
+	--map-langs-to-lre-codes --target-fs 8000
+    
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre_cat.py \
+	--corpus-dir $lwazi_root \
+	--output-dir data/lwazi09 \
+	--map-langs-to-lre-codes --target-fs 8000
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre_cat.py \
+	--corpus-dir $nchlt_root \
+	--output-dir data/nchlt14 \
+	--map-langs-to-lre-codes --target-fs 8000
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre_cat.py \
+	--corpus-dir $ammi_root \
+	--output-dir data/ammi20 \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 12 ];then
+
+    hyp_utils/conda_env.sh \
+	local/prepare_common_voice_cat.py \
+	--corpus-dir $cv22_root \
+	--output-dir data/cv22_tir \
+	--keep-langs tir-tir \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+
+if [ $stage -le 13 ];then
+  hyp_utils/conda_env.sh \
+	local/prepare_common_voice_accents_cat.py \
+	--corpus-dir $cv20_root \
+	--output-dir data/cv20_eng_ine \
+	--lang en \
+	--target-fs 8000
+  hyp_utils/conda_env.sh \
+	local/prepare_common_voice_accents_cat.py \
+	--corpus-dir $cv20_root \
+	--output-dir data/cv20_fra \
+	--lang fr \
+	--target-fs 8000
+
+fi
+
+if [ $stage -le 14 ];then
+      hyp_utils/conda_env.sh \
+	  local/prepare_adi17.py \
+	  --corpus-dir $adi_root \
+	  --output-dir data/adi17 \
+	  --map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 15 ];then
+    hyp_utils/conda_env.sh \
+	local/prepare_ast_cat.py \
+	--corpus-dir $ast_root \
+	--output-dir data/ast \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 16 ];then
+    #combine data
+    utils/combine_data.sh \
+	data/babel \
+	data/babel_{a*,b*,g*,k*,l*,p*,t*,v*,zulu}
+
+    utils/combine_data.sh \
+	data/cv \
+	data/cv20_eng_ine data/cv20_fra data/cv22_tir
+
+    utils/combine_data.sh \
+	data/sre16 \
+	data/sre16_train_{dev*,eval*} 
+
+    utils/combine_data.sh \
+	data/sre18 \
+	data/sre18_train_{dev*,eval*} data/sre18_dev_unlabeled
+
+    utils/combine_data.sh \
+	data/sre19 \
+	data/sre19_eval_{enroll,test}_cmn2
+
+    utils/combine_data.sh \
+	data/sre21_cts \
+	data/sre21_*_cts
+
+    utils/combine_data.sh \
+	data/sre21_afv \
+	data/sre21_audio*_{dev*,eval*}_afv
+
+    utils/combine_data.sh \
+	data/sre16-21_cts \
+	data/sre1{6,8,9} data/sre21_cts
+    
+fi
+  
+if [ $stage -le 5 ];then
+    if [ -d ../fixed.v1.8k/lre-scorer ];then
+	ln -s ../fixed.v1.8k/lre-scorer
+    else
+	local/download_lre22_scorer.sh
+    fi
+    if [ -d ../fixed.v1.8k/focal_multiclass ];then
+	ln -s ../fixed.v1.8k/focal_multiclass
+    else
+	local/download_focal.sh
+    fi
+fi
diff --git a/egs/lre22/open.v1.8k/run_002_compute_evad.sh b/egs/lre22/open.v1.8k/run_002_compute_evad.sh
new file mode 100755
index 00000000..f7ccdfa7
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_002_compute_evad.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=b1
+storage_name=$(date +'%m_%d_%H_%M')
+vaddir=`pwd`/exp/vad_e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+
+if [ $stage -le 1 ]; then
+    # Prepare to distribute data over multiple machines
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+	dir_name=$USER/hyp-data/lre22-fixed-v1.8k-$storage_name/vad/storage
+	if [ "$nodes" == "b0" ];then
+	    utils/create_split_dir.pl \
+			    utils/create_split_dir.pl \
+		/export/b{04,05,06,07}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "b1" ];then
+	    utils/create_split_dir.pl \
+		/export/b1{0,1,2,3,4,5,6,7,8,9}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "c0" ];then
+	    utils/create_split_dir.pl \
+		/export/c{06,07,08,09}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs01" ];then
+	    utils/create_split_dir.pl \
+		/export/fs01/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs05" ];then
+	    utils/create_split_dir.pl \
+		/export/fs05/$dir_name $vaddir/storage
+	else
+	    echo "we don't distribute data between multiple machines"
+	fi
+    fi
+fi
+
+# VAD Train/Test Datasets
+if [ $stage -le 2 ];then 
+  for name in voxlingua107 \
+		lre17_train \
+		lre17_dev_cts lre17_dev_afv \
+		lre17_eval_cts lre17_eval_afv \
+		lre22_dev lre22_eval \
+		babel sre16-21_cts sre21_afv sre_cts_superset \
+		lwazi09 nchlt14 adi17 fleurs22 ammi20 \
+		ast cv
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 40 ? $num_spk:40))
+    hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
+				 --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+				 data/${name} exp/make_vad/$name $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
+fi
+
diff --git a/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh b/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..638143f0
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 8 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../v1.16k/RIRS_NOISES ];then
+	    ln -s ../v1.16k/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 8 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 8 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 8 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/egs/lre22/open.v1.8k/run_004_apply_codecs.sh b/egs/lre22/open.v1.8k/run_004_apply_codecs.sh
new file mode 100755
index 00000000..6efc016b
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_004_apply_codecs.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ];then
+
+    for data in voxlingua107 \
+		    lre17_dev_afv lre17_eval_afv \
+		    sre21_afv ast cv \
+		    lwazi09 nchlt14 adi17 fleurs22 ammi20
+    do
+      hyp_utils/conda_env.sh \
+	local/apply_tel_codecs_to_kaldi_datadir.py \
+	--input-dir data/$data \
+	--output-dir data/${data}_codecs
+    done
+
+fi
diff --git a/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh b/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..d261a287
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # This script preprocess audio for x-vector training
+  for name in voxlingua107_codecs \
+		lre17_train \
+  		lre17_{dev,eval}_{cts,afv,afv_codecs} \
+		babel sre16-21_cts sre_cts_superset \
+		sre21_afv_codecs cv_codecs adi17_codecs \
+		lwazi09{,_codecs} nchlt14{,_codecs} fleurs22{,_codecs} ammi20{,_codecs} ast{,_codecs}
+  do
+    steps_xvec/preprocess_audios_for_nnet_train.sh \
+      --nj 40 --cmd "$train_cmd" \
+      --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+      data/${name} data/${name}_proc_audio_no_sil exp/${name}_proc_audio_no_sil
+    utils/fix_data_dir.sh data/${name}_proc_audio_no_sil
+  done
+fi
+
+if [ $stage -le 2 ];then
+  utils/combine_data.sh \
+    data/lre17_proc_audio_no_sil \
+    data/lre17_train_proc_audio_no_sil \
+    data/lre17_{dev,eval}_{cts,afv,afv_codecs}_proc_audio_no_sil
+
+  utils/combine_data.sh \
+    data/babel_sre_proc_audio_no_sil \
+    data/{babel,sre16-21_cts,sre21_afv_codecs,sre_cts_superset}_proc_audio_no_sil
+
+  utils/combine_data.sh \
+    data/others_afr_proc_audio_no_sil \
+    data/adi17_proc_audio_no_sil \
+    data/{lwazi09,nchlt14,fleurs22,ammi20,ast}{,_codecs}_proc_audio_no_sil
+fi
+
+if [ $stage -le 3 ]; then
+  # Now, we remove files with less than 3s
+  hyp_utils/remove_short_audios.sh --min-len 3 data/voxlingua107_codecs_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/lre17_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/babel_sre_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/others_afr_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/cv_codecs_proc_audio_no_sil
+fi
+
+if [ $stage -le 4 ];then
+  # merge all data
+  utils/combine_data.sh \
+    data/open_proc_audio_no_sil \
+    data/{voxlingua107_codecs,lre17,babel_sre,cv_codecs,others_afr}_proc_audio_no_sil 
+fi
+
+
+if [ $stage -le 5 ]; then
+  for name in open_proc_audio_no_sil
+  do
+    hyp_utils/conda_env.sh \
+      local/split_segments_train_val.py \
+      --segments-file data/$name/utt2lang \
+      --recordings-file data/$name/wav.scp \
+      --durations-file data/$name/utt2dur \
+      --val-percent 2. \
+      --remove-langs fra-mix ara-ary en-en es-es pt-pt ar-ar \
+      --output-dir data/$name/train_val_split
+  done
+fi
+
+if [ $stage -le 6 ]; then
+  awk 'BEGIN{
+adapt_langs_list="ara-acm ara-aeb ara-apc ara-arq ara-ary ara-arz ara-ayl ara-jor ara-ksa ara-kuw ara-leb ara-mau ara-mor ara-oma ara-pal ara-qat ara-sud ara-syr ara-uae ara-yem fra-can fra-fra fra-ntf eng-ens eng-gbr eng-iaf eng-ine eng-usg eng-zho afr-afr nbl-nbl orm-orm tir-tir tso-tso ven-ven xho-xho zul-zul";
+nf=split(adapt_langs_list, f, " "); 
+for(i=1;i<=nf;i++){ adapt_langs[f[i]]=1;};
+FS=","; OFS=",";
+getline; print $0;
+}
+{ if ($1 in adapt_langs) { $3="1."} else{ $3="0.01"}; print $0}' \
+      data/open_proc_audio_no_sil/train_val_split/class_file.csv > \
+      data/open_proc_audio_no_sil/train_val_split/class_file_adapt_1.csv
+fi
diff --git a/egs/lre22/open.v1.8k/run_011_train_xvector.sh b/egs/lre22/open.v1.8k/run_011_train_xvector.sh
new file mode 100755
index 00000000..056a9754
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_011_train_xvector.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-fixed-v1.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+    mkdir -p $nnet_s1_dir/log
+    if [ ! -f "$nnet_s0" ];then
+	$cuda_cmd \
+	    --gpu $ngpu $nnet_s1_dir/log/train.log \
+	    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	    train_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+	    --data.train.dataset.recordings-file $list_dir/wav.scp \
+	    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+	    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+	    --data.val.dataset.recordings-file $list_dir/wav.scp \
+	    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+	    --trainer.exp-path $nnet_s1_dir \
+	    --num-gpus $ngpu --master-port 3456
+    else
+	$cuda_cmd \
+	    --gpu $ngpu $nnet_s1_dir/log/train.log \
+	    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	    finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+	    --data.train.dataset.recordings-file $list_dir/wav.scp \
+	    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+	    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+	    --data.val.dataset.recordings-file $list_dir/wav.scp \
+	    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+	    --in-model-file $nnet_s0 \
+	    --trainer.exp-path $nnet_s1_dir \
+	    --num-gpus $ngpu 
+
+    fi
+    
+fi
+
+
+# Class-balanced Fine-tuning
+if [ $stage -le 2 ] && [ $max_stage -le 2 ]; then
+    if [ "$use_wandb" == "true" ];then
+	extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+    fi
+    mkdir -p $nnet_s2_dir/log
+    $cuda_cmd \
+	--gpu $ngpu $nnet_s2_dir/log/train.log \
+	hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+	finetune_xvector_from_wav.py $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+	--data.train.dataset.recordings-file $list_dir/wav.scp \
+	--data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+	--data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+	--data.val.dataset.recordings-file $list_dir/wav.scp \
+	--data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+	--in-model-file $nnet_s1 \
+	--trainer.exp-path $nnet_s2_dir \
+	--num-gpus $ngpu 
+    
+fi
diff --git a/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh b/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
new file mode 100755
index 00000000..ea2c59f6
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
@@ -0,0 +1,219 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=2
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=false
+do_tsne=true
+split_dev=false
+xvec_chunk_length=12800
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+    xvec_args="--use-gpu true --chunk-length $xvec_chunk_length"
+    xvec_cmd="$cuda_eval_cmd --mem 4G"
+else
+    xvec_cmd="$train_cmd --mem 12G"
+fi
+
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+# if [ $stage -le 1 ]; then
+#     # Extract xvectors for training 
+#   for name in lre17_proc_audio_no_sil \
+# 		voxlingua107_codecs_proc_audio_no_sil \
+# 		babel_sre_proc_audio_no_sil \
+# 		cv_codecs_proc_audio_no_sil \
+# 		others_afr_proc_audio_no_sil
+#     do
+#       steps_xvec/extract_xvectors_from_wav.sh \
+# 	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+# 	--use-bin-vad false \
+# 	--random-utt-length true --min-utt-length 300 --max-utt-length 3000 \
+# 	--feat-config $feat_config \
+#     	$nnet data/${name} \
+#     	$xvector_dir/${name}
+#     done
+# fi
+
+if [ $stage -le 2 ]; then
+    # Extract xvectors for training 
+    for name in lre22_dev
+    do
+	steps_xvec/extract_xvectors_from_wav.sh \
+	    --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	    --use-bin-vad true --num-augs 10 --aug-config conf/reverb_noise_aug.yaml \
+	    --random-utt-length true --min-utt-length 300 --max-utt-length 3000 \
+	    --feat-config $feat_config \
+    	    $nnet data/${name} \
+    	    $xvector_dir/${name}_aug \
+	    data/${name}_aug
+    done
+fi
+
+
+if [ $stage -le 3 ]; then
+    # Extracts x-vectors for dev and eval
+    for name in lre22_dev lre22_eval
+    do
+	num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+	nj=$(($num_spk < 100 ? $num_spk:100))
+	steps_xvec/extract_xvectors_from_wav.sh \
+	    --cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+	    --feat-config $feat_config \
+	    $nnet data/$name \
+	    $xvector_dir/$name
+    done
+fi
+
+if [ $stage -le 4 ]; then
+    for name in lre22_dev
+    do
+	if [ "$do_tsne" == "true" ] || [ "$split_dev" == "true" ];then
+	    $train_cmd \
+		$xvector_dir/$name/tsne/tsne.log \
+		hyp_utils/conda_env.sh \
+		plot_embedding_tsne.py \
+		--train-list data/$name/utt2lang \
+		--train-v-file scp:$xvector_dir/$name/xvector.scp \
+		--output-dir $xvector_dir/$name/tsne \
+		--pca-var-r 0.975 \
+		--lnorm \
+		--prob-plot 1. \
+		--tsne.metric cosine \
+		--tsne.early-exaggeration 12 --tsne.perplexity 30
+
+	    $train_cmd \
+		$xvector_dir/$name/tsne_per_class/tsne.log \
+		hyp_utils/conda_env.sh \
+		plot_embedding_tsne_per_class.py \
+		--train-list data/$name/utt2lang \
+		--train-v-file scp:$xvector_dir/$name/xvector.scp \
+		--output-dir $xvector_dir/$name/tsne_per_class \
+		--pca-var-r 0.975 \
+		--lnorm \
+		--prob-plot 1. \
+		--tsne.metric cosine \
+		--tsne.early-exaggeration 12 --tsne.perplexity 30 \
+		--do-ahc --cluster-tsne --ahc-thr -5
+
+	    if [ "$split_dev" == "true" ];then
+		hyp_utils/conda_env.sh \
+		    local/split_dev.py \
+		    --segs-file $xvector_dir/$name/tsne_per_class/segments.csv \
+		    --output-dir ./resources/dev_splits \
+		    --num-folds 2
+
+		# delete the split data dirs so they are regenerated later
+		rm -rf data/lre22_dev_p{1,2}
+
+	    fi
+	fi
+    done
+fi
+
+if [ $stage -le 5 ]; then
+    if [ ! -d data/lre22_dev_p1 ];then
+	awk -F "," '$1!="id" { print $1}' \
+	    ./resources/dev_splits/fold_0/train_segments.csv \
+	    > p1.lst
+	awk -F "," '$1!="id" { print $1}' \
+	    ./resources/dev_splits/fold_0/test_segments.csv \
+	    > p2.lst
+	
+	for p in p1 p2
+	do
+	    utils/subset_data_dir.sh \
+		--utt-list $p.lst \
+		data/lre22_dev data/lre22_dev_$p
+	done
+    fi
+fi
+
+if [ $stage -le 6 ]; then
+    if [ -d data/lre22_dev_aug ] && [ ! -d data/lre22_dev_aug_p1 ];then
+	awk -v fsegs=./resources/dev_splits/fold_0/train_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+    > p1.lst
+
+	awk -v fsegs=./resources/dev_splits/fold_0/test_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]=1;
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+    > p2.lst
+
+	for p in p1 p2
+	do
+	    utils/subset_data_dir.sh \
+		--utt-list $p.lst \
+		data/lre22_dev_aug data/lre22_dev_aug_$p
+	done
+    fi
+fi
+
+if [ $stage -le 7 ];then
+    if [ -f $xvector_dir/lre22_dev_aug/xvector.scp ];then
+	mkdir -p $xvector_dir/lre22_dev_aug_clean
+	cat $xvector_dir/lre22_dev/xvector.scp \
+	    $xvector_dir/lre22_dev_aug/xvector.scp \
+	    > $xvector_dir/lre22_dev_aug_clean/xvector.scp
+
+	for p in "" _p1 _p2
+	do
+	    if [ ! -d data/lre22_dev_aug_clean$p ]; then
+		utils/combine_data.sh \
+		    data/lre22_dev_aug_clean$p \
+		    data/lre22_dev$p \
+		    data/lre22_dev_aug$p
+	    fi
+	done
+    fi
+fi
+
+exit
diff --git a/egs/lre22/open.v1.8k/run_040_be_final.sh b/egs/lre22/open.v1.8k/run_040_be_final.sh
new file mode 100755
index 00000000..fe5b6f18
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_040_be_final.sh
@@ -0,0 +1,434 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+be_base_dir=exp/be/$nnet_name
+score_base_dir=exp/scores/$nnet_name
+
+if [ $stage -le 1 ];then
+  for r in 1 #0.9999 0.999 #0.99 0.975 0.95
+  do
+    be_name=pca${r}_cw_lnorm_lgbe_lre22_aug
+    be_dir=$be_base_dir/$be_name
+    score_dir=$score_base_dir/$be_name
+
+    (
+      for p_trn in p1 p2
+      do
+
+	if [ "$p_trn" == "p1" ];then
+	  p_test="p2"
+	else
+	  p_test="p1"
+	fi
+	be_dir_p=${be_dir}_$p_trn
+	(
+	  $train_cmd \
+	    $be_dir_p/train.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/train_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	    --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+	    --pca.pca-var-r $r \
+	    --do-lnorm --whiten \
+	    --output-dir $be_dir_p
+
+	  $train_cmd \
+	    ${score_dir}_p12/test_${p_test}.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev_$p_test/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir_p \
+	    --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+
+
+	) &
+	
+      done
+
+      (
+	$train_cmd \
+	  $be_dir/train.log \
+	  hyp_utils/conda_env.sh \
+	  steps_be/train_be_v1.py \
+	  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	  --train-list data/lre22_dev_aug_clean/utt2lang \
+	  --pca.pca-var-r $r \
+	  --do-lnorm --whiten \
+	  --output-dir $be_dir
+
+	$train_cmd \
+	    ${score_dir}_p12/test_dev.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	$train_cmd \
+	    ${score_dir}/test_eval.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+	    --trial-list data/lre22_eval/utt2spk \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	) &
+
+      wait
+
+      hyp_utils/conda_env.sh \
+	local/merge_scores.py \
+	--in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	--out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+      local/score_lre22.sh dev \
+	${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	${score_dir}_p12/nocal/lre22_dev_results
+
+      local/train_calibration_lre22.sh ${score_dir}_p12
+      local/score_lre22.sh dev \
+	${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}_p12/cal_v1/lre22_dev_results
+
+      local/score_lre22.sh dev \
+	${score_dir}/nocal/lre22_dev_scores.tsv \
+	${score_dir}/nocal/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/nocal/lre22_eval_scores.tsv \
+	${score_dir}/nocal/lre22_eval_results
+
+      local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+      local/score_lre22.sh dev \
+	${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}/cal_v1/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	${score_dir}/cal_v1/lre22_eval_results
+
+      # local/validate_lre22.sh \
+      # 	${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+     ) &
+
+    
+  done
+  wait
+
+fi
+
+exit
+# Back-ends below over-fitted
+
+if [ $stage -le 2 ];then
+  for r in 1 
+  do
+    for penalty in l2 #l1
+    do
+      for c in 1 #0.1 1
+      do
+	for ary_thr in 0.975 #0.85 0.7 #0.99 0.95 0.9 #15 ##1 5 10 20
+	do
+	  be_name=pca${r}_cw_lnorm_lsvm_${penalty}_c${c}_sqhinge_lre22_aug_lre17_aryt${ary_thr}
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  (
+	    for p_trn in p1 p2
+	    do
+	      
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+	      
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd \
+		  $be_dir_p/train.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/train_be_v3.py \
+		  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		  --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+		  --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		  --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		  --pca.pca-var-r $r \
+		  --svm.penalty $penalty --svm.c $c --svm.dual false \
+		  --do-lnorm --whiten --ary-thr $ary_thr \
+		  --output-dir $be_dir_p
+		
+		$train_cmd \
+		  ${score_dir}_p12/test_${p_test}.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/eval_be_v2.py \
+		  --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		  --trial-list data/lre22_dev_$p_test/utt2lang \
+		  --has-labels \
+		  --model-dir $be_dir_p \
+		  --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+	      $train_cmd \
+		$be_dir/train.log \
+		hyp_utils/conda_env.sh \
+		steps_be/train_be_v3.py \
+		--v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		--train-list data/lre22_dev_aug_clean/utt2lang \
+		--lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		--lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		--pca.pca-var-r $r \
+		--svm.penalty $penalty --svm.c $c --svm.dual false \
+		--do-lnorm --whiten --ary-thr $ary_thr \
+		--output-dir $be_dir
+		
+	      $train_cmd \
+		${score_dir}/test_dev.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		--trial-list data/lre22_dev/utt2lang \
+		--has-labels \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	      $train_cmd \
+		${score_dir}/test_eval.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+		--trial-list data/lre22_eval/utt2spk \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	    ) &
+	    
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+	  
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+	    
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+	    
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi
+
+if [ $stage -le 3 ];then
+  for r in 1 # 0.9999 0.99 0.975 0.95 0.9 0.8
+  do
+    for shrinking in true #false
+    do
+      for c in 1 10 #0.1 1 10 #0.01 0.1 1 10 # 0.0001
+      do
+	for vl in false #true #false
+	do
+	  if [ "$vl" == "true" ];then
+	    do_vl="--do-vl"
+	  else
+	    do_vl="--no_do-vl"
+	  fi
+	  ary_thr=0.975
+	  be_name=pca${r}_cw_lnorm_gsvm_shrinking_${shrinking}_c${c}_lre17_aryt${ary_thr}_vl${vl}_aug_clean
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  #score_dir=$score_base_dir/${be_name}_logpost
+	  (
+	    for p_trn in p1 p2
+	    do
+
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd $be_dir_p/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500\
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir_p \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}_p12/test_${p_test}.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev_$p_test/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir_p \
+			   --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+		$train_cmd $be_dir/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500 \
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}/test_dev.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+		
+		$train_cmd ${score_dir}/test_eval.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+			   --trial-list data/lre22_eval/utt2spk \
+			   --svm.eval-type cat-log-post \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	      ) &
+
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi
diff --git a/egs/lre22/open.v1.8k/steps b/egs/lre22/open.v1.8k/steps
new file mode 120000
index 00000000..aede39fe
--- /dev/null
+++ b/egs/lre22/open.v1.8k/steps
@@ -0,0 +1 @@
+hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/steps_be b/egs/lre22/open.v1.8k/steps_be
new file mode 120000
index 00000000..48aedc5a
--- /dev/null
+++ b/egs/lre22/open.v1.8k/steps_be
@@ -0,0 +1 @@
+../fixed.v1.8k/steps_be
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/steps_xvec b/egs/lre22/open.v1.8k/steps_xvec
new file mode 120000
index 00000000..af66a94d
--- /dev/null
+++ b/egs/lre22/open.v1.8k/steps_xvec
@@ -0,0 +1 @@
+hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/lre22/open.v1.8k/utils b/egs/lre22/open.v1.8k/utils
new file mode 120000
index 00000000..3d590a1d
--- /dev/null
+++ b/egs/lre22/open.v1.8k/utils
@@ -0,0 +1 @@
+hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/cmd.sh b/egs/lre22/open.v2.8k/cmd.sh
new file mode 100755
index 00000000..4efc96e1
--- /dev/null
+++ b/egs/lre22/open.v2.8k/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/lre22/open.v2.8k/conf/clsp.conf b/egs/lre22/open.v2.8k/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/lre22/open.v2.8k/conf/coe_gpu_bigmem.conf b/egs/lre22/open.v2.8k/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/lre22/open.v2.8k/conf/coe_gpu_long.conf b/egs/lre22/open.v2.8k/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/lre22/open.v2.8k/conf/coe_gpu_rtx.conf b/egs/lre22/open.v2.8k/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/lre22/open.v2.8k/conf/coe_gpu_short.conf b/egs/lre22/open.v2.8k/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/lre22/open.v2.8k/conf/coe_gpu_v100.conf b/egs/lre22/open.v2.8k/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/lre22/open.v2.8k/conf/reverb_noise_aug.yaml b/egs/lre22/open.v2.8k/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..4fdf8068
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/reverb_noise_aug.yaml
@@ -0,0 +1,35 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: scp:data/rirs_smallroom/rirs.scp
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: scp:data/rirs_mediumroom/rirs.scp
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: scp:data/rirs_real/rirs.scp
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/wav.scp
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/wav.scp
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/wav.scp
+      min_snr: 3
+      max_snr: 18
+
diff --git a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
new file mode 100644
index 00000000..b8998830
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
@@ -0,0 +1,59 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 128
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+model: wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.45
+    momentum: 0.9
+    weight_decay: 4e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    #decay_steps: 4200
+    #hold_steps: 1500
+    decay_steps: 16000
+    hold_steps: 18000
+    min_lr: 4e-4
+    warmup_steps: 4000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 12
+  eff_batch_size: 1024
+  train_mode: hf-feats-frozen-nograd
+
+ 
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml b/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
new file mode 100644
index 00000000..d8193f59
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
@@ -0,0 +1,47 @@
+hf_feats:
+  pretrained_model_path: facebook/wav2vec2-xls-r-300m
+xvector:
+  resnet_enc:
+    in_feats: 1024
+    in_conv_channels: 1024
+    in_kernel_size: 5
+    in_stride: 1
+    resb_type: seres2bn
+    resb_repeats:
+      - 1
+      - 1
+      - 1
+    resb_channels:
+      - 1024
+    resb_kernel_sizes:
+      - 3
+    resb_dilations:
+      - 2
+      - 3
+      - 4
+    resb_strides:
+      - 1
+    res2net_width_factor: 1
+    res2net_scale: 8
+    se_r: 8
+    multilayer: true
+    multilayer_concat: true
+    endpoint_channels: 3072
+    dropout_rate: 0.05
+    norm_before: false
+    hid_act: swish
+  pool_net:
+    pool_type: ch-wise-att-mean+stddev
+    inner_feats: 128
+  embed_dim: 192
+  loss_type: subcenter-arc-softmax
+  num_subcenters: 2
+  cos_scale: 32.0
+  margin: 0.
+  margin_warmup_epochs: 5
+  intertop_margin: 0.
+  dropout_rate: 0.0
+  norm_before: false
+  hid_act: swish
+feat_fusion_method: weighted-avg
+feat_fusion_start: 2
diff --git a/egs/lre22/open.v2.8k/datapath.sh b/egs/lre22/open.v2.8k/datapath.sh
new file mode 100644
index 00000000..fec52329
--- /dev/null
+++ b/egs/lre22/open.v2.8k/datapath.sh
@@ -0,0 +1,87 @@
+# Copyright
+#            2022   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+#paths to databases
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  ldc_root3=/export/fs02/corpora3/LDC
+  ldc_root5=/export/corpora5/LDC
+  ldc_root=/export/corpora6/LDC
+  sre16_dev_root=$ldc_root/LDC2019S20/data/dev/R148_0_0
+  sre16_eval_root=$ldc_root/LDC2019S20/data/eval/R149_0_1
+  sre18_dev_root=$ldc_root5/LDC2018E46
+  sre18_eval_root=$ldc_root3/LDC2018E51
+  sre19cmn2_eval_root=$ldc_root3/LDC2019E58
+  sre_superset_root=$ldc_root/LDC2021E08
+  sre21_dev_root=$ldc_root/LDC2021E09
+  sre21_eval_root=$ldc_root/LDC2021E10
+  lre17_train_root=$ldc_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$ldc_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$ldc_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=/export/corpora6/lre22_test_data_v2
+  voxlingua_root=/export/corpora6/voxlingua107
+  musan_root=/export/corpora5/JHU/musan
+  babel_assamese_root=$ldc_root/LDC2016S06
+  babel_bengali_root=$ldc_root/LDC2016S08
+  babel_pashto_root=$ldc_root/LDC2016S09
+  babel_turkish_root=$ldc_root/LDC2016S10
+  babel_georgian_root=$ldc_root/LDC2016S12
+  babel_vietnam_root=$ldc_root/LDC2017S01
+  babel_haitian_root=$ldc_root/LDC2017S03
+  babel_lao_root=$ldc_root/LDC2017S08
+  babel_tamil_root=$ldc_root/LDC2017S13
+  babel_zulu_root=$ldc_root/LDC2017S19
+  babel_kurmanji_root=$ldc_root/LDC2017S22
+  babel_tok_root=$ldc_root/LDC2018S02
+  babel_kazakh_root=$ldc_root/LDC2018S13
+  babel_telugu_root=$ldc_root/LDC2018S16
+  babel_lithuanian_root=$ldc_root/LDC2019S03
+  fleurs_root=/export/corpora6/LRE/FLEURS2022
+  lwazi_root=/export/corpora6/LRE/Lwazi2009
+  nchlt_root=/export/corpora6/LRE/NCHLT2014
+  ammi_root=/export/corpora6/LRE/AMMI2020
+  cv20_root=/export/corpora5/mozilla-common-voice/cv-corpus-5.1-2020-06-22
+  cv22_root=/export/corpora6/LRE/CommonVoice2020/cv-corpus-11.0-2022-09-21
+  adi_root=/export/corpora6/ADI17
+  ast_root=/export/corpora6/LRE/AST2004
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  ldc_root=/export/common/data/corpora/LDC
+  sre_root=/export/common/data/corpora/NIST/SRE
+  my_root=/exp/jvillalba/corpora
+  sre16_dev_root=/exp/jvillalba/corpora/LDC2019S20/data/dev/R148_0_0
+  sre16_eval_root=/exp/jvillalba/corpora/LDC2019S20/data/eval/R149_0_1
+  sre18_dev_root=$sre_root/SRE18/LDC2018E46_2018_NIST_Speaker_Recognition_Evaluation_Development_Set
+  sre18_eval_root=$sre_root/SRE18/Eval/LDC2018E51
+  sre19cmn2_eval_root=/exp/jvillalba/corpora/LDC2019E58
+  sre_superset_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E08
+  sre21_dev_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E09
+  sre21_eval_root=/exp/jvillalba/corpora/sre21/releases/LDC2021E10
+  lre17_train_root=$my_root/LDC2022E16_2017_NIST_Language_Recognition_Evaluation_Training_and_Development_Sets
+  lre17_eval_root=$my_root/LDC2022E17_2017_NIST_Language_Recognition_Evaluation_Test_Set
+  lre22_dev_root=$my_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
+  lre22_eval_root=$my_root/lre22_test_data_v2
+  voxlingua_root=$my_root/voxlingua107
+  musan_root=/expscratch/dgromero/corpora/musan
+  babel_assamese_root=$ldc_root/LDC2016S06
+  babel_bengali_root=$ldc_root/LDC2016S08
+  babel_pashto_root=$ldc_root/LDC2016S09
+  babel_turkish_root=$my_root/LDC2016S10
+  babel_georgian_root=$my_root/LDC2016S12
+  babel_vietnam_root=$my_root/LDC2017S01
+  babel_haitian_root=$my_root/LDC2017S03
+  babel_lao_root=$ldc_root/LDC2017S08
+  babel_tamil_root=$ldc_root/LDC2017S13
+  babel_zulu_root=$ldc_root/LDC2017S19
+  babel_kurmanji_root=$ldc_root/LDC2017S22
+  babel_tok_root=$my_root/LDC2018S02
+  babel_kazakh_root=$ldc_root/LDC2018S13
+  babel_telugu_root=$ldc_root/LDC2018S16
+  babel_lithuanian_root=$my_root/LDC2019S03
+  adi_root=/exp/jvillalba/corpora/ADI17
+
+else
+  echo "Put your database paths here"
+  exit 1
+fi
diff --git a/egs/lre22/open.v2.8k/default_config.sh b/egs/lre22/open.v2.8k/default_config.sh
new file mode 120000
index 00000000..94d038cf
--- /dev/null
+++ b/egs/lre22/open.v2.8k/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
new file mode 100644
index 00000000..b39d817b
--- /dev/null
+++ b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
@@ -0,0 +1,36 @@
+# WavLM base trained on 60k LibriLight + 10k GigaSpeech + 24k Voxpopuli + ECAPA-TDNN 1024x3
+
+# hugging face model
+hf_model_name=wav2vec2xlsr300m
+
+#vad
+vad_config=conf/vad_8k.yaml
+
+# x-vector training 
+nnet_data=open
+
+# x-vector cfg
+
+nnet_type=hf_wav2vec2resnet1d
+
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.2.yaml
+nnet_s1_args=""
+
+nnet_name=${hf_model_name}_ecapatdnn1024x3_v2.2
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0011.pth
+
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v2.2.yaml
+nnet_s2_args=""
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0008.pth
+
+nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage3_v2.2.yaml
+nnet_s3_args=""
+nnet_s3_name=${nnet_name}.s3
+nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
+nnet_s3=$nnet_s3_dir/model_ep0002.pth
+nnet_s3=$nnet_s3_dir/model_ep0005.pth
diff --git a/egs/lre22/open.v2.8k/hyp_utils b/egs/lre22/open.v2.8k/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/lre22/open.v2.8k/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/local b/egs/lre22/open.v2.8k/local
new file mode 120000
index 00000000..c2a3fdea
--- /dev/null
+++ b/egs/lre22/open.v2.8k/local
@@ -0,0 +1 @@
+../fixed.v1.8k/local
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/path.sh b/egs/lre22/open.v2.8k/path.sh
new file mode 100644
index 00000000..6994fdab
--- /dev/null
+++ b/egs/lre22/open.v2.8k/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/lre22/open.v2.8k/resources b/egs/lre22/open.v2.8k/resources
new file mode 120000
index 00000000..113b3492
--- /dev/null
+++ b/egs/lre22/open.v2.8k/resources
@@ -0,0 +1 @@
+../fixed.v1.8k/resources
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/run_001_prepare_data.sh b/egs/lre22/open.v2.8k/run_001_prepare_data.sh
new file mode 100755
index 00000000..99a72cab
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_001_prepare_data.sh
@@ -0,0 +1,330 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+config_file=default_config.sh
+stage=1
+
+. parse_options.sh || exit 1;
+. datapath.sh
+
+
+if [ $stage -le 1 ];then
+  # Prepares voxlingua 107 for training
+  hyp_utils/conda_env.sh \
+    local/prepare_voxlingua107.py \
+    --corpus-dir $voxlingua_root \
+    --output-dir data/voxlingua107 \
+    --remove-langs en-en es-es ar-ar pt-pt \
+    --map-langs-to-lre-codes \
+    --target-fs 8000
+  
+fi
+
+if [ $stage -le 2 ];then
+  # Prepare LRE17 Training data
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_train \
+    --subset train \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_cts \
+    --subset dev \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_train_root \
+    --output-dir data/lre17_dev_afv \
+    --subset dev \
+    --source vast \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_cts \
+    --subset eval \
+    --source mls14 \
+    --target-fs 8000
+
+  hyp_utils/conda_env.sh \
+    local/prepare_lre17.py \
+    --corpus-dir $lre17_eval_root \
+    --output-dir data/lre17_eval_afv \
+    --subset eval \
+    --source vast \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 3 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_dev.py \
+    --corpus-dir $lre22_dev_root \
+    --output-dir data/lre22_dev \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 4 ];then
+  hyp_utils/conda_env.sh \
+    local/prepare_lre22_eval.py \
+    --corpus-dir $lre22_eval_root \
+    --output-dir data/lre22_eval \
+    --target-fs 8000
+
+fi
+
+if [ $stage -le 5 ];then
+    local/make_sre16_train_dev.sh $sre16_dev_root 8 data
+    local/make_sre16_train_eval.sh $sre16_eval_root 8 data
+fi
+
+if [ $stage -le 6 ];then
+    local/make_sre18_dev_unlabeled.sh $sre18_dev_root 8 data
+    local/make_sre18_train_dev.sh $sre18_dev_root 8 data
+    local/make_sre18_train_eval.sh $sre18_eval_root 8 data
+fi
+
+if [ $stage -le 7 ];then
+    # Prepare sre19
+    local/make_sre19cmn2_eval.sh $sre19cmn2_eval_root 8 data
+fi
+
+if [ $stage -le 8 ];then
+    # Prepare SRE21 dev
+    hyp_utils/conda_env.sh \
+    	local/prepare_sre21av_dev_audio.py \
+    	--corpus-dir $sre21_dev_root \
+    	--target-fs 8000 \
+    	--output-path data/sre21_audio_dev \
+    	--av-output-path data/sre21_audio-visual_dev
+    # Prepare SRE21 eval
+    hyp_utils/conda_env.sh \
+	local/prepare_sre21av_eval_audio.py \
+	--corpus-dir $sre21_eval_root \
+	--target-fs 8000 \
+	--output-path data/sre21_audio_eval \
+	--av-output-path data/sre21_audio-visual_eval
+
+fi
+
+if [ $stage -le 9 ];then
+    # Prepare SRE CTS superset
+    hyp_utils/conda_env.sh \
+	local/prepare_sre_cts_superset.py \
+	--corpus-dir $sre_superset_root \
+	--target-fs 8000 \
+        --output-dir data/sre_cts_superset
+fi
+
+if [ $stage -le 10 ];then
+    # Prepare babel datasets
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_assamese_root \
+	--target-fs 8000 \
+	--lang-code as-as \
+        --output-dir data/babel_assamese
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_bengali_root \
+	--target-fs 8000 \
+	--lang-code bn-bn \
+        --output-dir data/babel_bengali
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_pashto_root \
+	--target-fs 8000 \
+	--lang-code ps-ps \
+        --output-dir data/babel_pashto
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_turkish_root \
+	--target-fs 8000 \
+	--lang-code tr-tr \
+        --output-dir data/babel_turkish
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_georgian_root \
+	--target-fs 8000 \
+	--lang-code ka-ka \
+        --output-dir data/babel_georgian
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_vietnam_root \
+	--target-fs 8000 \
+	--lang-code vi-vi \
+        --output-dir data/babel_vietnam
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_haitian_root \
+	--target-fs 8000 \
+	--lang-code ht-ht \
+        --output-dir data/babel_haitian
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_lao_root \
+	--target-fs 8000 \
+	--lang-code lo-lo \
+        --output-dir data/babel_lao
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_tamil_root \
+	--target-fs 8000 \
+	--lang-code ta-ta \
+        --output-dir data/babel_tamil
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_zulu_root \
+	--target-fs 8000 \
+	--lang-code zul-zul \
+        --output-dir data/babel_zulu
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_kurmanji_root \
+	--target-fs 8000 \
+	--lang-code kur-kur \
+        --output-dir data/babel_kurmanji
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_tok_root \
+	--target-fs 8000 \
+	--lang-code tok-tok \
+        --output-dir data/babel_tok
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_kazakh_root \
+	--target-fs 8000 \
+	--lang-code kk-kk \
+        --output-dir data/babel_kazakh
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_telugu_root \
+	--target-fs 8000 \
+	--lang-code te-te \
+        --output-dir data/babel_telugu
+    hyp_utils/conda_env.sh \
+	local/prepare_babel.py \
+	--corpus-dir $babel_lithuanian_root \
+	--target-fs 8000 \
+	--lang-code lt-lt \
+        --output-dir data/babel_lithuanian
+
+fi
+
+if [ $stage -le 11 ];then
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre.py \
+	--corpus-dir $fleurs_root \
+	--output-dir data/fleurs22 \
+	--map-langs-to-lre-codes --target-fs 8000
+    
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre.py \
+	--corpus-dir $lwazi_root \
+	--output-dir data/lwazi09 \
+	--map-langs-to-lre-codes --target-fs 8000
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre.py \
+	--corpus-dir $nchlt_root \
+	--output-dir data/nchlt14 \
+	--map-langs-to-lre-codes --target-fs 8000
+    hyp_utils/conda_env.sh \
+	local/prepare_some_data_for_lre.py \
+	--corpus-dir $ammi_root \
+	--output-dir data/ammi20 \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 12 ];then
+
+    hyp_utils/conda_env.sh \
+	local/prepare_common_voice_cat.py \
+	--corpus-dir $cv22_root \
+	--output-dir data/cv22_tir \
+	--keep-langs tir-tir \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+
+if [ $stage -le 13 ];then
+  hyp_utils/conda_env.sh \
+	local/prepare_common_voice_accents_cat.py \
+	--corpus-dir $cv20_root \
+	--output-dir data/cv20_eng_ine \
+	--lang en \
+	--target-fs 8000
+  hyp_utils/conda_env.sh \
+	local/prepare_common_voice_accents_cat.py \
+	--corpus-dir $cv20_root \
+	--output-dir data/cv20_fra \
+	--lang fr \
+	--target-fs 8000
+
+fi
+
+if [ $stage -le 14 ];then
+      hyp_utils/conda_env.sh \
+	  local/prepare_adi17.py \
+	  --corpus-dir $adi_root \
+	  --output-dir data/adi17 \
+	  --map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 15 ];then
+    hyp_utils/conda_env.sh \
+	local/prepare_ast.py \
+	--corpus-dir $ast_root \
+	--output-dir data/ast \
+	--map-langs-to-lre-codes --target-fs 8000
+fi
+
+if [ $stage -le 16 ];then
+    #combine data
+    utils/combine_data.sh \
+	data/babel \
+	data/babel_{a*,b*,g*,k*,l*,p*,t*,v*,zulu}
+
+    utils/combine_data.sh \
+	data/cv \
+	data/cv20_eng_ine data/cv20_fra data/cv22_tir
+
+    utils/combine_data.sh \
+	data/sre16 \
+	data/sre16_train_{dev*,eval*} 
+
+    utils/combine_data.sh \
+	data/sre18 \
+	data/sre18_train_{dev*,eval*} data/sre18_dev_unlabeled
+
+    utils/combine_data.sh \
+	data/sre19 \
+	data/sre19_eval_{enroll,test}_cmn2
+
+    utils/combine_data.sh \
+	data/sre21_cts \
+	data/sre21_*_cts
+
+    utils/combine_data.sh \
+	data/sre21_afv \
+	data/sre21_audio*_{dev*,eval*}_afv
+
+    utils/combine_data.sh \
+	data/sre16-21_cts \
+	data/sre1{6,8,9} data/sre21_cts
+    
+fi
+  
diff --git a/egs/lre22/open.v2.8k/run_002_compute_evad.sh b/egs/lre22/open.v2.8k/run_002_compute_evad.sh
new file mode 100755
index 00000000..f7ccdfa7
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_002_compute_evad.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=b1
+storage_name=$(date +'%m_%d_%H_%M')
+vaddir=`pwd`/exp/vad_e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+
+if [ $stage -le 1 ]; then
+    # Prepare to distribute data over multiple machines
+    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $vaddir/storage ]; then
+	dir_name=$USER/hyp-data/lre22-fixed-v1.8k-$storage_name/vad/storage
+	if [ "$nodes" == "b0" ];then
+	    utils/create_split_dir.pl \
+			    utils/create_split_dir.pl \
+		/export/b{04,05,06,07}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "b1" ];then
+	    utils/create_split_dir.pl \
+		/export/b1{0,1,2,3,4,5,6,7,8,9}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "c0" ];then
+	    utils/create_split_dir.pl \
+		/export/c{06,07,08,09}/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs01" ];then
+	    utils/create_split_dir.pl \
+		/export/fs01/$dir_name $vaddir/storage
+	elif [ "$nodes" == "fs05" ];then
+	    utils/create_split_dir.pl \
+		/export/fs05/$dir_name $vaddir/storage
+	else
+	    echo "we don't distribute data between multiple machines"
+	fi
+    fi
+fi
+
+# VAD Train/Test Datasets
+if [ $stage -le 2 ];then 
+  for name in voxlingua107 \
+		lre17_train \
+		lre17_dev_cts lre17_dev_afv \
+		lre17_eval_cts lre17_eval_afv \
+		lre22_dev lre22_eval \
+		babel sre16-21_cts sre21_afv sre_cts_superset \
+		lwazi09 nchlt14 adi17 fleurs22 ammi20 \
+		ast cv
+  do
+    num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+    nj=$(($num_spk < 40 ? $num_spk:40))
+    hyp_utils/feats/make_evad.sh --write-utt2num-frames true \
+				 --vad-config $vad_config --nj $nj --cmd "$train_cmd" \
+				 data/${name} exp/make_vad/$name $vaddir
+    utils/fix_data_dir.sh data/${name}
+  done
+fi
+
diff --git a/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh b/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..08d4d910
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+
+    # Prepare the MUSAN corpus, which consists of music, speech, and noise
+    # suitable for augmentation.
+    local/make_musan.sh $musan_root 16 data
+    
+    for name in musan_noise musan_music
+    do
+	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v2.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
+	utils/fix_data_dir.sh data/${name}_proc_audio
+    done
+
+fi
+
+if [ $stage -le 2 ]; then
+
+    # Create Babble noise from MUSAN speech files
+    for name in musan_speech
+    do
+	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
+	    --storage_name lre22-fixed-v2.8k-$(date +'%m_%d_%H_%M') \
+	    data/${name} data/${name}_babble exp/${name}_babble
+	# utils/fix_data_dir.sh data/${name}_babble
+    done
+fi
+
+if [ $stage -le 3 ]; then
+    if [ ! -d "RIRS_NOISES" ]; then
+	if [ -d ../v1.8k/RIRS_NOISES ];then
+	    ln -s ../v1.8k/RIRS_NOISES
+	else
+	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+	    unzip rirs_noises.zip
+	fi
+    fi
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/smallroom 16 data/rirs_smallroom
+    local/make_rirs_data.sh RIRS_NOISES/simulated_rirs/mediumroom 16 data/rirs_mediumroom
+    local/make_rirs_data.sh RIRS_NOISES/real_rirs_isotropic_noises 16 data/rirs_real
+    for rirs in rirs_smallroom rirs_mediumroom rirs_real
+    do
+	#pack all rirs in h5 files
+	steps_xvec/pack_rirs_for_nnet_train.sh data/$rirs data/$rirs exp/rirs/$rirs
+    done
+    
+fi
+
+
diff --git a/egs/lre22/open.v2.8k/run_004_apply_codecs.sh b/egs/lre22/open.v2.8k/run_004_apply_codecs.sh
new file mode 100755
index 00000000..6efc016b
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_004_apply_codecs.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ];then
+
+    for data in voxlingua107 \
+		    lre17_dev_afv lre17_eval_afv \
+		    sre21_afv ast cv \
+		    lwazi09 nchlt14 adi17 fleurs22 ammi20
+    do
+      hyp_utils/conda_env.sh \
+	local/apply_tel_codecs_to_kaldi_datadir.py \
+	--input-dir data/$data \
+	--output-dir data/${data}_codecs
+    done
+
+fi
diff --git a/egs/lre22/open.v2.8k/run_010_prepare_xvec_train_data.sh b/egs/lre22/open.v2.8k/run_010_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..98aa9a4c
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_010_prepare_xvec_train_data.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # This script preprocess audio for x-vector training
+  for name in voxlingua107_codecs \
+		lre17_train \
+  		lre17_{dev,eval}_{cts,afv,afv_codecs} \
+		babel sre16-21_cts sre_cts_superset \
+		sre21_afv_codecs cv_codecs adi17_codecs \
+		lwazi09{,_codecs} nchlt14{,_codecs} fleurs22{,_codecs} ammi20{,_codecs} ast{,_codecs}
+  do
+    steps_xvec/preprocess_audios_for_nnet_train.sh \
+      --nj 40 --cmd "$train_cmd" \
+      --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') --use-bin-vad true \
+      data/${name} data/${name}_proc_audio_no_sil exp/${name}_proc_audio_no_sil
+    utils/fix_data_dir.sh data/${name}_proc_audio_no_sil
+  done
+fi
+
+if [ $stage -le 2 ];then
+  utils/combine_data.sh \
+    data/lre17_proc_audio_no_sil \
+    data/lre17_train_proc_audio_no_sil \
+    data/lre17_{dev,eval}_{cts,afv,afv_codecs}_proc_audio_no_sil
+
+  utils/combine_data.sh \
+    data/babel_sre_proc_audio_no_sil \
+    data/{babel,sre16-21_cts,sre21_afv_codecs,sre_cts_superset}_proc_audio_no_sil
+
+  utils/combine_data.sh \
+    data/others_afr_proc_audio_no_sil \
+    data/adi17_proc_audio_no_sil \
+    data/{lwazi09,nchlt14,fleurs22,ammi20,ast}{,_codecs}_proc_audio_no_sil
+fi
+
+if [ $stage -le 3 ]; then
+  # Now, we remove files with less than 3s
+  hyp_utils/remove_short_audios.sh --min-len 3 data/voxlingua107_codecs_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/lre17_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/babel_sre_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/others_afr_proc_audio_no_sil
+  hyp_utils/remove_short_audios.sh --min-len 3 data/cv_codecs_proc_audio_no_sil
+fi
+
+if [ $stage -le 4 ];then
+  # merge all data
+  utils/combine_data.sh \
+    data/open_proc_audio_no_sil \
+    data/{voxlingua107_codecs,lre17,babel_sre,cv_codecs,others_afr}_proc_audio_no_sil \
+fi
+
+
+if [ $stage -le 5 ]; then
+  for name in open_proc_audio_no_sil
+  do
+    hyp_utils/conda_env.sh \
+      local/split_segments_train_val.py \
+      --segments-file data/$name/utt2lang \
+      --recordings-file data/$name/wav.scp \
+      --durations-file data/$name/utt2dur \
+      --val-percent 2. \
+      --remove-langs fra-mix ara-ary en-en es-es pt-pt ar-ar \
+      --output-dir data/$name/train_val_split
+  done
+fi
diff --git a/egs/lre22/open.v2.8k/run_011_train_xvector.sh b/egs/lre22/open.v2.8k/run_011_train_xvector.sh
new file mode 100755
index 00000000..3a7a47a4
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_011_train_xvector.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+list_dir=data/${nnet_data}_proc_audio_no_sil
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+if [ "$use_wandb" == "true" ];then
+  extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-open-v2.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
+fi
+
+
+# Network Training
+if [ $stage -le 1 ]; then
+
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    train_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu
+  
+fi
+
+if [ $stage -le 2 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+exit
+if [ $stage -le 3 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+        --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
+if [ $stage -le 4 ]; then
+
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s4_name.$(date -Iminutes)"
+  fi
+  
+  mkdir -p $nnet_s4_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s4_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    finetune_wav2vec2xvector.py $nnet_type \
+    --cfg $nnet_s4_base_cfg $nnet_s4_args $extra_args \
+        --data.train.dataset.recordings-file $list_dir/wav.scp \
+    --data.train.dataset.segments-file $list_dir/train_val_split/train_segments.csv \
+    --data.train.dataset.class-files $list_dir/train_val_split/class_file.csv \
+    --data.val.dataset.recordings-file $list_dir/wav.scp \
+    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
+    --in-model-file $nnet_s3 \
+    --trainer.exp-path $nnet_s4_dir $args \
+    --num-gpus $ngpu \
+  
+fi
+
diff --git a/egs/lre22/open.v2.8k/steps b/egs/lre22/open.v2.8k/steps
new file mode 120000
index 00000000..aede39fe
--- /dev/null
+++ b/egs/lre22/open.v2.8k/steps
@@ -0,0 +1 @@
+hyp_utils/kaldi/steps
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/steps_be b/egs/lre22/open.v2.8k/steps_be
new file mode 120000
index 00000000..48aedc5a
--- /dev/null
+++ b/egs/lre22/open.v2.8k/steps_be
@@ -0,0 +1 @@
+../fixed.v1.8k/steps_be
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/steps_xvec b/egs/lre22/open.v2.8k/steps_xvec
new file mode 120000
index 00000000..af66a94d
--- /dev/null
+++ b/egs/lre22/open.v2.8k/steps_xvec
@@ -0,0 +1 @@
+hyp_utils/xvectors
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/utils b/egs/lre22/open.v2.8k/utils
new file mode 120000
index 00000000..3d590a1d
--- /dev/null
+++ b/egs/lre22/open.v2.8k/utils
@@ -0,0 +1 @@
+hyp_utils/kaldi/utils
\ No newline at end of file
diff --git a/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh b/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
index d7ea8ed0..1ffd35a8 100755
--- a/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
+++ b/egs/sre21-av-a/v1.8k/run_011_train_xvector.sh
@@ -68,50 +68,3 @@ if [ $stage -le 2 ]; then
     --num-gpus $ngpu \
   
 fi
-exit
-
-# Network Training
-if [ $stage -le 1 ]; then
-
-  if [[ ${nnet_type} =~ resnet1d ]]; then
-    train_exec=torch-train-resnet1d-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ resnet ]] || [[ ${nnet_type} =~ resnext ]] || [[ ${nnet_type} =~ res2net ]] || [[ ${nnet_type} =~ res2next ]]; then
-    train_exec=torch-train-resnet-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ efficientnet ]]; then
-    train_exec=torch-train-efficientnet-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ tdnn ]]; then
-    train_exec=torch-train-tdnn-xvec-from-wav.py
-  elif [[ ${nnet_type} =~ transformer ]]; then
-    train_exec=torch-train-transformer-xvec-v1-from-wav.py
-  else
-    echo "$nnet_type not supported"
-    exit 1
-  fi
-  
-  mkdir -p $nnet_dir/log
-  $cuda_cmd \
-    --gpu $ngpu $nnet_dir/log/train.log \
-    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    $train_exec  --feats $feat_config $aug_opt \
-    --audio-path $list_dir/wav.scp \
-    --time-durs-file $list_dir/utt2dur \
-    --train-list $list_dir/lists_xvec/train.scp \
-    --val-list $list_dir/lists_xvec/val.scp \
-    --class-file $list_dir/lists_xvec/class2int \
-    --min-chunk-length $min_chunk --max-chunk-length $max_chunk \
-    --iters-per-epoch $ipe \
-    --batch-size $batch_size \
-    --num-workers $num_workers \
-    --grad-acc-steps $grad_acc_steps \
-    --embed-dim $embed_dim $nnet_opt $opt_opt $lrs_opt \
-    --epochs $nnet_num_epochs \
-    --cos-scale $s --margin $margin --margin-warmup-epochs $margin_warmup \
-    --dropout-rate $dropout \
-    --num-gpus $ngpu \
-    --log-interval $log_interval \
-    --exp-path $nnet_dir $args
-  
-fi
-
-
-exit
diff --git a/hyperion/np/transforms/skl_tsne.py b/hyperion/np/transforms/skl_tsne.py
index ebabc6ec..fbff7df3 100644
--- a/hyperion/np/transforms/skl_tsne.py
+++ b/hyperion/np/transforms/skl_tsne.py
@@ -53,7 +53,8 @@ def __init__(
         super().__init__(**kwargs)
         self.rng_seed = rng_seed
         if rng is None:
-            rng = np.random.default_rng(seed=rng_seed)
+            #rng = np.random.default_rng(seed=rng_seed)
+            rng = np.random.RandomState(seed=rng_seed)
 
         self._tsne = TSNE(
             n_components=tsne_dim,
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index 57f3faf2..b94d9752 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -139,7 +139,7 @@ def load(cls, file_path, sep=None, name="class_id"):
                 sep=" ",
                 header=None,
                 names=["id", name],
-                dtype={"id": np.str, name: np.str},
+                dtype={"id": str, name: str},
             )
         else:
             if sep is None:
diff --git a/hyperion/utils/scp_list.py b/hyperion/utils/scp_list.py
index 070e4f53..3d8b5e9d 100644
--- a/hyperion/utils/scp_list.py
+++ b/hyperion/utils/scp_list.py
@@ -36,7 +36,7 @@ def __init__(self, key, file_path, offset=None, range_spec=None):
     def validate(self):
         """Validates the attributes of the SCPList object."""
         self.key = list2ndarray(self.key)
-        self.file_path = list2ndarray(self.file_path, dtype=np.object)
+        self.file_path = list2ndarray(self.file_path, dtype=object)
         assert len(self.key) == len(self.file_path)
         if self.offset is not None:
             if isinstance(self.offset, list):

From af6e26e1a4f64a02cf8612e412649dec55ba3926 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 1 Nov 2023 12:45:13 -0400
Subject: [PATCH 116/154] new recipes voxceleb/v1.2 and v2.1

---
 egs/librispeech/v0/cmd.sh                     |  28 +
 egs/librispeech/v0/conf/clsp.conf             |  11 +
 egs/librispeech/{v1 => v0}/conf/infer.yaml    |   0
 .../{v1 => v0}/conf/reverb_noise20dB_aug.yaml |   0
 .../{v1 => v0}/conf/reverb_noise_aug.yaml     |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml |   0
 ...base_conf_rnnt_k2_pruned_stage1_v3.10.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml |   0
 ...ase_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml |   0
 ...ase_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml |   0
 ...2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml |   0
 ...2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml |   0
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml |   0
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml |   0
 ...v2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml |   0
 ...rain_wav2vec2base_rnnt_k2_stage1_v1.0.yaml |   0
 ...n_wav2vec2base_transducer_stage1_v5.0.yaml |   0
 ...n_wav2vec2base_transducer_stage1_v6.1.yaml |   0
 ...n_wav2vec2base_transducer_stage1_v7.1.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v1.0.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v2.0.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v3.0.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v3.1.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v3.2.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v3.3.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v4.3.yaml |   0
 ...v2vec2xlsr300m_transducer_stage1_v4.4.yaml |   0
 ...v2vec2xlsr300m_transducer_stage2_v3.2.yaml |   0
 .../wav2vec2base_rnn_transducer_do0.4.yaml    |   0
 .../conf/wav2vec2base_rnnt_ta_do0.4.yaml      |   0
 .../conf/wav2vec2base_transducer_do0.4.yaml   |   0
 .../conf/wav2vec2xlsr300m_transducer.yaml     |   0
 .../conf/wav2vec2xlsr300m_transducer_do.yaml  |   0
 .../wav2vec2xlsr300m_transducer_do0.2.yaml    |   0
 .../wav2vec2xlsr300m_transducer_do0.3.yaml    |   0
 .../wav2vec2xlsr300m_transducer_do0.4.yaml    |   0
 .../wav2vec2xlsr300m_transducer_enclast.yaml  |   0
 egs/librispeech/{v1 => v0}/datapath.sh        |   0
 egs/librispeech/{v1 => v0}/default_config.sh  |   0
 egs/librispeech/{v1 => v0}/feats              |   0
 .../global_conf/config_transducer_v1.sh       |   0
 .../global_conf/config_transducer_v2.sh       |   0
 .../global_conf/config_transducer_v3.1.sh     |   0
 .../global_conf/config_transducer_v3.2.sh     |   0
 .../global_conf/config_transducer_v3.3.sh     |   0
 .../global_conf/config_transducer_v3.sh       |   0
 .../global_conf/config_transducer_v4.3.sh     |   0
 .../global_conf/config_transducer_v4.4.sh     |   0
 .../global_conf/config_transducer_v5.0.sh     |   0
 .../global_conf/config_transducer_v6.1.sh     |   0
 .../global_conf/config_transducer_v7.1.sh     |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh |   0
 ...wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh |   0
 ...wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh |   0
 ...g_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh |   0
 ...g_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh |   0
 ...config_wav2vec2base_rnnt_k2_pruned_v1.0.sh |   0
 ...config_wav2vec2base_rnnt_k2_pruned_v1.2.sh |   0
 ...config_wav2vec2base_rnnt_k2_pruned_v1.3.sh |   0
 .../config_wav2vec2base_rnnt_k2_v1.0.sh       |   0
 egs/librispeech/v0/hyp_utils                  |   1 +
 egs/librispeech/{v1 => v0}/local/data_prep.sh |   0
 .../{v1 => v0}/local/download_lm.py           |   0
 .../{v1 => v0}/local/make_musan.py            |   0
 .../{v1 => v0}/local/make_musan.sh            |   0
 .../{v1 => v0}/local/make_rirs_data.sh        |   0
 .../{v1 => v0}/local/prepare_lang.py          |   0
 .../{v1 => v0}/local/prepare_lang_bpe.py      |   0
 .../{v1 => v0}/local/train_bpe_model.py       |   0
 .../{v1 => v0}/local/validate_bpe_lexicon.py  |   0
 egs/librispeech/v0/path.sh                    |   5 +
 egs/librispeech/v0/run_001_prepare_data.sh    |  54 ++
 .../{v1 => v0}/run_003_prepare_noises_rirs.sh |   0
 .../{v1 => v0}/run_004_compute_bpe.sh         |   0
 .../{v1 => v0}/run_011_train_asr.sh           |   0
 .../{v1 => v0}/run_011_train_asr_old.sh       |   0
 .../{v1 => v0}/run_030_inference.sh           |   0
 .../{v1 => v0}/run_030_inference_old.sh       |   0
 .../{v1 => v0}/run_040_eval_wer.sh            |   0
 egs/librispeech/{v1 => v0}/steps              |   0
 egs/librispeech/{v1 => v0}/steps_be           |   0
 egs/librispeech/{v1 => v0}/steps_pyfe         |   0
 .../decode_wav2vec2rnn_transducer.sh          |   0
 .../decode_wav2vec2transducer.sh              |   0
 egs/librispeech/{v1 => v0}/steps_xvec         |   0
 egs/librispeech/{v1 => v0}/utils              |   0
 egs/librispeech/{v1 => v0}/xvectors           |   0
 egs/librispeech/v1/cmd.sh                     |   8 +-
 egs/librispeech/v1/conf/clsp.conf             |   2 +-
 egs/librispeech/v1/conf/coe_gpu_bigmem.conf   |  11 +
 egs/librispeech/v1/conf/coe_gpu_long.conf     |  13 +
 egs/librispeech/v1/conf/coe_gpu_rtx.conf      |  11 +
 egs/librispeech/v1/conf/coe_gpu_short.conf    |  11 +
 egs/librispeech/v1/conf/coe_gpu_v100.conf     |  11 +
 .../v1/conf/fbank80_specaug1_mn_16k.yaml      |  25 +
 .../conf/speed_reverb_noise10-20dB_aug.yaml   |  39 +
 ...mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml |  70 ++
 ...nk80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh |  18 +
 egs/librispeech/v1/run_001_prepare_data.sh    |  51 +-
 egs/voxceleb/ssl.v1/cmd.sh                    |  28 +
 egs/voxceleb/ssl.v1/conf/clsp.conf            |  11 +
 egs/voxceleb/ssl.v1/conf/coe_gpu_bigmem.conf  |  11 +
 egs/voxceleb/ssl.v1/conf/coe_gpu_long.conf    |  13 +
 egs/voxceleb/ssl.v1/conf/coe_gpu_rtx.conf     |  11 +
 egs/voxceleb/ssl.v1/conf/coe_gpu_short.conf   |  11 +
 egs/voxceleb/ssl.v1/conf/coe_gpu_v100.conf    |  11 +
 .../conf/fbank80_specaug1_stmn_16k.yaml       |  24 +
 .../ssl.v1/conf/reverb_noise_aug.yaml         |  34 +
 egs/voxceleb/ssl.v1/datapath.sh               |  23 +
 egs/voxceleb/ssl.v1/hyp_utils                 |   1 +
 egs/voxceleb/ssl.v1/path.sh                   |   5 +
 egs/voxceleb/ssl.v1/run_001_prepare_data.sh   |  46 ++
 egs/voxceleb/ssl.v1/run_002_compute_evad.sh   |  66 ++
 .../ssl.v1/run_003_prepare_noises_rirs.sh     | 102 +++
 .../ssl.v1/run_004_prepare_xvec_train_data.sh |  75 ++
 egs/voxceleb/ssl.v1/run_005_train_dino.sh     |  99 +++
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml |   1 -
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml |  69 ++
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml |   5 +-
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml |  78 ++
 egs/voxceleb/v2.1/run_007_eval_be.sh          |   2 +-
 hyperion/bin/extract_wav2vec2xvectors.py      |   9 +-
 .../generate_adv_attacks_xvector_classif.py   |  14 +-
 .../bin/generate_adv_attacks_xvector_verif.py |   4 +-
 hyperion/bin/train_wav2vec2xvector.py         |   6 +
 hyperion/bin/train_wav2xvector.py             |   2 +
 hyperion/bin/train_xvector_from_feats.py      |   2 +
 hyperion/bin/train_xvector_from_wav.py        |   2 +
 hyperion/torch/data/audio_dataset.py          | 235 ++++--
 hyperion/torch/layer_blocks/__init__.py       |  58 +-
 .../layer_blocks/conformer_decoder_v1.py      | 213 ++++++
 .../layer_blocks/conformer_encoder_v1.py      | 108 +--
 .../transformer_conv2d_subsampler.py          |  61 --
 .../torch/layer_blocks/transformer_input.py   | 151 ++++
 hyperion/torch/layers/__init__.py             |   7 +
 hyperion/torch/layers/audio_feats.py          |  12 +-
 hyperion/torch/layers/audio_feats_factory.py  |  23 +-
 hyperion/torch/layers/feat_fuser_factory.py   | 101 +++
 hyperion/torch/layers/feat_fusers.py          |  86 +++
 hyperion/torch/layers/mvn.py                  | 112 ++-
 hyperion/torch/lr_schedulers/triangular_lr.py |   7 +-
 hyperion/torch/models/__init__.py             |   6 +-
 .../transducer/conformer_v1_rnn_transducer.py |  12 +-
 .../torch/models/wav2xvectors/__init__.py     |   6 +
 .../hf_hubert2conformer_v1_xvector.py         |  93 +++
 .../hf_wav2vec2conformer_v1_xvector.py        |  96 +++
 .../models/wav2xvectors/hf_wav2xvector.py     |   1 +
 .../hf_wavlm2conformer_v1_xvector.py          |  93 +++
 .../wav2xvectors/wav2conformer_v1_xvector.py  |  70 ++
 hyperion/torch/models/xvectors/__init__.py    |   1 +
 .../models/xvectors/conformer_v1_xvector.py   | 168 ++++
 hyperion/torch/narchs/audio_feats_mvn.py      |   5 +-
 hyperion/torch/narchs/conformer_decoder_v1.py | 724 ++++++++++++++++++
 hyperion/torch/narchs/conformer_encoder_v1.py | 159 ++--
 hyperion/torch/narchs/feat_fuser_mvn.py       | 107 +++
 hyperion/torch/torch_model.py                 | 120 ++-
 hyperion/torch/tpm/hf/hf_wav2vec_base.py      |  36 +-
 hyperion/torch/trainers/torch_trainer.py      |  86 ++-
 hyperion/torch/trainers/xvector_trainer.py    |  42 +-
 .../trainers/xvector_trainer_from_wav.py      |   7 +-
 hyperion/torch/utils/__init__.py              |  11 +-
 hyperion/torch/utils/masking.py               |  21 +-
 hyperion/utils/misc.py                        |   7 +-
 hyperion/utils/scp_list.py                    |   2 +-
 179 files changed, 3703 insertions(+), 478 deletions(-)
 create mode 100755 egs/librispeech/v0/cmd.sh
 create mode 100644 egs/librispeech/v0/conf/clsp.conf
 rename egs/librispeech/{v1 => v0}/conf/infer.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/reverb_noise20dB_aug.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/reverb_noise_aug.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2base_rnn_transducer_do0.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2base_rnnt_ta_do0.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2base_transducer_do0.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer_do.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer_do0.2.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer_do0.3.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer_do0.4.yaml (100%)
 rename egs/librispeech/{v1 => v0}/conf/wav2vec2xlsr300m_transducer_enclast.yaml (100%)
 rename egs/librispeech/{v1 => v0}/datapath.sh (100%)
 rename egs/librispeech/{v1 => v0}/default_config.sh (100%)
 rename egs/librispeech/{v1 => v0}/feats (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v1.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v3.1.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v3.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v3.3.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v3.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v4.3.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v4.4.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v5.0.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v6.1.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_transducer_v7.1.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh (100%)
 rename egs/librispeech/{v1 => v0}/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh (100%)
 create mode 120000 egs/librispeech/v0/hyp_utils
 rename egs/librispeech/{v1 => v0}/local/data_prep.sh (100%)
 rename egs/librispeech/{v1 => v0}/local/download_lm.py (100%)
 rename egs/librispeech/{v1 => v0}/local/make_musan.py (100%)
 rename egs/librispeech/{v1 => v0}/local/make_musan.sh (100%)
 rename egs/librispeech/{v1 => v0}/local/make_rirs_data.sh (100%)
 rename egs/librispeech/{v1 => v0}/local/prepare_lang.py (100%)
 rename egs/librispeech/{v1 => v0}/local/prepare_lang_bpe.py (100%)
 rename egs/librispeech/{v1 => v0}/local/train_bpe_model.py (100%)
 rename egs/librispeech/{v1 => v0}/local/validate_bpe_lexicon.py (100%)
 create mode 100755 egs/librispeech/v0/path.sh
 create mode 100755 egs/librispeech/v0/run_001_prepare_data.sh
 rename egs/librispeech/{v1 => v0}/run_003_prepare_noises_rirs.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_004_compute_bpe.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_011_train_asr.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_011_train_asr_old.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_030_inference.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_030_inference_old.sh (100%)
 rename egs/librispeech/{v1 => v0}/run_040_eval_wer.sh (100%)
 rename egs/librispeech/{v1 => v0}/steps (100%)
 rename egs/librispeech/{v1 => v0}/steps_be (100%)
 rename egs/librispeech/{v1 => v0}/steps_pyfe (100%)
 rename egs/librispeech/{v1 => v0}/steps_transducer/decode_wav2vec2rnn_transducer.sh (100%)
 rename egs/librispeech/{v1 => v0}/steps_transducer/decode_wav2vec2transducer.sh (100%)
 rename egs/librispeech/{v1 => v0}/steps_xvec (100%)
 rename egs/librispeech/{v1 => v0}/utils (100%)
 rename egs/librispeech/{v1 => v0}/xvectors (100%)
 create mode 100644 egs/librispeech/v1/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/librispeech/v1/conf/coe_gpu_long.conf
 create mode 100644 egs/librispeech/v1/conf/coe_gpu_rtx.conf
 create mode 100644 egs/librispeech/v1/conf/coe_gpu_short.conf
 create mode 100644 egs/librispeech/v1/conf/coe_gpu_v100.conf
 create mode 100644 egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
 create mode 100644 egs/librispeech/v1/conf/speed_reverb_noise10-20dB_aug.yaml
 create mode 100644 egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
 create mode 100644 egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
 create mode 100755 egs/voxceleb/ssl.v1/cmd.sh
 create mode 100644 egs/voxceleb/ssl.v1/conf/clsp.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/coe_gpu_bigmem.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/coe_gpu_long.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/coe_gpu_rtx.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/coe_gpu_short.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/coe_gpu_v100.conf
 create mode 100644 egs/voxceleb/ssl.v1/conf/fbank80_specaug1_stmn_16k.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/reverb_noise_aug.yaml
 create mode 100644 egs/voxceleb/ssl.v1/datapath.sh
 create mode 120000 egs/voxceleb/ssl.v1/hyp_utils
 create mode 100755 egs/voxceleb/ssl.v1/path.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_001_prepare_data.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_002_compute_evad.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_003_prepare_noises_rirs.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_004_prepare_xvec_train_data.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_005_train_dino.sh
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
 create mode 100644 egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
 create mode 100644 hyperion/torch/layer_blocks/conformer_decoder_v1.py
 delete mode 100644 hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
 create mode 100644 hyperion/torch/layer_blocks/transformer_input.py
 create mode 100644 hyperion/torch/layers/feat_fuser_factory.py
 create mode 100644 hyperion/torch/layers/feat_fusers.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
 create mode 100644 hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
 create mode 100644 hyperion/torch/models/xvectors/conformer_v1_xvector.py
 create mode 100644 hyperion/torch/narchs/conformer_decoder_v1.py
 create mode 100644 hyperion/torch/narchs/feat_fuser_mvn.py

diff --git a/egs/librispeech/v0/cmd.sh b/egs/librispeech/v0/cmd.sh
new file mode 100755
index 00000000..89dbb7d8
--- /dev/null
+++ b/egs/librispeech/v0/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 10G"
+    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --config conf/clsp.conf --mem 4G " 
+    export cuda_cmd="queue.pl --config conf/clsp.conf --mem 20G"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/librispeech/v0/conf/clsp.conf b/egs/librispeech/v0/conf/clsp.conf
new file mode 100644
index 00000000..959c62a7
--- /dev/null
+++ b/egs/librispeech/v0/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[1345679]*|c2[12357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/librispeech/v1/conf/infer.yaml b/egs/librispeech/v0/conf/infer.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/infer.yaml
rename to egs/librispeech/v0/conf/infer.yaml
diff --git a/egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml b/egs/librispeech/v0/conf/reverb_noise20dB_aug.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/reverb_noise20dB_aug.yaml
rename to egs/librispeech/v0/conf/reverb_noise20dB_aug.yaml
diff --git a/egs/librispeech/v1/conf/reverb_noise_aug.yaml b/egs/librispeech/v0/conf/reverb_noise_aug.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/reverb_noise_aug.yaml
rename to egs/librispeech/v0/conf/reverb_noise_aug.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v1.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.1.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.10.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.3.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.1.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.4.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.5.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.6.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.7.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.8.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_conf_rnnt_k2_pruned_stage1_v3.9.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_lstm_rnnt_k2_pruned_stage1_v1.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_pruned_stage1_v1.3.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_rnnt_k2_stage1_v1.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v5.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v6.1.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml b/egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2base_transducer_stage1_v7.1.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v1.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v2.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.0.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.1.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.2.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v3.3.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.3.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage1_v4.4.yaml
diff --git a/egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml b/egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
rename to egs/librispeech/v0/conf/train_wav2vec2xlsr300m_transducer_stage2_v3.2.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml b/egs/librispeech/v0/conf/wav2vec2base_rnn_transducer_do0.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2base_rnn_transducer_do0.4.yaml
rename to egs/librispeech/v0/conf/wav2vec2base_rnn_transducer_do0.4.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml b/egs/librispeech/v0/conf/wav2vec2base_rnnt_ta_do0.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2base_rnnt_ta_do0.4.yaml
rename to egs/librispeech/v0/conf/wav2vec2base_rnnt_ta_do0.4.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml b/egs/librispeech/v0/conf/wav2vec2base_transducer_do0.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2base_transducer_do0.4.yaml
rename to egs/librispeech/v0/conf/wav2vec2base_transducer_do0.4.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.2.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.3.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_do0.4.yaml
diff --git a/egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml b/egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_enclast.yaml
similarity index 100%
rename from egs/librispeech/v1/conf/wav2vec2xlsr300m_transducer_enclast.yaml
rename to egs/librispeech/v0/conf/wav2vec2xlsr300m_transducer_enclast.yaml
diff --git a/egs/librispeech/v1/datapath.sh b/egs/librispeech/v0/datapath.sh
similarity index 100%
rename from egs/librispeech/v1/datapath.sh
rename to egs/librispeech/v0/datapath.sh
diff --git a/egs/librispeech/v1/default_config.sh b/egs/librispeech/v0/default_config.sh
similarity index 100%
rename from egs/librispeech/v1/default_config.sh
rename to egs/librispeech/v0/default_config.sh
diff --git a/egs/librispeech/v1/feats b/egs/librispeech/v0/feats
similarity index 100%
rename from egs/librispeech/v1/feats
rename to egs/librispeech/v0/feats
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v1.sh b/egs/librispeech/v0/global_conf/config_transducer_v1.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v1.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v1.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v2.sh b/egs/librispeech/v0/global_conf/config_transducer_v2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v2.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v2.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.1.sh b/egs/librispeech/v0/global_conf/config_transducer_v3.1.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v3.1.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v3.1.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.2.sh b/egs/librispeech/v0/global_conf/config_transducer_v3.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v3.2.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v3.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.3.sh b/egs/librispeech/v0/global_conf/config_transducer_v3.3.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v3.3.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v3.3.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v3.sh b/egs/librispeech/v0/global_conf/config_transducer_v3.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v3.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v3.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.3.sh b/egs/librispeech/v0/global_conf/config_transducer_v4.3.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v4.3.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v4.3.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v4.4.sh b/egs/librispeech/v0/global_conf/config_transducer_v4.4.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v4.4.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v4.4.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v5.0.sh b/egs/librispeech/v0/global_conf/config_transducer_v5.0.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v5.0.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v5.0.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v6.1.sh b/egs/librispeech/v0/global_conf/config_transducer_v6.1.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v6.1.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v6.1.sh
diff --git a/egs/librispeech/v1/global_conf/config_transducer_v7.1.sh b/egs/librispeech/v0/global_conf/config_transducer_v7.1.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_transducer_v7.1.sh
rename to egs/librispeech/v0/global_conf/config_transducer_v7.1.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v1.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.0.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.3.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.1.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.4.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.5.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.6.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.7.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_conf_rnnt_k2_pruned_v3.9.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_lstm_rnnt_k2_pruned_v1.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.0.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.2.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_pruned_v1.3.sh
diff --git a/egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh b/egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
similarity index 100%
rename from egs/librispeech/v1/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
rename to egs/librispeech/v0/global_conf/config_wav2vec2base_rnnt_k2_v1.0.sh
diff --git a/egs/librispeech/v0/hyp_utils b/egs/librispeech/v0/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/librispeech/v0/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/librispeech/v1/local/data_prep.sh b/egs/librispeech/v0/local/data_prep.sh
similarity index 100%
rename from egs/librispeech/v1/local/data_prep.sh
rename to egs/librispeech/v0/local/data_prep.sh
diff --git a/egs/librispeech/v1/local/download_lm.py b/egs/librispeech/v0/local/download_lm.py
similarity index 100%
rename from egs/librispeech/v1/local/download_lm.py
rename to egs/librispeech/v0/local/download_lm.py
diff --git a/egs/librispeech/v1/local/make_musan.py b/egs/librispeech/v0/local/make_musan.py
similarity index 100%
rename from egs/librispeech/v1/local/make_musan.py
rename to egs/librispeech/v0/local/make_musan.py
diff --git a/egs/librispeech/v1/local/make_musan.sh b/egs/librispeech/v0/local/make_musan.sh
similarity index 100%
rename from egs/librispeech/v1/local/make_musan.sh
rename to egs/librispeech/v0/local/make_musan.sh
diff --git a/egs/librispeech/v1/local/make_rirs_data.sh b/egs/librispeech/v0/local/make_rirs_data.sh
similarity index 100%
rename from egs/librispeech/v1/local/make_rirs_data.sh
rename to egs/librispeech/v0/local/make_rirs_data.sh
diff --git a/egs/librispeech/v1/local/prepare_lang.py b/egs/librispeech/v0/local/prepare_lang.py
similarity index 100%
rename from egs/librispeech/v1/local/prepare_lang.py
rename to egs/librispeech/v0/local/prepare_lang.py
diff --git a/egs/librispeech/v1/local/prepare_lang_bpe.py b/egs/librispeech/v0/local/prepare_lang_bpe.py
similarity index 100%
rename from egs/librispeech/v1/local/prepare_lang_bpe.py
rename to egs/librispeech/v0/local/prepare_lang_bpe.py
diff --git a/egs/librispeech/v1/local/train_bpe_model.py b/egs/librispeech/v0/local/train_bpe_model.py
similarity index 100%
rename from egs/librispeech/v1/local/train_bpe_model.py
rename to egs/librispeech/v0/local/train_bpe_model.py
diff --git a/egs/librispeech/v1/local/validate_bpe_lexicon.py b/egs/librispeech/v0/local/validate_bpe_lexicon.py
similarity index 100%
rename from egs/librispeech/v1/local/validate_bpe_lexicon.py
rename to egs/librispeech/v0/local/validate_bpe_lexicon.py
diff --git a/egs/librispeech/v0/path.sh b/egs/librispeech/v0/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/librispeech/v0/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/librispeech/v0/run_001_prepare_data.sh b/egs/librispeech/v0/run_001_prepare_data.sh
new file mode 100755
index 00000000..0708e667
--- /dev/null
+++ b/egs/librispeech/v0/run_001_prepare_data.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. ./datapath.sh 
+
+
+nj=6
+
+mkdir -p data
+
+
+if [ ${stage} -le 1 ]; then
+    ### Task dependent. You have to make data the following preparation part by yourself.
+    ### But you can utilize Kaldi recipes in most cases
+    echo "stage 0: Data preparation"
+    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
+    do
+      # use underscore-separated names in data directories.
+      local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+      steps_xvec/audio_to_duration.sh --cmd "$train_cmd" data/${part//-/_}
+    done
+fi
+
+# if [ $stage -le 1 ]; then
+#   echo "Stage 1: Prepare LibriSpeech manifest"
+#   # We assume that you have downloaded the LibriSpeech corpus
+#   # to $librispeech_root
+#   mkdir -p data/manifests
+#   if [ ! -e data/manifests/.librispeech.done ]; then
+#     lhotse prepare librispeech -j $nj $librispeech_root data/manifests
+#     touch data/manifests/.librispeech.done
+#   fi
+# fi
+
+# if [ $stage -le 2 ]; then
+#   echo "Stage 2: Prepare musan manifest"
+#   # We assume that you have downloaded the musan corpus
+#   # to $musan_root
+#   mkdir -p data/manifests
+#   if [ ! -e data/manifests/.musan.done ]; then
+#     lhotse prepare musan $musan_root data/manifests
+#     touch data/manifests/.musan.done
+#   fi
+# fi
diff --git a/egs/librispeech/v1/run_003_prepare_noises_rirs.sh b/egs/librispeech/v0/run_003_prepare_noises_rirs.sh
similarity index 100%
rename from egs/librispeech/v1/run_003_prepare_noises_rirs.sh
rename to egs/librispeech/v0/run_003_prepare_noises_rirs.sh
diff --git a/egs/librispeech/v1/run_004_compute_bpe.sh b/egs/librispeech/v0/run_004_compute_bpe.sh
similarity index 100%
rename from egs/librispeech/v1/run_004_compute_bpe.sh
rename to egs/librispeech/v0/run_004_compute_bpe.sh
diff --git a/egs/librispeech/v1/run_011_train_asr.sh b/egs/librispeech/v0/run_011_train_asr.sh
similarity index 100%
rename from egs/librispeech/v1/run_011_train_asr.sh
rename to egs/librispeech/v0/run_011_train_asr.sh
diff --git a/egs/librispeech/v1/run_011_train_asr_old.sh b/egs/librispeech/v0/run_011_train_asr_old.sh
similarity index 100%
rename from egs/librispeech/v1/run_011_train_asr_old.sh
rename to egs/librispeech/v0/run_011_train_asr_old.sh
diff --git a/egs/librispeech/v1/run_030_inference.sh b/egs/librispeech/v0/run_030_inference.sh
similarity index 100%
rename from egs/librispeech/v1/run_030_inference.sh
rename to egs/librispeech/v0/run_030_inference.sh
diff --git a/egs/librispeech/v1/run_030_inference_old.sh b/egs/librispeech/v0/run_030_inference_old.sh
similarity index 100%
rename from egs/librispeech/v1/run_030_inference_old.sh
rename to egs/librispeech/v0/run_030_inference_old.sh
diff --git a/egs/librispeech/v1/run_040_eval_wer.sh b/egs/librispeech/v0/run_040_eval_wer.sh
similarity index 100%
rename from egs/librispeech/v1/run_040_eval_wer.sh
rename to egs/librispeech/v0/run_040_eval_wer.sh
diff --git a/egs/librispeech/v1/steps b/egs/librispeech/v0/steps
similarity index 100%
rename from egs/librispeech/v1/steps
rename to egs/librispeech/v0/steps
diff --git a/egs/librispeech/v1/steps_be b/egs/librispeech/v0/steps_be
similarity index 100%
rename from egs/librispeech/v1/steps_be
rename to egs/librispeech/v0/steps_be
diff --git a/egs/librispeech/v1/steps_pyfe b/egs/librispeech/v0/steps_pyfe
similarity index 100%
rename from egs/librispeech/v1/steps_pyfe
rename to egs/librispeech/v0/steps_pyfe
diff --git a/egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh b/egs/librispeech/v0/steps_transducer/decode_wav2vec2rnn_transducer.sh
similarity index 100%
rename from egs/librispeech/v1/steps_transducer/decode_wav2vec2rnn_transducer.sh
rename to egs/librispeech/v0/steps_transducer/decode_wav2vec2rnn_transducer.sh
diff --git a/egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh b/egs/librispeech/v0/steps_transducer/decode_wav2vec2transducer.sh
similarity index 100%
rename from egs/librispeech/v1/steps_transducer/decode_wav2vec2transducer.sh
rename to egs/librispeech/v0/steps_transducer/decode_wav2vec2transducer.sh
diff --git a/egs/librispeech/v1/steps_xvec b/egs/librispeech/v0/steps_xvec
similarity index 100%
rename from egs/librispeech/v1/steps_xvec
rename to egs/librispeech/v0/steps_xvec
diff --git a/egs/librispeech/v1/utils b/egs/librispeech/v0/utils
similarity index 100%
rename from egs/librispeech/v1/utils
rename to egs/librispeech/v0/utils
diff --git a/egs/librispeech/v1/xvectors b/egs/librispeech/v0/xvectors
similarity index 100%
rename from egs/librispeech/v1/xvectors
rename to egs/librispeech/v0/xvectors
diff --git a/egs/librispeech/v1/cmd.sh b/egs/librispeech/v1/cmd.sh
index 89dbb7d8..040f458b 100755
--- a/egs/librispeech/v1/cmd.sh
+++ b/egs/librispeech/v1/cmd.sh
@@ -11,16 +11,16 @@
 # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
 
 if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
     #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
-    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 10G"
-    #export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
-    export train_cmd="queue.pl --config conf/clsp.conf --mem 4G " 
-    export cuda_cmd="queue.pl --config conf/clsp.conf --mem 20G"
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
     export cuda_eval_cmd="$train_cmd"
 fi
 
diff --git a/egs/librispeech/v1/conf/clsp.conf b/egs/librispeech/v1/conf/clsp.conf
index 959c62a7..4ed38246 100644
--- a/egs/librispeech/v1/conf/clsp.conf
+++ b/egs/librispeech/v1/conf/clsp.conf
@@ -7,5 +7,5 @@ option num_threads=* -pe smp $0
 option num_threads=1  # Do not add anything to qsub_opts
 option max_jobs_run=* -tc $0
 default gpu=0
-option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[1345679]*|c2[12357]*'
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
 option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/librispeech/v1/conf/coe_gpu_bigmem.conf b/egs/librispeech/v1/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/librispeech/v1/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/librispeech/v1/conf/coe_gpu_long.conf b/egs/librispeech/v1/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/librispeech/v1/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/librispeech/v1/conf/coe_gpu_rtx.conf b/egs/librispeech/v1/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/librispeech/v1/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/librispeech/v1/conf/coe_gpu_short.conf b/egs/librispeech/v1/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/librispeech/v1/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/librispeech/v1/conf/coe_gpu_v100.conf b/egs/librispeech/v1/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/librispeech/v1/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml b/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
new file mode 100644
index 00000000..99f202bb
--- /dev/null
+++ b/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
@@ -0,0 +1,25 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_warp_prob: 0.66
+  time_warp_window: 5
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 40
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 2
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 30
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 2
+  mask_method: mean
+mvn:
+  norm_var: false
diff --git a/egs/librispeech/v1/conf/speed_reverb_noise10-20dB_aug.yaml b/egs/librispeech/v1/conf/speed_reverb_noise10-20dB_aug.yaml
new file mode 100644
index 00000000..f9ecdd33
--- /dev/null
+++ b/egs/librispeech/v1/conf/speed_reverb_noise10-20dB_aug.yaml
@@ -0,0 +1,39 @@
+speed_aug:
+  speed_prob: 0.5
+  speed_ratios:
+  - 0.9
+  - 1.1
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: csv:data/rirs_smallroom/rirs.csv
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: csv:data/rirs_mediumroom/rirs.csv
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: csv:data/rirs_real/rirs.csv
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/recordings.csv
+      min_snr: 10
+      max_snr: 20
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/recordings.csv
+      min_snr: 10
+      max_snr: 20
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/recordings.csv
+      min_snr: 10
+      max_snr: 20
diff --git a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
new file mode 100644
index 00000000..ed622adb
--- /dev/null
+++ b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      wav_scale: 1
+      aug_cfgs: 
+        - conf/speed_reverb_noise10-20dB_aug.yaml
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 625.
+      min_batch_size: 1
+      drop_last: false
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      wav_scale: 1
+      return_segment_info:
+        - text
+    sampler:
+      sampler_type: bucketing_seg_sampler
+      max_batch_length: 625
+      min_batch_size: 1
+      drop_last: true
+    data_loader:
+      num_workers: 8
+model: 
+  feats: fbank80_specaug1_mn_16k.yaml
+  transducer:
+    encoder:
+      att_type: local-scaled-dot-prod-v1
+      att_context: 32
+      d_model: 144
+      num_heads: 4
+      num_blocks: 16
+      d_ff: 576
+      in_layer_type: conv2d-sub
+    decoder:
+      rnnt_loss: k2_pruned
+      simple_loss_scale: 0.2
+      predictor:
+        embed_dim: 1024
+        num_layers: 2
+        hid_feats: 320
+        embed_dropout_rate: 0.1
+        rnn_dropout_rate: 0.1
+        rnn_type: lstm
+      joiner:
+        hid_feats: 320
+trainer:
+  optim:
+    opt_type: adamw
+    lr: 0.001
+    beta1: 0.9
+    beta2: 0.98
+    weight_decay: 1e-6
+  lrsched:
+    lrsch_type: noam_lr
+    d_model: 144
+    lr_factor: 8.0
+    min_lr: 1e-6
+    warmup_steps: 25000
+    update_lr_on_opt_step: true
+  grad_clip: 100
+  use_amp: true
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 128
+  train_mode: full
diff --git a/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh b/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
new file mode 100644
index 00000000..ee8c2b55
--- /dev/null
+++ b/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
@@ -0,0 +1,18 @@
+# Conformer + RNN-T
+
+# training data 
+nnet_train_data=train_960h
+nnet_val__data=dev_all
+
+# tokenizer
+bpe_model=data/lang_bpe_1000/bpe.model
+
+# rnn-t cfg
+nnet_type=conformer_v1_rnn_transducer
+nnet_name=fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p
+nnet_s1_base_cfg=conf/train_${nnet_name}.s1.yaml
+nnet_s1_args=""
+nnet_s1_name=$nnet_name.s1
+
+nnet_s1_dir=exp/asr_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0115.pth
diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
index 0708e667..3a4ef221 100755
--- a/egs/librispeech/v1/run_001_prepare_data.sh
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -19,36 +19,25 @@ nj=6
 mkdir -p data
 
 
-if [ ${stage} -le 1 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
-    echo "stage 0: Data preparation"
-    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
-    do
-      # use underscore-separated names in data directories.
-      local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
-      steps_xvec/audio_to_duration.sh --cmd "$train_cmd" data/${part//-/_}
-    done
-fi
-
-# if [ $stage -le 1 ]; then
-#   echo "Stage 1: Prepare LibriSpeech manifest"
-#   # We assume that you have downloaded the LibriSpeech corpus
-#   # to $librispeech_root
-#   mkdir -p data/manifests
-#   if [ ! -e data/manifests/.librispeech.done ]; then
-#     lhotse prepare librispeech -j $nj $librispeech_root data/manifests
-#     touch data/manifests/.librispeech.done
-#   fi
+# if [ ${stage} -le 1 ]; then
+#     ### Task dependent. You have to make data the following preparation part by yourself.
+#     ### But you can utilize Kaldi recipes in most cases
+#     echo "stage 0: Data preparation"
+#     for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500
+#     do
+#       # use underscore-separated names in data directories.
+#       local/data_prep.sh ${librispeech_root}/${part} data/${part//-/_}
+#       steps_xvec/audio_to_duration.sh --cmd "$train_cmd" data/${part//-/_}
+#     done
 # fi
 
-# if [ $stage -le 2 ]; then
-#   echo "Stage 2: Prepare musan manifest"
-#   # We assume that you have downloaded the musan corpus
-#   # to $musan_root
-#   mkdir -p data/manifests
-#   if [ ! -e data/manifests/.musan.done ]; then
-#     lhotse prepare musan $musan_root data/manifests
-#     touch data/manifests/.musan.done
-#   fi
-# fi
+if [ $stage -le 1 ]; then
+  echo "Stage 1: Prepare lhotse LibriSpeech manifest"
+  # We assume that you have downloaded the LibriSpeech corpus
+  # to $librispeech_root
+  mkdir -p data/lhotse_librispeech
+  if [ ! -e data/lhotse_librispeech/.librispeech.done ]; then
+    lhotse prepare librispeech -j $nj $librispeech_root data/lhotse_librispeech
+    touch data/lhotse_librispeech/.librispeech.done
+  fi
+fi
diff --git a/egs/voxceleb/ssl.v1/cmd.sh b/egs/voxceleb/ssl.v1/cmd.sh
new file mode 100755
index 00000000..040f458b
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/cmd.sh
@@ -0,0 +1,28 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+if [ "$(hostname -d)" == "cm.gemini" ];then
+    #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
+    # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
+else
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
+    export cuda_eval_cmd="$train_cmd"
+fi
+
+
+
diff --git a/egs/voxceleb/ssl.v1/conf/clsp.conf b/egs/voxceleb/ssl.v1/conf/clsp.conf
new file mode 100644
index 00000000..4ed38246
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/clsp.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* -V
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -l 'hostname=b[1]*|c0[123456789]*|c1[134679]*|c2[1357]*'
+option gpu=* -l 'hostname=c0[123456789]*|c1[1345679]*|c2[12357]*,gpu=$0'
diff --git a/egs/voxceleb/ssl.v1/conf/coe_gpu_bigmem.conf b/egs/voxceleb/ssl.v1/conf/coe_gpu_bigmem.conf
new file mode 100644
index 00000000..a7a2ce40
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/coe_gpu_bigmem.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[2-7]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[237]n[01][0123456789]*
diff --git a/egs/voxceleb/ssl.v1/conf/coe_gpu_long.conf b/egs/voxceleb/ssl.v1/conf/coe_gpu_long.conf
new file mode 100644
index 00000000..b31c167c
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/coe_gpu_long.conf
@@ -0,0 +1,13 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q -l hostname=r[1-9]*
+
+
diff --git a/egs/voxceleb/ssl.v1/conf/coe_gpu_rtx.conf b/egs/voxceleb/ssl.v1/conf/coe_gpu_rtx.conf
new file mode 100644
index 00000000..ba6d9e56
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/coe_gpu_rtx.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@rtx 
diff --git a/egs/voxceleb/ssl.v1/conf/coe_gpu_short.conf b/egs/voxceleb/ssl.v1/conf/coe_gpu_short.conf
new file mode 100644
index 00000000..81de5cb7
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/coe_gpu_short.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00 -l hostname=r[1-9]*
+option gpu=* -l gpu=$0,h_rt=00:59:00 -q gpu_short.q -l hostname=r[17]*
diff --git a/egs/voxceleb/ssl.v1/conf/coe_gpu_v100.conf b/egs/voxceleb/ssl.v1/conf/coe_gpu_v100.conf
new file mode 100644
index 00000000..69326b82
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/coe_gpu_v100.conf
@@ -0,0 +1,11 @@
+
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -sync y -l arch=*64* -V 
+option mem=* -l mem_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -l num_proc=$0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0 -q all.q -l h_rt=100:00:00
+option gpu=* -l gpu=$0,h_rt=500:00:00 -q gpu.q@@v100
diff --git a/egs/voxceleb/ssl.v1/conf/fbank80_specaug1_stmn_16k.yaml b/egs/voxceleb/ssl.v1/conf/fbank80_specaug1_stmn_16k.yaml
new file mode 100644
index 00000000..8df42fc6
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/fbank80_specaug1_stmn_16k.yaml
@@ -0,0 +1,24 @@
+audio_feats:
+  audio_feat: logfb
+  sample_frequency: 16000
+  frame_length: 25
+  low_freq: 20 
+  high_freq: 7600
+  num_filters: 80
+  snip_edges: false
+  use_energy: false
+spec_augment:
+  time_mask_prob: 1.
+  time_mask_min_width: 0
+  time_mask_max_width: 5
+  time_mask_min_num_masks: 1
+  time_mask_max_num_masks: 1
+  freq_mask_prob: 1.
+  freq_mask_min_width: 0
+  freq_mask_max_width: 8
+  freq_mask_min_num_masks: 1
+  freq_mask_max_num_masks: 1
+  mask_method: mean
+mvn:
+  context: 150
+  norm_var: false
diff --git a/egs/voxceleb/ssl.v1/conf/reverb_noise_aug.yaml b/egs/voxceleb/ssl.v1/conf/reverb_noise_aug.yaml
new file mode 100644
index 00000000..86f55073
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/reverb_noise_aug.yaml
@@ -0,0 +1,34 @@
+reverb_aug:
+  reverb_prob: 0.45
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: csv:data/rirs_smallroom/rirs.csv
+      rir_norm: max
+    mediumroom:
+      weight: 1
+      rir_path: csv:data/rirs_mediumroom/rirs.csv
+      rir_norm: max
+    realroom:
+      weight: 1
+      rir_path: csv:data/rirs_real/rirs.csv
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/recordings.csv
+      min_snr: 0
+      max_snr: 18
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/recordings.csv
+      min_snr: 3
+      max_snr: 18
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/recordings.csv
+      min_snr: 3
+      max_snr: 18
diff --git a/egs/voxceleb/ssl.v1/datapath.sh b/egs/voxceleb/ssl.v1/datapath.sh
new file mode 100644
index 00000000..a7eb575c
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/datapath.sh
@@ -0,0 +1,23 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  # voxceleb1_root=/export/corpora5/VoxCeleb1_v1 #voxceleb1 v1
+  voxceleb1_root=/export/corpora5/VoxCeleb1_v2 #voxceleb1 v2
+  voxceleb2_root=/export/corpora5/VoxCeleb2
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  # voxceleb1_root=/expscratch/dsnyder/VoxCeleb1 #voxceleb1 v1
+  voxceleb1_root=/exp/jvillalba/corpora/voxceleb1 #voxceleb1 v2
+  voxceleb2_root=/expscratch/dgromero/corpora-open/vox2
+  voxsrc22_root=/exp/jvillalba/corpora/voxsrc22
+  musan_root=/expscratch/dgromero/corpora-open/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/egs/voxceleb/ssl.v1/hyp_utils b/egs/voxceleb/ssl.v1/hyp_utils
new file mode 120000
index 00000000..f6d1eb7a
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/hyp_utils
@@ -0,0 +1 @@
+../../../hyp_utils
\ No newline at end of file
diff --git a/egs/voxceleb/ssl.v1/path.sh b/egs/voxceleb/ssl.v1/path.sh
new file mode 100755
index 00000000..6994fdab
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/path.sh
@@ -0,0 +1,5 @@
+
+export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
+export TOOLS_ROOT=$HYP_ROOT/tools
+
+. $TOOLS_ROOT/path.sh
diff --git a/egs/voxceleb/ssl.v1/run_001_prepare_data.sh b/egs/voxceleb/ssl.v1/run_001_prepare_data.sh
new file mode 100755
index 00000000..563d3c2d
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_001_prepare_data.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. datapath.sh 
+. $config_file
+
+if [ $stage -le 1 ];then
+  # Prepare the VoxCeleb2 dataset for training.
+  hyperion-prepare-data voxceleb2 --subset dev --corpus-dir $voxceleb2_root \
+			--cat-videos --use-kaldi-ids \
+			--output-dir data/voxceleb2cat_train
+fi
+
+if [ $stage -le 2 ];then
+  # prepare voxceleb1 for test
+  hyperion-prepare-data voxceleb1 --task test --corpus-dir $voxceleb1_root \
+			--use-kaldi-ids \
+			--output-dir data/voxceleb1_test
+fi
+
+if [ $stage -le 3 ] && [ "$do_voxsrc22" == "true" ];then
+  hyperion-prepare-data voxsrc22 --subset dev --corpus-dir $voxsrc22_root \
+			--vox1-corpus-dir $voxceleb1_root \
+			--output-dir data/voxsrc22_dev
+fi
+
+# if [ $stage -le 4 ] && [ "$do_voxsrc22" == "true" ];then
+  #   hyperion-prepare-data voxsrc22 --subset test --corpus-dir $voxsrc22_root \
+  # 		  --vox1-corpus-dir $voxceleb1_root \
+  # 		  --output-dir data/voxsrc22_test
+# fi
+
+if [ $stage -le 5 ] && [ "$do_qmf" == "true" ];then
+  # split vox2 into 2 parts, for cohort and qmf training
+  hyperion-split-dataset-into-trials-and-cohort --data-dir data/voxceleb2cat_train
+fi
diff --git a/egs/voxceleb/ssl.v1/run_002_compute_evad.sh b/egs/voxceleb/ssl.v1/run_002_compute_evad.sh
new file mode 100755
index 00000000..acccace3
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_002_compute_evad.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright
+#                2018   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+nodes=fs01
+vad_dir=`pwd`/exp/vad_e
+vad_config=conf/vad_16k.yaml
+nj=40
+
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ -z "$vad_config" ];then
+  echo "We are not using VAD in this configuration"
+  exit 0
+fi
+
+if [ "$do_voxsrc22" == "true" ];then
+  extra_data="voxsrc22_dev"
+fi
+
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    hyp_utils/create_data_split_dirs.sh \
+      $vad_dir/$name \
+      $USER/hyp-data/voxceleb/v1.2/vad $nodes
+  done
+fi
+
+#Train datasets
+if [ $stage -le 2 ];then
+  for name in voxceleb2cat_train voxceleb1_test $extra_data
+  do
+    # This creates links to distribute data in CLSP grid
+    # If you are not at CLSP grid, it does nothing and can be deleted
+    hyp_utils/create_data_split_links.sh $vad_dir/$name/vad.JOB.ark $nj
+    echo "compute vad for $name"
+    $train_cmd JOB=1:$nj $vad_dir/$name/log/vad.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-compute-energy-vad --cfg $vad_config \
+	       --recordings-file data/$name/recordings.csv \
+	       --output-spec ark,csv:$vad_dir/$name/vad.JOB.ark,$vad_dir/$name/vad.JOB.csv \
+	       --part-idx JOB --num-parts $nj || exit 1
+
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $vad_dir/$name/vad.csv --num-tables $nj
+    hyperion-dataset add_features \
+		     --dataset data/$name \
+		     --features-name vad \
+		     --features-file $vad_dir/$name/vad.csv
+  done
+fi
+
+
diff --git a/egs/voxceleb/ssl.v1/run_003_prepare_noises_rirs.sh b/egs/voxceleb/ssl.v1/run_003_prepare_noises_rirs.sh
new file mode 100755
index 00000000..73c7ed82
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_003_prepare_noises_rirs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nj=10
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+  for name in noise music speech
+  do
+    hyperion-prepare-data musan \
+			  --corpus-dir $musan_root \
+			  --subset $name \
+			  --output-dir data/musan_$name
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # # Prepare to distribute data over multiple machines
+  # # This only does something at CLSP grid
+  # hyp_utils/create_data_split_dirs.sh $vad_dir $USER/hyp-data/voxceleb/v1.2/vad $nodes
+
+  for name in musan_noise musan_music
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_proc_audio
+    output_dir=exp/proc_audio/$name
+    $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${name}.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-preprocess-audio-files \
+	       --audio-format flac  \
+	       --part-idx JOB --num-parts $nj \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_dir/recordings.JOB.csv
+    
+    hyperion-tables cat \
+		    --table-type recordings \
+		    --output-file $output_dir/recordings.csv --num-tables $nj
+    hyperion-dataset set_recordings \
+		     --dataset $input_data_dir \
+		     --recordings-file $output_dir/recordings.csv \
+		     --output-dataset $output_data_dir
+    
+    
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  # Create Babble noise from MUSAN speech files
+  for name in musan_speech
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_babble
+    output_dir=exp/proc_audio/${name}_babble
+    $train_cmd $output_dir/log/make_babble_noise_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-make-babble-noise-audio-files \
+	       --audio-format flac \
+	       --min-spks 3 --max-spks 10 --num-reuses 5 \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_data_dir/recordings.csv
+    hyperion-dataset make_from_recordings \
+		     --dataset $output_data_dir \
+		     --recordings-file $output_data_dir/recordings.csv
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
+  for rirs in rirs_smallroom rirs_mediumroom rirs_real
+  do
+    output_dir=exp/rirs/$rirs
+    data_dir=data/$rirs
+    $train_cmd $output_dir/log/pack_rirs_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-pack-wav-rirs ${args} --input $data_dir/recordings.csv \
+	       --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
+    hyperion-dataset add_features --dataset $data_dir \
+		     --features-name rirs --features-file $output_dir/rirs.csv
+
+  done
+fi
+
diff --git a/egs/voxceleb/ssl.v1/run_004_prepare_xvec_train_data.sh b/egs/voxceleb/ssl.v1/run_004_prepare_xvec_train_data.sh
new file mode 100755
index 00000000..3b7b9083
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_004_prepare_xvec_train_data.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+nodes=b1
+nj=40
+stage=1
+config_file=default_config.sh
+
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $stage -le 1 ]; then
+  # Prepare to distribute data over multiple machines
+  # This only does something at CLSP grid
+  hyp_utils/create_data_split_dirs.sh \
+    exp/xvector_audios/$nnet_data \
+    $USER/hyp-data/voxceleb/v1.2/xvector_audios/$nnet_data $nodes
+fi
+
+if [ $stage -le 2 ];then
+  output_dir=exp/proc_audio/$nnet_data
+  # This creates links to distribute data in CLSP grid
+  # If you are not at CLSP grid, it does nothing and can be deleted
+  hyp_utils/create_audios_split_links.sh $output_dir data/$nnet_data/recordings.csv flac
+  if [ -n "$vad_config" ];then
+    vad_args="--vad csv:data/$nnet_data/vad.csv"
+    update_durs="--update-seg-durs"
+  fi
+  
+  $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${nnet_data}.JOB.log \
+	     hyp_utils/conda_env.sh \
+	     hyperion-preprocess-audio-files \
+	     --audio-format flac --remove-dc-offset $vad_args \
+	     --part-idx JOB --num-parts $nj \
+	     --recordings-file data/$nnet_data/recordings.csv \
+	     --output-path $output_dir \
+	     --output-recordings-file $output_dir/recordings.JOB.csv
+
+  hyperion-tables cat \
+		  --table-type recordings \
+		  --output-file $output_dir/recordings.csv --num-tables $nj
+
+  hyperion-dataset set_recordings $update_durs \
+		   --dataset data/$nnet_data \
+		   --recordings-file $output_dir/recordings.csv \
+		   --output-dataset data/${nnet_data}_proc_audio \
+		   --remove-features vad
+fi
+
+if [ $stage -le 3 ];then
+  hyperion-dataset remove_short_segments \
+		   --dataset data/${nnet_data}_proc_audio \
+		   --output-dataset data/${nnet_data}_filtered \
+		   --length-name duration --min-length 2.0
+
+  hyperion-dataset remove_classes_few_segments \
+		   --dataset data/${nnet_data}_filtered \
+		   --class-name speaker --min-segs 4
+fi
+
+if [ $stage -le 4 ];then
+  hyperion-dataset split_train_val \
+		   --dataset data/${nnet_data}_filtered \
+		   --val-prob 0.03 \
+		   --seed 1123581321 \
+		   --train-dataset data/${nnet_data}_xvector_train \
+		   --val-dataset data/${nnet_data}_xvector_val 
+fi
+
diff --git a/egs/voxceleb/ssl.v1/run_005_train_dino.sh b/egs/voxceleb/ssl.v1/run_005_train_dino.sh
new file mode 100755
index 00000000..eb1c591e
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_005_train_dino.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_data_dir=data/${nnet_data}_xvector_train
+val_data_dir=data/${nnet_data}_xvector_val
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+  
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-train-wav2vec2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+
+# Finetune full model
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+# Finetune full model
+if [ $stage -le 3 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s3_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s3_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s2 \
+    --trainer.exp-path $nnet_s3_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
index 8504db9e..fc964f84 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -60,5 +60,4 @@ trainer:
   log_interval: 1000
   epochs: 8
   eff_batch_size: 512
-  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
new file mode 100644
index 00000000..ab6b3f4e
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
@@ -0,0 +1,69 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: speaker
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+      class_name: speaker
+    data_loader:
+      num_workers: 8
+model:
+  hf_feats:
+    encoder_lr: 1e-2
+    feat_extract_lr: 1e-2
+  xvector:
+    cos_scale: 32.0
+    margin: 0.2
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    #lr: 5e-2
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 7500
+    hold_steps: 20000
+    #min_lr: 5e-4
+    min_lr: 1e-6
+    warmup_steps: 10000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 14
+  eff_batch_size: 512
+  target_key: speaker
+  train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
index ad56e80d..928779f5 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -35,8 +35,8 @@ data:
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 16
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
       num_chunks_per_seg_epoch: 6
       class_name: speaker
       weight_exponent: 0.5
@@ -70,5 +70,4 @@ trainer:
   log_interval: 1000
   epochs: 4
   eff_batch_size: 256
-  target_key: speaker
   train_mode: full
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
new file mode 100644
index 00000000..7ab8cea7
--- /dev/null
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
@@ -0,0 +1,78 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      weight_exponent: 0.5
+      weight_mode: data-prior
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 0
+    data_loader:
+      num_workers: 8
+model:
+  #hf_feats:
+  #  encoder_lr: 1e-2
+  #  feat_extract_lr: 1e-2
+  xvector:
+    cos_scale: 32.0
+    margin: 0.4
+    margin_warmup_epochs: 0
+    intertop_k: 5
+    intertop_margin: 0.1
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 1e-4
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 15000
+    hold_steps: 10000
+    min_lr: 1e-6
+    warmup_steps: 5000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 14
+  eff_batch_size: 256
+  target_key: speaker
+  #train_mode: full
+  train_mode: hf-feats-frozen-nograd
\ No newline at end of file
diff --git a/egs/voxceleb/v2.1/run_007_eval_be.sh b/egs/voxceleb/v2.1/run_007_eval_be.sh
index 53621488..a686b237 100755
--- a/egs/voxceleb/v2.1/run_007_eval_be.sh
+++ b/egs/voxceleb/v2.1/run_007_eval_be.sh
@@ -8,7 +8,7 @@
 set -e
 
 stage=1
-nnet_stage=2
+nnet_stage=3
 config_file=default_config.sh
 
 . parse_options.sh || exit 1;
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index f2df9581..02a3b68e 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -139,10 +139,12 @@ def extract_xvectors(
         aug_df = None
         num_augs = 1
 
+    metadata_columns = ["speech_duration"]
+
     ar_args = AR.filter_args(**kwargs)
     ar_args["wav_scale"] = 1.0
     logging.info("opening output stream: %s", output_spec)
-    with DWF.create(output_spec) as writer:
+    with DWF.create(output_spec, metadata_columns=metadata_columns) as writer:
         logging.info(f"opening input stream: {recordings_file} with args={ar_args}")
         with AR(recordings_file, **ar_args) as reader:
             if vad_spec is not None:
@@ -168,6 +170,7 @@ def extract_xvectors(
 
                 logging.info("processing utt %s", key0)
                 for aug_id in range(num_augs):
+                    metadata = {}
                     t3 = time.time()
                     key, x = augment(key0, x0, augmenter, aug_df, aug_id)
                     t4 = time.time()
@@ -201,6 +204,8 @@ def extract_xvectors(
                                 key, x, fs, min_utt_length, max_utt_length, rng
                             )
 
+                        metadata["speech_duration"] = x.shape[1] / fs
+
                         t6 = time.time()
                         if x.shape[1] == 0:
                             y = np.zeros((model.embed_dim,), dtype=float_cpu())
@@ -217,7 +222,7 @@ def extract_xvectors(
                             )
 
                     t7 = time.time()
-                    writer.write([key], [y])
+                    writer.write([key], [y], metadata=metadata)
                     if write_speech_dur is not None:
                         keys.append(key)
                         info.append(str(x.shape[1] / fs))
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 00452695..711c4194 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -54,7 +54,7 @@ def __init__(self, feat_extractor, xvector_model):
         self.vad = None
 
     def forward(self, s):
-        f = self.feat_extractor(s)
+        f, _ = self.feat_extractor(s)
         if self.vad is not None:
             n_vad_frames = len(self.vad)
             n_feat_frames = f.shape[1]
@@ -161,16 +161,16 @@ def generate_attacks(
     model = init_model(model_path, **kwargs)
     model.to(device)
 
-    logging.info("opening audio read stream: %s" % (wav_file))
+    logging.info("opening audio read stream: %s", wav_file)
     audio_args = AR.filter_args(**kwargs)
-    audio_reader = AR(wav_file**audio_args)
+    audio_reader = AR(wav_file, **audio_args)
     wav_scale = audio_reader.wav_scale
 
-    logging.info("opening audio write stream: %s" % (output_wav_dir))
+    logging.info("opening audio write stream: %s", output_wav_dir)
     audio_writer = AW(output_wav_dir, audio_format="flac")
 
     if vad_spec is not None:
-        logging.info("opening VAD stream: %s" % (vad_spec))
+        logging.info("opening VAD stream: %s", vad_spec)
         v_reader = VRF.create(vad_spec, path_prefix=vad_path_prefix)
 
     keys, class_names, class_ids = read_utt_list(
@@ -190,9 +190,7 @@ def generate_attacks(
         s = s[0]
         fs = fs[0]
 
-        torch.manual_seed(
-            random_seed + int(s[0])
-        )  # this is to make results reproducible
+        torch.manual_seed(random_seed + len(s))  # this is to make results reproducible
         p = torch.rand(1).item()
         if p > p_attack:
             logging.info("skipping attack for utt %s" % (key))
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index ab7d907b..f858ea22 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -210,9 +210,7 @@ def generate_attacks(
         s, fs = audio_reader.read([key.seg_set[j]])
         s = s[0]
         fs = fs[0]
-        torch.manual_seed(
-            random_seed + int(s[0])
-        )  # this is to make results reproducible
+        torch.manual_seed(random_seed + len(s))  # this is to make results reproducible
         s = torch.as_tensor(s[None, :], dtype=torch.get_default_dtype()).to(device)
 
         if vad_spec is not None:
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index e6dd3d3e..63ac34a9 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -26,8 +26,11 @@
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
 from hyperion.torch.models import (
+    HFHubert2ConformerV1XVector,
     HFHubert2ResNet1dXVector,
+    HFWav2Vec2ConformerV1XVector,
     HFWav2Vec2ResNet1dXVector,
+    HFWavLM2ConformerV1XVector,
     HFWavLM2ResNet1dXVector,
 )
 from hyperion.torch.trainers import XVectorTrainer as Trainer
@@ -37,6 +40,9 @@
     "hf_wav2vec2resnet1d": HFWav2Vec2ResNet1dXVector,
     "hf_hubert2resnet1d": HFHubert2ResNet1dXVector,
     "hf_wavlm2resnet1d": HFWavLM2ResNet1dXVector,
+    "hf_wav2vec2conformer": HFWav2Vec2ConformerV1XVector,
+    "hf_hubert2conformer": HFHubert2ConformerV1XVector,
+    "hf_wavlm2conformer": HFWavLM2ConformerV1XVector,
 }
 
 
diff --git a/hyperion/bin/train_wav2xvector.py b/hyperion/bin/train_wav2xvector.py
index 7373a338..3138784d 100755
--- a/hyperion/bin/train_wav2xvector.py
+++ b/hyperion/bin/train_wav2xvector.py
@@ -22,6 +22,7 @@
 from hyperion.torch.metrics import CategoricalAccuracy
 
 # from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import Wav2ConformerV1XVector as CXVec
 from hyperion.torch.models import Wav2ResNet1dXVector as R1dXVec
 from hyperion.torch.models import Wav2ResNetXVector as RXVec
 
@@ -34,6 +35,7 @@
 xvec_dict = {
     "resnet": RXVec,
     "resnet1d": R1dXVec,
+    "conformer": CXVec,
     # "efficientnet": EXVec,
     # "tdnn": TDXVec,
     # "transformer": TFXVec,
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index a2acdf4c..699aa410 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -24,6 +24,7 @@
 from hyperion.torch.data import ClassWeightedSeqSampler as Sampler
 from hyperion.torch.data import FeatSeqDataset as SD
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import ConformerV1XVector as CXVec
 from hyperion.torch.models import EfficientNetXVector as EXVec
 from hyperion.torch.models import ResNet1dXVector as R1dXVec
 from hyperion.torch.models import ResNetXVector as RXVec
@@ -40,6 +41,7 @@
     "tdnn": TDXVec,
     "transformer": TFXVec,
     "spinenet": SpineXVec,
+    "conformer": CXVec,
 }
 
 
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index c3f6170d..67075a5d 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -20,6 +20,7 @@
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
 from hyperion.torch.metrics import CategoricalAccuracy
+from hyperion.torch.models import ConformerV1XVector as CXVec
 from hyperion.torch.models import EfficientNetXVector as EXVec
 from hyperion.torch.models import ResNet1dXVector as R1dXVec
 from hyperion.torch.models import ResNetXVector as RXVec
@@ -37,6 +38,7 @@
     "tdnn": TDXVec,
     "transformer": TFXVec,
     "spinenet": SpineXVec,
+    "conformer": CXVec,
 }
 
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index f91d7d96..4644f141 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -6,22 +6,23 @@
 import logging
 import math
 import time
+from typing import Dict, List, Optional
 
 import numpy as np
 import pandas as pd
 
 # import k2
 import sentencepiece as spm
-import torchaudio.transforms as tat
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.distributed as dist
+import torchaudio.transforms as tat
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessAudioReader as AR
 from ...np.augment import SpeechAugment
 from ...utils.class_info import ClassInfo
+from ...utils.misc import filter_func_args
 from ...utils.segment_set import SegmentSet
 from ...utils.text import read_text
 from ..torch_defs import floatstr_torch
@@ -30,22 +31,24 @@
 class AudioDataset(Dataset):
     def __init__(
         self,
-        recordings_file,
-        segments_file,
-        class_names=None,
-        class_files=None,
-        bpe_model=None,
-        text_file=None,
-        time_durs_file=None,
-        aug_cfgs=None,
-        num_augs=1,
-        return_segment_info=None,
-        return_orig=False,
-        target_sample_freq=None,
-        wav_scale=2 ** 15 - 1,
-        is_val=False,
+        recordings_file: str,
+        segments_file: str,
+        class_names: Optional[List[str]] = None,
+        class_files: Optional[List[str]] = None,
+        bpe_model: Optional[str] = None,
+        text_file: Optional[str] = None,
+        time_durs_file: Optional[str] = None,
+        aug_cfgs: Optional[List[str]] = None,
+        num_augs: int = 1,
+        num_aug_mix: int = 0,
+        aug_mix_alpha: float = 0,
+        return_segment_info: Optional[List[str]] = None,
+        return_orig: bool = False,
+        target_sample_freq: Optional[float] = None,
+        wav_scale: float = 1,
+        is_val: bool = False,
+        seed: int = 112358,
     ):
-
         super().__init__()
         try:
             rank = dist.get_rank()
@@ -86,12 +89,17 @@ def __init__(
         if text_file is not None:
             logging.info("loading text files")
             self._load_text_infos(text_file, is_val)
+
         self.return_segment_info = (
             [] if return_segment_info is None else return_segment_info
         )
         self.return_orig = return_orig
 
         self.num_augs = num_augs
+        self.num_aug_mix = num_aug_mix
+        self.aug_mix_alpha = aug_mix_alpha
+        self.seed = seed
+        self.rng = np.random.default_rng(seed + 1000 * rank)
         self._create_augmenters(aug_cfgs)
 
         self.target_sample_freq = target_sample_freq
@@ -135,7 +143,7 @@ def _load_class_infos(self, class_names, class_files, is_val):
                 name in self.seg_set
             ), f"class_name {name} not present in the segment set"
             if self.rank == 0:
-                logging.info("loading class-info file %s" % file)
+                logging.info("loading class-info file %s", file)
             table = ClassInfo.load(file)
             self.class_info[name] = table
             if not is_val:
@@ -157,7 +165,7 @@ def _create_augmenters(self, aug_cfgs):
         for aug_cfg in aug_cfgs:
             logging.info(f"loading augmentation={aug_cfg}")
             augmenter = SpeechAugment.create(
-                aug_cfg, random_seed=112358 + 1000 * self.rank
+                aug_cfg, random_seed=self.seed + 1000 * self.rank
             )
             self.augmenters.append(augmenter)
             self.reverb_context = max(augmenter.max_reverb_context, self.reverb_context)
@@ -223,32 +231,64 @@ def _read_audio(self, seg_id, start, duration):
         x, fs = self.r.read([seg_id], time_offset=start, time_durs=read_duration)
         return x[0].astype(floatstr_torch(), copy=False), fs[0]
 
-    def _read_audio0(self, seg_id, start, duration):
-        # how much extra audio we need to load to
-        # calculate the reverb of the first part of the audio
-        reverb_context = min(self.reverb_context, start)
-        start -= reverb_context
-        read_duration = duration + reverb_context
-
-        # read audio
-        recording_id = self.seg_set.recording_ids(seg_id)
-        x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
-        return x[0].astype(floatstr_torch(), copy=False), fs[0]
+    # def _read_audio0(self, seg_id, start, duration):
+    #     # how much extra audio we need to load to
+    #     # calculate the reverb of the first part of the audio
+    #     reverb_context = min(self.reverb_context, start)
+    #     start -= reverb_context
+    #     read_duration = duration + reverb_context
+
+    #     # read audio
+    #     recording_id = self.seg_set.recording_ids(seg_id)
+    #     x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
+    #     return x[0].astype(floatstr_torch(), copy=False), fs[0]
+
+    def _apply_aug_mix(self, x, x_augs, aug_idx):
+        x_aug_mix = {}
+        alpha_d = (self.aug_mix_alpha,) * len(x_augs)
+        w = self.rng.dirichlet(alpha_d, self.num_aug_mix)
+        m = self.rng.beta(alpha_d, self.num_aug_mix)
+        for i in range(self.num_aug_mix):
+            x_mix = np.zeros_like(x)
+            for j, (_, x_aug_j) in enumerate(x_augs.items()):
+                x_mix += w[i, j] * x_aug_j
+
+            x_aug_mix[f"x_aug_{aug_idx}_{i}"] = m[i] * x + (1 - m[i]) * x_mix
+
+        return x_aug_mix
+
+    def _apply_augs(self, x, duration, fs):
+        if not self.augmenters:
+            return {"x": x}
+
+        if duration == 0:
+            num_samples = len(x)
+        else:
+            num_samples = int(duration * fs)
 
-    def _apply_augs(self, x, reverb_context_samples):
+        reverb_context_samples = len(x) - num_samples
+        x_orig = x[reverb_context_samples:]
         x_augs = {}
         # for each type of augmentation
         for i, augmenter in enumerate(self.augmenters):
             # we do n_augs per augmentation type
+            x_augs_i = {}
             for j in range(self.num_augs):
                 # augment x
                 x_aug, aug_info = augmenter(x)
                 # remove the extra left context used to compute the reverberation.
                 x_aug = x_aug[reverb_context_samples : len(x)]
                 x_aug = x_aug.astype(floatstr_torch(), copy=False)
-                x_augs[f"x_aug_{i}_{j}"] = x_aug
+                x_augs_i[f"x_aug_{i}_{j}"] = x_aug
+
+            if self.num_aug_mix > 0:
+                x_augs_i = self._apply_aug_mix(x_orig, x_augs_i, i)
 
-        if not self.return_orig and len(x_augs) == 1:
+            x_augs.update(x_augs_i)
+
+        if self.return_orig:
+            x_augs["x"] = x_orig
+        elif len(x_augs) == 1:
             # if we just have one aug and we don't return the clean version,
             # we just call x to the aug version
             x_augs["x"] = x_augs.pop("x_aug_0_0")
@@ -304,62 +344,70 @@ def __getitem__(self, segment):
         x, fs = self._read_audio(seg_id, start, duration)
         x, fs = self._resample(x, fs)
         data = {"seg_id": seg_id, "sample_freq": fs}
+        x_augs = self._apply_augs(x, duration, fs)
+        data.update(x_augs)
 
-        if self.augmenters:
-            # augmentations
-            if duration == 0:
-                num_samples = len(x)
-            else:
-                num_samples = int(duration * fs)
-            reverb_context_samples = len(x) - num_samples
-            x_augs = self._apply_augs(x, reverb_context_samples)
-            data.update(x_augs)
-
-            # add original non augmented audio
-            if self.return_orig:
-                x_orig = x[reverb_context_samples:]
-                data["x"] = x_orig
+        # if self.augmenters:
+        #     # augmentations
+        #     if duration == 0:
+        #         num_samples = len(x)
+        #     else:
+        #         num_samples = int(duration * fs)
 
-        else:
-            data["x"] = x
+        #     reverb_context_samples = len(x) - num_samples
+        #     x_augs = self._apply_augs(x, reverb_context_samples)
+        #     data.update(x_augs)
+
+        #     # add original non augmented audio
+        #     if self.return_orig:
+        #         x_orig = x[reverb_context_samples:]
+        #         data["x"] = x_orig
+
+        # else:
+        #     data["x"] = x
 
         seg_info = self._get_segment_info(seg_id)
         data.update(seg_info)
-        if np.any(~np.isfinite(data["x"])):
-            print(
-                "zzz",
-                x.max(),
-                x.min(),
-                x.mean(),
-                data["x"].max(),
-                data["x"].min(),
-                data["x"].mean(),
-                flush=True,
-            )
+        # if np.any(~np.isfinite(data["x"])):
+        #     print(
+        #         "zzz",
+        #         x.max(),
+        #         x.min(),
+        #         x.mean(),
+        #         data["x"].max(),
+        #         data["x"].min(),
+        #         data["x"].mean(),
+        #         flush=True,
+        #     )
         return data
 
     @staticmethod
     def filter_args(**kwargs):
-
-        ar_args = AR.filter_args(**kwargs)
-        valid_args = (
-            "recordings_file",
-            "segments_file",
-            "aug_cfgs",
-            "num_augs",
-            "class_names",
-            "class_files",
-            "bpe_model",
-            "text_file",
-            "return_segment_info",
-            "return_orig",
-            "time_durs_file",
-            "target_sample_freq",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        args.update(ar_args)
+        args = filter_func_args(AudioDataset.__init__, kwargs)
         return args
 
+    # @staticmethod
+    # def filter_args(**kwargs):
+
+    #     ar_args = AR.filter_args(**kwargs)
+    #     valid_args = (
+    #         "recordings_file",
+    #         "segments_file",
+    #         "aug_cfgs",
+    #         "num_augs",
+    #         "class_names",
+    #         "class_files",
+    #         "bpe_model",
+    #         "text_file",
+    #         "return_segment_info",
+    #         "return_orig",
+    #         "time_durs_file",
+    #         "target_sample_freq",
+    #     )
+    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+    #     args.update(ar_args)
+    #     return args
+
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
@@ -390,7 +438,10 @@ def add_class_args(parser, prefix=None, skip=set()):
         )
 
         parser.add_argument(
-            "--class-files", default=None, nargs="+", help=("list of class info files"),
+            "--class-files",
+            default=None,
+            nargs="+",
+            help=("list of class info files"),
         )
 
         parser.add_argument(
@@ -402,7 +453,9 @@ def add_class_args(parser, prefix=None, skip=set()):
         )
 
         parser.add_argument(
-            "--bpe-model", default=None, help=("bpe model for the text label"),
+            "--bpe-model",
+            default=None,
+            help=("bpe model for the text label"),
         )
 
         parser.add_argument(
@@ -421,8 +474,21 @@ def add_class_args(parser, prefix=None, skip=set()):
         parser.add_argument(
             "--num-augs",
             default=1,
+            type=int,
             help=("number of augmentations per segment and augmentation type"),
         )
+        parser.add_argument(
+            "--num-aug-mix",
+            default=0,
+            type=int,
+            help=("number of AugMix augmentations per segment"),
+        )
+        parser.add_argument(
+            "--aug-mix-alpha",
+            default=0.5,
+            type=float,
+            help=("number of AugMix augmentations per segment"),
+        )
         parser.add_argument(
             "--return-segment-info",
             default=None,
@@ -449,6 +515,13 @@ def add_class_args(parser, prefix=None, skip=set()):
             ),
         )
 
+        parser.add_argument(
+            "--seed",
+            default=11235811,
+            type=int,
+            help="random seed",
+        )
+
         AR.add_class_args(parser)
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/__init__.py b/hyperion/torch/layer_blocks/__init__.py
index 22cc629d..735df21d 100644
--- a/hyperion/torch/layer_blocks/__init__.py
+++ b/hyperion/torch/layer_blocks/__init__.py
@@ -4,6 +4,7 @@
 """
 
 from .conformer_conv import ConformerConvBlock
+from .conformer_decoder_v1 import ConformerDecoderBlockV1
 from .conformer_encoder_v1 import ConformerEncoderBlockV1
 from .dc1d_blocks import DC1dDecBlock, DC1dEncBlock
 from .dc2d_blocks import DC2dDecBlock, DC2dEncBlock
@@ -14,26 +15,47 @@
 from .res2net2d_blocks import Res2Net2dBasicBlock, Res2Net2dBNBlock
 from .res2net_blocks import Res2NetBasicBlock, Res2NetBNBlock
 from .resetdnn_blocks import ResETDNNBlock
-from .resnet1d_blocks import (ResNet1dBasicBlock, ResNet1dBasicDecBlock,
-                              ResNet1dBNBlock, ResNet1dBNDecBlock,
-                              ResNet1dEndpoint, SEResNet1dBasicBlock,
-                              SEResNet1dBasicDecBlock, SEResNet1dBNBlock,
-                              SEResNet1dBNDecBlock)
-from .resnet2d_blocks import (ResNet2dBasicBlock, ResNet2dBasicDecBlock,
-                              ResNet2dBNBlock, ResNet2dBNDecBlock,
-                              SEResNet2dBasicBlock, SEResNet2dBasicDecBlock,
-                              SEResNet2dBNBlock, SEResNet2dBNDecBlock)
-from .resnet_blocks import (ResNetBasicBlock, ResNetBNBlock,
-                            ResNetEndpointBlock, ResNetInputBlock)
-from .se_blocks import (CFwSEBlock2d, FwSEBlock2d, SEBlock1d, SEBlock2D,
-                        SEBlock2d, TSEBlock2D, TSEBlock2d)
+from .resnet1d_blocks import (
+    ResNet1dBasicBlock,
+    ResNet1dBasicDecBlock,
+    ResNet1dBNBlock,
+    ResNet1dBNDecBlock,
+    ResNet1dEndpoint,
+    SEResNet1dBasicBlock,
+    SEResNet1dBasicDecBlock,
+    SEResNet1dBNBlock,
+    SEResNet1dBNDecBlock,
+)
+from .resnet2d_blocks import (
+    ResNet2dBasicBlock,
+    ResNet2dBasicDecBlock,
+    ResNet2dBNBlock,
+    ResNet2dBNDecBlock,
+    SEResNet2dBasicBlock,
+    SEResNet2dBasicDecBlock,
+    SEResNet2dBNBlock,
+    SEResNet2dBNDecBlock,
+)
+from .resnet_blocks import (
+    ResNetBasicBlock,
+    ResNetBNBlock,
+    ResNetEndpointBlock,
+    ResNetInputBlock,
+)
+from .se_blocks import (
+    CFwSEBlock2d,
+    FwSEBlock2d,
+    SEBlock1d,
+    SEBlock2D,
+    SEBlock2d,
+    TSEBlock2D,
+    TSEBlock2d,
+)
 from .seresnet_blocks import SEResNetBasicBlock, SEResNetBNBlock
 from .spine_blocks import BlockSpec, SpineConv, SpineEndpoints, SpineResample
 from .tdnn_blocks import TDNNBlock
 from .transducer_joiner import TransducerJoiner
-from .transducer_predictor import (TransducerConvPredictor,
-                                   TransducerRNNPredictor)
-from .transformer_conv2d_subsampler import TransformerConv2dSubsampler
+from .transducer_predictor import TransducerConvPredictor, TransducerRNNPredictor
 from .transformer_encoder_v1 import TransformerEncoderBlockV1
-from .transformer_feedforward import (Conv1dLinear, Conv1dx2,
-                                      PositionwiseFeedForward)
+from .transformer_feedforward import Conv1dLinear, Conv1dx2, PositionwiseFeedForward
+from .transformer_input import TransformerConv1dSubsampler, TransformerConv2dSubsampler
diff --git a/hyperion/torch/layer_blocks/conformer_decoder_v1.py b/hyperion/torch/layer_blocks/conformer_decoder_v1.py
new file mode 100644
index 00000000..e3d0893a
--- /dev/null
+++ b/hyperion/torch/layer_blocks/conformer_decoder_v1.py
@@ -0,0 +1,213 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+#
+
+import torch
+import torch.nn as nn
+
+from ..layers.attention import *
+from .conformer_conv import ConformerConvBlock
+from .conformer_encoder_v1 import ConformerEncoderBlockV1
+from .transformer_feedforward import *
+
+
+class ConformerDecoderBlockV1(ConformerEncoderBlockV1):
+    """Building block for conformer decoder based on conformer encoder introduced in
+       https://arxiv.org/pdf/2005.08100.pdf
+
+        This includes some optional extra features
+        not included in the original paper:
+           - Choose local-attention (attending only to close frames
+             instead of all the frames in the sequence)
+           - Choose number of conv blocks
+           - Squeeze-Excitation after depthwise-conv
+           - Allows downsampling in time dimension
+           - Allows choosing activation and layer normalization type
+        We call this Conformer+
+
+    Attributes:
+      num_feats: input/output feat. dimension (aka d_model)
+      self_attn: attention module in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1']
+      num_heads: number of heads
+      conv_repeats: number of conv blocks
+      conv_kernel_size: kernel size for conv blocks
+      conv_stride: stride for depth-wise conv in first conv block
+      feed_forward: position-wise feed-forward string in ['linear', 'conv1dx2', 'conv1d-linear']
+      d_ff: dimension of middle layer in feed_forward block
+      ff_kernel_size: kernel size for convolutional versions of ff block
+      hid_act: ff and conv block hidden activation
+      dropout_rate: dropout rate for ff and conv blocks
+      att_context: maximum context range for local attention
+      att_dropout_rate: dropout rate for attention block
+      causal_pos_enc: if True, use causal positional encodings (when rel_pos_enc=True), it assumes
+                      that query q_i only attents to key k_j when j<=i
+      conv_norm_layer: norm layer constructor for conv block,
+                       if None it uses BatchNorm
+      se_r:         Squeeze-Excitation compression ratio,
+                    if None it doesn't use Squeeze-Excitation
+      ff_macaron: if True, it uses macaron-net style ff layers, otherwise transformer style.
+      out_lnorm: if True, use LNorm layer at the output as in the conformer paper,
+                 we think that this layer is redundant and put it to False by default
+      concat_after: if True, if concats attention input and output and apply linear transform, i.e.,
+                             y = x + linear(concat(x, att(x)))
+                    if False, y = x + att(x)
+
+    """
+
+    def __init__(
+        self,
+        num_feats,
+        self_attn,
+        cross_attn,
+        num_heads,
+        conv_repeats=0,
+        conv_kernel_size=31,
+        conv_stride=1,
+        feed_forward="linear",
+        d_ff=2048,
+        ff_kernel_size=3,
+        hid_act="swish",
+        dropout_rate=0,
+        att_context=25,
+        att_dropout_rate=0,
+        pos_enc_type="rel",
+        causal_pos_enc=False,
+        conv_norm_layer=None,
+        se_r=None,
+        ff_macaron=True,
+        src_lnorm=False,
+        out_lnorm=False,
+        concat_after=False,
+    ):
+        super().__init__(
+            num_feats,
+            self_attn,
+            num_heads,
+            conv_repeats=conv_repeats,
+            conv_kernel_size=conv_kernel_size,
+            conv_stride=conv_stride,
+            feed_forward=feed_forward,
+            d_ff=d_ff,
+            ff_kernel_size=ff_kernel_size,
+            hid_act=hid_act,
+            dropout_rate=dropout_rate,
+            att_context=att_context,
+            att_dropout_rate=att_dropout_rate,
+            pos_enc_type=pos_enc_type,
+            causal_pos_enc=causal_pos_enc,
+            conv_norm_layer=conv_norm_layer,
+            se_r=se_r,
+            ff_macaron=ff_macaron,
+            out_lnorm=out_lnorm,
+            concat_after=concat_after,
+        )
+
+        self.cross_att = self._make_att(
+            cross_attn,
+            num_feats,
+            num_heads,
+            0,
+            att_dropout_rate,
+            "no",
+            False,
+        )
+
+        self.norm_cross_att = nn.LayerNorm(num_feats)
+        self.src_lnorm = src_lnorm
+        if src_lnorm:
+            self.norm_src = nn.LayerNorm(num_feats)
+
+        if self.concat_after:
+            self.cross_concat_linear = nn.Linear(num_feats + num_feats, num_feats)
+
+    def _forward_self_attn(self, x, pos_emb=None, mask=None, cache=None):
+        residual = x
+        x = self.norm_att(x)
+
+        if cache is None:
+            x_q = x
+            mask_q = mask
+        else:
+            # compute only the last frame query keeping dim: max_time_out -> 1
+            assert_cache_shape = (x.size(0), x.size(1) - 1, x.size(2))
+            assert (
+                cache.shape == assert_cache_shape
+            ), f"{cache.shape} != {assert_cache_shape}"
+            x_q = x[:, -1:, :]
+            residual = residual[:, -1:, :]
+            mask_q = None if mask is None else mask[:, -1:, :]
+
+        if pos_emb is None:
+            x_att = self.self_attn(x_q, x, x, mask=mask_q)
+        else:
+            x_att = self.self_attn(x_q, x, x, pos_emb=pos_emb, mask=mask_q)
+
+        if self.concat_after:
+            x = torch.cat((x_q, x_att), dim=-1)
+            x = self.concat_linear(x)
+        else:
+            x = x_att
+
+        if self.dropout_rate > 0:
+            x = self.dropout(x)
+
+        x = residual + x
+        return x
+
+    def _forward_cross_attn(self, x, x_src, pos_emb=None, mask=None):
+        residual = x
+        x = self.norm_cross_att(x)
+        if self.src_lnorm:
+            x_src = self.norm_src(x_src)
+
+        if pos_emb is None:
+            x_att = self.cross_attn(x, x_src, x_src, mask=mask)
+        else:
+            x_att = self.cross_attn(x, x_src, x_src, pos_emb=pos_emb, mask=mask)
+
+        if self.concat_after:
+            x = torch.cat((x, x_att), dim=-1)
+            x = self.cross_concat_linear(x)
+        else:
+            x = x_att
+
+        if self.dropout_rate > 0:
+            x = self.dropout(x)
+
+        x = residual + x
+        return x
+
+    def forward(self, x, x_src, pos_emb=None, mask=None, mask_src=None, cache=None):
+        """Forward pass function
+
+        Args:
+          x: input tensor with size=(batch, time, num_feats)
+          pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0,
+                   when using relative postional encoder, otherwise None
+          mask: mask to indicate valid time steps for x (batch, time)
+
+        Returns:
+           Tensor with output features
+           Tensor with mask
+        """
+        # macaron feed forward
+        if self.ff_macaron:
+            x = self._forward_ff_macaron(x)
+
+        # multihead attention
+        x = self._forward_self_attn(x, pos_emb, mask, cache=cache)
+        x = self._forward_cross_attn(x, x_src, mask=mask_src)
+
+        # convolutional blocks
+        x = self._forward_convs(x)
+
+        # feed-forward block
+        x = self._forward_ff(x)
+
+        # output norm
+        if self.out_lnorm:
+            x = self.norm_out(x)
+
+        return x, mask
diff --git a/hyperion/torch/layer_blocks/conformer_encoder_v1.py b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
index 5764c85e..4f8e1b4d 100644
--- a/hyperion/torch/layer_blocks/conformer_encoder_v1.py
+++ b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
@@ -78,7 +78,6 @@ def __init__(
         out_lnorm=False,
         concat_after=False,
     ):
-
         super().__init__()
         self.self_attn = self._make_att(
             self_attn,
@@ -94,14 +93,14 @@ def __init__(
         self.ff_macaron = ff_macaron
         if ff_macaron:
             self.ff_scale = 0.5
-            self.feed_forward_macaron = self._make_ff(feed_forward, num_feats,
-                                                      d_ff, ff_kernel_size,
-                                                      hid_act, dropout_rate)
+            self.feed_forward_macaron = self._make_ff(
+                feed_forward, num_feats, d_ff, ff_kernel_size, hid_act, dropout_rate
+            )
             self.norm_ff_macaron = nn.LayerNorm(num_feats)
 
-        self.feed_forward = self._make_ff(feed_forward, num_feats, d_ff,
-                                          ff_kernel_size, hid_act,
-                                          dropout_rate)
+        self.feed_forward = self._make_ff(
+            feed_forward, num_feats, d_ff, ff_kernel_size, hid_act, dropout_rate
+        )
 
         conv_blocks = []
         for i in range(conv_repeats):
@@ -148,6 +147,7 @@ def _make_att(
            att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1', 'block-scaled-dot-prod-att-v1']
            num_feats: input/output feat. dimension (aka d_model)
            num_heads: number of heads
+           context: block attention receptive field
            dropout_rate: dropout rate for attention block
            pos_enc_type: type of positional encoder
            causal_pos_enc: if True, use causal positional encodings (when rel_pos_enc=True), it assumes
@@ -228,8 +228,7 @@ def _make_att(
             )
 
     @staticmethod
-    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation,
-                 dropout_rate):
+    def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation, dropout_rate):
         """Creates position-wise feed forward block from ff_type string
 
         Args:
@@ -245,58 +244,38 @@ def _make_ff(ff_type, num_feats, hid_feats, kernel_size, activation,
 
         """
         if ff_type == "linear":
-            return PositionwiseFeedForward(num_feats,
-                                           hid_feats,
-                                           activation,
-                                           dropout_rate,
-                                           time_dim=1)
+            return PositionwiseFeedForward(
+                num_feats, hid_feats, activation, dropout_rate, time_dim=1
+            )
 
         if ff_type == "conv1dx2":
-            return Conv1dx2(num_feats,
-                            hid_feats,
-                            kernel_size,
-                            activation,
-                            dropout_rate,
-                            time_dim=1)
+            return Conv1dx2(
+                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
+            )
 
         if ff_type == "conv1d-linear":
-            return Conv1dLinear(num_feats,
-                                hid_feats,
-                                kernel_size,
-                                activation,
-                                dropout_rate,
-                                time_dim=1)
-
-    def forward(self, x, pos_emb=None, mask=None):
-        """Forward pass function
-
-        Args:
-          x: input tensor with size=(batch, time, num_feats)
-          pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0,
-                   when using relative postional encoder, otherwise None
-          mask: mask to indicate valid time steps for x (batch, time)
+            return Conv1dLinear(
+                num_feats, hid_feats, kernel_size, activation, dropout_rate, time_dim=1
+            )
 
-        Returns:
-           Tensor with output features
-           Tensor with mask
-        """
+    def _forward_ff_macaron(self, x):
+        residual = x
+        x = self.norm_ff_macaron(x)
+        x = self.feed_forward_macaron(x)
+        if self.dropout_rate > 0:
+            x = self.dropout(x)
 
-        # macaron feed forward
-        if self.ff_macaron:
-            residual = x
-            x = self.norm_ff_macaron(x)
-            x = self.feed_forward_macaron(x)
-            if self.dropout_rate > 0:
-                x = self.dropout(x)
-            x = residual + self.ff_scale * x
+        x = residual + self.ff_scale * x
+        return x
 
-        # multihead attention
+    def _forward_self_attn(self, x, pos_emb=None, mask=None):
         residual = x
         x = self.norm_att(x)
         if pos_emb is None:
             x_att = self.self_attn(x, x, x, mask=mask)
         else:
             x_att = self.self_attn(x, x, x, pos_emb=pos_emb, mask=mask)
+
         if self.concat_after:
             x = torch.cat((x, x_att), dim=-1)
             x = self.concat_linear(x)
@@ -307,15 +286,17 @@ def forward(self, x, pos_emb=None, mask=None):
             x = self.dropout(x)
 
         x = residual + x
+        return x
 
-        # convolutional blocks
+    def _forward_convs(self, x):
         x = x.transpose(1, 2)
         for block in range(len(self.conv_blocks)):
             x = self.conv_blocks[block](x)
 
         x = x.transpose(1, 2)
+        return x
 
-        # feed-forward block
+    def _forward_ff(self, x):
         residual = x
         x = self.norm_ff(x)
         x = self.feed_forward(x)
@@ -323,6 +304,33 @@ def forward(self, x, pos_emb=None, mask=None):
             x = self.dropout(x)
 
         x = residual + self.ff_scale * x
+        return x
+
+    def forward(self, x, pos_emb=None, mask=None):
+        """Forward pass function
+
+        Args:
+          x: input tensor with size=(batch, time, num_feats)
+          pos_emb: positional embedding size=(batch, time2, in_feats) as R_{L-1}, ..., R_0,
+                   when using relative postional encoder, otherwise None
+          mask: mask to indicate valid time steps for x (batch, time)
+
+        Returns:
+           Tensor with output features
+           Tensor with mask
+        """
+        # macaron feed forward
+        if self.ff_macaron:
+            x = self._forward_ff_macaron(x)
+
+        # multihead attention
+        x = self._forward_self_attn(x, pos_emb, mask)
+
+        # convolutional blocks
+        x = self._forward_convs(x)
+
+        # feed-forward block
+        x = self._forward_ff(x)
 
         # output norm
         if self.out_lnorm:
diff --git a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py b/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
deleted file mode 100644
index 942e1313..00000000
--- a/hyperion/torch/layer_blocks/transformer_conv2d_subsampler.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import torch
-import torch.nn as nn
-
-from ..layers import ActivationFactory as AF
-
-
-class TransformerConv2dSubsampler(nn.Module):
-    """Convolutional 2D subsampling (to 1/4 length) Tor transformer
-
-    Attributes:
-      in_feats: input feature dimension
-      out_feats: Transformer d_model
-      hid_act: activation layer object
-      pos_enc: positional encoder layer
-      time_dim: indicates which is the time dimension in the input tensor
-    """
-
-    def __init__(self, in_feats, out_feats, hid_act, pos_enc=None, time_dim=1):
-        super().__init__()
-        self.time_dim = time_dim
-        hid_act = AF.create(hid_act)
-        self.conv = nn.Sequential(
-            nn.Conv2d(1, out_feats, 3, 2, padding=(0, 1)),
-            hid_act,
-            nn.Conv2d(out_feats, out_feats, 3, 2, padding=(0, 1)),
-            hid_act,
-        )
-
-        linear = nn.Linear(out_feats * (((in_feats - 1) // 2 - 1) // 2),
-                           out_feats)
-        if pos_enc is None:
-            self.out = linear
-        else:
-            self.out = nn.Sequential(linear, pos_enc)
-
-    def forward(self, x, x_mask=None):
-        """Forward function.
-
-        Args:
-          x: input tensor with size=(batch, time, in_feats)
-          x_mask: mask to indicate valid time steps for x (batch, time1, time2)
-
-        Returns:
-           Tensor with output features with shape = (batch, time//4, out_feats)
-           Tensor with subsampled mask x4.
-        """
-        if self.time_dim == 1:
-            x = x.transpose(1, 2)
-
-        x = x.unsqueeze(1)  # (b, c, f, t)
-        x = self.conv(x)
-        b, c, f, t = x.size()
-        x = self.out(x.contiguous().view(b, c * f, t).transpose(1, 2))
-        if x_mask is None:
-            return x, None
-        return x, x_mask[:, :, :-2:2][:, :, :-2:2]
diff --git a/hyperion/torch/layer_blocks/transformer_input.py b/hyperion/torch/layer_blocks/transformer_input.py
new file mode 100644
index 00000000..6c5de188
--- /dev/null
+++ b/hyperion/torch/layer_blocks/transformer_input.py
@@ -0,0 +1,151 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import math
+
+import torch
+import torch.nn as nn
+
+from ..layers import ActivationFactory as AF
+
+
+class TransformerConv2dSubsampler(nn.Module):
+    """Convolutional 2D subsampling (to 1//stride length) Tor transformer
+
+    Attributes:
+      in_feats: input feature dimension
+      out_feats: Transformer d_model
+      hid_act: activation layer object
+      stride: total stride of the subsampler
+      pos_enc: positional encoder layer
+      time_dim: indicates which is the time dimension in the input tensor
+    """
+
+    def __init__(
+        self, in_feats, out_feats, hid_act, stride=4, pos_enc=None, time_dim=1
+    ):
+        super().__init__()
+        self.time_dim = time_dim
+        hid_act = AF.create(hid_act)
+        self.stride = stride
+        if stride == 4:
+            stride_1 = 2
+            stride_2 = 2
+            hid_feats = out_feats * (((in_feats - 1) // 2 - 1) // 2)
+        elif stride == 2:
+            stride_1 = 2
+            stride_2 = 1
+            hid_feats = out_feats * ((in_feats - 1) // 2 - 2)
+        elif stride == 1:
+            stride_1 = 1
+            stride_2 = 1
+            hid_feats = out_feats * (in_feats - 4)
+        else:
+            raise NotImplementedError(
+                "Valid TransformerConv2dSubsampler stride==1,2,4 !={stride}"
+            )
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(1, out_feats, 3, stride_1, padding=(0, 1)),
+            hid_act,
+            nn.Conv2d(out_feats, out_feats, 3, stride_2, padding=(0, 1)),
+            hid_act,
+        )
+
+        linear = nn.Linear(hid_feats, out_feats)
+        if pos_enc is None:
+            self.out = linear
+        else:
+            self.out = nn.Sequential(linear, pos_enc)
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with size=(batch, time, in_feats)
+          x_mask: mask to indicate valid time steps for x (batch, time1, time2)
+
+        Returns:
+           Tensor with output features with shape = (batch, time//stride, out_feats)
+           Tensor with subsampled mask // stride.
+        """
+        if self.time_dim == 1:
+            x = x.transpose(1, 2)
+
+        x = x.unsqueeze(1)  # (b, c, f, t)
+        x = self.conv(x)
+        b, c, f, t = x.size()
+        x = self.out(x.contiguous().view(b, c * f, t).transpose(1, 2))
+        if x_mask is None:
+            return x, None
+
+        return x, x_mask[:, :, :: self.stride]
+
+
+class TransformerConv1dSubsampler(nn.Module):
+    """Convolutional 1D subsampling (to 1//stride length) Tor transformer
+
+    Attributes:
+      in_feats: input feature dimension
+      out_feats: Transformer d_model
+      hid_act: activation layer object
+      stride: total stride of the subsampler
+      pos_enc: positional encoder layer
+      time_dim: indicates which is the time dimension in the input tensor
+    """
+
+    def __init__(
+        self, in_feats, out_feats, hid_act, stride=4, pos_enc=None, time_dim=1
+    ):
+        super().__init__()
+        self.time_dim = time_dim
+        hid_act = AF.create(hid_act)
+        self.stride = stride
+        if stride == 4:
+            stride_1 = 2
+            stride_2 = 2
+        elif stride == 2:
+            stride_1 = 2
+            stride_2 = 1
+        elif stride == 1:
+            stride_1 = 1
+            stride_2 = 1
+        else:
+            raise NotImplementedError(
+                "Valid TransformerConv1dSubsampler stride==1,2,4 !={stride}"
+            )
+
+        self.conv = nn.Sequential(
+            nn.Conv1d(in_feats, out_feats, 3, stride_1, padding=1),
+            hid_act,
+            nn.Conv1d(out_feats, out_feats, 3, stride_2, padding=1),
+            hid_act,
+        )
+
+        linear = nn.Linear(out_feats, out_feats)
+        if pos_enc is None:
+            self.out = linear
+        else:
+            self.out = nn.Sequential(linear, pos_enc)
+
+    def forward(self, x, x_mask=None):
+        """Forward function.
+
+        Args:
+          x: input tensor with size=(batch, time, in_feats)
+          x_mask: mask to indicate valid time steps for x (batch, time1, time2)
+
+        Returns:
+           Tensor with output features with shape = (batch, time//stride, out_feats)
+           Tensor with subsampled mask // stride.
+        """
+        if self.time_dim == 1:
+            x = x.transpose(1, 2)
+
+        x = self.conv(x)
+        x = self.out(x.transpose(1, 2))
+        if x_mask is None:
+            return x, None
+
+        return x, x_mask[:, :, :: self.stride]
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index bea52c95..b0b607e2 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -14,6 +14,13 @@
 from .audio_feats_factory import AudioFeatsFactory
 from .calibrators import LinBinCalibrator
 from .dropout import DropConnect1d, DropConnect2d, Dropout1d
+from .feat_fuser_factory import FeatFuserFactory
+from .feat_fusers import (
+    CatFeatFuser,
+    LastFeatFuser,
+    LinearFeatFuser,
+    WeightedAvgFeatFuser,
+)
 from .global_pool import *
 from .interpolate import Interpolate
 from .lora import LoRAFactory
diff --git a/hyperion/torch/layers/audio_feats.py b/hyperion/torch/layers/audio_feats.py
index 3bc4add9..ed26b576 100644
--- a/hyperion/torch/layers/audio_feats.py
+++ b/hyperion/torch/layers/audio_feats.py
@@ -2,7 +2,6 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-#
 
 import logging
 import math
@@ -158,7 +157,6 @@ def __init__(
         raw_energy=True,
         return_log_energy=False,
     ):
-
         super().__init__()
         self.fs = fs
         self.frame_length = frame_length
@@ -211,7 +209,6 @@ def __str__(self):
         return s
 
     def forward(self, x):
-
         # Add dither
         if self.dither != 0.0:
             n = torch.randn(x.shape, device=x.device)
@@ -308,13 +305,12 @@ def __init__(
         raw_energy=True,
         use_energy=True,
     ):
-
         super().__init__()
 
         N = int(math.floor(frame_length * fs / 1000))
         if N > fft_length:
             k = math.ceil(math.log(N) / math.log(2))
-            self.fft_length = int(2 ** k)
+            self.fft_length = int(2**k)
 
         self.wav2win = Wav2Win(
             fs,
@@ -432,7 +428,6 @@ def __init__(
         raw_energy=True,
         use_energy=True,
     ):
-
         super().__init__(
             fs,
             frame_length,
@@ -526,7 +521,6 @@ def __init__(
         raw_energy=True,
         use_energy=True,
     ):
-
         super().__init__(
             fs,
             frame_length,
@@ -634,7 +628,6 @@ def __init__(
         raw_energy=True,
         use_energy=True,
     ):
-
         super().__init__(
             fs,
             frame_length,
@@ -768,7 +761,6 @@ def __init__(
         raw_energy=True,
         use_energy=True,
     ):
-
         super().__init__(
             fs,
             frame_length,
@@ -929,7 +921,6 @@ def __init__(
         snip_edges=False,
         center=True,
     ):
-
         super().__init__(
             fs=fs,
             frame_length=frame_length,
@@ -976,7 +967,6 @@ def __init__(
         num_filters=23,
         norm_filters=False,
     ):
-
         super().__init__()
         self.fs = fs
         self.fft_length = fft_length
diff --git a/hyperion/torch/layers/audio_feats_factory.py b/hyperion/torch/layers/audio_feats_factory.py
index 6d0b4df4..1694e84e 100644
--- a/hyperion/torch/layers/audio_feats_factory.py
+++ b/hyperion/torch/layers/audio_feats_factory.py
@@ -4,10 +4,9 @@
 """
 import re
 
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...np.feats.filter_banks import FilterBankFactory as FBF
-from ...utils.misc import str2bool
 from .audio_feats import *
 
 FFT = "fft"
@@ -20,7 +19,7 @@
 FEAT_TYPES = [FFT, SPEC, LOG_SPEC, LOG_FB, MFCC, KAN_BAYASHI]
 
 
-class AudioFeatsFactory(object):
+class AudioFeatsFactory:
     """Factory class to create acoustic features layers like
     FFT, Spectrogram, log-Spectrogram, log-filter-bank, MFCC.
     """
@@ -213,6 +212,8 @@ def create(
                 snip_edges=snip_edges,
             )
 
+        raise ValueError(f"unknown feature type {audio_feat}")
+
     @staticmethod
     def filter_args(**kwargs):
         """Filters feature extractor args from arguments dictionary.
@@ -284,7 +285,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--remove-dc-offset",
             default=True,
-            type=str2bool,
+            action=ActionYesNo,
             help="Subtract mean from waveform on each frame",
         )
 
@@ -315,7 +316,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--dither",
             type=float,
-            default=1.0 / 2 ** 15,
+            default=1.0 / 2**15,
             help="Dithering constant (0.0 means no dither)",
         )
 
@@ -331,7 +332,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--snip-edges",
             default=True,
-            type=str2bool,
+            action=ActionYesNo,
             help=(
                 "If true, end effects will be handled by outputting only "
                 "frames that completely fit in the file, and the number of "
@@ -344,7 +345,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--center",
             default=False,
-            type=str2bool,
+            action=ActionYesNo,
             help=(
                 "If true, puts the center of the frame at t*frame_shift, "
                 "it over-wrides snip-edges and set it to false"
@@ -361,13 +362,13 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--raw-energy",
             default=True,
-            type=str2bool,
+            action=ActionYesNo,
             help="If true, compute energy before preemphasis and windowing",
         )
         parser.add_argument(
             "--use-energy",
             default=True,
-            type=str2bool,
+            action=ActionYesNo,
             help="Use energy (not C0) in MFCC computation",
         )
 
@@ -380,10 +381,10 @@ def add_class_args(parser, prefix=None):
 
         parser.add_argument(
             "--audio-feat",
-            default="cepstrum",
+            default="logfb",
             choices=FEAT_TYPES,
             help=(
-                "It can return intermediate result: fft, spec, log_spec, " "logfb, mfcc"
+                "It can return intermediate result: fft, spec, log_spec, logfb, mfcc"
             ),
         )
 
diff --git a/hyperion/torch/layers/feat_fuser_factory.py b/hyperion/torch/layers/feat_fuser_factory.py
new file mode 100644
index 00000000..edc4d933
--- /dev/null
+++ b/hyperion/torch/layers/feat_fuser_factory.py
@@ -0,0 +1,101 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from typing import Optional
+
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
+from .feat_fusers import (
+    CatFeatFuser,
+    LastFeatFuser,
+    LinearFeatFuser,
+    WeightedAvgFeatFuser,
+)
+
+LAST_FUSER = "last"
+WAVG_FUSER = "weighted-avg"
+LINEAR_FUSER = "linear"
+CAT_FUSER = "cat"
+
+FUSER_TYPES = [LAST_FUSER, WAVG_FUSER, LINEAR_FUSER, CAT_FUSER]
+
+
+class FeatFuserFactory:
+    """Factory class to create feature fusers for Wav2Vec style hidden features."""
+
+    @staticmethod
+    def create(
+        fuser_type: str = WAVG_FUSER,
+        num_feats: Optional[int] = None,
+        feat_dim: Optional[int] = None,
+        proj_dim: Optional[int] = None,
+        proj_bias: bool = True,
+    ):
+        if fuser_type == WAVG_FUSER:
+            return WeightedAvgFeatFuser(
+                num_feats, feat_dim=feat_dim, proj_dim=proj_dim, proj_bias=proj_bias
+            )
+        elif fuser_type == LAST_FUSER:
+            return LastFeatFuser(
+                feat_dim=feat_dim, proj_dim=proj_dim, proj_bias=proj_bias
+            )
+        elif fuser_type == LINEAR_FUSER:
+            return LinearFeatFuser(
+                num_feats, feat_dim=feat_dim, proj_dim=proj_dim, proj_bias=proj_bias
+            )
+        elif fuser_type == CAT_FUSER:
+            return CatFeatFuser(
+                num_feats, feat_dim=feat_dim, proj_dim=proj_dim, proj_bias=proj_bias
+            )
+        else:
+            raise ValueError(f"unknown feature fuser type {fuser_type}")
+
+    @staticmethod
+    def filter_args(**kwargs):
+        """Filters arguments correspondin to Feature Fuser
+            from args dictionary
+
+        Args:
+          kwargs: args dictionary
+
+        Returns:
+          args dictionary
+        """
+        args = filter_func_args(FeatFuserFactory.create, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds feature extractor options to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--fuser-type",
+            default=WAVG_FUSER,
+            choices=FUSER_TYPES,
+            help=f"One of {FUSER_TYPES}",
+        )
+        parser.add_argument(
+            "--proj-dim",
+            default=None,
+            type=int,
+            help="project features after fusion to proj_dim",
+        )
+        parser.add_argument(
+            "--proj-bias",
+            default=True,
+            action=ActionYesNo,
+            help="linear projection has bias",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layers/feat_fusers.py b/hyperion/torch/layers/feat_fusers.py
new file mode 100644
index 00000000..44c72ffb
--- /dev/null
+++ b/hyperion/torch/layers/feat_fusers.py
@@ -0,0 +1,86 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import math
+
+import torch
+import torch.nn as nn
+
+
+class FeatFuser(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+
+class _ProjFeatFuser(FeatFuser):
+    def __init__(self, feat_dim=None, proj_dim=None, proj_bias=True):
+        super().__init__()
+        self.feat_dim = feat_dim
+        self.proj_dim = proj_dim
+        self.feat_proj = None
+        if feat_dim is not None and proj_dim is not None:
+            self.feat_proj = nn.Linear(feat_dim, proj_dim, bias=proj_bias)
+
+
+class LastFeatFuser(_ProjFeatFuser):
+    def __init__(self, feat_dim=None, proj_dim=None, proj_bias=True):
+        super().__init__(feat_dim, proj_dim, proj_bias)
+
+    def forward(self, feats):
+        feats = feats[-1]
+        if self.feat_proj is not None:
+            feats = self.feat_proj(feats)
+
+        return feats
+
+
+class WeightedAvgFeatFuser(_ProjFeatFuser):
+    def __init__(self, num_feats, feat_dim=None, proj_dim=None, proj_bias=True):
+        super().__init__(feat_dim, proj_dim, proj_bias)
+        self.num_feats = num_feats
+        self.feat_fuser = nn.Parameter(torch.zeros(num_feats))
+
+    def forward(self, feats):
+        feats = torch.stack(feats, dim=-1)
+        norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+        feats = torch.sum(feats * norm_weights, dim=-1)
+        if self.feat_proj is not None:
+            feats = self.feat_proj(feats)
+
+        return feats
+
+
+class LinearFeatFuser(_ProjFeatFuser):
+    def __init__(self, num_feats, feat_dim=None, proj_dim=None, proj_bias=True):
+        super().__init__(feat_dim, proj_dim, proj_bias)
+        self.num_feats = num_feats
+        self.feat_fuser = nn.Linear(num_feats, 1, bias=False)
+        self.feat_fuser.weight.data = torch.ones(1, num_feats) / num_feats
+
+    def forward(self, feats):
+        feats = torch.stack(feats, dim=-1)
+        feats = self.feat_fuser(feats).squeeze(dim=-1)
+        if self.feat_proj is not None:
+            feats = self.feat_proj(feats)
+
+        return feats
+
+
+class CatFeatFuser(FeatFuser):
+    def __init__(self, num_feats, feat_dim, proj_dim=None, proj_bias=True):
+        super().__init__()
+        self.num_feats = num_feats
+        self.feat_dim = feat_dim
+        if proj_dim is None:
+            proj_dim = feat_dim
+        self.proj_dim = proj_dim
+        self.proj_bias = proj_bias
+        self.feat_fuser = nn.Linear(num_feats * feat_dim, proj_dim, bias=proj_bias)
+
+    def forward(self, feats):
+        feats = torch.cat(feats, dim=-1)
+        feats = self.feat_fuser(feats)
+        return feats
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index 4b4c5927..736b69c6 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -2,28 +2,31 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ..utils import seq_lengths_to_mask
+
+SQRT_EPS = 1e-5
 
 
 class MeanVarianceNorm(nn.Module):
     """Class to apply short-time mean-variance normalization to features.
-    
+
     Attributes:
       norm_mean:    if True, it normalizes the mean.
       norm_var:     if True, is also normalized the variance.
       left_context:  left context for the window that computes the normalization stats.
       right_context: right context for the window that computes the normalization stats.
-      dim:           normalization dimension (time dimension). 
+      dim:           normalization dimension (time dimension).
 
     If left_context = right_context = 0, it computes the stats on the whole utterance.
     """
+
     def __init__(
         self, norm_mean=True, norm_var=False, left_context=0, right_context=0, dim=1
     ):
-
         super().__init__()
         self.norm_mean = norm_mean
         self.norm_var = norm_var
@@ -45,62 +48,124 @@ def __str__(self):
         )
         return s
 
-    def forward(self, x):
+    def forward(self, x, x_lengths=None, x_mask=None):
         """Short-time mean-var normalizes feature tensor.
-        
+
         Args:
           x: feature tensor.
 
         Returns:
           Normalized feature tensor.
         """
+        if not self.norm_mean and not self.norm_var:
+            return x
+
+        if self.dim != 1:
+            x = x.transpose(x, 1, self.dim)
+
+        max_length = x.size(1)
+        if x_lengths is not None and x_mask is None:
+            x_mask = seq_lengths_to_mask(
+                x_lengths,
+                max_length,
+                dtype=x.dtype,
+                none_if_all_max=True,
+            )
 
-        T = x.shape[self.dim]
         if (self.left_context == 0 and self.right_context == 0) or (
-            T <= self.left_context + self.right_context + 1
+            max_length <= self.left_context + self.right_context + 1
         ):
-            return self.normalize_global(x)
+            x = self.normalize_global(x, x_mask)
+        else:
+            x = self.normalize_cumsum(x, x_mask)
+
+        if self.dim != 1:
+            x = x.transpose(x, 1, self.dim).contiguous()
 
-        return self.normalize_cumsum(x)
+        return x
 
-    def normalize_global(self, x):
+    def _normalize_global_nomask(self, x):
         """Applies global mean-var normalization."""
         # Global mean/var norm.
+
         if self.norm_mean:
-            m_x = torch.mean(x, dim=self.dim, keepdim=True)
+            m_x = torch.mean(x, dim=1, keepdim=True)
             x = x - m_x
 
         if self.norm_var:
-            s_x = torch.std(x, dim=self.dim, keepdim=True).clamp(min=1e-5)
+            s_x = torch.std(x, dim=1, keepdim=True).clamp(min=1e-5)
             x = x / s_x
 
         return x
 
-    def normalize_cumsum(self, x):
-        """Applies short-time mean-var normalization using cumulative sums."""
+    def _normalize_global_mask(self, x, x_mask):
+        """Applies global mean-var normalization with masking."""
+        # Global mean/var norm.
+        den = torch.mean(x_mask, dim=1, keepdim=True)
+        x = x * x_mask
+        m_x = torch.mean(x, dim=1, keepdim=True) / den
         if self.norm_mean:
+            x = x - m_x
+            if self.norm_var:
+                s2_x = torch.mean(x**2, dim=1, keepdim=True) / den
+                s_x = torch.sqrt(s2_x.clamp(min=SQRT_EPS))
+                x = x / s_x
+        elif self.norm_var:
+            s2_x = torch.mean((x - m_x) ** 2, dim=1, keepdim=True) / den
+            s_x = torch.sqrt(s2_x.clamp(min=SQRT_EPS))
+            x = x / s_x
+
+        return x
+
+    def normalize_global(self, x, x_mask=None):
+        """Applies global mean-var normalization."""
+        # Global mean/var norm.
+        if x_mask is None:
+            return self._normalize_global_nomask(x)
+        else:
+            return self._normalize_global_mask(x, x_mask)
+
+    def _prenormalize_cumsum(self, x, x_mask):
+        """substract first global mean
+        it will help cumsum numerical stability
+        and set masked values to the global mean"""
+        if self.norm_mean or x_mask is not None:
             # substract first global mean
             # it will help cumsum numerical stability
-            m_x = torch.mean(x, dim=self.dim, keepdim=True)
+            if x_mask is not None:
+                x = x * x_mask
+                den = torch.mean(x_mask, dim=1, keepdim=True)
+            else:
+                den = 1
+            m_x = torch.mean(x, dim=1, keepdim=True) / den
+
+        if self.norm_mean:
             x = x - m_x
+            if x_mask is not None:
+                x = x * x_mask
+        elif x_mask is not None:
+            x = x * x_mask + m_x * (1 - x_mask)
 
-        if self.dim != 1:
-            x = x.transpose(self.dim, 1)
+        return x
+
+    def normalize_cumsum(self, x, x_mask=None):
+        """Applies short-time mean-var normalization using cumulative sums."""
 
+        x = self._prenormalize_cumsum(x, x_mask)
         total_context = self.left_context + self.right_context + 1
 
         xx = nn.functional.pad(
             x.transpose(1, -1), (self.left_context, self.right_context), mode="reflect"
         ).transpose(1, -1)
 
-        if self.norm_mean:
+        if self.norm_mean or self.norm_var:
             c_x = torch.cumsum(xx, dim=1)
             m_x = (
                 c_x[:, total_context - 1 :] - c_x[:, : -total_context + 1]
             ) / total_context
 
         if self.norm_var:
-            c_x = torch.cumsum(xx ** 2, dim=1)
+            c_x = torch.cumsum(xx**2, dim=1)
             m_x2 = (
                 c_x[:, total_context - 1 :] - c_x[:, : -total_context + 1]
             ) / total_context
@@ -109,12 +174,9 @@ def normalize_cumsum(self, x):
             x = x - m_x
 
         if self.norm_var:
-            s_x = torch.sqrt((m_x2 - m_x ** 2).clamp(min=1e-5))
+            s_x = torch.sqrt((m_x2 - m_x**2).clamp(min=SQRT_EPS))
             x = x / s_x
 
-        if self.dim != 1:
-            x = x.transpose(self.dim, 1)
-
         return x.contiguous()
 
     @staticmethod
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
index 10e3f83d..45704014 100644
--- a/hyperion/torch/lr_schedulers/triangular_lr.py
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -54,7 +54,6 @@ def __init__(
         step=0,
         update_lr_on_opt_step=False,
     ):
-
         super().__init__(optimizer, min_lr, 0, epoch, step, update_lr_on_opt_step)
         self.T = T
         self.T_mul = T_mul
@@ -68,7 +67,7 @@ def on_epoch_begin(self, epoch=None, epoch_updates=1, **kwargs):
             # T has to correspond to an integer number of epochs
             T = int(math.ceil(self.T / epoch_updates) * epoch_updates)
             if self.T != T:
-                logging.info("readjusting triangular_lr T %d -> %d" % (self.T, T))
+                logging.info("readjusting triangular_lr T %d -> %d", self.T, T)
                 self.T = T
 
     def get_lr(self, step):
@@ -80,10 +79,10 @@ def get_lr(self, step):
             self.T *= self.T_mul
             self.num_restarts += 1
             logging.info(
-                "triangular_lr warm-restart=%d T=%d" % (self.num_restarts, self.T)
+                "triangular_lr warm-restart=%d T=%d", self.num_restarts, self.T
             )
 
-        alpha = self.gamma ** self.num_restarts
+        alpha = self.gamma**self.num_restarts
         x = abs(2 * x / self.T - 1)
 
         return [
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 29b6cdaa..fa4addcd 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -14,11 +14,15 @@
     HFWav2Vec2Transducer,
 )
 from .wav2xvectors import (
+    HFHubert2ConformerV1XVector,
     HFHubert2ResNet1dXVector,
+    HFWav2Vec2ConformerV1XVector,
     HFWav2Vec2ResNet1dXVector,
+    HFWavLM2ConformerV1XVector,
     HFWavLM2ResNet1dXVector,
-    Wav2ResNetXVector,
+    Wav2ConformerV1XVector,
     Wav2ResNet1dXVector,
+    Wav2ResNetXVector,
 )
 from .xvectors.efficient_net_xvector import EfficientNetXVector
 from .xvectors.resnet1d_xvector import ResNet1dXVector
diff --git a/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
index 05a82103..89173eff 100644
--- a/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
@@ -32,7 +32,7 @@ def __init__(self, encoder, decoder):
         if isinstance(encoder, dict):
             encoder = ConformerEncoderV1(**encoder)
         else:
-            assert isinstance(encoder, RNNEncoder)
+            assert isinstance(encoder, ConformerEncoderV1)
 
         super().__init__(encoder, decoder)
 
@@ -45,7 +45,6 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -53,8 +52,7 @@ def add_class_args(parser, prefix=None, skip=set()):
         ConformerEncoderV1.add_class_args(parser, prefix="encoder", skip=skip)
         RNNTransducer.add_class_args(parser)
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     def change_config(
         self,
@@ -68,8 +66,7 @@ def change_config(
     @staticmethod
     def filter_finetune_args(**kwargs):
         args = RNNTransducer.filter_finetune_args(**kwargs)
-        encoder_args = ConformerEncoderV1.filter_finetune_args(
-            **kwargs["encoder"])
+        encoder_args = ConformerEncoderV1.filter_finetune_args(**kwargs["encoder"])
         args["encoder"] = encoder_args
         return args
 
@@ -83,5 +80,4 @@ def add_finetune_args(parser, prefix=None):
         RNNTransducer.add_finetune_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/__init__.py b/hyperion/torch/models/wav2xvectors/__init__.py
index 62123d13..6bafd26d 100644
--- a/hyperion/torch/models/wav2xvectors/__init__.py
+++ b/hyperion/torch/models/wav2xvectors/__init__.py
@@ -4,12 +4,18 @@
 
 """
 
+from .hf_hubert2conformer_v1_xvector import HFHubert2ConformerV1XVector
 from .hf_hubert2resnet1d_xvector import HFHubert2ResNet1dXVector
+from .hf_wav2vec2conformer_v1_xvector import HFWav2Vec2ConformerV1XVector
 from .hf_wav2vec2resnet1d_xvector import HFWav2Vec2ResNet1dXVector
+from .hf_wavlm2conformer_v1_xvector import HFWavLM2ConformerV1XVector
 from .hf_wavlm2resnet1d_xvector import HFWavLM2ResNet1dXVector
+from .wav2conformer_v1_xvector import Wav2ConformerV1XVector
+
 # from .wav2efficient_net_xvector import Wav2EfficientNetXVector
 # from .wav2transformer_xvector_v1 import Wav2TransformerXVectorV1
 # from .wav2spinenet_xvector import Wav2SpineNetXVector
 from .wav2resnet1d_xvector import Wav2ResNet1dXVector
+
 # from .wav2tdnn_xvector import Wav2TDNNXVector
 from .wav2resnet_xvector import Wav2ResNetXVector
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
new file mode 100644
index 00000000..aeabd09e
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
@@ -0,0 +1,93 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFHubert
+from ..xvectors import ConformerV1XVector
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFHubert2ConformerV1XVector(HFWav2XVector):
+    """Class extracting Hubert + ConformerV1 x-vectors from waveform.
+
+    Attributes:
+      Attributes:
+      hf_feats: HFHubert configuration dictionary or object.
+                This is a warpper over Hugging Face Hubert model.
+      xvector: ConformerV1XVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the Hubert layers from "feat_fusion_start" to
+                         the Hubert "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the Hubert model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFHubert],
+        xvector: Union[Dict, ConformerV1XVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+        if isinstance(hf_feats, dict):
+            hf_feats = HFHubert(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFHubert)
+
+        if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector = ConformerV1XVector(**xvector)
+        else:
+            assert isinstance(xvector, ConformerV1XVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFHubert.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFHubert.add_class_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFHubert.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFHubert.add_finetune_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
new file mode 100644
index 00000000..3a670d1c
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
@@ -0,0 +1,96 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFWav2Vec2
+from ..xvectors import ConformerV1XVector
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFWav2Vec2ConformerV1XVector(HFWav2XVector):
+    """Class extracting Wav2Vec2 + ConformerV1 x-vectors from waveform.
+
+    Attributes:
+      hf_feats: HFWav2Vec configuration dictionary or object.
+                This is a warpper over Hugging Face Wav2Vec model.
+      xvector: ConformerV1XVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
+                         the wav2vec "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWav2Vec2],
+        xvector: Union[Dict, ConformerV1XVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+        if isinstance(hf_feats, dict):
+            if "class_name" in hf_feats:
+                del hf_feats["class_name"]
+            hf_feats = HFWav2Vec2(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWav2Vec2)
+
+        if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            if "class_name" in xvector:
+                del xvector["class_name"]
+            xvector = ConformerV1XVector(**xvector)
+        else:
+            assert isinstance(xvector, ConformerV1XVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_class_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWav2Vec2.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWav2Vec2.add_finetune_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 925f1172..d6be544a 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -9,6 +9,7 @@
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
+from ...layers import MeanVarianceNorm
 from ...torch_model import TorchModel
 from ...utils import remove_silence
 
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
new file mode 100644
index 00000000..30e450eb
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
@@ -0,0 +1,93 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFWavLM
+from ..xvectors import ConformerV1XVector
+from .hf_wav2xvector import HFWav2XVector
+
+
+class HFWavLM2ConformerV1XVector(HFWav2XVector):
+    """Class extracting WavLM + ConformerV1 x-vectors from waveform.
+
+    Attributes:
+      Attributes:
+      hf_feats: HFWavLM configuration dictionary or object.
+                This is a warpper over Hugging Face WavLM model.
+      xvector: ConformerV1XVector configuration dictionary or object.
+      feat_fusion_start: the input to x-vector model will fuse the WavLM layers from "feat_fusion_start" to
+                         the WavLM "num_layers".
+      feat_fusion_method: method to fuse the hidden layers from the WavLM model, when more
+                           than one layer is used.
+    """
+
+    def __init__(
+        self,
+        hf_feats: Union[Dict, HFWavLM],
+        xvector: Union[Dict, ConformerV1XVector],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
+        if isinstance(hf_feats, dict):
+            hf_feats = HFWavLM(**hf_feats)
+        else:
+            assert isinstance(hf_feats, HFWavLM)
+
+        if isinstance(xvector, dict):
+            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector = ConformerV1XVector(**xvector)
+        else:
+            assert isinstance(xvector, ConformerV1XVector)
+            assert xvector.encoder_net.in_feats == hf_feats.hidden_size
+
+        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = HFWav2XVector.filter_args(**kwargs)
+        child_args = HFWavLM.filter_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWavLM.add_class_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_class_args(parser, prefix="xvector")
+        HFWav2XVector.add_class_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = HFWavLM.filter_finetune_args(**kwargs["hf_feats"])
+        base_args["hf_feats"] = child_args
+        child_args = ConformerV1XVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        HFWavLM.add_finetune_args(parser, prefix="hf_feats")
+        ConformerV1XVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
new file mode 100644
index 00000000..ad6ae4c7
--- /dev/null
+++ b/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
@@ -0,0 +1,70 @@
+"""
+ Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ..xvectors import ConformerV1XVector
+from .wav2xvector import Wav2XVector
+
+
+class Wav2ConformerV1XVector(Wav2XVector):
+    """Class extracting ConformerV1 x-vectors from waveform.
+    It contains acoustic feature extraction, feature normalization and
+    ConformerV1XVector extractor.
+
+    Attributes:
+      Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      xvector: ConformerV1XVector configuration dictionary or object.
+    """
+
+    def __init__(self, feats, xvector):
+        if isinstance(xvector, dict):
+            xvector = ConformerV1XVector.filter_args(**xvector)
+            xvector = ConformerV1XVector(**xvector)
+        else:
+            assert isinstance(xvector, ConformerV1XVector)
+
+        super().__init__(feats, xvector)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """Adds Wav2ConformerV1XVector options to parser.
+
+        Args:
+          parser: Arguments parser
+          prefix: Options prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Wav2XVector.add_class_args(parser)
+        ConformerV1XVector.add_class_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = ConformerV1XVector.filter_finetune_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ConformerV1XVector.add_finetune_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/__init__.py b/hyperion/torch/models/xvectors/__init__.py
index 57819128..92e69a5d 100644
--- a/hyperion/torch/models/xvectors/__init__.py
+++ b/hyperion/torch/models/xvectors/__init__.py
@@ -4,6 +4,7 @@
 
 """
 
+from .conformer_v1_xvector import ConformerV1XVector
 from .efficient_net_xvector import EfficientNetXVector
 from .resnet1d_xvector import ResNet1dXVector
 from .resnet_xvector import ResNetXVector
diff --git a/hyperion/torch/models/xvectors/conformer_v1_xvector.py b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
new file mode 100644
index 00000000..323c22a9
--- /dev/null
+++ b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
@@ -0,0 +1,168 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+
+from jsonargparse import ActionParser, ArgumentParser
+
+import torch
+import torch.nn as nn
+
+from ...narchs import ConformerEncoderV1 as Encoder
+from .xvector import XVector
+
+
+class ConformerV1XVector(XVector):
+    def __init__(
+        self,
+        encoder,
+        num_classes,
+        pool_net="mean+stddev",
+        embed_dim=256,
+        num_embed_layers=1,
+        hid_act={"name": "relu", "inplace": True},
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=0,
+        intertop_k=5,
+        intertop_margin=0.0,
+        num_subcenters=2,
+        dropout_rate=0,
+        norm_layer=None,
+        head_norm_layer=None,
+        use_norm=True,
+        norm_before=True,
+        head_use_in_norm=False,
+        embed_layer=0,
+        proj_feats=None,
+    ):
+        if isinstance(encoder, dict):
+            logging.info("making %s conformer encoder network")
+            encoder = Encoder(**encoder)
+
+        super().__init__(
+            encoder,
+            num_classes,
+            pool_net=pool_net,
+            embed_dim=embed_dim,
+            num_embed_layers=num_embed_layers,
+            hid_act=hid_act,
+            loss_type=loss_type,
+            cos_scale=cos_scale,
+            margin=margin,
+            margin_warmup_epochs=margin_warmup_epochs,
+            intertop_k=intertop_k,
+            intertop_margin=intertop_margin,
+            num_subcenters=num_subcenters,
+            norm_layer=norm_layer,
+            head_norm_layer=head_norm_layer,
+            use_norm=use_norm,
+            norm_before=norm_before,
+            head_use_in_norm=head_use_in_norm,
+            dropout_rate=dropout_rate,
+            embed_layer=embed_layer,
+            proj_feats=proj_feats,
+        )
+
+    def get_config(self):
+        base_config = super().get_config()
+        del base_config["encoder_cfg"]
+        del base_config["in_feats"]
+
+        encoder_cfg = self.encoder_net.get_config()
+        del encoder_cfg["class_name"]
+        config = {
+            "resnet_enc": encoder_cfg,
+        }
+
+        config.update(base_config)
+        return config
+
+    def change_config(
+        self,
+        encoder,
+        override_dropouts=False,
+        dropout_rate=0,
+        num_classes=None,
+        loss_type="arc-softmax",
+        cos_scale=64,
+        margin=0.3,
+        margin_warmup_epochs=10,
+        intertop_k=5,
+        intertop_margin=0,
+        num_subcenters=2,
+    ):
+        super().change_config(
+            False,
+            dropout_rate,
+            num_classes,
+            loss_type,
+            cos_scale,
+            margin,
+            margin_warmup_epochs,
+            intertop_k,
+            intertop_margin,
+            num_subcenters,
+        )
+        if override_dropouts:
+            logging.info("chaning x-vector head dropouts")
+            self.classif_net.change_dropouts(dropout_rate)
+
+        self.encoder_net.change_config(**encoder)
+
+    @classmethod
+    def load(cls, file_path=None, cfg=None, state_dict=None):
+        cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
+        try:
+            del cfg["in_feats"]
+        except:
+            pass
+
+        model = cls(**cfg)
+        if state_dict is not None:
+            model.load_state_dict(state_dict)
+
+        return model
+
+    @staticmethod
+    def filter_args(**kwargs):
+        base_args = XVector.filter_args(**kwargs)
+        child_args = Encoder.filter_args(**kwargs["encoder"])
+
+        base_args["encoder"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_class_args(parser, skip=set(["in_feats"]))
+        Encoder.add_class_args(parser, prefix="encoder", skip=set())
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    add_argparse_args = add_class_args
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = XVector.filter_finetune_args(**kwargs)
+        child_args = Encoder.filter_finetune_args(**kwargs["encoder"])
+        base_args["encoder"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_finetune_args(parser)
+        Encoder.add_finetune_args(parser, prefix="encoder", skip=set())
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index 440c22b6..b42f48f1 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -2,10 +2,9 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..layers import AudioFeatsFactory as AFF
 from ..layers import MeanVarianceNorm as MVN
@@ -80,7 +79,7 @@ def forward(self, x, x_lengths=None):
             f = self.spec_augment(f, f_lengths)
 
         if self.mvn is not None:
-            f = self.mvn(f)
+            f = self.mvn(f, f_lengths)
 
         if self.spec_augment is not None and self.aug_after_mvn:
             f = self.spec_augment(f, f_lengths)
diff --git a/hyperion/torch/narchs/conformer_decoder_v1.py b/hyperion/torch/narchs/conformer_decoder_v1.py
new file mode 100644
index 00000000..ef55d6c3
--- /dev/null
+++ b/hyperion/torch/narchs/conformer_decoder_v1.py
@@ -0,0 +1,724 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
+from ..layer_blocks import ConformerDecoderBlockV1 as DBlock
+from ..layer_blocks import TransformerConv1dSubsampler as Conv1dSubsampler
+from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
+from ..layers import ActivationFactory as AF
+from ..layers import ConvPosEncoder, NoPosEncoder
+from ..layers import NormLayer1dFactory as NLF
+from ..layers import PosEncoder, RelPosEncoder
+from ..utils import make_attn_mask_causal, scale_seq_lengths, seq_lengths_to_mask
+from .net_arch import NetArch
+
+
+class ConformerDecoderV1(NetArch):
+    """Conformer decoder mixing Transformer Decoder with Conformer Encoder Conv blocks
+
+    This becomes a standard Transformer Decoder by setting conv_repeats=0, pos_enc_type='abs', ff_macaron=False.
+
+    Attributes:
+      in_feats: input features dimension
+      d_model: encoder blocks feature dimension
+      num_heads: number of heads
+      num_blocks: number of self attn blocks
+      self_att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1', 'block-scaled-dot-prod-att-v1']
+      self_att_context: maximum context range for local attention
+      cross_att_type: string in ['scaled-dot-prod-att-v1', 'local-scaled-dot-prod-att-v1', 'block-scaled-dot-prod-att-v1']
+      conv_repeats: number of conv blocks in each conformer block
+      conv_kernel_sizes: kernel size for conv blocks
+      conv_strides: stride for depth-wise conv in the first conv block of each conformer block
+      ff_type: string in ['linear', 'conv1dx2', 'conv1d-linear']
+      d_ff: dimension of middle layer in feed_forward block
+      ff_kernel_size: kernel size for convolutional versions of ff block
+      dropout_rate: dropout rate for ff and conv blocks
+      pos_dropout_rate: dropout rate for positional encoder
+      att_dropout_rate: dropout rate for attention block
+      in_layer_type: input layer block type in ['linear','conv2d-sub', 'embed', None]
+      pos_enc_type: type of positional encoder ['no', 'abs', 'rel', 'conv']
+
+      causal_pos_enc: if True, use causal positional encodings (when rel_pos_enc=True), it assumes
+                      that query q_i only attents to key k_j when j<=i
+      hid_act:  hidden activations in ff and input blocks
+      conv_norm_layer: norm layer constructor or str for conv block,
+                       if None it uses BatchNorm1d
+      se_r:         Squeeze-Excitation compression ratio,
+                    if None it doesn't use Squeeze-Excitation
+      ff_macaron: if True, it uses macaron-net style ff layers, otherwise transformer style.
+      red_lnorms:  it True, use redundant LNorm layers at the output of the conformer blocks as
+                  in the paper
+      concat_after: if True, if concats attention input and output and apply linear transform, i.e.,
+                             y = x + linear(concat(x, att(x)))
+                    if False, y = x + att(x)
+      padding_idx: padding idx for embed layer
+      in_time_dim: time dimension in the input Tensor
+      out_time_dim: dimension that we want to be time in the output tensor
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        d_model=256,
+        num_heads=4,
+        num_blocks=6,
+        self_att_type="scaled-dot-prod-v1",
+        att_context=25,
+        cross_att_type="scaled-dot-prod-v1",
+        conv_repeats=0,
+        conv_kernel_sizes=31,
+        conv_strides=1,
+        ff_type="linear",
+        d_ff=2048,
+        ff_kernel_size=1,
+        dropout_rate=0.1,
+        pos_dropout_rate=0.1,
+        att_dropout_rate=0.0,
+        in_layer_type="embed",
+        in_stride=4,
+        pos_enc_type="abs",
+        causal_pos_enc=False,
+        pos_kernel_size=128,
+        pos_num_groups=16,
+        hid_act="swish",
+        conv_norm_layer=None,
+        se_r=None,
+        ff_macaron=True,
+        red_lnorms=True,
+        concat_after=False,
+        padding_idx=-1,
+        in_time_dim=1,
+        src_time_dim=1,
+        out_time_dim=1,
+        in_feats=None,
+        with_output=True,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.with_output = with_output
+        if in_feats is None:
+            in_feats = num_classes
+        self.in_feats = in_feats
+        self.d_model = d_model
+        self.num_heads = num_heads
+        self.num_blocks = num_blocks
+
+        self.self_att_type = self_att_type
+        self.cross_att_type = cross_att_type
+        self.att_context = att_context
+
+        self.conv_repeats = self._standarize_cblocks_param(
+            conv_repeats, num_blocks, "conv_repeats"
+        )
+        self.conv_kernel_sizes = self._standarize_cblocks_param(
+            conv_kernel_sizes, num_blocks, "conv_kernel_sizes"
+        )
+        self.conv_strides = self._standarize_cblocks_param(
+            conv_strides, num_blocks, "conv_strides"
+        )
+
+        self.ff_type = ff_type
+        self.d_ff = d_ff
+        self.ff_kernel_size = ff_kernel_size
+        self.dropout_rate = dropout_rate
+        self.pos_enc_type = pos_enc_type
+        self.causal_pos_enc = causal_pos_enc
+        self.att_dropout_rate = att_dropout_rate
+        self.pos_dropout_rate = pos_dropout_rate
+        self.in_layer_type = in_layer_type
+        self.in_stride = in_stride
+        self.se_r = se_r
+        self.ff_macaron = ff_macaron
+        self.red_lnorms = red_lnorms
+        self.concat_after = concat_after
+        self.padding_idx = padding_idx
+        self.in_time_dim = in_time_dim
+        self.src_time_dim = src_time_dim
+        self.out_time_dim = out_time_dim
+        self.hid_act = hid_act
+        self.pos_kernel_size = pos_kernel_size
+        self.pos_num_groups = pos_num_groups
+
+        self.conv_norm_layer = conv_norm_layer
+        norm_groups = None
+        if conv_norm_layer == "group-norm":
+            norm_groups = min(d_model // 2, 32)
+        self._conv_norm_layer = NLF.create(conv_norm_layer, norm_groups)
+
+        self._make_in_layer()
+
+        blocks = []
+        for i in range(num_blocks):
+            blocks.append(
+                DBlock(
+                    d_model,
+                    self_att_type,
+                    cross_att_type,
+                    num_heads,
+                    self.conv_repeats[i],
+                    self.conv_kernel_sizes[i],
+                    self.conv_strides[i],
+                    ff_type,
+                    d_ff,
+                    ff_kernel_size,
+                    hid_act=hid_act,
+                    dropout_rate=dropout_rate,
+                    att_context=att_context,
+                    att_dropout_rate=att_dropout_rate,
+                    pos_enc_type=pos_enc_type,
+                    causal_pos_enc=causal_pos_enc,
+                    conv_norm_layer=self._conv_norm_layer,
+                    se_r=se_r,
+                    ff_macaron=ff_macaron,
+                    out_lnorm=self.red_lnorms,
+                    concat_after=concat_after,
+                )
+            )
+
+        self.blocks = nn.ModuleList(blocks)
+        if not self.red_lnorms:
+            self.norm_out = nn.LayerNorm(d_model)
+
+        if with_output:
+            self.output_layer = nn.Linear(d_model, num_classes)
+
+    @staticmethod
+    def _standarize_cblocks_param(p, num_blocks, p_name):
+        if isinstance(p, int):
+            p = [p] * num_blocks
+        elif isinstance(p, list):
+            if len(p) == 1:
+                p = p * num_blocks
+
+            assert len(p) == num_blocks, "len(%s)(%d)!=%d" % (
+                p_name,
+                len(p),
+                num_blocks,
+            )
+        else:
+            raise TypeError("wrong type for param {}={}".format(p_name, p))
+
+        return p
+
+    def _make_in_layer(self):
+        in_feats = self.in_feats
+        d_model = self.d_model
+        dropout_rate = self.dropout_rate
+        if self.pos_enc_type == "no":
+            pos_enc = NoPosEncoder()
+        elif self.pos_enc_type == "rel":
+            pos_enc = RelPosEncoder(d_model, self.pos_dropout_rate)
+        elif self.pos_enc_type == "abs":
+            pos_enc = PosEncoder(d_model, self.pos_dropout_rate)
+        elif self.pos_enc_type == "conv":
+            pos_enc = ConvPosEncoder(
+                d_model, self.pos_kernel_size, self.pos_num_groups, self.hid_act
+            )
+        else:
+            raise Exception("wrong pos-enc-type={}".format(self.pos_enc_type))
+
+        hid_act = AF.create(self.hid_act)
+
+        if self.in_layer_type == "linear":
+            self.in_layer = nn.Sequential(
+                nn.Linear(in_feats, d_model),
+                nn.LayerNorm(d_model),
+                nn.Dropout(dropout_rate),
+                hid_act,
+                pos_enc,
+            )
+        elif self.in_layer_type == "conv2d-sub":
+            self.in_layer = Conv2dSubsampler(
+                in_feats,
+                d_model,
+                hid_act,
+                self.in_stride,
+                pos_enc,
+                time_dim=self.in_time_dim,
+            )
+        elif self.in_layer_type == "conv1d-sub":
+            self.in_layer = Conv1dSubsampler(
+                in_feats,
+                d_model,
+                hid_act,
+                self.in_stride,
+                pos_enc,
+                time_dim=self.in_time_dim,
+            )
+        elif self.in_layer_type == "embed":
+            self.in_layer = nn.Sequential(
+                nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx), pos_enc
+            )
+        elif isinstance(self.in_layer_type, nn.Module):
+            self.in_layer = nn.Sequential(self.in_layer_type, pos_enc)
+        elif self.in_layer_type is None:
+            self.in_layer = pos_enc
+        else:
+            raise ValueError(f"unknown in_layer_type: {self.in_layer_type}")
+
+    def _make_masks(
+        self,
+        max_in_length,
+        x_lengths,
+        x_mask,
+        max_src_length,
+        x_src_lengths,
+        x_src_mask,
+        causal_mask,
+    ):
+        if x_mask is None:
+            if x_lengths is not None:
+                x_mask = seq_lengths_to_mask(x_lengths, max_in_length, time_dim=1)
+            if causal_mask:
+                x_mask = make_attn_mask_causal(x_mask)
+
+        if x_src_mask is None and x_src_lengths is not None:
+            x_src_mask = seq_lengths_to_mask(x_src_lengths, max_src_length, time_dim=1)
+
+        return x_mask, x_src_mask
+
+    def _forward_input(self, x, x_mask):
+        if isinstance(self.in_layer, (Conv2dSubsampler, Conv1dSubsampler)):
+            x, x_mask = self.in_layer(x, x_mask)
+        else:
+            if self.in_time_dim != 1:
+                x = x.transpose(1, self.in_time_dim).contiguous()
+            x = self.in_layer(x)
+
+        return x, x_mask
+
+    def forward(
+        self,
+        x,
+        x_src,
+        x_lengths=None,
+        x_src_lengths=None,
+        x_mask=None,
+        x_src_mask=None,
+        causal_mask=True,
+    ):
+        """Forward pass function
+
+        Args:
+          x: input tensor with size=(batch, time_out, num_feats) or (batch, time_out)
+          x_src: source tensor with size=(batch, time_in, num_feats)
+          x_lengths: lengths of the input sequences.
+          x_src_lengths: lengths of the source sequences
+          x_mask: mask to indicate valid time steps for x (batch, time_out).
+                  It overwrites the mask of x_lengths.
+          x_src_mask: mask to indicate valid time steps for x_src (batch, time_in).
+                  It overwrites the mask of x_src_lengths.
+          return_mask: if True, it also return the output mask
+
+        Returns:
+           Tensor with output logits
+           Tensor with output lengths
+           Tensor with mask if return_mask is True
+        """
+        if self.src_time_dim != 1:
+            x_src = x_src.transpose(1, 2)
+
+        max_in_length = x.size(self.in_time_dim)
+        max_src_length = x_src.size(1)
+        x_mask, x_src_mask = self._make_masks(
+            max_in_length,
+            x_lengths,
+            x_mask,
+            max_src_length,
+            x_src_lengths,
+            x_src_mask,
+            causal_mask,
+        )
+        x, x_mask = self._forward_input(x, x_mask)
+
+        if isinstance(x, tuple):
+            x, pos_emb = x
+            b_args = {"pos_emb": pos_emb}
+        else:
+            b_args = {}
+
+        for i in range(len(self.blocks)):
+            x, x_mask = self.blocks[i](
+                x, x_src, mask=x_mask, mask_src=x_src_mask, **b_args
+            )
+
+        if not self.red_lnorms:
+            x = self.norm_out(x)
+
+        if self.with_output:
+            x = self.output_layer(x)
+
+        if self.out_time_dim != 1:
+            x = x.transpose(1, self.out_time_dim)
+
+        return x, x_lengths
+
+    def forward_1step(
+        self,
+        x,
+        x_src,
+        x_lengths=None,
+        x_mask=None,
+        cache=None,
+    ):
+        """Forward pass function
+
+        Args:
+          x: input tensor with size=(batch, time, num_feats)
+          x_lengths: lengths of the input sequences.
+          x_mask: mask to indicate valid time steps for x (batch, time).
+                  It overwrites the mask of x_lengths.
+          return_mask: if True, it also return the output mask
+          target_shape: unused
+
+        Returns:
+           Tensor with output features
+           Tensor with output lengths
+           Tensor with mask if return_mask is True
+        """
+        max_in_length = x.size(self.in_time_dim)
+        if x_mask is None and x_lengths is not None:
+            x_mask = seq_lengths_to_mask(x_lengths, max_in_length, time_dim=1)
+
+        if self.src_time_dim != 1:
+            x_src = x_src.transpose(1, 2)
+
+        max_src_length = x_src.size(1)
+        x, x_mask = self._forward_input(x, x_mask)
+
+        if isinstance(x, tuple):
+            x, pos_emb = x
+            b_args = {"pos_emb": pos_emb}
+        else:
+            b_args = {}
+
+        if cache is None:
+            cache = [None] * len(self.blocks)
+
+        next_cache = []
+        for i in range(len(self.blocks)):
+            x, x_mask = self.blocks[i](x, x_src, mask=x_mask, cache=cache[i] ** b_args)
+            next_cache.apppend(x)
+
+        if not self.red_lnorms:
+            x = self.norm_out(x[:, -1])
+        else:
+            x = x[:, -1]
+
+        if self.with_output:
+            x = self.output_layer(x)
+
+        return x, next_cache
+
+    def get_config(self):
+        """Gets network config
+        Returns:
+           dictionary with config params
+        """
+        config = {
+            "num_classes": self.num_classes,
+            "in_feats": self.in_feats,
+            "d_model": self.d_model,
+            "num_heads": self.num_heads,
+            "num_blocks": self.num_blocks,
+            "att_type": self.att_type,
+            "att_context": self.att_context,
+            "conv_repeats": self.conv_repeats,
+            "conv_kernel_sizes": self.conv_kernel_sizes,
+            "conv_strides": self.conv_strides,
+            "ff_type": self.ff_type,
+            "d_ff": self.d_ff,
+            "ff_kernel_size": self.ff_kernel_size,
+            "dropout_rate": self.dropout_rate,
+            "att_dropout_rate": self.att_dropout_rate,
+            "pos_dropout_rate": self.pos_dropout_rate,
+            "in_layer_type": self.in_layer_type,
+            "in_stride": self.in_stride,
+            "pos_enc_type": self.pos_enc_type,
+            "causal_pos_enc": self.causal_pos_enc,
+            "pos_kernel_size": self.pos_kernel_size,
+            "pos_num_groups": self.pos_num_groups,
+            "hid_act": self.hid_act,
+            "se_r": self.se_r,
+            "ff_macaron": self.ff_macaron,
+            "red_lnorms": self.red_lnorms,
+            "conv_norm_layer": self.conv_norm_layer,
+            "concat_after": self.concat_after,
+            "padding_idx": self.padding_idx,
+            "in_time_dim": self.in_time_dim,
+            "out_time_dim": self.out_time_dim,
+            "with_output": self.with_output,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def in_context(self):
+        return (self.att_context, self.att_context)
+
+    def in_shape(self):
+        """Input shape for network
+
+        Returns:
+           Tuple describing input shape
+        """
+        if self.in_time_dim == 1:
+            return (None, None, self.in_feats)
+        else:
+            return (None, self.in_feats, None)
+
+    def out_shape(self, in_shape=None):
+        """Infers the network output shape given the input shape
+
+        Args:
+          in_shape: input shape tuple
+
+        Returns:
+          Tuple with the output shape
+        """
+        if in_shape is None:
+            out_t = None
+            batch_size = None
+        else:
+            assert len(in_shape) == 3
+            batch_size = in_shape[0]
+            in_t = in_shape[self.in_time_dim]
+            if in_t is None:
+                out_t = None
+            else:
+                if isinstance(self.in_layer, Conv2dSubsampler):
+                    # out_t = in_t//4
+                    out_t = ((in_t - 1) // 2 - 1) // 2
+                else:
+                    out_t = in_t
+
+        if self.out_time_dim == 1:
+            return (batch_size, out_t, self.d_model)
+        else:
+            return (batch_size, self.d_model, out_t)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        """Filters arguments correspondin to ConformerDecoder
+            from args dictionary
+
+        Args:
+          kwargs: args dictionary
+
+        Returns:
+          args dictionary
+        """
+        args = filter_func_args(ConformerDecoderV1.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        """Adds Conformer config parameters to argparser
+
+        Args:
+           parser: argparse object
+           prefix: prefix string to add to the argument names
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        if "in_feats" not in skip:
+            parser.add_argument(
+                "--in-feats", type=int, default=None, help=("input feature dimension")
+            )
+
+        parser.add_argument(
+            "--num-blocks", default=6, type=int, help=("number of tranformer blocks")
+        )
+
+        parser.add_argument(
+            "--d-model", default=512, type=int, help=("encoder layer sizes")
+        )
+
+        parser.add_argument(
+            "--num-heads",
+            default=4,
+            type=int,
+            help=("number of heads in self-attention layers"),
+        )
+
+        parser.add_argument(
+            "--self-att-type",
+            default="scaled-dot-prod-v1",
+            choices=[
+                "scaled-dot-prod-v1",
+                "local-scaled-dot-prod-v1",
+                "block-scaled-dot-prod-v1",
+            ],
+            help=("type of self-attention"),
+        )
+
+        parser.add_argument(
+            "--cross-att-type",
+            default="scaled-dot-prod-v1",
+            choices=[
+                "scaled-dot-prod-v1",
+                "local-scaled-dot-prod-v1",
+                "block-scaled-dot-prod-v1",
+            ],
+            help=("type of self-attention"),
+        )
+
+        parser.add_argument(
+            "--att-context",
+            default=25,
+            type=int,
+            help=("context size when using local attention"),
+        )
+
+        parser.add_argument(
+            "--conv-repeats",
+            default=[0],
+            type=int,
+            nargs="+",
+            help=("number of conv blocks in each conformer block"),
+        )
+
+        parser.add_argument(
+            "--conv-kernel-sizes",
+            default=[31],
+            nargs="+",
+            type=int,
+            help=("kernels sizes for the depth-wise convs of each conformer block"),
+        )
+
+        parser.add_argument(
+            "--conv-strides",
+            default=[1],
+            nargs="+",
+            type=int,
+            help=("resb-blocks strides for each encoder stage"),
+        )
+
+        parser.add_argument(
+            "--ff-type",
+            default="linear",
+            choices=["linear", "conv1dx2", "conv1dlinear"],
+            help=("type of feed forward layers in transformer block"),
+        )
+
+        parser.add_argument(
+            "--d-ff",
+            default=2048,
+            type=int,
+            help=("size middle layer in feed forward block"),
+        )
+
+        parser.add_argument(
+            "--ff-kernel-size",
+            default=3,
+            type=int,
+            help=("kernel size in convolutional feed forward block"),
+        )
+
+        parser.add_argument("--hid-act", default="swish", help="hidden activation")
+
+        parser.add_argument(
+            "--pos-dropout-rate",
+            default=0.1,
+            type=float,
+            help="positional encoder dropout",
+        )
+        parser.add_argument(
+            "--att-dropout-rate", default=0, type=float, help="self-att dropout"
+        )
+        parser.add_argument(
+            "--dropout-rate", default=0.1, type=float, help="feed-forward layer dropout"
+        )
+
+        parser.add_argument(
+            "--in-layer-type",
+            default="linear",
+            choices=["embed", "linear", "conv2d-sub", "conv1d-sub"],
+            help=("type of input layer"),
+        )
+
+        parser.add_argument(
+            "--in-stride",
+            default=4,
+            type=int,
+            choices=[1, 2, 4],
+            help="stride of conformer input layer",
+        )
+
+        parser.add_argument(
+            "--pos-enc-type",
+            default="rel",
+            choices=["no", "rel", "abs", "conv"],
+            help=("type of positional encoder"),
+        )
+
+        parser.add_argument(
+            "--causal-pos-enc",
+            default=False,
+            action=ActionYesNo,
+            help="relative positional encodings are zero when attending to the future",
+        )
+        parser.add_argument(
+            "--pos-kernel-size",
+            default=128,
+            type=int,
+            help="kernel size for conv positional encoder",
+        )
+        parser.add_argument(
+            "--pos-num-groups",
+            default=16,
+            type=int,
+            help="number of conv groups for conv positional encoder",
+        )
+
+        parser.add_argument(
+            "--conv-norm-layer",
+            default=None,
+            choices=[
+                "batch-norm",
+                "group-norm",
+                "instance-norm",
+                "instance-norm-affine",
+                "layer-norm",
+            ],
+            help="type of normalization layer for conv block in conformer",
+        )
+
+        parser.add_argument(
+            "--se-r",
+            default=None,
+            type=int,
+            help=("squeeze-excitation compression ratio"),
+        )
+
+        parser.add_argument(
+            "--ff-macaron",
+            default=True,
+            action=ActionYesNo,
+            help="do not use macaron style ff layers ",
+        )
+
+        parser.add_argument(
+            "--red-lnorms",
+            default=True,
+            action=ActionYesNo,
+            help="use redundant Lnorm at conformer blocks' outputs",
+        )
+
+        parser.add_argument(
+            "--concat-after",
+            default=False,
+            action=ActionYesNo,
+            help="concatenate attention input and output instead of adding",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 97cb6d5b..54c2f400 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -3,13 +3,13 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..layer_blocks import ConformerEncoderBlockV1 as EBlock
+from ..layer_blocks import TransformerConv1dSubsampler as Conv1dSubsampler
 from ..layer_blocks import TransformerConv2dSubsampler as Conv2dSubsampler
 from ..layers import ActivationFactory as AF
 from ..layers import ConvPosEncoder, NoPosEncoder
@@ -90,6 +90,7 @@ def __init__(
         pos_dropout_rate=0.1,
         att_dropout_rate=0.0,
         in_layer_type="conv2d-sub",
+        in_stride=4,
         pos_enc_type="rel",
         causal_pos_enc=False,
         pos_kernel_size=128,
@@ -104,7 +105,6 @@ def __init__(
         in_time_dim=1,
         out_time_dim=1,
     ):
-
         super().__init__()
         self.in_feats = in_feats
         self.d_model = d_model
@@ -115,11 +115,14 @@ def __init__(
         self.att_context = att_context
 
         self.conv_repeats = self._standarize_cblocks_param(
-            conv_repeats, num_blocks, "conv_repeats")
+            conv_repeats, num_blocks, "conv_repeats"
+        )
         self.conv_kernel_sizes = self._standarize_cblocks_param(
-            conv_kernel_sizes, num_blocks, "conv_kernel_sizes")
+            conv_kernel_sizes, num_blocks, "conv_kernel_sizes"
+        )
         self.conv_strides = self._standarize_cblocks_param(
-            conv_strides, num_blocks, "conv_strides")
+            conv_strides, num_blocks, "conv_strides"
+        )
 
         self.ff_type = ff_type
         self.d_ff = d_ff
@@ -130,6 +133,7 @@ def __init__(
         self.att_dropout_rate = att_dropout_rate
         self.pos_dropout_rate = pos_dropout_rate
         self.in_layer_type = in_layer_type
+        self.in_stride = in_stride
         self.se_r = se_r
         self.ff_macaron = ff_macaron
         self.red_lnorms = red_lnorms
@@ -173,7 +177,8 @@ def __init__(
                     ff_macaron=ff_macaron,
                     out_lnorm=self.red_lnorms,
                     concat_after=concat_after,
-                ))
+                )
+            )
 
         self.blocks = nn.ModuleList(blocks)
         if not self.red_lnorms:
@@ -198,7 +203,6 @@ def _standarize_cblocks_param(p, num_blocks, p_name):
         return p
 
     def _make_in_layer(self):
-
         in_feats = self.in_feats
         d_model = self.d_model
         dropout_rate = self.dropout_rate
@@ -209,8 +213,9 @@ def _make_in_layer(self):
         elif self.pos_enc_type == "abs":
             pos_enc = PosEncoder(d_model, self.pos_dropout_rate)
         elif self.pos_enc_type == "conv":
-            pos_enc = ConvPosEncoder(d_model, self.pos_kernel_size,
-                                     self.pos_num_groups, self.hid_act)
+            pos_enc = ConvPosEncoder(
+                d_model, self.pos_kernel_size, self.pos_num_groups, self.hid_act
+            )
         else:
             raise Exception("wrong pos-enc-type={}".format(self.pos_enc_type))
 
@@ -225,28 +230,53 @@ def _make_in_layer(self):
                 pos_enc,
             )
         elif self.in_layer_type == "conv2d-sub":
-            self.in_layer = Conv2dSubsampler(in_feats,
-                                             d_model,
-                                             hid_act,
-                                             pos_enc,
-                                             time_dim=self.in_time_dim)
+            self.in_layer = Conv2dSubsampler(
+                in_feats,
+                d_model,
+                hid_act,
+                self.in_stride,
+                pos_enc,
+                time_dim=self.in_time_dim,
+            )
+        elif self.in_layer_type == "conv1d-sub":
+            self.in_layer = Conv1dSubsampler(
+                in_feats,
+                d_model,
+                hid_act,
+                self.in_stride,
+                pos_enc,
+                time_dim=self.in_time_dim,
+            )
         elif self.in_layer_type == "embed":
             self.in_layer = nn.Sequential(
-                nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx),
-                pos_enc)
+                nn.Embedding(in_feats, d_model, padding_idx=self.padding_idx), pos_enc
+            )
         elif isinstance(self.in_layer_type, nn.Module):
             self.in_layer = nn.Sequential(self.in_layer_type, pos_enc)
         elif self.in_layer_type is None:
             self.in_layer = pos_enc
         else:
-            raise ValueError("unknown in_layer_type: " + self.in_layer_type)
-
-    def forward(self,
-                x,
-                x_lengths=None,
-                x_mask=None,
-                return_mask=False,
-                target_shape=None):
+            raise ValueError(f"unknown in_layer_type: {self.in_layer_type}")
+
+    def _make_masks(self, max_in_length, x_lengths=None, x_mask=None):
+        if x_mask is None and x_lengths is not None:
+            x_mask = seq_lengths_to_mask(x_lengths, max_in_length, time_dim=1)
+
+        return x_mask
+
+    def _forward_input(self, x, x_mask):
+        if isinstance(self.in_layer, (Conv2dSubsampler, Conv1dSubsampler)):
+            x, x_mask = self.in_layer(x, x_mask)
+        else:
+            if self.in_time_dim != 1:
+                x = x.transpose(1, self.in_time_dim).contiguous()
+            x = self.in_layer(x)
+
+        return x, x_mask
+
+    def forward(
+        self, x, x_lengths=None, x_mask=None, return_mask=False, target_shape=None
+    ):
         """Forward pass function
 
         Args:
@@ -263,16 +293,8 @@ def forward(self,
            Tensor with mask if return_mask is True
         """
         max_in_length = x.size(self.in_time_dim)
-        if x_mask is None and x_lengths is not None:
-            x_mask = seq_lengths_to_mask(x_lengths, max_in_length, time_dim=1)
-
-        if isinstance(self.in_layer, Conv2dSubsampler):
-            x, x_mask = self.in_layer(x, x_mask)
-        else:
-            if self.in_time_dim != 1:
-                x = x.transpose(1, self.in_time_dim).contiguous()
-            x = self.in_layer(x)
-
+        x_mask = self._make_masks(x, x_lengths, x_mask)
+        x, x_mask = self._forward_input(x, x_mask)
         if isinstance(x, tuple):
             x, pos_emb = x
             b_args = {"pos_emb": pos_emb}
@@ -318,6 +340,7 @@ def get_config(self):
             "att_dropout_rate": self.att_dropout_rate,
             "pos_dropout_rate": self.pos_dropout_rate,
             "in_layer_type": self.in_layer_type,
+            "in_stride": self.in_stride,
             "pos_enc_type": self.pos_enc_type,
             "causal_pos_enc": self.causal_pos_enc,
             "pos_kernel_size": self.pos_kernel_size,
@@ -382,7 +405,7 @@ def out_shape(self, in_shape=None):
 
     @staticmethod
     def filter_args(**kwargs):
-        """Filters arguments correspondin to TransformerXVector
+        """Filters arguments correspondin to Conformer Encoder
             from args dictionary
 
         Args:
@@ -407,20 +430,17 @@ def add_class_args(parser, prefix=None, skip=set()):
             parser = ArgumentParser(prog="")
 
         if "in_feats" not in skip:
-            parser.add_argument("--in-feats",
-                                type=int,
-                                default=80,
-                                help=("input feature dimension"))
+            parser.add_argument(
+                "--in-feats", type=int, default=80, help=("input feature dimension")
+            )
 
-        parser.add_argument("--num-blocks",
-                            default=6,
-                            type=int,
-                            help=("number of tranformer blocks"))
+        parser.add_argument(
+            "--num-blocks", default=6, type=int, help=("number of tranformer blocks")
+        )
 
-        parser.add_argument("--d-model",
-                            default=512,
-                            type=int,
-                            help=("encoder layer sizes"))
+        parser.add_argument(
+            "--d-model", default=512, type=int, help=("encoder layer sizes")
+        )
 
         parser.add_argument(
             "--num-heads",
@@ -433,8 +453,9 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--att-type",
             default="scaled-dot-prod-v1",
             choices=[
-                "scaled-dot-prod-v1", "local-scaled-dot-prod-v1",
-                "block-scaled-dot-prod-v1"
+                "scaled-dot-prod-v1",
+                "local-scaled-dot-prod-v1",
+                "block-scaled-dot-prod-v1",
             ],
             help=("type of self-attention"),
         )
@@ -459,9 +480,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             default=[31],
             nargs="+",
             type=int,
-            help=(
-                "kernels sizes for the depth-wise convs of each conformer block"
-            ),
+            help=("kernels sizes for the depth-wise convs of each conformer block"),
         )
 
         parser.add_argument(
@@ -493,9 +512,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             help=("kernel size in convolutional feed forward block"),
         )
 
-        parser.add_argument("--hid-act",
-                            default="swish",
-                            help="hidden activation")
+        parser.add_argument("--hid-act", default="swish", help="hidden activation")
 
         parser.add_argument(
             "--pos-dropout-rate",
@@ -503,22 +520,28 @@ def add_class_args(parser, prefix=None, skip=set()):
             type=float,
             help="positional encoder dropout",
         )
-        parser.add_argument("--att-dropout-rate",
-                            default=0,
-                            type=float,
-                            help="self-att dropout")
-        parser.add_argument("--dropout-rate",
-                            default=0.1,
-                            type=float,
-                            help="feed-forward layer dropout")
+        parser.add_argument(
+            "--att-dropout-rate", default=0, type=float, help="self-att dropout"
+        )
+        parser.add_argument(
+            "--dropout-rate", default=0.1, type=float, help="feed-forward layer dropout"
+        )
 
         parser.add_argument(
             "--in-layer-type",
             default="linear",
-            choices=["linear", "conv2d-sub"],
+            choices=["linear", "conv2d-sub", "conv1d-sub"],
             help=("type of input layer"),
         )
 
+        parser.add_argument(
+            "--in-stride",
+            default=4,
+            type=int,
+            choices=[1, 2, 4],
+            help="stride of conformer input layer",
+        )
+
         parser.add_argument(
             "--pos-enc-type",
             default="rel",
@@ -530,8 +553,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--causal-pos-enc",
             default=False,
             action=ActionYesNo,
-            help=
-            "relative positional encodings are zero when attending to the future",
+            help="relative positional encodings are zero when attending to the future",
         )
         parser.add_argument(
             "--pos-kernel-size",
@@ -588,5 +610,4 @@ def add_class_args(parser, prefix=None, skip=set()):
         )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/feat_fuser_mvn.py b/hyperion/torch/narchs/feat_fuser_mvn.py
new file mode 100644
index 00000000..17b396bc
--- /dev/null
+++ b/hyperion/torch/narchs/feat_fuser_mvn.py
@@ -0,0 +1,107 @@
+"""
+ Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+from typing import Dict, Optional
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ..layers import FeatFuserFactory as FFF
+from ..layers import MeanVarianceNorm as MVN
+from ..layers import SpecAugment
+from .net_arch import NetArch
+
+
+class FeatFuserMVN(NetArch):
+    """FeatureFuser for Wav2Vec style hidden features + ST-MVN
+    Optional SpecAugment
+    """
+
+    def __init__(
+        self,
+        feat_fuser: Dict[str],
+        mvn: Optional[Dict[str]] = None,
+        spec_augment: Optional[Dict[str]] = None,
+        trans: bool = False,
+        aug_after_mvn: bool = False,
+    ):
+        super().__init__()
+
+        feat_fuser = FFF.filter_args(**feat_fuser)
+        self.feat_fuser_cfg = feat_fuser
+        self.feat_fuser = FFF.create(**feat_fuser)
+
+        self.mvn = None
+        self.mvn_cfg = None
+        if mvn is not None:
+            mvn = MVN.filter_args(**mvn)
+            self.mvn_cfg = mvn
+            if (
+                ("norm_mean" in mvn)
+                and mvn["norm_mean"]
+                or ("norm_var" in mvn)
+                and mvn["norm_var"]
+            ):
+                self.mvn = MVN(**mvn)
+
+        self.spec_augment = None
+        self.spec_augment_cfg = None
+        if spec_augment is not None:
+            spec_augment = SpecAugment.filter_args(**spec_augment)
+            self.spec_augment_cfg = spec_augment
+            self.spec_augment = SpecAugment(**spec_augment)
+
+        self.trans = trans
+        self.aug_after_mvn = aug_after_mvn
+
+    def forward(self, feats, feats_lengths=None):
+        feats = self.feat_fuser(feats)
+        if self.spec_augment is not None and not self.aug_after_mvn:
+            feats = self.spec_augment(feats, feats_lengths)
+
+        if self.mvn is not None:
+            feats = self.mvn(feats, feats_lengths)
+
+        if self.spec_augment is not None and self.aug_after_mvn:
+            feats = self.spec_augment(feats, feats_lengths)
+
+        if self.trans:
+            feats = feats.transpose(1, 2).contiguous()
+
+        return feats, feats_lengths
+
+    def get_config(self):
+        config = {
+            "feat_fuser": self.feat_feats_cfg,
+            "mvn": self.mvn_cfg,
+            "spec_augment": self.spec_augment_cfg,
+            "trans": self.trans,
+            "aug_after_mvn": self.aug_after_mvn,
+        }
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = ("feat_fuser", "mvn", "spec_augment", "trans", "aug_after_mvn")
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        FFF.add_class_args(parser, prefix="feat_fuser")
+        MVN.add_class_args(parser, prefix="mvn")
+        SpecAugment.add_class_args(parser, prefix="spec_augment")
+        parser.add_argument(
+            "--aug-after-mvn",
+            default=False,
+            action="store_true",
+            help=("do spec augment after st-mvn," "instead of before"),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index e7020e1d..912c2640 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -2,15 +2,17 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+import logging
 from collections import OrderedDict as ODict
 from copy import deepcopy
-from enum import Enum
-from typing import Optional
 from pathlib import Path
+from typing import Callable, Dict, Optional, Union
 
 import torch
 import torch.nn as nn
 
+from ..utils.misc import PathLike
+
 
 class TorchModel(nn.Module):
     """Base class for all Pytorch Models and NNet architectures"""
@@ -45,6 +47,49 @@ def non_trainable_parameters(self, recurse: bool = True):
             if not param.requires_grad:
                 yield param
 
+    def trainable_named_parameters(self, recurse: bool = True):
+        for name, param in self.named_parameters(recurse=recurse):
+            if param.requires_grad:
+                yield name, param
+
+    def non_trainable_named_parameters(self, recurse: bool = True):
+        for name, param in self.named_parameters(recurse=recurse):
+            if not param.requires_grad:
+                yield name, param
+
+    def parameter_summary(self, verbose: bool = False):
+        trainable_params = sum(p.numel() for p in self.trainable_parameters())
+        non_trainable_params = sum(p.numel() for p in self.non_trainable_parameters())
+        buffer_params = sum(p.numel() for p in self.buffers())
+        non_trainable_total = non_trainable_params + buffer_params
+        total_params = trainable_params + non_trainable_total
+        if verbose:
+            logging.info(
+                "total-params=%d, trainable-params=%d, non-trainable-params+buffers=%d, non-trainable-params=%d, buffer-params=%d",
+                total_params,
+                trainable_params,
+                non_trainable_total,
+                non_trainable_params,
+                buffer_params,
+            )
+        return (
+            total_params,
+            trainable_params,
+            non_trainable_total,
+            non_trainable_params,
+            buffer_params,
+        )
+
+    def print_parameter_list(self):
+        for n, p in self.trainable_named_parameters():
+            logging.info("trainable: %s", n)
+
+        for n, p in self.non_trainable_named_parameters():
+            logging.info("non_trainable: %s", n)
+
+        for n, p in self.named_buffers():
+            logging.info("buffers: %s", n)
+
     def has_param_groups(self):
         return False
 
@@ -65,7 +110,7 @@ def change_dropouts(self, dropout_rate):
             if isinstance(module, nn.modules.dropout._DropoutNd):
                 module.p = dropout_rate
             if isinstance(module, nn.RNNBase):
-                module.dropout = dropout
+                module.dropout = dropout_rate
 
         if hasattr(self, "dropout_rate"):
             assert dropout_rate == 0 or self.dropout_rate > 0
@@ -184,7 +229,73 @@ def _fix_cfg_compatibility(class_obj, cfg):
         return cfg
 
     @staticmethod
-    def auto_load(file_path, extra_objs={}, map_location=None):
+    def _is_hf_path(file_path: Path):
+        # hf path can have only 2 dir levels
+        return len(file_path.parents) == 2
+
+    @staticmethod
+    def _get_from_hf(
+        file_path: Path, cache_dir: PathLike = None, local_dir: PathLike = None
+    ):
+        from huggingface_hub import hf_hub_download
+
+        return hf_hub_download(
+            repo_id=file_path.parent,
+            filename=file_path.name,
+            cache_dir=cache_dir,
+            local_dir=local_dir,
+        )
+
+    @staticmethod
+    def _try_to_get_from_hf(
+        file_path: Path, cache_dir: PathLike = None, local_dir: PathLike = None
+    ):
+        if str(file_path)[:3] == "hf:":
+            # hf: prefix indicates to download from hub
+            file_path = Path(str(file_path)[3:])
+            assert TorchModel._is_hf_path(
+                file_path
+            ), f"{file_path} is not a valid HF path"
+            file_path = TorchModel._get_from_hf(
+                file_path, cache_dir=cache_dir, local_dir=local_dir
+            )
+            return Path(file_path)
+        elif not file_path.is_file():
+            # if no prefix but file not in local dir try to get it from hub
+            if not TorchModel._is_hf_path(file_path):
+                return file_path
+
+            try:
+                file_path = TorchModel._get_from_hf(file_path)
+                return Path(file_path)
+            except:
+                return file_path
+
+        else:
+            # file is local
+            return file_path
+
+    @staticmethod
+    def auto_load(
+        file_path: PathLike,
+        extra_objs: dict = {},
+        map_location: Optional[
+            Union[
+                Callable[[torch.Tensor, str], torch.Tensor],
+                torch.device,
+                str,
+                Dict[str, str],
+            ]
+        ] = None,
+        cache_dir: PathLike = None,
+        local_dir: PathLike = None,
+    ):
+        file_path = Path(file_path)
+        file_path = TorchModel._try_to_get_from_hf(
+            file_path, cache_dir=cache_dir, local_dir=local_dir
+        )
+
+        assert file_path.is_file(), f"TorchModel file: {file_path} not found"
 
         if map_location is None:
             map_location = torch.device("cpu")
@@ -193,7 +304,6 @@ def auto_load(file_path, extra_objs={}, map_location=None):
         cfg = model_data["model_cfg"]
         class_name = cfg["class_name"]
         del cfg["class_name"]
-        print(TorchModel.registry)
         if class_name in TorchModel.registry:
             class_obj = TorchModel.registry[class_name]
         elif class_name in extra_objs:
diff --git a/hyperion/torch/tpm/hf/hf_wav2vec_base.py b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
index e0bcee1c..2cb95a53 100644
--- a/hyperion/torch/tpm/hf/hf_wav2vec_base.py
+++ b/hyperion/torch/tpm/hf/hf_wav2vec_base.py
@@ -245,7 +245,7 @@ def change_config(
         self,
         override_dropouts: bool,
         override_spec_augment: bool,
-        override_lora: bool,
+        override_lora: bool = False,
         feat_extract_lr: Optional[float] = None,
         encoder_lr: Optional[float] = None,
         use_lora: bool = False,
@@ -538,22 +538,22 @@ def forward_impl(
         x, x_mask = self._preprocess(x, x_lengths)
         # if ddp_get_rank() == 0:
         #     lora_layer = self.hf_model.encoder.layers[0].attention.v_proj
-            # print(
-            #     "lora\nw=",
-            #     lora_layer.weight[:3, :3],
-            #     "\na=",
-            #     lora_layer.lora_A[:3, :3],
-            #     "\nb=",
-            #     lora_layer.lora_B[:3, :3],
-            #     "\n",
-            #     "merged=",
-            #     lora_layer.merged,
-            #     "training=",
-            #     lora_layer.training,
-            #     flush=True,
-            # )
-            # assert self.training == lora_layer.training
-            # assert self.training == (not lora_layer.merged)
+        # print(
+        #     "lora\nw=",
+        #     lora_layer.weight[:3, :3],
+        #     "\na=",
+        #     lora_layer.lora_A[:3, :3],
+        #     "\nb=",
+        #     lora_layer.lora_B[:3, :3],
+        #     "\n",
+        #     "merged=",
+        #     lora_layer.merged,
+        #     "training=",
+        #     lora_layer.training,
+        #     flush=True,
+        # )
+        # assert self.training == lora_layer.training
+        # assert self.training == (not lora_layer.merged)
         output = self.hf_model(
             x,
             x_mask,
@@ -760,7 +760,7 @@ def filter_args(**kwargs):
     @staticmethod
     def _add_lr_args(parser):
         parser.add_argument(
-            "--feat-extractor-lr",
+            "--feat-extract-lr",
             default=None,
             type=float,
             help=(
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 5e41747c..7260595c 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -11,13 +11,12 @@
 from enum import Enum
 from pathlib import Path
 
-from fairscale.optim.grad_scaler import ShardedGradScaler
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.distributed as dist
 import torch.nn as nn
+from fairscale.optim.grad_scaler import ShardedGradScaler
+from jsonargparse import ActionParser, ArgumentParser
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
@@ -108,7 +107,6 @@ def __init__(
         input_key="x",
         target_key="class_id",
     ):
-
         self.model = model
         self.loss = loss
         self.epochs = epochs
@@ -139,6 +137,13 @@ def __init__(
         self.amp_args = {}
         self.input_key = input_key
         self.target_key = target_key
+        self.ddp = ddp
+        self.ddp_type = ddp_type
+        self.rank = 0
+        self.world_size = 1
+        if ddp:
+            self.rank = dist.get_rank()
+            self.world_size = dist.get_world_size()
 
         self.set_train_mode()
 
@@ -147,13 +152,7 @@ def __init__(
             if loss is not None:
                 self.loss.to(device)
 
-        self.ddp = ddp
-        self.ddp_type = ddp_type
-        self.rank = 0
-        self.world_size = 1
         if ddp:
-            self.rank = dist.get_rank()
-            self.world_size = dist.get_world_size()
             if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
                 self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
                 if self.rank == 0:
@@ -288,6 +287,9 @@ def fit(self, train_data, val_data=None):
 
     def set_train_mode(self):
         self.model.set_train_mode(self.train_mode)
+        if self.rank == 0:
+            self.model.parameter_summary(verbose=True)
+            self.model.print_parameter_list()
 
     def train_epoch(self, data_loader):
         """Training epoch loop
@@ -465,6 +467,20 @@ def _get_lr(self):
         lrs = [param_group["lr"] for param_group in self.optimizer.param_groups]
         return max(lrs)
 
+    def _get_lrs(self):
+        """Returns the current learning rates of all param groups to show in the loggers"""
+        lrs = [param_group["lr"] for param_group in self.optimizer.param_groups]
+        all_eq = True
+        for lr in lrs:
+            if lr != lrs[0]:
+                all_eq = False
+                break
+
+        if all_eq:
+            return {"lr": lrs[0]}
+
+        return {f"lr_{i}": lr for i, lr in enumerate(lrs)}
+
     def _compute_grad_acc_steps(self, data_loader):
         if self.eff_batch_size is None:
             return
@@ -505,6 +521,7 @@ def checkpoint(self, logs=None):
         Args:
           logs: logs containing the current value of the metrics.
         """
+        self.model.train()
         checkpoint = {
             "epoch": self.cur_epoch,
             "rng_state": torch.get_rng_state(),
@@ -545,6 +562,7 @@ def save_checkpoint(self, logs=None):
 
         if self.rank != 0:
             return
+
         checkpoint = self.checkpoint(logs)
         file_path = "%s/model_ep%04d.pth" % (self.exp_path, self.cur_epoch)
 
@@ -629,32 +647,34 @@ def load_last_checkpoint(self):
 
         return None
 
+    @staticmethod
+    def get_augs_keys(batch, base_key, skip={}):
+        keys = []
+        if base_key in batch and base_key not in skip:
+            keys.append(base_key)
+
+        aug_idx_1 = 0
+        while True:
+            aug_idx_2 = 0
+            while True:
+                aug_key = f"{base_key}_aug_{aug_idx_1}_{aug_idx_2}"
+                if aug_key in batch:
+                    if aug_key not in skip:
+                        keys.append(aug_key)
+                    aug_idx_2 += 1
+                else:
+                    break
+
+            if aug_idx_2 == 0:
+                break
+
+            aug_idx_1 += 1
+
+        return keys
+
     @staticmethod
     def filter_args(**kwargs):
         args = filter_func_args(TorchTrainer.__init__, kwargs)
-
-        # valid_args = (
-        #     "grad_acc_steps",
-        #     "eff_batch_size",
-        #     "epochs",
-        #     "log_interval",
-        #     "use_amp",
-        #     "ddp_type",
-        #     "grad_clip",
-        #     "grad_clip_norm",
-        #     "swa_start",
-        #     "swa_lr",
-        #     "swa_anneal_epochs",
-        #     "exp_path",
-        #     "optim",
-        #     "lrsched",
-        #     "cpu_offload",
-        #     "use_tensorboard",
-        #     "use_wandb",
-        #     "wandb",
-        #     "train_mode",
-        # )
-        # args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         return args
 
     @staticmethod
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index a59cbe14..aedd5be0 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -81,7 +81,6 @@ def __init__(
         input_key="x",
         target_key="class_id",
     ):
-
         if loss is None:
             loss = nn.CrossEntropyLoss()
 
@@ -101,38 +100,53 @@ def train_epoch(self, data_loader):
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
+
         for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
+            # try:
+            #       l1 = self.model.hf_feats.hf_model.encoder.layers[0].attention.v_proj
+            #      # print(f"lora train {l1.training}")
+            #        print(f"loraA {l1.lora_A}")
+            #        print(f"loraB {l1.lora_B}", flush=True)
+            # except:
+            #   pass
+
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
-            x, target = tensors_subset(data, batch_keys, self.device)
-            batch_size = x.size(0)
-            with amp.autocast(enabled=self.use_amp):
-                output = self.model(x, y=target)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
-
-            if self.use_amp:
-                self.grad_scaler.scale(loss).backward()
-            else:
-                loss.backward()
+            input_keys = self.get_augs_keys(data, self.input_key)
+            loss_scale = self.grad_acc_steps * len(input_keys)
+            for aug_key in input_keys:
+                batch_keys = [aug_key, self.target_key]
+                x, target = tensors_subset(data, batch_keys, self.device)
+                batch_size = x.size(0)
+                with amp.autocast(enabled=self.use_amp):
+                    output = self.model(x, y=target)
+                    loss = self.loss(output, target) / loss_scale
+
+                if self.use_amp:
+                    self.grad_scaler.scale(loss).backward()
+                else:
+                    loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
                 if self.lr_scheduler is not None and not self.in_swa:
                     self.lr_scheduler.on_opt_step()
                 self.update_model()
 
-            batch_metrics["loss"] = loss.item() * self.grad_acc_steps
+            batch_metrics["loss"] = loss.item() * loss_scale
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 0f6ccd9b..6d00806a 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -81,7 +81,6 @@ def __init__(
         input_key="x",
         target_key="class_id",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
         self.feat_extractor = feat_extractor
@@ -131,12 +130,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 0fee1bdb..610a43e9 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -6,9 +6,14 @@
 from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
 from .data_parallel import TorchDataParallel
 from .ddp import FairFullyShardedDDP, FairShardedDDP, TorchDDP
-from .devices import (open_device, tensors_subset, tensors_to_cpu,
-                      tensors_to_device, tensors_to_numpy)
+from .devices import (
+    open_device,
+    tensors_subset,
+    tensors_to_cpu,
+    tensors_to_device,
+    tensors_to_numpy,
+)
 from .eval_utils import eval_nnet_by_chunks, eval_nnet_overlap_add
-from .masking import scale_seq_lengths, seq_lengths_to_mask
+from .masking import make_attn_mask_causal, scale_seq_lengths, seq_lengths_to_mask
 from .metric_acc import MetricAcc
 from .vad_utils import remove_silence
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
index 934b4b90..c7095b31 100644
--- a/hyperion/torch/utils/masking.py
+++ b/hyperion/torch/utils/masking.py
@@ -20,7 +20,9 @@ def scale_seq_lengths(lengths, max_out_length, max_in_length=None):
     return torch.div(lengths * max_out_length, max_in_length, rounding_mode="floor")
 
 
-def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
+def seq_lengths_to_mask(
+    lengths, max_length=None, dtype=None, time_dim=1, none_if_all_max=False
+):
     """Creates a binary masks indicating the valid values in a sequence.
 
     Args:
@@ -43,6 +45,10 @@ def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
 
     if max_length is None:
         max_length = lengths.max()
+
+    if none_if_all_max and torch.all(lengths == max_length):
+        return None
+
     idx = torch.arange(max_length, dtype=lengths.dtype, device=lengths.device)
 
     # compute mask shape=(batch, max_length)
@@ -60,3 +66,16 @@ def seq_lengths_to_mask(lengths, max_length=None, dtype=None, time_dim=1):
         mask = mask.to(dtype)
 
     return mask
+
+
+def make_attn_mask_causal(mask: torch.Tensor):
+    """Make  causal mask for decoder self-attention."""
+    size = mask.size(-1)
+    causal_mask = torch.ones(size, size, device=mask.device, dtype=torch.bool)
+    torch.tril(causal_mask, out=causal_mask)
+    return mask & causal_mask
+
+
+def make_dec_causal_att_mask(y: torch.Tensor, padding_idx: int):
+    mask = (y != padding_idx).unsqueeze(-2)
+    return make_attn_mask_causal(mask)
diff --git a/hyperion/utils/misc.py b/hyperion/utils/misc.py
index 6fb7d24b..6afd4a88 100644
--- a/hyperion/utils/misc.py
+++ b/hyperion/utils/misc.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-PathLike = TypeVar("PathLike", str, Path, None)
+PathLike = TypeVar("PathLike", str, Path, type(None))
 
 
 def generate_data(g):
@@ -77,9 +77,8 @@ def energy_vad(P):
 
 
 def compute_snr(x, n, axis=-1):
-
-    P_x = 10 * np.log10(np.mean(x ** 2, axis=axis))
-    P_n = 10 * np.log10(np.mean(n ** 2, axis=axis))
+    P_x = 10 * np.log10(np.mean(x**2, axis=axis))
+    P_n = 10 * np.log10(np.mean(n**2, axis=axis))
     return P_x - P_n
 
 
diff --git a/hyperion/utils/scp_list.py b/hyperion/utils/scp_list.py
index 070e4f53..3d8b5e9d 100644
--- a/hyperion/utils/scp_list.py
+++ b/hyperion/utils/scp_list.py
@@ -36,7 +36,7 @@ def __init__(self, key, file_path, offset=None, range_spec=None):
     def validate(self):
         """Validates the attributes of the SCPList object."""
         self.key = list2ndarray(self.key)
-        self.file_path = list2ndarray(self.file_path, dtype=np.object)
+        self.file_path = list2ndarray(self.file_path, dtype=object)
         assert len(self.key) == len(self.file_path)
         if self.offset is not None:
             if isinstance(self.offset, list):

From c977186e3729441dcb52bf6a874d49a9d90ae338 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 3 Nov 2023 21:27:02 -0400
Subject: [PATCH 117/154] added feature fuser to hf w2v xvector

---
 ...v2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml | 18 +++-
 ...wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml | 18 +++-
 .../wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml | 18 +++-
 .../wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml  | 18 +++-
 .../wavlmbaseplus_ecapatdnn512x3_v2.0.yaml    | 18 +++-
 .../wavlmlarge12l_ecapatdnn512x3_v2.0.yaml    | 18 +++-
 .../conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml  | 18 +++-
 hyperion/bin/extract_wav2vec2xvectors.py      |  7 +-
 .../generate_adv_attacks_xvector_classif.py   |  8 +-
 hyperion/torch/layers/mvn.py                  | 12 +--
 hyperion/torch/lr_schedulers/lr_scheduler.py  |  4 +-
 hyperion/torch/lr_schedulers/triangular_lr.py |  6 ++
 .../hf_hubert2conformer_v1_xvector.py         |  7 +-
 .../hf_hubert2resnet1d_xvector.py             |  9 +-
 .../hf_wav2vec2conformer_v1_xvector.py        |  7 +-
 .../hf_wav2vec2resnet1d_xvector.py            | 18 ++--
 .../models/wav2xvectors/hf_wav2xvector.py     | 99 ++++++++++++++-----
 .../hf_wavlm2conformer_v1_xvector.py          |  7 +-
 .../wav2xvectors/hf_wavlm2resnet1d_xvector.py | 10 +-
 .../models/xvectors/conformer_v1_xvector.py   | 12 ++-
 hyperion/torch/models/xvectors/xvector.py     | 36 +------
 hyperion/torch/narchs/__init__.py             |  1 +
 hyperion/torch/narchs/feat_fuser_mvn.py       | 14 ++-
 hyperion/torch/torch_model.py                 | 80 +++++++++++----
 hyperion/torch/utils/eval_utils.py            |  6 +-
 25 files changed, 327 insertions(+), 142 deletions(-)

diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
index c3466259..5d27b093 100644
--- a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.yaml
@@ -1,6 +1,23 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-xls-r-300m
   drop_layers_gt: 12
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -41,5 +58,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
index d9c9b782..fe89d2fc 100644
--- a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn1024x3_v2.0.yaml
@@ -1,5 +1,22 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-xls-r-300m
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 1024
@@ -40,5 +57,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
index dc3737e3..63afdb58 100644
--- a/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wav2vec2xlsr300m_ecapatdnn512x3_v2.0.yaml
@@ -1,5 +1,22 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-xls-r-300m
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -40,5 +57,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
index d7e3388f..4de306e4 100644
--- a/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wavlmbaseplus9l_ecapatdnn512x3_v2.0.yaml
@@ -1,6 +1,23 @@
 hf_feats:
   pretrained_model_path: microsoft/wavlm-base-plus
   drop_layers_gt: 9
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -41,5 +58,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
index b2430d97..2c2c6db3 100644
--- a/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wavlmbaseplus_ecapatdnn512x3_v2.0.yaml
@@ -1,5 +1,22 @@
 hf_feats:
   pretrained_model_path: microsoft/wavlm-base-plus
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -40,5 +57,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
index 5025f047..52246639 100644
--- a/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wavlmlarge12l_ecapatdnn512x3_v2.0.yaml
@@ -1,6 +1,23 @@
 hf_feats:
   pretrained_model_path: microsoft/wavlm-large
   drop_layers_gt: 12
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -41,5 +58,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml b/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
index 0a6303f5..a05e82e1 100644
--- a/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/wavlmlarge_ecapatdnn512x3_v2.0.yaml
@@ -1,5 +1,22 @@
 hf_feats:
   pretrained_model_path: microsoft/wavlm-large
+feat_fuser:
+  feat_fuser:
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 765
@@ -40,5 +57,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/hyperion/bin/extract_wav2vec2xvectors.py b/hyperion/bin/extract_wav2vec2xvectors.py
index 02a3b68e..336ec818 100755
--- a/hyperion/bin/extract_wav2vec2xvectors.py
+++ b/hyperion/bin/extract_wav2vec2xvectors.py
@@ -25,7 +25,9 @@
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-from hyperion.torch import TorchModelLoader as TML
+
+# from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch import TorchModel
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
 
@@ -59,7 +61,8 @@ def init_device(use_gpu):
 
 def load_model(model_path, device):
     logging.info("loading model {}".format(model_path))
-    model = TML.load(model_path)
+    # model = TML.load(model_path)
+    model = TorchModel.auto_load(model_path)
     logging.info("xvector-model={}".format(model))
     model.to(device)
     model.eval()
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 711c4194..4d0e762a 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -34,10 +34,10 @@
 
 
 def read_utt_list(list_file, class2int_file, part_idx, num_parts):
-    logging.info("reading utt list %s" % (list_file))
+    logging.info("reading utt list %s", list_file)
     utt_list = Utt2Info.load(list_file)
     utt_list = utt_list.split(part_idx, num_parts)
-    logging.info("reading class2int-file %s" % (class2int_file))
+    logging.info("reading class2int-file %s", class2int_file)
     class_info = pd.read_csv(class2int_file, header=None, sep=" ")
     class2idx = {str(k): i for i, k in enumerate(class_info[0])}
     class_idx = np.array([class2idx[k] for k in utt_list.info], dtype=int)
@@ -193,7 +193,7 @@ def generate_attacks(
         torch.manual_seed(random_seed + len(s))  # this is to make results reproducible
         p = torch.rand(1).item()
         if p > p_attack:
-            logging.info("skipping attack for utt %s" % (key))
+            logging.info("skipping attack for utt %s", key)
             continue
 
         if random_utt_length:
@@ -228,7 +228,7 @@ def generate_attacks(
 
         _, pred = torch.max(score_benign, dim=1)
         if pred[0] != class_id:
-            logging.info("utt %s failed benign classification, skipping..." % (key))
+            logging.info("utt %s failed benign classification, skipping...", key)
             continue
 
         t3 = time.time()
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index 736b69c6..a46ce20d 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -4,7 +4,7 @@
 """
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..utils import seq_lengths_to_mask
 
@@ -225,16 +225,16 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            "--no-norm-mean",
-            default=False,
-            action="store_true",
-            help="don't center the features",
+            "--norm-mean",
+            default=True,
+            action=ActionYesNo,
+            help="center the features",
         )
 
         parser.add_argument(
             "--norm-var",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help="normalize the variance of the features",
         )
 
diff --git a/hyperion/torch/lr_schedulers/lr_scheduler.py b/hyperion/torch/lr_schedulers/lr_scheduler.py
index 5cbb3ff1..5008e1be 100644
--- a/hyperion/torch/lr_schedulers/lr_scheduler.py
+++ b/hyperion/torch/lr_schedulers/lr_scheduler.py
@@ -3,12 +3,11 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import torch
 import torch.optim as optim
 
 
-class LRScheduler(object):
+class LRScheduler:
     """Base class for learning rate schedulers.
 
     Attributes:
@@ -114,7 +113,6 @@ def on_epoch_end(self, metrics=None):
         self.epoch += 1
 
     def on_opt_step(self):
-
         if self.in_warmup:
             for param_group, lr in zip(
                 self.optimizer.param_groups, self.get_warmup_lr()
diff --git a/hyperion/torch/lr_schedulers/triangular_lr.py b/hyperion/torch/lr_schedulers/triangular_lr.py
index 45704014..0a5efd38 100644
--- a/hyperion/torch/lr_schedulers/triangular_lr.py
+++ b/hyperion/torch/lr_schedulers/triangular_lr.py
@@ -61,6 +61,12 @@ def __init__(
         self.num_restarts = num_restarts
         self.gamma = gamma
 
+    def load_state_dict(self, state_dict):
+        # we want to be able to change gamma and T_mul in the middle of training
+        del state_dict["gamma"]
+        del state_dict["T_mul"]
+        super().load_state_dict(state_dict)
+
     def on_epoch_begin(self, epoch=None, epoch_updates=1, **kwargs):
         super().on_epoch_begin(epoch)
         if self.update_lr_on_opt_step:
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
index aeabd09e..2dc37052 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2conformer_v1_xvector.py
@@ -9,6 +9,7 @@
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFHubert
 from ..xvectors import ConformerV1XVector
 from .hf_wav2xvector import HFWav2XVector
@@ -31,9 +32,9 @@ class HFHubert2ConformerV1XVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFHubert],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ConformerV1XVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
         if isinstance(hf_feats, dict):
             hf_feats = HFHubert(**hf_feats)
@@ -41,13 +42,13 @@ def __init__(
             assert isinstance(hf_feats, HFHubert)
 
         if isinstance(xvector, dict):
-            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector["encoder"]["in_feats"] = hf_feats.hidden_size
             xvector = ConformerV1XVector(**xvector)
         else:
             assert isinstance(xvector, ConformerV1XVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
index b75ac53f..a9495ba5 100644
--- a/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_hubert2resnet1d_xvector.py
@@ -5,11 +5,11 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFHubert
 from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
@@ -32,11 +32,10 @@ class HFHubert2ResNet1dXVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFHubert],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ResNet1dXVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
-
         if isinstance(hf_feats, dict):
             hf_feats = HFHubert(**hf_feats)
         else:
@@ -49,7 +48,7 @@ def __init__(
             assert isinstance(xvector, ResNet1dXVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
index 3a670d1c..1526c467 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2conformer_v1_xvector.py
@@ -9,6 +9,7 @@
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFWav2Vec2
 from ..xvectors import ConformerV1XVector
 from .hf_wav2xvector import HFWav2XVector
@@ -30,9 +31,9 @@ class HFWav2Vec2ConformerV1XVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFWav2Vec2],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ConformerV1XVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
         if isinstance(hf_feats, dict):
             if "class_name" in hf_feats:
@@ -42,7 +43,7 @@ def __init__(
             assert isinstance(hf_feats, HFWav2Vec2)
 
         if isinstance(xvector, dict):
-            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector["encoder"]["in_feats"] = hf_feats.hidden_size
             if "class_name" in xvector:
                 del xvector["class_name"]
             xvector = ConformerV1XVector(**xvector)
@@ -50,7 +51,7 @@ def __init__(
             assert isinstance(xvector, ConformerV1XVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
index 8a17379c..3709e980 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2vec2resnet1d_xvector.py
@@ -5,11 +5,11 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFWav2Vec2
 from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
@@ -31,11 +31,10 @@ class HFWav2Vec2ResNet1dXVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFWav2Vec2],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ResNet1dXVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
-
         if isinstance(hf_feats, dict):
             if "class_name" in hf_feats:
                 del hf_feats["class_name"]
@@ -52,12 +51,11 @@ def __init__(
             assert isinstance(xvector, ResNet1dXVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start,
-                         feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
+        # feat_fusion_method)
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = HFWav2XVector.filter_args(**kwargs)
         child_args = HFWav2Vec2.filter_args(**kwargs["hf_feats"])
         base_args["hf_feats"] = child_args
@@ -76,8 +74,7 @@ def add_class_args(parser, prefix=None):
         HFWav2XVector.add_class_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_finetune_args(**kwargs):
@@ -98,5 +95,4 @@ def add_finetune_args(parser, prefix=None):
         ResNet1dXVector.add_finetune_args(parser, prefix="xvector")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index d6be544a..2b4ef876 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -9,7 +9,7 @@
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
-from ...layers import MeanVarianceNorm
+from ...narchs import FeatFuserMVN
 from ...torch_model import TorchModel
 from ...utils import remove_silence
 
@@ -19,25 +19,38 @@ class HFWav2XVector(TorchModel):
 
     Attributes:
        hf_feats: hugging face model wrapper object.
+       feat_fuser: Dictionary to build feature fuser object.
        xvector: x-vector model object.
        feat_fusion_start: the input to x-vector model will fuse the wav2vec layers from "feat_fusion_start" to
                           the wav2vec "num_layers".
        feat_fusion_method: method to fuse the hidden layers from the wav2vec model, when more
-                           than one layer is used.
+                           than one layer is used (deprecated).
     """
 
     def __init__(
-        self, hf_feats, xvector, feat_fusion_start=0, feat_fusion_method="weighted-avg"
+        self,
+        hf_feats,
+        feat_fuser,
+        xvector,
+        feat_fusion_start=0,
+        # feat_fusion_method="weighted-avg",
     ):
         super().__init__()
         self.hf_feats = hf_feats
         self.xvector = xvector
         self.feat_fusion_start = feat_fusion_start
-        self.feat_fusion_method = feat_fusion_method
+        # self.feat_fusion_method = feat_fusion_method
         self._hf_context = contextlib.nullcontext()
-        self._make_fuser()
+        self._make_fuser(feat_fuser)
+
+    def _make_fuser(self, feat_fuser):
+        num_feats = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+        feat_dim = self.hf_feats.hidden_size
+        feat_fuser["feat_fuser"]["num_feats"] = num_feats
+        feat_fuser["feat_fuser"]["feat_dim"] = feat_dim
+        self.feat_fuser = FeatFuserMVN(**feat_fuser)
 
-    def _make_fuser(self):
+    def _make_fuser_legacy(self):
         if self.feat_fusion_method == "last":
             self.feat_fuser = None
             return
@@ -52,7 +65,7 @@ def _make_fuser(self):
         elif self.feat_fusion_method == "cat":
             self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
 
-    def _fuse_hid_feats(self, hid_feats):
+    def _fuse_hid_feats_legacy(self, hid_feats):
         """Fuses the hidden features from the Wav2Vec model.
 
         Args:
@@ -121,6 +134,44 @@ def rebuild_output_layer(
 
     def forward_feats(
         self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fuser.fuser_type == "last"
+            else True
+        )
+        with self._hf_context:
+            hf_output = self.hf_feats(
+                x,
+                x_lengths,
+                return_hid_states=return_hid_states,
+                chunk_length=chunk_length,
+                detach_chunks=detach_chunks,
+            )
+        feat_lengths = hf_output["hidden_states_lengths"]
+        if return_hid_states:
+            hid_feats = hf_output["hidden_states"]
+            hid_feats = hid_feats[self.feat_fusion_start :]
+        else:
+            hid_feats = [hf_output["last_hidden_state"]]
+
+        feats, feat_lengths = self.feat_fuser(hid_feats, feat_lengths)
+        feats = feats.transpose(1, 2)
+        if return_feat_layers is not None:
+            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+            # as the hidden features of the x-vector encoder.
+            hid_feats = [
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
+                if i in return_feat_layers
+            ]
+        else:
+            hid_feats = None
+
+        return feats, hid_feats, feat_lengths
+
+    def forward_feats_legacy(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
     ):
         return_hid_states = (
             False
@@ -360,23 +411,27 @@ def valid_train_modes():
     def filter_args(**kwargs):
         valid_args = (
             "hf_feats",
+            "feat_fuser",
             "xvector",
             "feat_fusion_start",
-            "feat_fusion_method",
+            # "feat_fusion_method",
         )
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
         return args
 
     def get_config(self):
         hf_cfg = self.hf_feats.get_config()
+        fuser_cfg = self.feat_fuser.get_config()
         xvec_cfg = self.xvector.get_config()
         del hf_cfg["class_name"]
+        del fuser_cfg["class_name"]
         del xvec_cfg["class_name"]
         config = {
             "hf_feats": hf_cfg,
+            "feat_fuser": fuser_cfg,
             "xvector": xvec_cfg,
             "feat_fusion_start": self.feat_fusion_start,
-            "feat_fusion_method": self.feat_fusion_method,
+            # "feat_fusion_method": self.feat_fusion_method,
         }
 
         base_config = super().get_config()
@@ -393,6 +448,8 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        FeatFuserMVN.add_class_args(parser, prefix="feat_fuser")
+
         parser.add_argument(
             "--feat-fusion-start",
             default=0,
@@ -402,19 +459,15 @@ def add_class_args(parser, prefix=None, skip=set()):
                 "the wav2vec num_layers"
             ),
         )
-        parser.add_argument(
-            "--feat-fusion-method",
-            default="weighted-avg",
-            choices=["weighted-avg", "linear", "cat", "last"],
-            help=(
-                "method to fuse the hidden layers from the wav2vec model "
-                "in [weighted-avg, cat]"
-            ),
-        )
+        # parser.add_argument(
+        #     "--feat-fusion-method",
+        #     default="weighted-avg",
+        #     choices=["weighted-avg", "linear", "cat", "last"],
+        #     help=(
+        #         "method to fuse the hidden layers from the wav2vec model "
+        #         "in [weighted-avg, cat]"
+        #     ),
+        # )
 
         if prefix is not None:
-            outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
-                help="xvector options",
-            )
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
index 30e450eb..bcf82bba 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2conformer_v1_xvector.py
@@ -9,6 +9,7 @@
 import torch.nn as nn
 from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFWavLM
 from ..xvectors import ConformerV1XVector
 from .hf_wav2xvector import HFWav2XVector
@@ -31,9 +32,9 @@ class HFWavLM2ConformerV1XVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFWavLM],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ConformerV1XVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
         if isinstance(hf_feats, dict):
             hf_feats = HFWavLM(**hf_feats)
@@ -41,13 +42,13 @@ def __init__(
             assert isinstance(hf_feats, HFWavLM)
 
         if isinstance(xvector, dict):
-            xvector["resnet_enc"]["in_feats"] = hf_feats.hidden_size
+            xvector["encoder"]["in_feats"] = hf_feats.hidden_size
             xvector = ConformerV1XVector(**xvector)
         else:
             assert isinstance(xvector, ConformerV1XVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
index 56a19130..30ace453 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wavlm2resnet1d_xvector.py
@@ -5,11 +5,11 @@
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
+from ...narchs import FeatFuserMVN
 from ...tpm import HFWavLM
 from ..xvectors import ResNet1dXVector
 from .hf_wav2xvector import HFWav2XVector
@@ -32,11 +32,10 @@ class HFWavLM2ResNet1dXVector(HFWav2XVector):
     def __init__(
         self,
         hf_feats: Union[Dict, HFWavLM],
+        feat_fuser: Union[Dict, FeatFuserMVN],
         xvector: Union[Dict, ResNet1dXVector],
         feat_fusion_start: int = 0,
-        feat_fusion_method: str = "weighted-avg",
     ):
-
         if isinstance(hf_feats, dict):
             hf_feats = HFWavLM(**hf_feats)
         else:
@@ -49,11 +48,10 @@ def __init__(
             assert isinstance(xvector, ResNet1dXVector)
             assert xvector.encoder_net.in_feats == hf_feats.hidden_size
 
-        super().__init__(hf_feats, xvector, feat_fusion_start, feat_fusion_method)
+        super().__init__(hf_feats, feat_fuser, xvector, feat_fusion_start)
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = HFWav2XVector.filter_args(**kwargs)
         child_args = HFWavLM.filter_args(**kwargs["hf_feats"])
         base_args["hf_feats"] = child_args
diff --git a/hyperion/torch/models/xvectors/conformer_v1_xvector.py b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
index 323c22a9..f52b8700 100644
--- a/hyperion/torch/models/xvectors/conformer_v1_xvector.py
+++ b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ConformerEncoderV1 as Encoder
 from .xvector import XVector
@@ -40,8 +39,13 @@ def __init__(
         proj_feats=None,
     ):
         if isinstance(encoder, dict):
-            logging.info("making %s conformer encoder network")
+            logging.info(f"making conformer encoder network={encoder}")
+            encoder["in_time_dim"] = 2
+            encoder["out_time_dim"] = 2
             encoder = Encoder(**encoder)
+        else:
+            encoder.in_time_dim = 2
+            encoder.out_time_dim = 2
 
         super().__init__(
             encoder,
@@ -75,7 +79,7 @@ def get_config(self):
         encoder_cfg = self.encoder_net.get_config()
         del encoder_cfg["class_name"]
         config = {
-            "resnet_enc": encoder_cfg,
+            "encoder": encoder_cfg,
         }
 
         config.update(base_config)
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index 9ccd0d31..de28ccae 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -283,46 +283,14 @@ def forward_logits(self, x, x_lengths=None, y=None):
         Returns:
           class logits tensor with shape=(batch, num_classes).
         """
-        f = x
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
         x = self.encoder_net(x)
+        if isinstance(x, tuple):
+            x = x[0]
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
         y = self.classif_net(p, y)
-        # if not self.training:
-        #     fnf = (
-        #         torch.any(torch.any(torch.logical_not(torch.isfinite(f)), dim=1), dim=1)
-        #         .sum()
-        #         .cpu()
-        #         .item()
-        #     )
-        #     xnf = (
-        #         torch.any(torch.any(torch.logical_not(torch.isfinite(x)), dim=1), dim=1)
-        #         .sum()
-        #         .cpu()
-        #         .item()
-        #     )
-        #     pnf = (
-        #         torch.any(torch.logical_not(torch.isfinite(p)), dim=1)
-        #         .sum()
-        #         .cpu()
-        #         .item()
-        #     )
-        #     ynf = (
-        #         torch.any(torch.logical_not(torch.isfinite(y)), dim=1)
-        #         .sum()
-        #         .cpu()
-        #         .item()
-        #     )
-        #     # if xnf + pnf + ynf > 0:
-        #     logging.warning("ff %d xnf %d pnf %d ynf %d", fnf, xnf, pnf, ynf)
-        #     if xnf > 0:
-        #         ii = torch.any(
-        #             torch.any(torch.logical_not(torch.isfinite(x)), dim=1), dim=1
-        #         )
-        #         xx = x[ii]
-        #         logging.info(f"xx={xx}")
 
         return y
 
diff --git a/hyperion/torch/narchs/__init__.py b/hyperion/torch/narchs/__init__.py
index 4fe8b4ed..c46c87fa 100644
--- a/hyperion/torch/narchs/__init__.py
+++ b/hyperion/torch/narchs/__init__.py
@@ -13,6 +13,7 @@
 from .efficient_net import EfficientNet
 from .etdnn import ETDNNV1
 from .fcnet import FCNetV1, FCNetV2
+from .feat_fuser_mvn import FeatFuserMVN
 from .resetdnn import ResETDNNV1
 from .resnet import *
 from .resnet1d_decoder import ResNet1dDecoder
diff --git a/hyperion/torch/narchs/feat_fuser_mvn.py b/hyperion/torch/narchs/feat_fuser_mvn.py
index 17b396bc..6fa4c6c0 100644
--- a/hyperion/torch/narchs/feat_fuser_mvn.py
+++ b/hyperion/torch/narchs/feat_fuser_mvn.py
@@ -2,7 +2,7 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from typing import Dict, Optional
+from typing import Any, Dict, Optional
 
 import torch
 import torch.nn as nn
@@ -21,9 +21,9 @@ class FeatFuserMVN(NetArch):
 
     def __init__(
         self,
-        feat_fuser: Dict[str],
-        mvn: Optional[Dict[str]] = None,
-        spec_augment: Optional[Dict[str]] = None,
+        feat_fuser: Dict[str, Any],
+        mvn: Optional[Dict[str, Any]] = None,
+        spec_augment: Optional[Dict[str, Any]] = None,
         trans: bool = False,
         aug_after_mvn: bool = False,
     ):
@@ -56,6 +56,10 @@ def __init__(
         self.trans = trans
         self.aug_after_mvn = aug_after_mvn
 
+    @property
+    def fuser_type(self):
+        return self.feat_fuser_cfg["fuser_type"]
+
     def forward(self, feats, feats_lengths=None):
         feats = self.feat_fuser(feats)
         if self.spec_augment is not None and not self.aug_after_mvn:
@@ -74,7 +78,7 @@ def forward(self, feats, feats_lengths=None):
 
     def get_config(self):
         config = {
-            "feat_fuser": self.feat_feats_cfg,
+            "feat_fuser": self.feat_fuser_cfg,
             "mvn": self.mvn_cfg,
             "spec_augment": self.spec_augment_cfg,
             "trans": self.trans,
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 912c2640..97be320c 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -208,7 +208,42 @@ def device(self):
         return next(iter(devices))
 
     @staticmethod
-    def _fix_cfg_compatibility(class_obj, cfg):
+    def _remove_module_prefix(state_dict):
+        import re
+
+        p = re.compile("^(module\.)+")
+        if p.match(list(state_dict.keys())[0]) is not None:
+            state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
+
+        return state_dict
+
+    @staticmethod
+    def _fix_xvector_cfg(cfg):
+        # We renamed AM-softmax scale parameer s to cos_scale
+        if "s" in cfg:
+            cfg["cos_scale"] = cfg.pop("s")
+
+        return cfg
+
+    @staticmethod
+    def _fix_hf_wav2xvector(cfg, state_dict):
+        key = "feat_fusion_method"
+        if key in cfg:
+            fuser_type = cfg.pop(key)
+            feat_fuser = {
+                "feat_fuser": {"fuser_type": fuser_type},
+                "mvn": None,
+                "spec_augment": None,
+            }
+            cfg["feat_fuser"] = feat_fuser
+            state_dict["feat_fuser.feat_fuser.feat_fuser"] = state_dict.pop(
+                "feat_fuser"
+            )
+
+        return cfg, state_dict
+
+    @staticmethod
+    def _fix_model_compatibility(class_obj, cfg, state_dict):
         """Function that fixed compatibility issues with deprecated models
 
         Args:
@@ -221,12 +256,14 @@ def _fix_cfg_compatibility(class_obj, cfg):
         # for compatibility with older x-vector models
         XVector = TorchModel.registry["XVector"]
         if issubclass(class_obj, XVector):
-            # We renamed AM-softmax scale parameer s to cos_scale
-            if "s" in cfg:
-                cfg["cos_scale"] = cfg["s"]
-                del cfg["s"]
+            cfg = TorchModel._fix_xvector_cfg(cfg)
 
-        return cfg
+        # switch old feature fuser to new feature fuser in w2v x-vectors
+        HFWav2XVector = TorchModel.registry["HFWav2XVector"]
+        if issubclass(class_obj, HFWav2XVector):
+            cfg, state_dict = TorchModel._fix_hf_wav2xvector(cfg, state_dict)
+
+        return cfg, state_dict
 
     @staticmethod
     def _is_hf_path(file_path: Path):
@@ -316,19 +353,20 @@ def auto_load(
         if "n_averaged" in state_dict:
             del state_dict["n_averaged"]
 
-        cfg = TorchModel._fix_cfg_compatibility(class_obj, cfg)
-
-        import re
+        state_dict = TorchModel._remove_module_prefix(state_dict)
+        cfg, state_dict = TorchModel._fix_model_compatibility(
+            class_obj, cfg, state_dict
+        )
 
-        p = re.compile("^module\.")
-        num_tries = 3
-        for tries in range(num_tries):
-            try:
-                return class_obj.load(cfg=cfg, state_dict=state_dict)
-            except RuntimeError as err:
-                # remove module prefix when is trained with dataparallel
-                if tries == num_tries - 1:
-                    # if it failed the 3 trials raise exception
-                    raise err
-                # remove module prefix when is trained with dataparallel
-                state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
+        return class_obj.load(cfg=cfg, state_dict=state_dict)
+        # num_tries = 3
+        # for tries in range(num_tries):
+        #     try:
+        #         return class_obj.load(cfg=cfg, state_dict=state_dict)
+        #     except RuntimeError as err:
+        #         # remove module prefix when is trained with dataparallel
+        #         if tries == num_tries - 1:
+        #             # if it failed the 3 trials raise exception
+        #             raise err
+        #         # remove module prefix when is trained with dataparallel
+        #         state_dict = ODict((p.sub("", k), v) for k, v in state_dict.items())
diff --git a/hyperion/torch/utils/eval_utils.py b/hyperion/torch/utils/eval_utils.py
index d74835f6..d6a9063a 100644
--- a/hyperion/torch/utils/eval_utils.py
+++ b/hyperion/torch/utils/eval_utils.py
@@ -9,7 +9,6 @@
 
 
 def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1):
-
     device = None if nnet.device == x.device else nnet.device
     T = x.shape[time_dim]
     if T <= chunk_length or chunk_length == 0:
@@ -17,6 +16,8 @@ def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1
             x = x.to(device)
 
         y = nnet(x)
+        if isinstance(y, tuple):
+            y = y[0]
         if detach_chunks:
             y = y.detach()
         return y
@@ -50,6 +51,8 @@ def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1
             x_i = x_i.to(device)
 
         y_i = nnet(x_i)
+        if isinstance(y_i, tuple):
+            y_i = y_i[0]
         if detach_chunks:
             y_i = y_i.detach()
 
@@ -99,7 +102,6 @@ def eval_nnet_by_chunks(x, nnet, chunk_length=0, detach_chunks=True, time_dim=-1
 def eval_nnet_overlap_add(
     x, nnet, chunk_length=0, chunk_overlap=None, detach_chunks=True, time_dim=-1
 ):
-
     device = None if nnet.device == x.device else nnet.device
 
     # assume time is the last dimension

From 4c5c4fbfc335993f5598f793a522dc2fed6fd234 Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Mon, 6 Nov 2023 18:43:55 -0500
Subject: [PATCH 118/154] started lre22/open.v2.8k

---
 egs/lre22/open.v1.8k/README.md                | 26 ++++++++++++++-----
 .../open.v1.8k/run_003_prepare_noises_rirs.sh |  8 +++---
 .../run_010_prepare_xvec_train_data.sh        | 26 ++++++++++---------
 egs/lre22/open.v1.8k/run_011_train_xvector.sh |  3 +--
 egs/lre22/open.v2.8k/cmd.sh                   |  4 +--
 egs/lre22/open.v2.8k/datapath.sh              |  2 +-
 egs/lre22/open.v2.8k/run_001_prepare_data.sh  | 20 +++++++++++---
 .../open.v2.8k/run_003_prepare_noises_rirs.sh |  8 +++---
 egs/lre22/open.v2.8k/run_011_train_xvector.sh |  9 +++----
 .../preprocess_audios_for_nnet_train.sh       |  3 +--
 10 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/egs/lre22/open.v1.8k/README.md b/egs/lre22/open.v1.8k/README.md
index 877f99ca..9ad41229 100644
--- a/egs/lre22/open.v1.8k/README.md
+++ b/egs/lre22/open.v1.8k/README.md
@@ -1,6 +1,6 @@
 # LRE22 Fixed Condition V1
 
-Recipe for the NIST LRE22 fixed condition based to the JHU-MIT Submission.
+Recipe for the NIST LRE22 open condition based to the JHU-MIT Submission.
 
 ## Citing
 ```
@@ -18,14 +18,27 @@ Recipe for the NIST LRE22 fixed condition based to the JHU-MIT Submission.
 
   - x-Vector networks trained on:
     - VoxLingua107
-    - NIST LRE17 Train + Dev + Eval / CTS + AfV
+    - NIST LRE17 Train + Dev + Eval / CTS + AfV  without Maghrebi Arabic
+    - NIST SRE16
+    - NIST SRE18
+    - NIST SRE19 CMN2
+    - NIST SRE21
+    - NIST SRE CTS Superset
+    - IARPA Babel
+    - Fleurs
+    - LWAZI 2009
+    - NCHLT 2014
+    - AMMI 2020
+    - CommonVoice Tigrinya, Indian English, French
+    - ADI 2017
+    - AST
   - Gaussian back-end trained on:
     - NIST LRE22 dev with 2-fold cross-val + x10 augmentations
 
 ## Usage
 
    - Run the run_0*.sh scripts in sequence
-   - By default it uses ECAPA-TDNN 4 layers of 2048 dim.
+   - By default it uses Res2Net50
    - To change the default network run scripts with the config-file argument:
 ```bash
 run_011_train_xvector.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
@@ -37,7 +50,6 @@ run_040_be_final.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s
 
 | Config | Model Type | Model Details | Back-end | Dev MinCp | Dev ActCp | Eval MinCp | Eval ActCp |
 | ------ | ---------- | ------------- | -------- | :-------: | :-------: | :--------: | :--------: |
-| config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-2 | GBE | 0.207 | 0.209 | 0.198 | 0.199 |
-| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-2 | GBE | 0.227 | 0.229 | 0.213 | 0.215 |
-| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 0.182 | 0.183 | 0.180 | 0.181 |
-
+| config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-1 | GBE | 0.100 | 0.101 | 0.105 | 0.106 |
+| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-1 | GBE | 
+| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 
diff --git a/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh b/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
index 638143f0..09f01f4d 100755
--- a/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
+++ b/egs/lre22/open.v1.8k/run_003_prepare_noises_rirs.sh
@@ -23,7 +23,7 @@ if [ $stage -le 1 ]; then
     for name in musan_noise musan_music
     do
 	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
-	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    --storage_name lre22-open-v1.8k-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
 	utils/fix_data_dir.sh data/${name}_proc_audio
     done
@@ -36,7 +36,7 @@ if [ $stage -le 2 ]; then
     for name in musan_speech
     do
 	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
-	    --storage_name lre22-fixed-v1.8k-$(date +'%m_%d_%H_%M') \
+	    --storage_name lre22-open-v1.8k-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_babble exp/${name}_babble
 	# utils/fix_data_dir.sh data/${name}_babble
     done
@@ -44,8 +44,8 @@ fi
 
 if [ $stage -le 3 ]; then
     if [ ! -d "RIRS_NOISES" ]; then
-	if [ -d ../v1.16k/RIRS_NOISES ];then
-	    ln -s ../v1.16k/RIRS_NOISES
+	if [ -d ../fixed.v1.8k/RIRS_NOISES ];then
+	    ln -s ../fixed.v1.8k/RIRS_NOISES
 	else
 	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
 	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
diff --git a/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh b/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
index d261a287..9f3eff6c 100755
--- a/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
+++ b/egs/lre22/open.v1.8k/run_010_prepare_xvec_train_data.sh
@@ -77,15 +77,17 @@ if [ $stage -le 5 ]; then
   done
 fi
 
-if [ $stage -le 6 ]; then
-  awk 'BEGIN{
-adapt_langs_list="ara-acm ara-aeb ara-apc ara-arq ara-ary ara-arz ara-ayl ara-jor ara-ksa ara-kuw ara-leb ara-mau ara-mor ara-oma ara-pal ara-qat ara-sud ara-syr ara-uae ara-yem fra-can fra-fra fra-ntf eng-ens eng-gbr eng-iaf eng-ine eng-usg eng-zho afr-afr nbl-nbl orm-orm tir-tir tso-tso ven-ven xho-xho zul-zul";
-nf=split(adapt_langs_list, f, " "); 
-for(i=1;i<=nf;i++){ adapt_langs[f[i]]=1;};
-FS=","; OFS=",";
-getline; print $0;
-}
-{ if ($1 in adapt_langs) { $3="1."} else{ $3="0.01"}; print $0}' \
-      data/open_proc_audio_no_sil/train_val_split/class_file.csv > \
-      data/open_proc_audio_no_sil/train_val_split/class_file_adapt_1.csv
-fi
+exit
+
+# if [ $stage -le 6 ]; then
+#   awk 'BEGIN{
+# adapt_langs_list="ara-acm ara-aeb ara-apc ara-arq ara-ary ara-arz ara-ayl ara-jor ara-ksa ara-kuw ara-leb ara-mau ara-mor ara-oma ara-pal ara-qat ara-sud ara-syr ara-uae ara-yem fra-can fra-fra fra-ntf eng-ens eng-gbr eng-iaf eng-ine eng-usg eng-zho afr-afr nbl-nbl orm-orm tir-tir tso-tso ven-ven xho-xho zul-zul";
+# nf=split(adapt_langs_list, f, " "); 
+# for(i=1;i<=nf;i++){ adapt_langs[f[i]]=1;};
+# FS=","; OFS=",";
+# getline; print $0;
+# }
+# { if ($1 in adapt_langs) { $3="1."} else{ $3="0.01"}; print $0}' \
+#       data/open_proc_audio_no_sil/train_val_split/class_file.csv > \
+#       data/open_proc_audio_no_sil/train_val_split/class_file_adapt_1.csv
+# fi
diff --git a/egs/lre22/open.v1.8k/run_011_train_xvector.sh b/egs/lre22/open.v1.8k/run_011_train_xvector.sh
index 056a9754..4b3f9642 100755
--- a/egs/lre22/open.v1.8k/run_011_train_xvector.sh
+++ b/egs/lre22/open.v1.8k/run_011_train_xvector.sh
@@ -31,7 +31,6 @@ fi
 if [ "$use_wandb" == "true" ];then
     extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-fixed-v1.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
 fi
-
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
@@ -50,7 +49,7 @@ if [ $stage -le 1 ]; then
 	    --data.val.dataset.recordings-file $list_dir/wav.scp \
 	    --data.val.dataset.segments-file $list_dir/train_val_split/val_segments.csv \
 	    --trainer.exp-path $nnet_s1_dir \
-	    --num-gpus $ngpu --master-port 3456
+	    --num-gpus $ngpu #--master-port 3456
     else
 	$cuda_cmd \
 	    --gpu $ngpu $nnet_s1_dir/log/train.log \
diff --git a/egs/lre22/open.v2.8k/cmd.sh b/egs/lre22/open.v2.8k/cmd.sh
index 4efc96e1..f22c66b4 100755
--- a/egs/lre22/open.v2.8k/cmd.sh
+++ b/egs/lre22/open.v2.8k/cmd.sh
@@ -15,11 +15,11 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
-    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
-    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\" -V" 
+    export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01][234589]*\" -V" 
     export cuda_cmd="queue.pl --mem 20G -l hostname=\"c[01]*\" -V"
     export cuda_eval_cmd="$train_cmd"
 fi
diff --git a/egs/lre22/open.v2.8k/datapath.sh b/egs/lre22/open.v2.8k/datapath.sh
index fec52329..02e2ddd4 100644
--- a/egs/lre22/open.v2.8k/datapath.sh
+++ b/egs/lre22/open.v2.8k/datapath.sh
@@ -63,7 +63,7 @@ elif [ "$(hostname --domain)" == "cm.gemini" ];then
   lre22_dev_root=$my_root/LDC2022E14_2022_NIST_Language_Recognition_Evaluation_Development_Data
   lre22_eval_root=$my_root/lre22_test_data_v2
   voxlingua_root=$my_root/voxlingua107
-  musan_root=/expscratch/dgromero/corpora/musan
+  musan_root=/export/common/data/corpora/MUSAN/musan
   babel_assamese_root=$ldc_root/LDC2016S06
   babel_bengali_root=$ldc_root/LDC2016S08
   babel_pashto_root=$ldc_root/LDC2016S09
diff --git a/egs/lre22/open.v2.8k/run_001_prepare_data.sh b/egs/lre22/open.v2.8k/run_001_prepare_data.sh
index 99a72cab..bb64cdbe 100755
--- a/egs/lre22/open.v2.8k/run_001_prepare_data.sh
+++ b/egs/lre22/open.v2.8k/run_001_prepare_data.sh
@@ -233,17 +233,17 @@ if [ $stage -le 11 ];then
 	--map-langs-to-lre-codes --target-fs 8000
     
     hyp_utils/conda_env.sh \
-	local/prepare_some_data_for_lre.py \
+	local/prepare_some_data_for_lre_cat.py \
 	--corpus-dir $lwazi_root \
 	--output-dir data/lwazi09 \
 	--map-langs-to-lre-codes --target-fs 8000
     hyp_utils/conda_env.sh \
-	local/prepare_some_data_for_lre.py \
+	local/prepare_some_data_for_lre_cat.py \
 	--corpus-dir $nchlt_root \
 	--output-dir data/nchlt14 \
 	--map-langs-to-lre-codes --target-fs 8000
     hyp_utils/conda_env.sh \
-	local/prepare_some_data_for_lre.py \
+	local/prepare_some_data_for_lre_cat.py \
 	--corpus-dir $ammi_root \
 	--output-dir data/ammi20 \
 	--map-langs-to-lre-codes --target-fs 8000
@@ -286,7 +286,7 @@ fi
 
 if [ $stage -le 15 ];then
     hyp_utils/conda_env.sh \
-	local/prepare_ast.py \
+	local/prepare_ast_cat.py \
 	--corpus-dir $ast_root \
 	--output-dir data/ast \
 	--map-langs-to-lre-codes --target-fs 8000
@@ -328,3 +328,15 @@ if [ $stage -le 16 ];then
     
 fi
   
+if [ $stage -le 5 ];then
+    if [ -d ../fixed.v1.8k/lre-scorer ];then
+	ln -s ../fixed.v1.8k/lre-scorer
+    else
+	local/download_lre22_scorer.sh
+    fi
+    if [ -d ../fixed.v1.8k/focal_multiclass ];then
+	ln -s ../fixed.v1.8k/focal_multiclass
+    else
+	local/download_focal.sh
+    fi
+fi
diff --git a/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh b/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
index 08d4d910..55da7f2a 100755
--- a/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
+++ b/egs/lre22/open.v2.8k/run_003_prepare_noises_rirs.sh
@@ -23,7 +23,7 @@ if [ $stage -le 1 ]; then
     for name in musan_noise musan_music
     do
 	steps_xvec/preprocess_audios_for_nnet_train.sh --nj 10 --cmd "$train_cmd" \
-	    --storage_name lre22-fixed-v2.8k-$(date +'%m_%d_%H_%M') \
+	    --storage_name lre22-open-v2.8k-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_proc_audio exp/${name}_proc_audio
 	utils/fix_data_dir.sh data/${name}_proc_audio
     done
@@ -36,7 +36,7 @@ if [ $stage -le 2 ]; then
     for name in musan_speech
     do
 	steps_xvec/make_babble_noise_for_nnet_train.sh --cmd "$train_cmd" \
-	    --storage_name lre22-fixed-v2.8k-$(date +'%m_%d_%H_%M') \
+	    --storage_name lre22-open-v2.8k-$(date +'%m_%d_%H_%M') \
 	    data/${name} data/${name}_babble exp/${name}_babble
 	# utils/fix_data_dir.sh data/${name}_babble
     done
@@ -44,8 +44,8 @@ fi
 
 if [ $stage -le 3 ]; then
     if [ ! -d "RIRS_NOISES" ]; then
-	if [ -d ../v1.8k/RIRS_NOISES ];then
-	    ln -s ../v1.8k/RIRS_NOISES
+	if [ -d ../fixed.v1.8k/RIRS_NOISES ];then
+	    ln -s ../fixed.v1.8k/RIRS_NOISES
 	else
 	    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
 	    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
diff --git a/egs/lre22/open.v2.8k/run_011_train_xvector.sh b/egs/lre22/open.v2.8k/run_011_train_xvector.sh
index 3a7a47a4..611a33ca 100755
--- a/egs/lre22/open.v2.8k/run_011_train_xvector.sh
+++ b/egs/lre22/open.v2.8k/run_011_train_xvector.sh
@@ -28,15 +28,12 @@ fi
 if [ "$use_tb" == "true" ];then
     extra_args="$extra_args --trainer.use-tensorboard"
 fi
-
-if [ "$interactive" == "true" ];then
-    export cuda_cmd=run.pl
-fi
-
 if [ "$use_wandb" == "true" ];then
   extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project lre22-open-v2.8k --trainer.wandb.name $nnet_s1_name.$(date -Iminutes)"
 fi
-
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
 
 # Network Training
 if [ $stage -le 1 ]; then
diff --git a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
index aed40672..afd13d74 100755
--- a/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
+++ b/hyp_utils/xvectors/preprocess_audios_for_nnet_train.sh
@@ -95,11 +95,10 @@ $cmd JOB=1:$nj $dir/log/preproc_audios_${name}.JOB.log \
     preprocess_audio_files.py ${args} --audio-format $file_format $args $proc_opts \
     --write-time-durs $output_dir/utt2dur.${name}.JOB \
     --part-idx JOB --num-parts $nj \
-    # --input $data_in/wav.scp \
     --recordings-file $data_in/wav.scp \
     --output-path $output_dir \
     --output-recordings-file $output_dir/wav.${name}.JOB.scp
-    #--output-script $output_dir/wav.${name}.JOB.scp
+
 
 for n in $(seq $nj); do
   cat $output_dir/wav.${name}.$n.scp || exit 1;

From c2f0602f625f2e37dc4769a9953f2d113f31b625 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 8 Nov 2023 10:29:46 -0500
Subject: [PATCH 119/154] fix bug missing conformer xvector in __init__

---
 hyperion/torch/models/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index fa4addcd..7292dbad 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -24,6 +24,7 @@
     Wav2ResNet1dXVector,
     Wav2ResNetXVector,
 )
+from .xvectors.conformer_v1_xvector import ConformerV1XVector
 from .xvectors.efficient_net_xvector import EfficientNetXVector
 from .xvectors.resnet1d_xvector import ResNet1dXVector
 from .xvectors.resnet_xvector import ResNetXVector

From 9fd2141bbdb956b28705dafd69f22a0605224361 Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Wed, 8 Nov 2023 10:32:30 -0500
Subject: [PATCH 120/154] started to clean lre22/open.v2.8k

---
 egs/lre22/open.v2.8k/cmd.sh                    |  2 +-
 ...c2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml |  9 +++------
 egs/lre22/open.v2.8k/conf/vad_8k.yaml          |  9 +++++++++
 ...vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml | 18 +++++++++++++++++-
 ...fig_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh |  6 +++---
 5 files changed, 33 insertions(+), 11 deletions(-)
 create mode 100644 egs/lre22/open.v2.8k/conf/vad_8k.yaml

diff --git a/egs/lre22/open.v2.8k/cmd.sh b/egs/lre22/open.v2.8k/cmd.sh
index f22c66b4..15e4a015 100755
--- a/egs/lre22/open.v2.8k/cmd.sh
+++ b/egs/lre22/open.v2.8k/cmd.sh
@@ -15,7 +15,7 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
-    #export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
diff --git a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
index b8998830..a7f3b111 100644
--- a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
+++ b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
@@ -37,17 +37,16 @@ model: wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
 trainer:
   optim:
     opt_type: sgd
-    lr: 0.45
+    lr: 0.4
     momentum: 0.9
     weight_decay: 4e-4
   lrsched:
     lrsch_type: exp_lr
     decay_rate: 0.5
-    #decay_steps: 4200
-    #hold_steps: 1500
     decay_steps: 16000
     hold_steps: 18000
-    min_lr: 4e-4
+    #min_lr: 4e-4
+    min_lr: 1e-6
     warmup_steps: 4000
     update_lr_on_opt_step: true
   use_amp: true
@@ -55,5 +54,3 @@ trainer:
   epochs: 12
   eff_batch_size: 1024
   train_mode: hf-feats-frozen-nograd
-
- 
\ No newline at end of file
diff --git a/egs/lre22/open.v2.8k/conf/vad_8k.yaml b/egs/lre22/open.v2.8k/conf/vad_8k.yaml
new file mode 100644
index 00000000..1cfe34b0
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/vad_8k.yaml
@@ -0,0 +1,9 @@
+sample_frequency: 8000
+frame_shift: 10
+frame_length: 25 
+snip_edges: false
+vad_energy_threshold: -4.89
+vad_energy_mean_scale: 0.5
+vad_proportion_threshold: 0.12
+vad_frames_context: 2
+wav_scale: 1
diff --git a/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml b/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
index d8193f59..beb687d2 100644
--- a/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
+++ b/egs/lre22/open.v2.8k/conf/wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
@@ -1,5 +1,22 @@
 hf_feats:
   pretrained_model_path: facebook/wav2vec2-xls-r-300m
+feat_fuser:
+  feat_fuser: 
+    fuser_type: weighted-avg
+  mvn:
+    norm_mean: false
+  spec_augment:
+    time_mask_prob: 1.
+    time_mask_min_width: 0
+    time_mask_max_width: 5
+    time_mask_min_num_masks: 1
+    time_mask_max_num_masks: 1
+    freq_mask_prob: 1.
+    freq_mask_min_width: 0
+    freq_mask_max_width: 10
+    freq_mask_min_num_masks: 1
+    freq_mask_max_num_masks: 1
+    mask_method: mean
 xvector:
   resnet_enc:
     in_feats: 1024
@@ -43,5 +60,4 @@ xvector:
   dropout_rate: 0.0
   norm_before: false
   hid_act: swish
-feat_fusion_method: weighted-avg
 feat_fusion_start: 2
diff --git a/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
index b39d817b..bf6c3528 100644
--- a/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
+++ b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
@@ -13,14 +13,14 @@ nnet_data=open
 
 nnet_type=hf_wav2vec2resnet1d
 
-nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v2.2.yaml
+nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
 nnet_s1_args=""
 
-nnet_name=${hf_model_name}_ecapatdnn1024x3_v2.2
+nnet_name=${hf_model_name}_ecapatdnn1024x3_v1.0
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/model_ep0011.pth
+nnet_s1=$nnet_s1_dir/model_ep0012.pth
 
 nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v2.2.yaml
 nnet_s2_args=""

From 8446f7819284ea32c18ed34b6530f961f57a90b0 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 Nov 2023 12:13:22 -0500
Subject: [PATCH 121/154] fix param groups in hf w2vec2xvec

---
 egs/voxceleb/v2.1/README.md                   | 182 +++++++++++++++++
 .../models/wav2xvectors/hf_wav2xvector.py     | 186 +++++++++---------
 2 files changed, 276 insertions(+), 92 deletions(-)
 create mode 100644 egs/voxceleb/v2.1/README.md

diff --git a/egs/voxceleb/v2.1/README.md b/egs/voxceleb/v2.1/README.md
new file mode 100644
index 00000000..cb5b5368
--- /dev/null
+++ b/egs/voxceleb/v2.1/README.md
@@ -0,0 +1,182 @@
+# VoxCeleb V2.1
+
+Recipe for the VoxCeleb Speaker Verification Task using Wav2Vec2, WavLM or Hubert models from HuggingFace as feature extractors
+
+## Differences w.r.t VoxCeleb V2 recipe
+
+   - Kaldi format is replaced by new format based on pandas tables
+   - Kaldi style bash scripts are removed and replaced by python scripts
+   - Most python scripts are called using Hyperion entry points 
+
+## Citing
+
+## Training Data
+
+   - x-Vector network is trained on Voxceleb2 dev + test with augmentations
+     - MUSAN noise
+     - RIR reverberation
+
+## Test data
+
+   - Test data is VoxCeleb 1
+   - We evaluate the 3 conditions (with cleaned lists):
+      - VoxCeleb-O (Original): Original Voxceleb test set with 40 speakers
+      - VoxCeleb-E (Entire): List using all utterances of VoxCeleb1
+      - VoxCeleb-H (Hard): List of hard trials between all utterances of VoxCeleb1, same gender and nationality trials.
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it will use config global_conf/config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh
+   - To use other configs: 
+```bash
+run_005_train_xvector.sh --config-file global_conf/other_config.sh
+run_006_extract_xvectors.sh --config-file global_conf/other_config.sh --use-gpu true
+run_007_eval_be.sh --config-file global_conf/other_config.sh
+```
+
+
+## Recipe Steps:
+
+   - `run_001_prepare_data.sh`
+      - Data preparation script to generate Kaldi style data directories for 
+          - VoxCeleb2 train+test
+          - VoxCeleb1 O/E/H eval sets
+
+   - `run_002_compute_evad.sh`
+      - Computes Energy VAD for all datasets
+
+   - `run_003_prepare_noises_rirs.sh`
+      - Prepares MUSAN noises, music to be used by SpeechAugment class.
+      - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
+      - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
+
+   - `run_004_prepare_xvec_train_data.sh`
+      - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
+      - Removes silence from the audios
+      - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
+      - Creates training and validation lists for x-vector training
+
+   - `run_005_train_xvector.sh`
+      - Trains the x-vector model on frozen wav2vec features
+      - Finetunes wav2vec+x-vector model
+      - Large margin finetuning of wav2vec+x-vector model
+
+   - `run_006_extract_xvectors.sh`
+      - Extracts x-vectors for VoxCeleb2 or VoxCeleb2+augmentation for PLDA training
+      - Exctracts x-vectors for VoxCeleb1 test sets
+
+   - `run_007_eval_be.sh`
+      - Trains PLDA and evals PLDA and cosine scoring back-ends
+
+
+## Results
+
+
+
+
+
+### VoxCeleb 1 Original-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.060 | 0.116 |
+| | | | Cosine + AS-Norm | 0.81 | 0.058 | 0.108 |
+| | | | Cosine + QMF | 0.75 | 0.054 | 0.086 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.89 | 0.069 | 0.108 |
+| | | | Cosine + AS-Norm | 0.86 | 0.067 | 0.108 |
+| | | | Cosine + QMF | 0.77 | 0.066 | 0.105 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.74 | 0.057 | 0.085 |
+| | | | Cosine + AS-Norm | 0.73 | 0.055 | 0.093 |
+| | | | Cosine + QMF | 0.66 | 0.051 | 0.094 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.74 | 0.053 | 0.080 |
+| | | | Cosine + AS-Norm | 0.71 | 0.050 | 0.087 |
+| | | | Cosine + QMF | 0.64 | 0.045 | 0.087 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.84 | 0.063 | 0.111 |
+| | | | Cosine + AS-Norm | 0.68 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.63 | 0.048 | 0.071 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.14 | 0.074 | 0.107 |
+| | | | Cosine + AS-Norm | 0.94 | 0.060 | 0.089 |
+| | | | Cosine + QMF | 0.89 | 0.054 | 0.076 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.1.sh | Wav2Vec2-XLSR300M(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.69 | 0.048 | 0.094 |
+| | | | Cosine + AS-Norm | 0.63 | 0.046 | 0.082 |
+| | | | Cosine + QMF | 0.57 | 0.041 | 0.076 |
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.81 | 0.051 | 0.087 |
+| | | | Cosine + AS-Norm | 0.78 | 0.047 | 0.083 |
+| | | | Cosine + QMF | 0.75 | 0.046 | 0.076 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.89 | 0.056 | 0.099 |
+| | | | Cosine + AS-Norm | 0.86 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.82 | 0.050 | 0.085 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.80 | 0.049 | 0.088 |
+| | | | Cosine + AS-Norm | 0.76 | 0.045 | 0.080 |
+| | | | Cosine + QMF | 0.73 | 0.043 | 0.078 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.91 | 0.056 | 0.094 |
+| | | | Cosine + AS-Norm | 0.87 | 0.053 | 0.090 |
+| | | | Cosine + QMF | 0.83 | 0.050 | 0.086 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.80 | 0.050 | 0.086 |
+| | | | Cosine + AS-Norm | 0.73 | 0.045 | 0.074 |
+| | | | Cosine + QMF | 0.69 | 0.042 | 0.069 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.99 | 0.058 | 0.103 |
+| | | | Cosine + AS-Norm | 0.87 | 0.052 | 0.090 |
+| | | | Cosine + QMF | 0.83 | 0.050 | 0.085 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.1.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.72 | 0.044 | 0.079 |
+| | | | Cosine + AS-Norm | 0.68 | 0.040 | 0.068 |
+| | | | Cosine + QMF | 0.64 | 0.037 | 0.065 |
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.73 | 0.113 | 0.182 |
+| | | | Cosine + AS-Norm | 1.63 | 0.100 | 0.160 |
+| | | | Cosine + QMF | 1.56 | 0.096 | 0.155 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.88 | 0.122 | 0.200 |
+| | | | Cosine + AS-Norm | 1.77 | 0.110 | 0.175 |
+| | | | Cosine + QMF | 1.66 | 0.104 | 0.168 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.67 | 0.103 | 0.165 |
+| | | | Cosine + AS-Norm | 1.54 | 0.093 | 0.152 |
+| | | | Cosine + QMF | 1.45 | 0.089 | 0.145 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.78 | 0.106 | 0.174 |
+| | | | Cosine + AS-Norm | 1.70 | 0.099 | 0.162 |
+| | | | Cosine + QMF | 1.61 | 0.094 | 0.153 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.49 | 0.087 | 0.137 |
+| | | | Cosine + AS-Norm | 1.29 | 0.074 | 0.117 |
+| | | | Cosine + QMF | 1.22 | 0.069 | 0.111 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.84 | 0.107 | 0.172 |
+| | | | Cosine + AS-Norm | 1.47 | 0.083 | 0.128 |
+| | | | Cosine + QMF | 1.39 | 0.079 | 0.123 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.24 | 0.076 | 0.121 |
+| | | | Cosine + AS-Norm | 1.15 | 0.068 | 0.109 |
+| | | | Cosine + QMF | 1.09 | 0.065 | 0.107 |
+
+### VoxSRC2022 dev
+
+| Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | -------- | :----: | :------------: | :------------: |
+| config_wavlmbaseplus_ecapatdnn512x3_v2.0.sh | WavLM+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.60 | 0.163 | 0.257 |
+| | | | Cosine + AS-Norm | 2.43 | 0.150 | 0.244 |
+| | | | Cosine + QMF | 2.31 | 0.143 | 0.232 |
+| config_wavlmbaseplus9l_ecapatdnn512x3_v2.0.sh | WavLM(layer=2-9)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.82 | 0.183 | 0.286 |
+| | | | Cosine + AS-Norm | 2.69 | 0.168 | 0.265 |
+| | | | Cosine + QMF | 2.52 | 0.158 | 0.252 |
+| config_wavlmlarge_ecapatdnn512x3_v2.0.sh | WavLM-Large+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.65 | 0.176 | 0.289 |
+| | | | Cosine + AS-Norm | 2.55 | 0.171 | 0.292 |
+| | | | Cosine + QMF | 2.38 | 0.159 | 0.266 |
+| config_wavlmlarge12l_ecapatdnn512x3_v2.0.sh | WavLM-Large(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.62 | 0.153 | 0.251 |
+| | | | Cosine + AS-Norm | 2.53 | 0.149 | 0.247 |
+| | | | Cosine + QMF | 2.42 | 0.144 | 0.231 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.225 |
+| | | | Cosine + AS-Norm | 2.01 | 0.125 | 0.209 |
+| | | | Cosine + QMF | 1.92 | 0.117 | 0.200 |
+| config_wav2vec2xlsr300m12l_ecapatdnn512x3_v2.0.sh | Wav2Vec2-XLSR300M(layer=2-12)+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.83 | 0.175 | 0.276 |
+| | | | Cosine + AS-Norm | 2.31 | 0.149 | 0.244 |
+| | | | Cosine + QMF | 2.22 | 0.137 | 0.229 |
+| config_wav2vec2xlsr300m_ecapatdnn512x3_v2.1.sh | Wav2Vec2-XLSR300M+ECAPA-TDNN 512x3 | Stage3: ArcFace m=0.4/intertop_m=0.1 | Cosine | 2.06 | 0.124 | 0.206 |
+| | | | Cosine + AS-Norm | 1.97 | 0.125 | 0.212 |
+| | | | Cosine + QMF | 1.87 | 0.120 | 0.204 |
+
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index 2b4ef876..fc10f810 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -50,49 +50,49 @@ def _make_fuser(self, feat_fuser):
         feat_fuser["feat_fuser"]["feat_dim"] = feat_dim
         self.feat_fuser = FeatFuserMVN(**feat_fuser)
 
-    def _make_fuser_legacy(self):
-        if self.feat_fusion_method == "last":
-            self.feat_fuser = None
-            return
-
-        num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
-        layer_dim = self.hf_feats.hidden_size
-        if self.feat_fusion_method == "weighted-avg":
-            self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
-        elif self.feat_fusion_method == "linear":
-            self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
-        elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
-
-    def _fuse_hid_feats_legacy(self, hid_feats):
-        """Fuses the hidden features from the Wav2Vec model.
-
-        Args:
-          hid_feats: list of hidden features Tensors from Wav2Vec model.
-
-        Returns:
-          Tensor of fused features (batch, channels, time)
-        """
-        if len(hid_feats) == 1:
-            # There is only one layer of features
-            return hid_feats[0]
-
-        hid_feats = hid_feats[self.feat_fusion_start :]
-        if self.feat_fusion_method == "weighted-avg":
-            hid_feats = torch.stack(hid_feats, dim=-1)
-            norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
-            feats = torch.sum(hid_feats * norm_weights, dim=-1)
-        elif self.feat_fusion_method == "linear":
-            hid_feats = torch.stack(hid_feats, dim=-1)
-            feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
-        elif self.feat_fusion_method == "cat":
-            hid_feats = torch.cat(hid_feats, dim=-1)
-            feats = self.feat_fuser(hid_feats)
-        elif self.feat_fusion_method == "last":
-            feats = hid_feats[-1]
-
-        return feats
+    # def _make_fuser_legacy(self):
+    #     if self.feat_fusion_method == "last":
+    #         self.feat_fuser = None
+    #         return
+
+    #     num_layers = self.hf_feats.num_encoder_layers + 1 - self.feat_fusion_start
+    #     layer_dim = self.hf_feats.hidden_size
+    #     if self.feat_fusion_method == "weighted-avg":
+    #         self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
+    #     elif self.feat_fusion_method == "linear":
+    #         self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
+    #         self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
+    #     elif self.feat_fusion_method == "cat":
+    #         self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
+
+    # def _fuse_hid_feats_legacy(self, hid_feats):
+    #     """Fuses the hidden features from the Wav2Vec model.
+
+    #     Args:
+    #       hid_feats: list of hidden features Tensors from Wav2Vec model.
+
+    #     Returns:
+    #       Tensor of fused features (batch, channels, time)
+    #     """
+    #     if len(hid_feats) == 1:
+    #         # There is only one layer of features
+    #         return hid_feats[0]
+
+    #     hid_feats = hid_feats[self.feat_fusion_start :]
+    #     if self.feat_fusion_method == "weighted-avg":
+    #         hid_feats = torch.stack(hid_feats, dim=-1)
+    #         norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
+    #         feats = torch.sum(hid_feats * norm_weights, dim=-1)
+    #     elif self.feat_fusion_method == "linear":
+    #         hid_feats = torch.stack(hid_feats, dim=-1)
+    #         feats = self.feat_fuser(hid_feats).squeeze(dim=-1)
+    #     elif self.feat_fusion_method == "cat":
+    #         hid_feats = torch.cat(hid_feats, dim=-1)
+    #         feats = self.feat_fuser(hid_feats)
+    #     elif self.feat_fusion_method == "last":
+    #         feats = hid_feats[-1]
+
+    #     return feats
 
     @property
     def sample_frequency(self):
@@ -170,43 +170,43 @@ def forward_feats(
 
         return feats, hid_feats, feat_lengths
 
-    def forward_feats_legacy(
-        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
-    ):
-        return_hid_states = (
-            False
-            if return_feat_layers is None and self.feat_fusion_method == "last"
-            else True
-        )
-        with self._hf_context:
-            hf_output = self.hf_feats(
-                x,
-                x_lengths,
-                return_hid_states=return_hid_states,
-                chunk_length=chunk_length,
-                detach_chunks=detach_chunks,
-            )
-        feat_lengths = hf_output["hidden_states_lengths"]
-        if return_hid_states:
-            hid_feats = hf_output["hidden_states"]
-            feats = self._fuse_hid_feats(hid_feats)
-        else:
-            hid_feats = None
-            feats = hf_output["last_hidden_state"]
-
-        feats = feats.transpose(1, 2)
-        if return_feat_layers is not None:
-            # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
-            # as the hidden features of the x-vector encoder.
-            hid_feats = [
-                f.transpose(1, 2)
-                for i, f in enumerate(hid_feats)
-                if i in return_feat_layers
-            ]
-        else:
-            hid_feats = None
-
-        return feats, hid_feats, feat_lengths
+    # def forward_feats_legacy(
+    #     self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    # ):
+    #     return_hid_states = (
+    #         False
+    #         if return_feat_layers is None and self.feat_fusion_method == "last"
+    #         else True
+    #     )
+    #     with self._hf_context:
+    #         hf_output = self.hf_feats(
+    #             x,
+    #             x_lengths,
+    #             return_hid_states=return_hid_states,
+    #             chunk_length=chunk_length,
+    #             detach_chunks=detach_chunks,
+    #         )
+    #     feat_lengths = hf_output["hidden_states_lengths"]
+    #     if return_hid_states:
+    #         hid_feats = hf_output["hidden_states"]
+    #         feats = self._fuse_hid_feats(hid_feats)
+    #     else:
+    #         hid_feats = None
+    #         feats = hf_output["last_hidden_state"]
+
+    #     feats = feats.transpose(1, 2)
+    #     if return_feat_layers is not None:
+    #         # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
+    #         # as the hidden features of the x-vector encoder.
+    #         hid_feats = [
+    #             f.transpose(1, 2)
+    #             for i, f in enumerate(hid_feats)
+    #             if i in return_feat_layers
+    #         ]
+    #     else:
+    #         hid_feats = None
+
+    #     return feats, hid_feats, feat_lengths
 
     def forward(
         self,
@@ -289,15 +289,16 @@ def extract_embed(
         )
 
     def freeze_feat_fuser(self):
-        if self.feat_fuser is None:
-            return
+        self.feat_fuser.freeze()
+        # if self.feat_fuser is None:
+        #     return
 
-        if self.feat_fusion_method == "weighted-avg":
-            self.feat_fuser.requires_grad = False
-            return
+        # if self.feat_fusion_method == "weighted-avg":
+        #     self.feat_fuser.requires_grad = False
+        #     return
 
-        for param in self.feat_fuser.parameters():
-            param.requires_grad = False
+        # for param in self.feat_fuser.parameters():
+        #     param.requires_grad = False
 
     def freeze_hf_feats(self):
         self.hf_feats.freeze()
@@ -316,11 +317,12 @@ def trainable_param_groups(self):
             return self.trainable_parameters()
 
         param_groups = self.hf_feats.trainable_param_groups()
-        if self.feat_fusion_method == "weighted-avg":
-            if self.feat_fuser.requires_grad:
-                param_groups.append({"params": self.feat_fuser})
-        else:
-            param_groups.append({"params": self.feat_fuser.parameters()})
+        param_groups.append({"params": self.feat_fuser.trainable_parameters()})
+        # if self.feat_fusion_method == "weighted-avg":
+        #     if self.feat_fuser.requires_grad:
+        #         param_groups.append({"params": self.feat_fuser})
+        # else:
+        #     param_groups.append({"params": self.feat_fuser.parameters()})
 
         param_groups.append({"params": self.xvector.trainable_parameters()})
         return param_groups

From 398bf36b9afb6e8caaf369e27802db6d9061d36e Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Tue, 14 Nov 2023 12:18:10 -0500
Subject: [PATCH 122/154] xxx

---
 ...rain_fwseres2net50s8_xvec_stage1_v1.0.yaml |   2 +-
 ...onfig_fbank64_stmn_fwseres2net50s8_v1.0.sh |  31 +-
 .../open.v1.8k/run_030_extract_xvectors.sh    |   4 +-
 egs/lre22/open.v2.8k/run_040_be_final.sh      | 434 ++++++++++++++++++
 4 files changed, 440 insertions(+), 31 deletions(-)
 create mode 100755 egs/lre22/open.v2.8k/run_040_be_final.sh

diff --git a/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml b/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
index 13ce9445..e501abdb 100644
--- a/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
+++ b/egs/lre22/open.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
@@ -50,7 +50,7 @@ model:
   margin: 0.0
   intertop_margin: 0.0
   margin_warmup_epochs: 3.0
-  dropout_rate: 0.0
+  dropout_rate: 0.05
   norm_before: false
   hid_act: swish
 trainer:
diff --git a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
index 6a735e4c..6f6bc98b 100644
--- a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+++ b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
@@ -9,37 +9,12 @@ vad_config=conf/vad_8k.yaml
 nnet_data=open
 
 # x-vector cfg
-
 nnet_type=resnet
-nnet_stages=2
+nnet_stages=1
 nnet_s1_base_cfg=conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
 
-nnet_name=${feat_type}_fwseres2net50s8_v1.0
+nnet_name=${feat_type}_fwseres2net50s8_v1.2
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/swa_model_ep0012.pth
-#nnet_s1=$nnet_s1_dir/model_ep0001.pth
-nnet_s1=$nnet_s1_dir/model_ep0008.pth
-nnet_s1=$nnet_s1_dir/model_ep0011.pth
-nnet_s1=$nnet_s1_dir/model_ep0015.pth
-nnet_s1=$nnet_s1_dir/swa_model_ep0016.pth
-
-nnet_s2_base_cfg=conf/train_tseres2net50s8_xvec_stage2_v1.0.yaml
-nnet_s2_name=${nnet_name}.s2
-nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
-#nnet_s2=$nnet_s2_dir/swa_model_ep0013.pth
-nnet_s2=$nnet_s2_dir/model_ep0001.pth
-nnet_s2=$nnet_s2_dir/model_ep0002.pth
-nnet_s2=$nnet_s2_dir/model_ep0004.pth
-# nnet_s2=$nnet_s2_dir/model_ep0008.pth
-# nnet_s2=$nnet_s2_dir/swa_model_ep0012.pth
-
-nnet_s3_base_cfg=conf/train_tseres2net50s8_xvec_stage3_v2.1.yaml
-nnet_s3_name=${nnet_name}.s3
-nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
-#nnet_s3=$nnet_s3_dir/swa_model_ep0013.pth
-#nnet_s3=$nnet_s3_dir/model_ep0007.pth
-nnet_s3=$nnet_s3_dir/model_ep0001.pth
-nnet_s3=$nnet_s3_dir/model_ep0004.pth
-nnet_s3=$nnet_s3_dir/model_ep0008.pth
+nnet_s1=$nnet_s1_dir/model_ep0012.pth
 
diff --git a/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh b/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
index ea2c59f6..227d1047 100755
--- a/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
+++ b/egs/lre22/open.v1.8k/run_030_extract_xvectors.sh
@@ -8,10 +8,10 @@
 set -e
 
 stage=2
-nnet_stage=2
+nnet_stage=1
 config_file=default_config.sh
 use_gpu=false
-do_tsne=true
+do_tsne=false
 split_dev=false
 xvec_chunk_length=12800
 . parse_options.sh || exit 1;
diff --git a/egs/lre22/open.v2.8k/run_040_be_final.sh b/egs/lre22/open.v2.8k/run_040_be_final.sh
new file mode 100755
index 00000000..fe5b6f18
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_040_be_final.sh
@@ -0,0 +1,434 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+be_base_dir=exp/be/$nnet_name
+score_base_dir=exp/scores/$nnet_name
+
+if [ $stage -le 1 ];then
+  for r in 1 #0.9999 0.999 #0.99 0.975 0.95
+  do
+    be_name=pca${r}_cw_lnorm_lgbe_lre22_aug
+    be_dir=$be_base_dir/$be_name
+    score_dir=$score_base_dir/$be_name
+
+    (
+      for p_trn in p1 p2
+      do
+
+	if [ "$p_trn" == "p1" ];then
+	  p_test="p2"
+	else
+	  p_test="p1"
+	fi
+	be_dir_p=${be_dir}_$p_trn
+	(
+	  $train_cmd \
+	    $be_dir_p/train.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/train_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	    --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+	    --pca.pca-var-r $r \
+	    --do-lnorm --whiten \
+	    --output-dir $be_dir_p
+
+	  $train_cmd \
+	    ${score_dir}_p12/test_${p_test}.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev_$p_test/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir_p \
+	    --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+
+
+	) &
+	
+      done
+
+      (
+	$train_cmd \
+	  $be_dir/train.log \
+	  hyp_utils/conda_env.sh \
+	  steps_be/train_be_v1.py \
+	  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+	  --train-list data/lre22_dev_aug_clean/utt2lang \
+	  --pca.pca-var-r $r \
+	  --do-lnorm --whiten \
+	  --output-dir $be_dir
+
+	$train_cmd \
+	    ${score_dir}_p12/test_dev.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+	    --trial-list data/lre22_dev/utt2lang \
+	    --has-labels \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	$train_cmd \
+	    ${score_dir}/test_eval.log \
+	    hyp_utils/conda_env.sh \
+	    steps_be/eval_be_v1.py \
+	    --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+	    --trial-list data/lre22_eval/utt2spk \
+	    --model-dir $be_dir \
+	    --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	) &
+
+      wait
+
+      hyp_utils/conda_env.sh \
+	local/merge_scores.py \
+	--in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	--out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+      local/score_lre22.sh dev \
+	${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	${score_dir}_p12/nocal/lre22_dev_results
+
+      local/train_calibration_lre22.sh ${score_dir}_p12
+      local/score_lre22.sh dev \
+	${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}_p12/cal_v1/lre22_dev_results
+
+      local/score_lre22.sh dev \
+	${score_dir}/nocal/lre22_dev_scores.tsv \
+	${score_dir}/nocal/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/nocal/lre22_eval_scores.tsv \
+	${score_dir}/nocal/lre22_eval_results
+
+      local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+      local/score_lre22.sh dev \
+	${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	${score_dir}/cal_v1/lre22_dev_results
+      local/score_lre22.sh eval \
+	${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	${score_dir}/cal_v1/lre22_eval_results
+
+      # local/validate_lre22.sh \
+      # 	${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+     ) &
+
+    
+  done
+  wait
+
+fi
+
+exit
+# Back-ends below over-fitted
+
+if [ $stage -le 2 ];then
+  for r in 1 
+  do
+    for penalty in l2 #l1
+    do
+      for c in 1 #0.1 1
+      do
+	for ary_thr in 0.975 #0.85 0.7 #0.99 0.95 0.9 #15 ##1 5 10 20
+	do
+	  be_name=pca${r}_cw_lnorm_lsvm_${penalty}_c${c}_sqhinge_lre22_aug_lre17_aryt${ary_thr}
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  (
+	    for p_trn in p1 p2
+	    do
+	      
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+	      
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd \
+		  $be_dir_p/train.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/train_be_v3.py \
+		  --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		  --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+		  --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		  --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		  --pca.pca-var-r $r \
+		  --svm.penalty $penalty --svm.c $c --svm.dual false \
+		  --do-lnorm --whiten --ary-thr $ary_thr \
+		  --output-dir $be_dir_p
+		
+		$train_cmd \
+		  ${score_dir}_p12/test_${p_test}.log \
+		  hyp_utils/conda_env.sh \
+		  steps_be/eval_be_v2.py \
+		  --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		  --trial-list data/lre22_dev_$p_test/utt2lang \
+		  --has-labels \
+		  --model-dir $be_dir_p \
+		  --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+	      $train_cmd \
+		$be_dir/train.log \
+		hyp_utils/conda_env.sh \
+		steps_be/train_be_v3.py \
+		--v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+		--train-list data/lre22_dev_aug_clean/utt2lang \
+		--lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+		--lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+		--pca.pca-var-r $r \
+		--svm.penalty $penalty --svm.c $c --svm.dual false \
+		--do-lnorm --whiten --ary-thr $ary_thr \
+		--output-dir $be_dir
+		
+	      $train_cmd \
+		${score_dir}/test_dev.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+		--trial-list data/lre22_dev/utt2lang \
+		--has-labels \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+
+	      $train_cmd \
+		${score_dir}/test_eval.log \
+		hyp_utils/conda_env.sh \
+		steps_be/eval_be_v2.py \
+		--v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+		--trial-list data/lre22_eval/utt2spk \
+		--model-dir $be_dir \
+		--score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	    ) &
+	    
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+	  
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+	    
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+	    
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi
+
+if [ $stage -le 3 ];then
+  for r in 1 # 0.9999 0.99 0.975 0.95 0.9 0.8
+  do
+    for shrinking in true #false
+    do
+      for c in 1 10 #0.1 1 10 #0.01 0.1 1 10 # 0.0001
+      do
+	for vl in false #true #false
+	do
+	  if [ "$vl" == "true" ];then
+	    do_vl="--do-vl"
+	  else
+	    do_vl="--no_do-vl"
+	  fi
+	  ary_thr=0.975
+	  be_name=pca${r}_cw_lnorm_gsvm_shrinking_${shrinking}_c${c}_lre17_aryt${ary_thr}_vl${vl}_aug_clean
+	  be_dir=$be_base_dir/$be_name
+	  score_dir=$score_base_dir/$be_name
+	  #score_dir=$score_base_dir/${be_name}_logpost
+	  (
+	    for p_trn in p1 p2
+	    do
+
+	      if [ "$p_trn" == "p1" ];then
+		p_test="p2"
+	      else
+		p_test="p1"
+	      fi
+
+	      be_dir_p=${be_dir}_$p_trn
+	      (
+		$train_cmd $be_dir_p/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean_$p_trn/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500\
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir_p \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}_p12/test_${p_test}.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev_$p_test/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir_p \
+			   --score-file ${score_dir}_p12/nocal/lre22_dev_${p_test}_scores.tsv
+	      ) &
+	    done
+	    (
+		$train_cmd $be_dir/train.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/train_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev_aug_clean/xvector.scp \
+			   --train-list data/lre22_dev_aug_clean/utt2lang \
+			   --lre17-v-file scp:$xvector_dir/lre17_proc_audio_no_sil/xvector.scp \
+			   --lre17-list data/lre17_proc_audio_no_sil/utt2lang \
+			   --voxlingua-v-file scp:$xvector_dir/voxlingua107_codecs_proc_audio_no_sil/xvector.scp \
+			   --voxlingua-list data/voxlingua107_codecs_proc_audio_no_sil/utt2lang \
+			   --pca.pca-var-r $r \
+			   --svm.shrinking $shrinking --svm.c $c --svm.break_ties false --svm.max-iter 500 \
+			   --do-lnorm --whiten --ary-thr $ary_thr \
+			   --output-dir $be_dir \
+			   --do-lre17 $do_vl
+
+		$train_cmd ${score_dir}/test_dev.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_dev/xvector.scp \
+			   --trial-list data/lre22_dev/utt2lang \
+			   --svm.eval-type cat-log-post \
+			   --has-labels \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_dev_scores.tsv
+		
+		$train_cmd ${score_dir}/test_eval.log \
+			   hyp_utils/conda_env.sh \
+			   steps_be/eval_be_v5.py \
+			   --v-file scp:$xvector_dir/lre22_eval/xvector.scp \
+			   --trial-list data/lre22_eval/utt2spk \
+			   --svm.eval-type cat-log-post \
+			   --model-dir $be_dir \
+			   --score-file ${score_dir}/nocal/lre22_eval_scores.tsv
+
+	      ) &
+
+	    wait
+	    hyp_utils/conda_env.sh \
+	      local/merge_scores.py \
+	      --in-score-files ${score_dir}_p12/nocal/lre22_dev_p{1,2}_scores.tsv \
+	      --out-score-file ${score_dir}_p12/nocal/lre22_dev_scores.tsv
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/nocal/lre22_dev_results
+
+	    local/train_calibration_lre22.sh ${score_dir}_p12
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}_p12/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}_p12/cal_v1/lre22_dev_results
+
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/nocal/lre22_dev_scores.tsv \
+	      ${score_dir}/nocal/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/nocal/lre22_eval_scores.tsv \
+	      ${score_dir}/nocal/lre22_eval_results
+
+	    local/eval_calibration_lre22.sh $score_dir ${score_dir}_p12/cal_v1/cal.mat
+	    local/score_lre22.sh \
+	      dev \
+	      ${score_dir}/cal_v1/lre22_dev_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_dev_results
+	    local/score_lre22.sh \
+	      eval \
+	      ${score_dir}/cal_v1/lre22_eval_scores.tsv \
+	      ${score_dir}/cal_v1/lre22_eval_results
+
+	    # local/validate_lre22.sh \
+	    #   ${score_dir}/cal_v1/lre22_eval_scores.tsv
+
+
+	  ) &
+	done
+      done
+    done
+  done
+  wait
+
+fi

From e49157dfa1420091e603bf832abfe4e5acf1d158 Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Thu, 30 Nov 2023 13:11:57 -0500
Subject: [PATCH 123/154] clean up recipe lre22/open.v2.8k

---
 egs/lre22/open.v1.8k/README.md                |  6 +-
 ...onfig_fbank64_stmn_fwseres2net50s8_v1.0.sh |  2 +-
 egs/lre22/open.v1.8k/run_050_fusion_v1.sh     | 43 +++++++++++++
 egs/lre22/open.v2.8k/README.md                | 58 +++++++++++++++++
 ...2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml |  2 +-
 ...2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml | 62 +++++++++++++++++++
 ...ig_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh | 15 ++---
 egs/lre22/open.v2.8k/run_050_fusion_v1.sh     | 46 ++++++++++++++
 8 files changed, 219 insertions(+), 15 deletions(-)
 create mode 100755 egs/lre22/open.v1.8k/run_050_fusion_v1.sh
 create mode 100644 egs/lre22/open.v2.8k/README.md
 create mode 100644 egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml
 create mode 100755 egs/lre22/open.v2.8k/run_050_fusion_v1.sh

diff --git a/egs/lre22/open.v1.8k/README.md b/egs/lre22/open.v1.8k/README.md
index 9ad41229..d55ced4e 100644
--- a/egs/lre22/open.v1.8k/README.md
+++ b/egs/lre22/open.v1.8k/README.md
@@ -1,4 +1,4 @@
-# LRE22 Fixed Condition V1
+# LRE22 Open Condition V1
 
 Recipe for the NIST LRE22 open condition based to the JHU-MIT Submission.
 
@@ -51,5 +51,5 @@ run_040_be_final.sh --config-file global_conf/config_fbank64_stmn_fwseres2net50s
 | Config | Model Type | Model Details | Back-end | Dev MinCp | Dev ActCp | Eval MinCp | Eval ActCp |
 | ------ | ---------- | ------------- | -------- | :-------: | :-------: | :--------: | :--------: |
 | config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-1 | GBE | 0.100 | 0.101 | 0.105 | 0.106 |
-| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-1 | GBE | 
-| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 
+| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-1 | GBE | 0.092 | 0.093 | 0.103 | 0.104 |
+| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 0.082 | 0.083 | 0.089 | 0.090 | 
diff --git a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
index 6f6bc98b..352cd1a6 100644
--- a/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
+++ b/egs/lre22/open.v1.8k/global_conf/config_fbank64_stmn_fwseres2net50s8_v1.0.sh
@@ -13,7 +13,7 @@ nnet_type=resnet
 nnet_stages=1
 nnet_s1_base_cfg=conf/train_fwseres2net50s8_xvec_stage1_v1.0.yaml
 
-nnet_name=${feat_type}_fwseres2net50s8_v1.2
+nnet_name=${feat_type}_fwseres2net50s8_v1.0
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0012.pth
diff --git a/egs/lre22/open.v1.8k/run_050_fusion_v1.sh b/egs/lre22/open.v1.8k/run_050_fusion_v1.sh
new file mode 100755
index 00000000..5f9a1624
--- /dev/null
+++ b/egs/lre22/open.v1.8k/run_050_fusion_v1.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+score_dir_0=exp/scores
+nnet_1=fbank64_stmn_ecapatdnn2048x4_v1.0.s1
+nnet_2=fbank64_stmn_fwseres2net50s8_v1.0.s1
+be_1=pca1_cw_lnorm_lgbe_lre22_aug
+score_dirs="$score_dir_0/$nnet_1/$be_1
+$score_dir_0/$nnet_2/$be_1"
+
+train_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"_p12/cal_v1" }; print $0}')
+test_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"/cal_v1" }; print $0}')
+
+output_dir=exp/fusion/fus_v1.0
+
+local/train_fusion_lre22.sh "$train_score_dirs" $output_dir/train
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/train/lre22_dev_scores.tsv \
+  ${output_dir}/train/lre22_dev_results
+
+local/eval_fusion_lre22.sh "$test_score_dirs" $output_dir/train/fus.mat $output_dir/test
+
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/test/lre22_dev_scores.tsv \
+  ${output_dir}/test/lre22_dev_results
+
+local/score_lre22.sh eval \
+  ${output_dir}/test/lre22_eval_scores.tsv \
+  ${output_dir}/test/lre22_eval_results
+
+
+
+
+
+		   
diff --git a/egs/lre22/open.v2.8k/README.md b/egs/lre22/open.v2.8k/README.md
new file mode 100644
index 00000000..c500d811
--- /dev/null
+++ b/egs/lre22/open.v2.8k/README.md
@@ -0,0 +1,58 @@
+# LRE22 Open Condition V2
+
+Recipe for the NIST LRE22 open condition based to the JHU-MIT Submission, using Hugging Face Wav2Vec + x-vector model.
+
+## Citing
+```
+@inproceedings{villalba23_interspeech,
+  author={Jesús Villalba and Jonas Borgstrom and Maliha Jahan and Saurabh Kataria and Leibny Paola Garcia and Pedro Torres-Carrasquillo and Najim Dehak},
+  title={{Advances in Language Recognition in Low Resource African Languages: The JHU-MIT Submission for NIST LRE22}},
+  year=2023,
+  booktitle={Proc. INTERSPEECH 2023},
+  pages={521--525},
+  doi={10.21437/Interspeech.2023-1094}
+}
+```
+
+## Training Data
+
+  - x-Vector networks trained on:
+    - VoxLingua107
+    - NIST LRE17 Train + Dev + Eval / CTS + AfV  without Maghrebi Arabic
+    - NIST SRE16
+    - NIST SRE18
+    - NIST SRE19 CMN2
+    - NIST SRE21
+    - NIST SRE CTS Superset
+    - IARPA Babel
+    - Fleurs
+    - LWAZI 2009
+    - NCHLT 2014
+    - AMMI 2020
+    - CommonVoice Tigrinya, Indian English, French
+    - ADI 2017
+    - AST
+  - Gaussian back-end trained on:
+    - NIST LRE22 dev with 2-fold cross-val + x10 augmentations
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it uses Wav2Vec2 XLSR 300M
+   - To change the default network run scripts with the config-file argument:
+```bash
+run_011_train_xvector.sh --config-file global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
+run_030_extract_xvectors.sh --config-file global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh --use-gpu true
+run_040_be_final.sh --config-file global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
+```
+
+## Results
+
+| Config | Model Type | Model Details | Back-end | Dev MinCp | Dev ActCp | Eval MinCp | Eval ActCp |
+| ------ | ---------- | ------------- | -------- | :-------: | :-------: | :--------: | :--------: |
+| config_fbank64_stmn_ecapatdnn2048x4_v1.0.sh | ECAPA-TDNN 2048x4 | Stage-1 | GBE | 0.100 | 0.101 | 0.105 | 0.106 |
+| config_fbank64_stmn_fwseres2net50s8_v1.0.sh  | fw-SE Res2Net50 scale=8 | Stage-1 | GBE | 0.092 | 0.093 | 0.103 | 0.104 |
+| Fusion ECAPA-TDNN + FwSE Res2Net50 |  | | FoCal | 0.082 | 0.083 | 0.089 | 0.090 | 
+| config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh | Wav2VectXLR 300M + ECAPA-TDNN 1024x3 | Stage-1 | GBE | 0.088 | 0.089 | 0.106 | 0.107 |
+| " | " | Stage-2 | GBE | 0.083 | 0.085 | 0.089 | 0.090 |
+| Fusion ECAPA-TDNN + FwSE Res2Net50 + Wav2Vec2 |  | | FoCal | 0.069 | 0.072 | 0.076 | 0.077 |
diff --git a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
index a7f3b111..d33e30f4 100644
--- a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
+++ b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
@@ -37,7 +37,7 @@ model: wav2vec2xlsr300m_ecapatdnn1024x3_subcenter.yaml
 trainer:
   optim:
     opt_type: sgd
-    lr: 0.4
+    lr: 0.04
     momentum: 0.9
     weight_decay: 4e-4
   lrsched:
diff --git a/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml
new file mode 100644
index 00000000..090093b3
--- /dev/null
+++ b/egs/lre22/open.v2.8k/conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml
@@ -0,0 +1,62 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - class_id
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - class_id
+      target_sample_freq: 16000
+      wav_scale: 1
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 3.0
+      min_chunk_length: 3.0
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 32.0
+    margin: 0.0
+    margin_warmup_epochs: 0
+    intertop_margin: 0.
+trainer: 
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 1e-4
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 20000
+    min_lr: 1e-6
+    warmup_steps: 10000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 6
+  eff_batch_size: 512
+  train_mode: full
diff --git a/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
index bf6c3528..910b4bad 100644
--- a/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
+++ b/egs/lre22/open.v2.8k/global_conf/config_wav2vec2xlr300m_ecapatdnn1024x3_v1.0.sh
@@ -10,7 +10,7 @@ vad_config=conf/vad_8k.yaml
 nnet_data=open
 
 # x-vector cfg
-
+nnet_stages=2
 nnet_type=hf_wav2vec2resnet1d
 
 nnet_s1_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage1_v1.0.yaml
@@ -22,15 +22,10 @@ nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0012.pth
 
-nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v2.2.yaml
+nnet_s2_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage2_v1.0.yaml
 nnet_s2_args=""
+nnet_name=${hf_model_name}_ecapatdnn1024x3_v1.0
 nnet_s2_name=${nnet_name}.s2
 nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
-nnet_s2=$nnet_s2_dir/model_ep0008.pth
-
-nnet_s3_base_cfg=conf/train_wav2vec2xlsr300m_ecapatdnn1024x3_stage3_v2.2.yaml
-nnet_s3_args=""
-nnet_s3_name=${nnet_name}.s3
-nnet_s3_dir=exp/xvector_nnets/$nnet_s3_name
-nnet_s3=$nnet_s3_dir/model_ep0002.pth
-nnet_s3=$nnet_s3_dir/model_ep0005.pth
+nnet_s2=$nnet_s2_dir/model_ep0006.pth
+
diff --git a/egs/lre22/open.v2.8k/run_050_fusion_v1.sh b/egs/lre22/open.v2.8k/run_050_fusion_v1.sh
new file mode 100755
index 00000000..056c2f0b
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_050_fusion_v1.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+score_dir_fixed=../open.v1.8k/exp/scores
+score_dir_0=exp/scores
+nnet_1=fbank64_stmn_ecapatdnn2048x4_v1.0.s1
+nnet_2=fbank64_stmn_fwseres2net50s8_v1.0.s1
+nnet_3=wav2vec2xlsr300m_ecapatdnn1024x3_v1.0.s2
+be_1=pca1_cw_lnorm_lgbe_lre22_aug
+score_dirs="$score_dir_fixed/$nnet_1/$be_1
+$score_dir_fixed/$nnet_2/$be_1
+$score_dir_0/$nnet_3/$be_1"
+
+train_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"_p12/cal_v1" }; print $0}')
+test_score_dirs=$(echo $score_dirs | awk '{ for(i=1;i<=NF;i++){ $i=$i"/cal_v1" }; print $0}')
+
+output_dir=exp/fusion/fus_v1.0
+
+local/train_fusion_lre22.sh "$train_score_dirs" $output_dir/train
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/train/lre22_dev_scores.tsv \
+  ${output_dir}/train/lre22_dev_results
+
+local/eval_fusion_lre22.sh "$test_score_dirs" $output_dir/train/fus.mat $output_dir/test
+
+local/score_lre22.sh \
+  dev \
+  ${output_dir}/test/lre22_dev_scores.tsv \
+  ${output_dir}/test/lre22_dev_results
+
+local/score_lre22.sh eval \
+  ${output_dir}/test/lre22_eval_scores.tsv \
+  ${output_dir}/test/lre22_eval_results
+
+
+
+
+
+		   

From ae47ce6a5b51e64012743c62f4cd28582cd5b711 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 8 Dec 2023 19:48:38 -0500
Subject: [PATCH 124/154] dino seems to be working

---
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |   1 +
 ...train_res2net50w26s4_xvec_stage2_v1.0.yaml |   1 +
 ...train_res2net50w26s8_xvec_stage2_v1.0.yaml |   1 +
 ...in_tseres2net50w26s4_xvec_stage2_v1.0.yaml |   1 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v1.0.yaml |   1 +
 ...train_res2net50w26s8_xvec_stage2_v1.0.yaml |   1 +
 ...in_tseres2net50w26s4_xvec_stage2_v1.0.yaml |   1 +
 egs/voxceleb/ssl.v1/run_005_train_dino.sh     |  81 ++--
 .../ssl.v1/run_006_extract_dino_embeds.sh     | 139 ++++++
 .../train_cfwseresnet34_xvec_stage2_v3.0.yaml |   1 +
 .../train_cwseresnet34_xvec_stage2_v3.0.yaml  |   1 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v2.0.yaml |   6 +-
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |   1 +
 ...train_ecapatdnn512x3_xvec_stage2_v2.0.yaml |   1 +
 ...train_ecapatdnn512x3_xvec_stage2_v3.0.yaml |   1 +
 .../train_fwseresnet34_xvec_stage2_v3.0.yaml  |   1 +
 ...rain_idrnd_resnet100_xvec_stage2_v2.0.yaml |   1 +
 ...rain_idrnd_resnet100_xvec_stage2_v3.0.yaml |   1 +
 ...rain_idrnd_resnet202_xvec_stage2_v2.0.yaml |   1 +
 ...train_res2net50w26s4_xvec_stage2_v3.0.yaml |   1 +
 ...train_res2net50w26s8_xvec_stage2_v3.0.yaml |   1 +
 .../train_tseresnet34_xvec_stage2_v3.0.yaml   |   1 +
 .../train_cfwseresnet34_xvec_stage2_v3.0.yaml |   1 +
 .../train_cwseresnet34_xvec_stage2_v3.0.yaml  |   1 +
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml |   1 +
 ...train_ecapatdnn512x3_xvec_stage2_v3.0.yaml |   1 +
 .../train_fwseresnet34_xvec_stage2_v3.0.yaml  |   1 +
 ...rain_idrnd_resnet100_xvec_stage2_v3.0.yaml |   1 +
 .../conf/train_resnet34_xvec_stage2_v3.0.yaml |   1 +
 .../train_tseresnet34_xvec_stage2_v3.0.yaml   |   1 +
 .../config_fbank80_stmn_resnet34.v3.0.sh      |   2 +-
 egs/voxceleb/v2.1/cmd.sh                      |   3 +-
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...lmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...lmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...avlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml |   1 +
 ...avlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml |   4 +-
 ...rge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...rge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...c2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml |   1 +
 ...c2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml |   1 +
 ...vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml |   1 +
 ...vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml |   1 +
 ...lmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml |   1 +
 ...lmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...lmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml |   1 +
 ...lmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...lmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...lmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml |   1 +
 ...lmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml |   1 +
 ...wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml |   1 +
 hyperion/bin/extract_wav2xvectors.py          |   6 +-
 hyperion/bin/train_dino_wav2xvector.py        | 232 ++++++++++
 hyperion/bin/train_wav2rnn_transducer.py      |   4 +-
 hyperion/bin/train_wav2vec2rnn_transducer.py  |   4 +-
 hyperion/bin/train_wav2vec2xvector.py         |   4 +-
 hyperion/bin/train_wav2xvector.py             |   4 +-
 hyperion/bin/train_xvector_from_feats.py      |   4 +-
 hyperion/bin/train_xvector_from_wav.py        |   4 +-
 hyperion/torch/data/__init__.py               |   4 +
 hyperion/torch/data/audio_dataset.py          |  51 ++-
 hyperion/torch/data/dino_audio_dataset.py     | 352 +++++++++++++++
 hyperion/torch/data/embed_dataset.py          |   9 +-
 hyperion/torch/data/hyp_sampler.py            |  12 +-
 hyperion/torch/data/seg_chunk_sampler.py      |  17 +-
 .../layer_blocks/conformer_encoder_v1.py      |   6 +
 hyperion/torch/layers/__init__.py             |   8 +-
 hyperion/torch/layers/activation_factory.py   |   4 +
 hyperion/torch/layers/global_pool.py          |  49 ++-
 hyperion/torch/layers/norm_layer_factory.py   |   2 +-
 hyperion/torch/layers/pos_encoder.py          |  56 ++-
 hyperion/torch/layers/swish.py                |  23 +-
 hyperion/torch/losses/__init__.py             |   1 +
 hyperion/torch/losses/dino_loss.py            | 164 +++++++
 hyperion/torch/lr_schedulers/factory.py       |  94 ++---
 .../torch/models/transducer/rnn_transducer.py |  89 ++--
 .../models/wav2xvectors/hf_wav2xvector.py     |  17 +-
 .../wav2xvectors/wav2conformer_v1_xvector.py  |  18 +
 .../wav2xvectors/wav2resnet1d_xvector.py      |  22 +-
 .../models/wav2xvectors/wav2resnet_xvector.py |  22 +-
 .../torch/models/wav2xvectors/wav2xvector.py  |  23 +-
 .../models/xvectors/conformer_v1_xvector.py   |  35 ++
 .../models/xvectors/efficient_net_xvector.py  |  48 ++-
 .../torch/models/xvectors/resnet1d_xvector.py |  69 ++-
 .../torch/models/xvectors/resnet_xvector.py   |  45 +-
 .../torch/models/xvectors/spinenet_xvector.py |  40 +-
 .../torch/models/xvectors/tdnn_xvector.py     |  39 +-
 .../models/xvectors/transformer_xvector_v1.py |  66 ++-
 hyperion/torch/models/xvectors/xvector.py     | 318 +++++++++++---
 hyperion/torch/narchs/__init__.py             |   2 +
 hyperion/torch/narchs/audio_feats_mvn.py      |   4 +-
 hyperion/torch/narchs/classif_head.py         |  25 +-
 hyperion/torch/narchs/conformer_encoder_v1.py |  87 +++-
 hyperion/torch/narchs/dino_head.py            | 337 +++++++++++++++
 hyperion/torch/narchs/feat_fuser_mvn.py       |   4 +-
 hyperion/torch/narchs/proj_head.py            | 149 +++++++
 .../torch/narchs/rnn_transducer_decoder.py    |  48 ++-
 hyperion/torch/optim/__init__.py              |   1 +
 hyperion/torch/optim/factory.py               |  54 ++-
 hyperion/torch/optim/radam.py                 |   3 -
 hyperion/torch/torch_model.py                 |  28 +-
 hyperion/torch/trainers/__init__.py           |   4 +-
 hyperion/torch/trainers/ae_trainer.py         |  50 +--
 .../torch/trainers/dino_xvector_trainer.py    | 385 +++++++++++++++++
 hyperion/torch/trainers/dvae_trainer.py       |  18 +-
 hyperion/torch/trainers/plda_trainer.py       |  44 +-
 hyperion/torch/trainers/torch_trainer.py      | 399 ++++++++++++++----
 hyperion/torch/trainers/transducer_trainer.py |  58 ++-
 hyperion/torch/trainers/vae_trainer.py        |  19 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |  51 +--
 hyperion/torch/trainers/vq_vae_trainer.py     |  20 +-
 .../torch/trainers/xvector_adv_trainer.py     |  50 +--
 .../trainers/xvector_adv_trainer_from_wav.py  |  49 +--
 hyperion/torch/trainers/xvector_trainer.py    |  16 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |  47 +--
 .../xvector_trainer_deep_feat_reg_from_wav.py |  48 +--
 .../trainers/xvector_trainer_from_wav.py      |   8 +-
 hyperion/torch/utils/ddp.py                   |   4 +-
 hyperion/utils/dataset.py                     |  24 +-
 127 files changed, 3394 insertions(+), 895 deletions(-)
 create mode 100755 egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh
 create mode 100755 hyperion/bin/train_dino_wav2xvector.py
 create mode 100644 hyperion/torch/data/dino_audio_dataset.py
 create mode 100644 hyperion/torch/losses/dino_loss.py
 create mode 100644 hyperion/torch/narchs/dino_head.py
 create mode 100644 hyperion/torch/narchs/proj_head.py
 create mode 100644 hyperion/torch/trainers/dino_xvector_trainer.py

diff --git a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
index e7f9969b..30483a8b 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
index 9884bb4c..49f84a6a 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s4_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
index f34b4896..c85c0e7b 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
index f34b4896..c85c0e7b 100644
--- a/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.16k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
index 031e9ca3..72dec1b7 100644
--- a/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.8k/conf/train_ecapatdnn2048x4_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank64_stmn_8k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
index 16203033..1b917e2c 100644
--- a/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.8k/conf/train_res2net50w26s8_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank64_stmn_8k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
index f34b4896..c85c0e7b 100644
--- a/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
+++ b/egs/sre21-av-a/v1.8k/conf/train_tseres2net50w26s4_xvec_stage2_v1.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.5
   margin_warmup_epochs: 3
diff --git a/egs/voxceleb/ssl.v1/run_005_train_dino.sh b/egs/voxceleb/ssl.v1/run_005_train_dino.sh
index eb1c591e..58399159 100755
--- a/egs/voxceleb/ssl.v1/run_005_train_dino.sh
+++ b/egs/voxceleb/ssl.v1/run_005_train_dino.sh
@@ -37,17 +37,16 @@ if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
-# Network Training
+# # Network Training
 if [ $stage -le 1 ]; then
   
   mkdir -p $nnet_s1_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-train-wav2vec2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    hyperion-train-dino-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
-    --data.train.dataset.class-files $train_data_dir/speaker.csv \
     --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
     --data.val.dataset.segments-file $val_data_dir/segments.csv \
     --trainer.exp-path $nnet_s1_dir \
@@ -56,44 +55,44 @@ if [ $stage -le 1 ]; then
 fi
 
 
-# Finetune full model
-if [ $stage -le 2 ]; then
-  if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
-  fi
-  mkdir -p $nnet_s2_dir/log
-  $cuda_cmd \
-    --gpu $ngpu $nnet_s2_dir/log/train.log \
-    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
-    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
-    --data.train.dataset.segments-file $train_data_dir/segments.csv \
-    --data.train.dataset.class-files $train_data_dir/speaker.csv \
-    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
-    --data.val.dataset.segments-file $val_data_dir/segments.csv \
-    --in-model-file $nnet_s1 \
-    --trainer.exp-path $nnet_s2_dir \
-    --num-gpus $ngpu \
+# # Finetune full model
+# if [ $stage -le 2 ]; then
+#   if [ "$use_wandb" == "true" ];then
+#     extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+#   fi
+#   mkdir -p $nnet_s2_dir/log
+#   $cuda_cmd \
+#     --gpu $ngpu $nnet_s2_dir/log/train.log \
+#     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+#     hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+#     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+#     --data.train.dataset.segments-file $train_data_dir/segments.csv \
+#     --data.train.dataset.class-files $train_data_dir/speaker.csv \
+#     --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+#     --data.val.dataset.segments-file $val_data_dir/segments.csv \
+#     --in-model-file $nnet_s1 \
+#     --trainer.exp-path $nnet_s2_dir \
+#     --num-gpus $ngpu \
   
-fi
+# fi
 
-# Finetune full model
-if [ $stage -le 3 ]; then
-  if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
-  fi
-  mkdir -p $nnet_s3_dir/log
-  $cuda_cmd \
-    --gpu $ngpu $nnet_s3_dir/log/train.log \
-    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
-    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
-    --data.train.dataset.segments-file $train_data_dir/segments.csv \
-    --data.train.dataset.class-files $train_data_dir/speaker.csv \
-    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
-    --data.val.dataset.segments-file $val_data_dir/segments.csv \
-    --in-model-file $nnet_s2 \
-    --trainer.exp-path $nnet_s3_dir \
-    --num-gpus $ngpu \
+# # Finetune full model
+# if [ $stage -le 3 ]; then
+#   if [ "$use_wandb" == "true" ];then
+#     extra_args="$extra_args --trainer.wandb.name $nnet_s3_name.$(date -Iminutes)"
+#   fi
+#   mkdir -p $nnet_s3_dir/log
+#   $cuda_cmd \
+#     --gpu $ngpu $nnet_s3_dir/log/train.log \
+#     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+#     hyperion-finetune-wav2vec2xvector $nnet_type --cfg $nnet_s3_base_cfg $nnet_s3_args $extra_args \
+#     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+#     --data.train.dataset.segments-file $train_data_dir/segments.csv \
+#     --data.train.dataset.class-files $train_data_dir/speaker.csv \
+#     --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+#     --data.val.dataset.segments-file $val_data_dir/segments.csv \
+#     --in-model-file $nnet_s2 \
+#     --trainer.exp-path $nnet_s3_dir \
+#     --num-gpus $ngpu \
   
-fi
+# fi
diff --git a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh
new file mode 100755
index 00000000..36ccd294
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=2
+nnet_stage=1
+config_file=default_config.sh
+use_gpu=false
+xvec_chunk_length=120.0
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+  xvec_args="--use-gpu --chunk-length $xvec_chunk_length"
+  xvec_cmd="$cuda_eval_cmd --gpu 1 --mem 6G"
+  num_gpus=1
+else
+  xvec_cmd="$train_cmd --mem 12G"
+  num_gpus=0
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+score_dir=exp/scores/$nnet_name
+score_cosine_dir=$score_dir/cosine
+
+if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
+  # Extract xvectors for training LDA/PLDA
+  nj=100
+  for name in voxceleb2cat_train
+  do
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # Extracts x-vectors for evaluation
+  nj=100
+  if [ "$do_voxsrc22" == "true" ];then
+    extra_data="voxsrc22_dev"
+  fi
+  for name in voxceleb1_test $extra_data
+  do
+    num_segs=$(wc -l data/$name/segments.csv | awk '{ print $1-1}')
+    nj=$(($num_segs < 100 ? $num_segs:100))
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
+if [ $stage -le 3 ];then
+
+  echo "Eval Voxceleb 1 with Cosine scoring"
+  num_parts=8
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $train_cmd $score_cosine_dir/log/voxceleb1_${i}_${j}.log \
+		 hyp_utils/conda_env.sh \
+		 hyperion-eval-cosine-scoring-backend \
+		 --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		 --ndx-file data/voxceleb1_test/trials.csv \
+		 --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		 --score-file $score_cosine_dir/voxceleb1_scores.csv \
+		 --enroll-part-idx $i --num-enroll-parts $num_parts \
+		 --test-part-idx $j --num-test-parts $num_parts &
+    done
+  done
+  wait
+  hyperion-merge-scores --output-file $score_cosine_dir/voxceleb1_scores.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
+
+  $train_cmd --mem 12G --num-threads 6 $score_cosine_dir/log/score_voxceleb1.log \
+	     hyperion-eval-verification-metrics \
+	     --score-files $score_cosine_dir/voxceleb1_scores.csv \
+	     --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	     --score-names voxceleb1 \
+	     --key-names O E H \
+	     --sparse \
+	     --output-file $score_cosine_dir/voxceleb1_results.csv
+
+  cat $score_cosine_dir/voxceleb1_results.csv
+fi
+
diff --git a/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
index 04665cac..9a9dfc06 100644
--- a/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
index 04665cac..9a9dfc06 100644
--- a/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
index e7a94225..a2e63b54 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v2.0.yaml
@@ -41,15 +41,11 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.4
   margin_warmup_epochs: 0
   intertop_margin: 0.1
-  # override_dropouts: false
-  # dropout_rate: 0.1
-  # resnet_enc:
-  #   override_dropouts: true
-  #   dropout_rate: 0.1
 trainer:
   optim:
     opt_type: sgd
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
index 877736b3..f3573b4a 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
index b6163f14..bb9c8c79 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v2.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.4
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
index 45e55d97..13f9cd9a 100644
--- a/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
index 04665cac..9a9dfc06 100644
--- a/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
index 2311b07b..65cd737c 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v2.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.4
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
index 469e166b..c7437e94 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
index e4e6d97a..09a5345f 100644
--- a/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_idrnd_resnet202_xvec_stage2_v2.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.4
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
index 469e166b..c7437e94 100644
--- a/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s4_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
index 5c9af011..63a5cb25 100644
--- a/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_res2net50w26s8_xvec_stage2_v3.0.yaml
@@ -47,6 +47,7 @@ model:
   # dropout_rate: 0.0
   dropout_rate: 0.2
 trainer:
+  override_output: true
   optim:
     opt_type: sgd
     lr: 1e-3
diff --git a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
index 04665cac..9a9dfc06 100644
--- a/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.1/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 feats: fbank80_stmn_16k.yaml
 model:
+  override_output: true
   cos_scale: 30.0
   margin: 0.3
   margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
index 0923a608..c0bd44e5 100644
--- a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
index 0923a608..c0bd44e5 100644
--- a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
index 21f0db8b..c348e7c5 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
index 9788bb7c..9008a04c 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
index 0923a608..c0bd44e5 100644
--- a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
index 11d33ae2..b14cfc75 100644
--- a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
index 0923a608..c0bd44e5 100644
--- a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
index 0923a608..c0bd44e5 100644
--- a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
@@ -39,6 +39,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 30.0
     margin: 0.3
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
index bb5d990c..cb1a172d 100644
--- a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.0.sh
@@ -16,7 +16,7 @@ nnet_name=${feat_type}_resnet34.v3.0
 
 nnet_s1_base_cfg=conf/train_resnet34_xvec_stage1_v3.0.yaml
 nnet_s1_name=$nnet_name.s1
-nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name.kk2
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/model_ep0035.pth
 
 nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
diff --git a/egs/voxceleb/v2.1/cmd.sh b/egs/voxceleb/v2.1/cmd.sh
index 040f458b..c95884ec 100755
--- a/egs/voxceleb/v2.1/cmd.sh
+++ b/egs/voxceleb/v2.1/cmd.sh
@@ -15,7 +15,8 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
     #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
-    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_a100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 30G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
index 8504db9e..07bf8e5a 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
index dda0c632..c58797cf 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
index db36f8ee..5703104e 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
index dda0c632..c58797cf 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
index 8504db9e..07bf8e5a 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
index ad56e80d..e9638704 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn1024x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
index fc964f84..d1af05d8 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
index ab6b3f4e..99002b45 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.1.yaml
@@ -40,6 +40,7 @@ model:
     encoder_lr: 1e-2
     feat_extract_lr: 1e-2
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
index 928779f5..4a8c53d7 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
index 7ab8cea7..9c7652ce 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.1.yaml
@@ -46,10 +46,8 @@ data:
     data_loader:
       num_workers: 8
 model:
-  #hf_feats:
-  #  encoder_lr: 1e-2
-  #  feat_extract_lr: 1e-2
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
index b5b9b6b6..d1ed9300 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage2_v2.0.yaml
@@ -44,6 +44,7 @@ model:
     - q_proj
     - v_proj
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
index a39445ff..fbea3f0f 100644
--- a/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2.1/conf/train_wavlmlarge_loraqv_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
index 90e3b14f..84ecfc04 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage2_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
index 7a2f7bba..fdaff633 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr300m_ecapatdnn512x3_stage3_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
index 90e3b14f..84ecfc04 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage2_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
index 69bcc097..58fe1d49 100644
--- a/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wav2vec2xlsr53_ecapatdnn512x3_stage3_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
index 90e3b14f..84ecfc04 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
index 69a8322b..f8e620c1 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
index 69bcc097..58fe1d49 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
index 3443591a..5013e5af 100644
--- a/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmbaseplus_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
index 7287188c..9fec8986 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
index 69bcc097..58fe1d49 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
index 3443591a..5013e5af 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge12l_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
index 90e3b14f..84ecfc04 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
index 69a8322b..f8e620c1 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage2_v2.0.yaml
@@ -37,6 +37,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.2
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
index 69bcc097..58fe1d49 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v1.0.yaml
@@ -43,6 +43,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
index 5e1260ad..2ea1589d 100644
--- a/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
+++ b/egs/voxceleb/v2/conf/train_wavlmlarge_ecapatdnn512x3_stage3_v2.0.yaml
@@ -47,6 +47,7 @@ data:
       num_workers: 8
 model:
   xvector:
+    override_output: true
     cos_scale: 32.0
     margin: 0.4
     margin_warmup_epochs: 0
diff --git a/hyperion/bin/extract_wav2xvectors.py b/hyperion/bin/extract_wav2xvectors.py
index 763df3fc..3cca3ede 100755
--- a/hyperion/bin/extract_wav2xvectors.py
+++ b/hyperion/bin/extract_wav2xvectors.py
@@ -25,7 +25,9 @@
 from hyperion.io import SequentialAudioReader as AR
 from hyperion.io import VADReaderFactory as VRF
 from hyperion.np.augment import SpeechAugment
-from hyperion.torch import TorchModelLoader as TML
+
+# from hyperion.torch import TorchModelLoader as TML
+from hyperion.torch import TorchModel
 from hyperion.torch.utils import open_device
 from hyperion.utils import Utt2Info
 
@@ -59,7 +61,7 @@ def init_device(use_gpu):
 
 def load_model(model_path, device):
     logging.info("loading model %s", model_path)
-    model = TML.load(model_path)
+    model = TorchModel.auto_load(model_path)
     logging.info(f"xvector-model={model}")
     model.to(device)
     model.eval()
diff --git a/hyperion/bin/train_dino_wav2xvector.py b/hyperion/bin/train_dino_wav2xvector.py
new file mode 100755
index 00000000..d1cd108e
--- /dev/null
+++ b/hyperion/bin/train_dino_wav2xvector.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import multiprocessing
+import os
+from pathlib import Path
+
+import torch
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger, set_float_cpu
+from hyperion.torch.data import DINOAudioDataset as AD
+from hyperion.torch.data import SegSamplerFactory
+from hyperion.torch.losses import DINOLoss
+from hyperion.torch.metrics import CategoricalAccuracy
+
+# from hyperion.torch.models import EfficientNetXVector as EXVec
+from hyperion.torch.models import Wav2ConformerV1XVector as CXVec
+from hyperion.torch.models import Wav2ResNet1dXVector as R1dXVec
+from hyperion.torch.models import Wav2ResNetXVector as RXVec
+
+# from hyperion.torch.models import SpineNetXVector as SpineXVec
+# from hyperion.torch.models import TDNNXVector as TDXVec
+# from hyperion.torch.models import TransformerXVectorV1 as TFXVec
+from hyperion.torch.trainers import DINOXVectorTrainer as Trainer
+from hyperion.torch.utils import ddp
+
+xvec_dict = {
+    "resnet": RXVec,
+    "resnet1d": R1dXVec,
+    "conformer": CXVec,
+    # "efficientnet": EXVec,
+    # "tdnn": TDXVec,
+    # "transformer": TFXVec,
+    # "spinenet": SpineXVec,
+}
+
+
+def init_data(partition, rank, num_gpus, **kwargs):
+    kwargs = kwargs["data"][partition]
+    ad_args = AD.filter_args(**kwargs["dataset"])
+    sampler_args = kwargs["sampler"]
+    if rank == 0:
+        logging.info("{} audio dataset args={}".format(partition, ad_args))
+        logging.info("{} sampler args={}".format(partition, sampler_args))
+        logging.info("init %s dataset", partition)
+
+    is_val = partition == "val"
+    ad_args["is_val"] = is_val
+    sampler_args["shuffle"] = not is_val
+    dataset = AD(**ad_args)
+
+    if rank == 0:
+        logging.info("init %s samplers", partition)
+
+    sampler = SegSamplerFactory.create(dataset, **sampler_args)
+
+    if rank == 0:
+        logging.info("init %s dataloader", partition)
+
+    num_workers = kwargs["data_loader"]["num_workers"]
+    num_workers_per_gpu = int((num_workers + num_gpus - 1) / num_gpus)
+    largs = (
+        {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
+    )
+    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, **largs)
+    return data_loader
+
+
+def init_student_xvector(num_classes, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_args(**kwargs["student_model"])
+    if rank == 0:
+        logging.info(f"student xvector network args={xvec_args}")
+    xvec_args["xvector"]["num_classes"] = num_classes
+    model = xvec_class(**xvec_args)
+    if rank == 0:
+        logging.info(f"student-model={model}")
+    return model
+
+
+def init_teacher_xvector(student_model, rank, xvec_class, **kwargs):
+    xvec_args = xvec_class.filter_args(**kwargs["teacher_model"])
+    if rank == 0:
+        logging.info(f"teacher xvector network args={xvec_args}")
+    # xvec_args["xvector"]["num_classes"] = num_classes
+    model = student_model.clone()
+    model.change_config(**xvec_args)
+    if rank == 0:
+        logging.info(f"teacher-model={model}")
+    return model
+
+
+def init_dino_loss(rank, **kwargs):
+    loss_args = kwargs["dino_loss"]
+    if rank == 0:
+        logging.info(f"dino loss args={loss_args}")
+    loss = DINOLoss(**loss_args)
+    if rank == 0:
+        logging.info(f"dino-loss={loss}")
+
+    return loss
+
+
+def train_xvec(gpu_id, args):
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    kwargs = namespace_to_dict(args)
+    torch.manual_seed(args.seed)
+    set_float_cpu("float32")
+
+    ddp_args = ddp.filter_ddp_args(**kwargs)
+    device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
+    kwargs["rank"] = rank
+
+    train_loader = init_data(partition="train", **kwargs)
+    val_loader = init_data(partition="val", **kwargs)
+
+    dino_loss = init_dino_loss(**kwargs)
+    student_model = init_student_xvector(num_classes=dino_loss.num_classes, **kwargs)
+    kwargs["student_model"] = student_model
+    teacher_model = init_teacher_xvector(**kwargs)
+
+    trn_args = Trainer.filter_args(**kwargs["trainer"])
+    if rank == 0:
+        logging.info("trainer args={}".format(trn_args))
+    metrics = {"acc": CategoricalAccuracy()}
+    trainer = Trainer(
+        student_model,
+        teacher_model,
+        dino_loss,
+        device=device,
+        metrics=metrics,
+        ddp=world_size > 1,
+        **trn_args,
+    )
+    trainer.load_last_checkpoint()
+    trainer.fit(train_loader, val_loader)
+
+    ddp.ddp_cleanup()
+
+
+def make_parser(xvec_class):
+    parser = ArgumentParser()
+
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    train_parser = ArgumentParser(prog="")
+
+    AD.add_class_args(train_parser, prefix="dataset")
+    SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
+    train_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+
+    val_parser = ArgumentParser(prog="")
+    AD.add_class_args(val_parser, prefix="dataset")
+    SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
+    val_parser.add_argument(
+        "--data_loader.num-workers",
+        type=int,
+        default=5,
+        help="num_workers of data loader",
+    )
+    data_parser = ArgumentParser(prog="")
+    data_parser.add_argument("--train", action=ActionParser(parser=train_parser))
+    data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
+    parser.add_argument("--data", action=ActionParser(parser=data_parser))
+    parser.link_arguments(
+        "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
+    )
+
+    xvec_class.add_class_args(parser, prefix="student_model")
+    xvec_class.add_dino_teacher_args(parser, prefix="teacher_model")
+    DINOLoss.add_class_args(parser, prefix="dino_loss")
+    Trainer.add_class_args(
+        parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
+    )
+    ddp.add_ddp_args(parser)
+    parser.add_argument("--seed", type=int, default=1123581321, help="random seed")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    return parser
+
+
+def main():
+    parser = ArgumentParser(description="Train Wav2XVector from audio files")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for k, v in xvec_dict.items():
+        parser_k = make_parser(v)
+        subcommands.add_subcommand(k, parser_k)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    xvec_type = args.subcommand
+    args_sc = vars(args)[xvec_type]
+
+    if gpu_id == 0:
+        try:
+            config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except:
+            pass
+
+    args_sc.xvec_class = xvec_dict[xvec_type]
+    # torch docs recommend using forkserver
+    multiprocessing.set_start_method("forkserver")
+    train_xvec(gpu_id, args_sc)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index c00c4633..6d947d24 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -145,7 +145,7 @@ def make_parser(model_class):
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -155,7 +155,7 @@ def make_parser(model_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/bin/train_wav2vec2rnn_transducer.py b/hyperion/bin/train_wav2vec2rnn_transducer.py
index 5b802454..fd94f19d 100755
--- a/hyperion/bin/train_wav2vec2rnn_transducer.py
+++ b/hyperion/bin/train_wav2vec2rnn_transducer.py
@@ -168,7 +168,7 @@ def make_parser(model_class):
 
     parser.add_argument("--cfg", action=ActionConfigFile)
     train_parser = ArgumentParser(prog="")
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -178,7 +178,7 @@ def make_parser(model_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/bin/train_wav2vec2xvector.py b/hyperion/bin/train_wav2vec2xvector.py
index 63ac34a9..c772fe3c 100755
--- a/hyperion/bin/train_wav2vec2xvector.py
+++ b/hyperion/bin/train_wav2vec2xvector.py
@@ -128,7 +128,7 @@ def make_parser(model_class):
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     train_parser = ArgumentParser(prog="")
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -138,7 +138,7 @@ def make_parser(model_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/bin/train_wav2xvector.py b/hyperion/bin/train_wav2xvector.py
index 3138784d..2c4684c3 100755
--- a/hyperion/bin/train_wav2xvector.py
+++ b/hyperion/bin/train_wav2xvector.py
@@ -127,7 +127,7 @@ def make_parser(xvec_class):
 
     train_parser = ArgumentParser(prog="")
 
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -137,7 +137,7 @@ def make_parser(xvec_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/bin/train_xvector_from_feats.py b/hyperion/bin/train_xvector_from_feats.py
index 699aa410..c79e444f 100755
--- a/hyperion/bin/train_xvector_from_feats.py
+++ b/hyperion/bin/train_xvector_from_feats.py
@@ -126,7 +126,7 @@ def make_parser(xvec_class):
 
     train_parser = ArgumentParser(prog="")
 
-    SD.add_class_args(train_parser, prefix="dataset", skip={})
+    SD.add_class_args(train_parser, prefix="dataset")
     Sampler.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -136,7 +136,7 @@ def make_parser(xvec_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    SD.add_class_args(val_parser, prefix="dataset", skip={})
+    SD.add_class_args(val_parser, prefix="dataset")
     Sampler.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/bin/train_xvector_from_wav.py b/hyperion/bin/train_xvector_from_wav.py
index 67075a5d..eb251ad9 100755
--- a/hyperion/bin/train_xvector_from_wav.py
+++ b/hyperion/bin/train_xvector_from_wav.py
@@ -138,7 +138,7 @@ def make_parser(xvec_class):
 
     train_parser = ArgumentParser(prog="")
 
-    AD.add_class_args(train_parser, prefix="dataset", skip={})
+    AD.add_class_args(train_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(train_parser, prefix="sampler")
     train_parser.add_argument(
         "--data_loader.num-workers",
@@ -148,7 +148,7 @@ def make_parser(xvec_class):
     )
 
     val_parser = ArgumentParser(prog="")
-    AD.add_class_args(val_parser, prefix="dataset", skip={})
+    AD.add_class_args(val_parser, prefix="dataset")
     SegSamplerFactory.add_class_args(val_parser, prefix="sampler")
     val_parser.add_argument(
         "--data_loader.num-workers",
diff --git a/hyperion/torch/data/__init__.py b/hyperion/torch/data/__init__.py
index 959a635a..ee5a661d 100644
--- a/hyperion/torch/data/__init__.py
+++ b/hyperion/torch/data/__init__.py
@@ -4,11 +4,15 @@
 """
 
 from .audio_dataset import AudioDataset
+
 # samplers
 from .bucketing_seg_sampler import BucketingSegSampler
+from .dino_audio_dataset import DINOAudioDataset
 from .embed_sampler_factory import EmbedSamplerFactory
+
 # datasets
 from .feat_seq_dataset import FeatSeqDataset
 from .paired_feat_seq_dataset import PairedFeatSeqDataset
+
 # from .weighted_seq_sampler import ClassWeightedSeqSampler
 from .seg_sampler_factory import SegSamplerFactory
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 4644f141..2329d0b1 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -29,6 +29,28 @@
 
 
 class AudioDataset(Dataset):
+    """AudioDataset class
+
+    Args:
+      recordings_file: recordings manifest file (kaldi .scp or pandas .csv)
+      segments_file: segments manifest file (kaldi .scp or pandas .csv)
+      class_names: list with the names of the types of classes in the datasets, e.g., speaker, language
+      class_files: list of class info files
+      time_durs_file: (deprecated) segment to duration in secs file, if durations are not in segments_file
+      bpe_model: bpe model for the text label
+      text_file: text file with words labels for each utterances
+      aug_cfgs: list of augmentation configuration files
+      num_augs: number of augmentations per segment and augmentation type
+      num_aug_mix: "number of AugMix augmentations per segment
+      aug_mix_alpha: AugMix Diritchlet distribution parameter
+      return_segment_info: list of columns of the segment file which should be returned as supervisions
+      return_orig: when using augmentation, whether or not to return also the original audio
+      target_sample_freq: target sampling frequencey, if not None all audios are converted to this sample freq
+      wav_scale: make waves to be in [-wav_scale, wav_scale]
+      is_val: is validation dataset.
+      seed: random seed",
+    """
+
     def __init__(
         self,
         recordings_file: str,
@@ -418,14 +440,14 @@ def add_class_args(parser, prefix=None, skip=set()):
             parser.add_argument(
                 "--recordings-file",
                 required=True,
-                help=("recordings manifest file (kaldi .scp or pandas .csv)"),
+                help="recordings manifest file (kaldi .scp or pandas .csv)",
             )
 
         if "segments_file" not in skip:
             parser.add_argument(
                 "--segments-file",
                 required=True,
-                help=("segments manifest file (kaldi .scp or pandas .csv)"),
+                help="segments manifest file (kaldi .scp or pandas .csv)",
             )
 
         parser.add_argument(
@@ -441,7 +463,7 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--class-files",
             default=None,
             nargs="+",
-            help=("list of class info files"),
+            help="list of class info files",
         )
 
         parser.add_argument(
@@ -455,39 +477,40 @@ def add_class_args(parser, prefix=None, skip=set()):
         parser.add_argument(
             "--bpe-model",
             default=None,
-            help=("bpe model for the text label"),
+            help="bpe model for the text label",
         )
 
         parser.add_argument(
             "--text-file",
             default=None,
-            help=("text file with words labels for each utterances"),
+            help="text file with words labels for each utterances",
         )
 
-        parser.add_argument(
-            "--aug-cfgs",
-            default=None,
-            nargs="+",
-            help=("augmentation configuration file."),
-        )
+        if "aug_cfgs" not in skip:
+            parser.add_argument(
+                "--aug-cfgs",
+                default=None,
+                nargs="+",
+                help="augmentation configuration file.",
+            )
 
         parser.add_argument(
             "--num-augs",
             default=1,
             type=int,
-            help=("number of augmentations per segment and augmentation type"),
+            help="number of augmentations per segment and augmentation type",
         )
         parser.add_argument(
             "--num-aug-mix",
             default=0,
             type=int,
-            help=("number of AugMix augmentations per segment"),
+            help="number of AugMix augmentations per segment",
         )
         parser.add_argument(
             "--aug-mix-alpha",
             default=0.5,
             type=float,
-            help=("number of AugMix augmentations per segment"),
+            help="number of AugMix augmentations per segment",
         )
         parser.add_argument(
             "--return-segment-info",
diff --git a/hyperion/torch/data/dino_audio_dataset.py b/hyperion/torch/data/dino_audio_dataset.py
new file mode 100644
index 00000000..bb0a93a5
--- /dev/null
+++ b/hyperion/torch/data/dino_audio_dataset.py
@@ -0,0 +1,352 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+import math
+import time
+from typing import Dict, List, Optional
+
+import numpy as np
+import pandas as pd
+
+# import k2
+import sentencepiece as spm
+import torch
+import torch.distributed as dist
+import torchaudio.transforms as tat
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...io import RandomAccessAudioReader as AR
+from ...np.augment import SpeechAugment
+from ...utils.class_info import ClassInfo
+from ...utils.misc import filter_func_args
+from ...utils.segment_set import SegmentSet
+from ...utils.text import read_text
+from ..torch_defs import floatstr_torch
+from .audio_dataset import AudioDataset
+
+
+class DINOAudioDataset(AudioDataset):
+    """AudioDataset class to train DINO for speech
+
+    Args:
+      recordings_file: recordings manifest file (kaldi .scp or pandas .csv)
+      segments_file: segments manifest file (kaldi .scp or pandas .csv)
+      class_names: list with the names of the types of classes in the datasets, e.g., speaker, language
+      class_files: list of class info files
+      time_durs_file: (deprecated) segment to duration in secs file, if durations are not in segments_file
+      bpe_model: bpe model for the text label
+      text_file: text file with words labels for each utterances
+      teacher_aug_cfg: configuration for teacher augmentations
+      student_aug_cfg: configuration for student augmentations.
+      aug_cfgs: list of augmentation configuration files
+      num_augs: number of augmentations per segment and augmentation type
+      num_aug_mix: "number of AugMix augmentations per segment
+      aug_mix_alpha: AugMix Diritchlet distribution parameter
+      return_segment_info: list of columns of the segment file which should be returned as supervisions
+      return_orig: when using augmentation, whether or not to return also the original audio
+      target_sample_freq: target sampling frequencey, if not None all audios are converted to this sample freq
+      wav_scale: make waves to be in [-wav_scale, wav_scale]
+      is_val: is validation dataset.
+      seed: random seed
+      teacher_chunk_length: chunk length for the teacher model
+      num_teacher_chunks: num teacher chunks in eachd batch
+      student_chunk_length: chunk length for the student model
+      num_student_chunks: num student chunks in eachd batch
+      same_teacher_student_chunks: is True if teacher and student chunks are overlapped, False if disjoint
+    """
+
+    def __init__(
+        self,
+        recordings_file: str,
+        segments_file: str,
+        class_names: Optional[List[str]] = None,
+        class_files: Optional[List[str]] = None,
+        bpe_model: Optional[str] = None,
+        text_file: Optional[str] = None,
+        time_durs_file: Optional[str] = None,
+        teacher_aug_cfg: Optional[str] = None,
+        student_aug_cfg: Optional[str] = None,
+        num_augs: int = 1,
+        num_aug_mix: int = 0,
+        aug_mix_alpha: float = 0,
+        return_segment_info: Optional[List[str]] = None,
+        return_orig: bool = False,
+        target_sample_freq: Optional[float] = None,
+        wav_scale: float = 1,
+        is_val: bool = False,
+        seed: int = 112358,
+        teacher_chunk_length: float = 4,
+        num_teacher_chunks: int = 2,
+        student_chunk_length: float = 2,
+        num_student_chunks: int = 4,
+        same_teacher_student_chunks: bool = False,
+    ):
+        aug_cfgs = []
+        student_aug_idx = -1
+        teacher_aug_idx = -1
+        if student_aug_cfg is not None:
+            aug_cfgs.append(student_aug_cfg)
+            student_aug_idx = 0
+        if teacher_aug_cfg is not None:
+            assert student_aug_idx is not None
+            if teacher_aug_cfg != student_aug_cfg:
+                aug_cfgs.append(teacher_aug_cfg)
+                teacher_aug_idx = 1
+            else:
+                teacher_aug_idx = 0
+
+        super_args = filter_func_args(super().__init__, locals())
+        super().__init__(**super_args)
+        self.teacher_chunk_length = teacher_chunk_length
+        self.num_teacher_chunks = num_teacher_chunks
+        self.student_chunk_length = student_chunk_length
+        self.num_student_chunks = num_student_chunks
+        self.same_teacher_student_chunks = same_teacher_student_chunks
+        if student_aug_idx != -1:
+            self.student_augmenter = self.augmenters[student_aug_idx]
+        if teacher_aug_idx != -1:
+            self.teacher_augmenter = self.augmenters[teacher_aug_idx]
+
+    def _apply_chunk_augs(self, x, duration, fs, augmenter, tag):
+        if not augmenter:
+            return {f"x_{tag}": x}
+
+        if duration == 0:
+            num_samples = len(x)
+        else:
+            num_samples = int(duration * fs)
+
+        reverb_context_samples = len(x) - num_samples
+        x_orig = x[reverb_context_samples:]
+        x_augs = {}
+        for j in range(self.num_augs):
+            # augment x
+            x_aug, aug_info = augmenter(x)
+            # remove the extra left context used to compute the reverberation.
+            x_aug = x_aug[reverb_context_samples : len(x)]
+            x_aug = x_aug.astype(floatstr_torch(), copy=False)
+            x_augs[f"x_{tag}_aug_{j}"] = x_aug
+
+        if self.num_aug_mix > 0:
+            x_augs = self._apply_aug_mix(x_orig, x_augs, 0)
+
+        if self.return_orig:
+            x_augs[f"x_{tag}"] = x_orig
+        elif len(x_augs) == 1:
+            # if we just have one aug and we don't return the clean version,
+            # we just call x to the aug version
+            x_augs[f"x_{tag}"] = x_augs.pop(f"x_{tag}_aug_0")
+
+        return x_augs
+
+    def _apply_augs(self, xs, duration, fs, augmenter, tag):
+        x_augs = {}
+        for i, x in enumerate(xs):
+            x_augs_i = self._apply_chunk_augs(x, duration, fs, augmenter, f"{tag}_{i}")
+            x_augs.update(x_augs_i)
+
+        return x_augs
+
+    def _split_audio_into_chunks(self, x, x_samples, chunk_samples, num_chunks):
+        reverb_context = len(x) - x_samples
+        chunk_shift = (x_samples - chunk_samples) // num_chunks
+        xs = []
+        for i in range(num_chunks):
+            x_start = i * chunk_shift
+            x_end = x_start + chunk_samples + reverb_context
+            xs.append(x[x_start:x_end])
+
+        return xs
+
+    def _split_audio_into_teacher_student_disjoint(self, x, duration, fs):
+        total_samples = int(duration * fs)
+        teacher_chunk_samples = int(fs * self.teacher_chunk_length)
+        student_chunk_samples = int(fs * self.student_chunk_length)
+        sum_chunk = teacher_chunk_samples + student_chunk_samples
+        assert total_samples >= sum_chunk, f"signal samples = {len(x)} < {sum_chunk}"
+
+        teacher_crops_x_chunk = self.num_teacher_chunks * teacher_chunk_samples
+        student_crops_x_chunk = self.num_student_chunks * student_chunk_samples
+        sum_crops_x_chunk = teacher_crops_x_chunk + student_crops_x_chunk
+        teacher_samples = max(
+            teacher_crops_x_chunk * total_samples // sum_crops_x_chunk,
+            teacher_chunk_samples,
+        )
+        student_samples = total_samples - teacher_samples
+        # here we decide if we split the audio in [teacher, student] or [student, teacher]
+        teacher_first = self.rng.random() < 0.5
+
+        if teacher_first:
+            x1_samples = teacher_samples
+            # x2_samples = student_samples
+        else:
+            x1_samples = student_samples
+            # x2_samples = teacher_samples
+
+        max_reverb_context = int(self.reverb_context * fs)
+        x1_reverb_context = len(x) - total_samples
+        x1_end_sample = x1_reverb_context + x1_samples
+        x1 = x[:x1_end_sample]
+        if x1_end_sample >= max_reverb_context:
+            x2_reverb_context = max_reverb_context
+        else:
+            x2_reverb_context = x1_end_sample
+
+        # print(
+        #     "xxx",
+        #     len(x),
+        #     total_samples,
+        #     teacher_first,
+        #     teacher_samples,
+        #     student_samples,
+        #     x1_reverb_context,
+        #     x1_end_sample,
+        #     x2_reverb_context,
+        #     flush=True,
+        # )
+        x2 = x[x1_end_sample - x2_reverb_context :]
+        if teacher_first:
+            x_teacher = x1
+            x_student = x2
+        else:
+            x_teacher = x2
+            x_student = x1
+
+        return x_teacher, teacher_samples, x_student, student_samples
+
+    def _split_audio_into_teacher_student_same(self, x, duration, fs):
+        total_samples = int(duration * fs)
+        return x, total_samples, x, total_samples
+
+    def _split_audio_into_teacher_student_chunks(self, x, duration, fs):
+        if self.same_teacher_student_chunks:
+            (
+                x_teacher,
+                teacher_samples,
+                x_student,
+                student_samples,
+            ) = self._split_audio_into_teacher_student_same(x, duration, fs)
+        else:
+            (
+                x_teacher,
+                teacher_samples,
+                x_student,
+                student_samples,
+            ) = self._split_audio_into_teacher_student_disjoint(x, duration, fs)
+        assert (
+            len(x_teacher) >= 64000 and len(x_teacher) <= 136000
+        ), f"{len(x_teacher)}, {len(x_student)} {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
+        assert (
+            len(x_student) >= 32000 and len(x_student) <= 136000
+        ), f"{len(x_teacher)}, {len(x_student)}, {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
+        xs_teacher = self._split_audio_into_chunks(
+            x_teacher,
+            teacher_samples,
+            int(fs * self.teacher_chunk_length),
+            self.num_teacher_chunks,
+        )
+        xs_student = self._split_audio_into_chunks(
+            x_student,
+            student_samples,
+            int(fs * self.student_chunk_length),
+            self.num_student_chunks,
+        )
+        for xx in xs_teacher:
+            assert (
+                len(xx) >= 64000 and len(xx) <= 72000
+            ), f"{[len(t) for t in xs_teacher]} {len(x_teacher)} {len(x)}"
+        for xx in xs_student:
+            assert (
+                len(xx) >= 32000 and len(xx) <= 40000
+            ), f"{[len(t) for t in xs_student]} {len(x_student)} {len(x)}"
+
+        return xs_teacher, xs_student
+
+    def __getitem__(self, segment):
+        seg_id, start, duration = self._parse_segment_item(segment)
+        x, fs = self._read_audio(seg_id, start, duration)
+        x, fs = self._resample(x, fs)
+        assert len(x) >= int(
+            duration * fs
+        ), f"getitem {self.seg_set.loc[seg_id].duration}, {start}, {duration}, {len(x)}"
+        data = {"seg_id": seg_id, "sample_freq": fs}
+        xs_teacher, xs_student = self._split_audio_into_teacher_student_chunks(
+            x, duration, fs
+        )
+        x_augs_teacher = self._apply_augs(
+            xs_teacher, self.teacher_chunk_length, fs, self.teacher_augmenter, "teacher"
+        )
+        x_augs_student = self._apply_augs(
+            xs_student, self.student_chunk_length, fs, self.student_augmenter, "student"
+        )
+        data.update(x_augs_teacher)
+        data.update(x_augs_student)
+        # print(data, flush=True)
+        # for ll in [
+        #     "x_teacher_0",
+        #     "x_teacher_1",
+        #     "x_student_0",
+        #     "x_student_1",
+        #     "x_student_2",
+        #     "x_student_3",
+        # ]:
+        #     print("zzz ", ll, data[ll].shape, flush=True)
+        seg_info = self._get_segment_info(seg_id)
+        data.update(seg_info)
+        return data
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = filter_func_args(DINOAudioDataset.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        skip.add("aug_cfgs")
+        AudioDataset.add_class_args(parser, skip=skip)
+        parser.add_argument(
+            "--teacher-aug-cfg", default=None, help="config for teacher augmentations"
+        )
+        parser.add_argument(
+            "--student-aug-cfg", default=None, help="config for student augmentations"
+        )
+        parser.add_argument(
+            "--teacher-chunk-length",
+            default=4.0,
+            type=float,
+            help="chunk length for the teacher model",
+        )
+        parser.add_argument(
+            "--student-chunk-length",
+            default=4.0,
+            type=float,
+            help="chunk length for the student model",
+        )
+        parser.add_argument(
+            "--num-teacher-chunks",
+            default=2,
+            type=int,
+            help="num teacher chunks in eachd batch",
+        )
+        parser.add_argument(
+            "--num-student-chunks",
+            default=4,
+            type=int,
+            help="num student chunks in eachd batch",
+        )
+        parser.add_argument(
+            "--same-teacher-student-chunks",
+            default=False,
+            action=ActionYesNo,
+            help="teacher and student chunks are overlapped",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/embed_dataset.py b/hyperion/torch/data/embed_dataset.py
index 519f498d..3c4433af 100644
--- a/hyperion/torch/data/embed_dataset.py
+++ b/hyperion/torch/data/embed_dataset.py
@@ -10,10 +10,9 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.utils.data import Dataset
 
 from ...io import RandomAccessDataReaderFactory as RF
@@ -38,7 +37,6 @@ def __init__(
         preload_embeds=False,
         is_val=False,
     ):
-
         assert embeds is not None or embed_file is not None
         assert embed_info is not None or embed_info is not None
         assert class_info is not None or class_files is not None
@@ -60,8 +58,8 @@ def __init__(
             logging.info("dataset contains %d embeddings", len(self.embed_info))
 
         if embeds is None:
-            if rank == 0:
-                logging.info("opening dataset %s", rspecifier)
+            # if rank == 0:
+            #     logging.info("opening dataset %s", rspecifier)
             self.r = RF.create(embed_file, path_prefix=path_prefix, scp_sep=" ")
             if self.preload_embeds:
                 self.embeds = self.r.load(embed_info["id"], squeeze=True).astype(
@@ -143,7 +141,6 @@ def _get_embed_info(self, embed_id):
         return embed_info
 
     def __getitem__(self, embed_id):
-
         x = self._read_embed(embed_id)
 
         data = {"embed_id": embed_id, "x": x}
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index d1bcb0a8..f8d0862b 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -2,10 +2,9 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
 from torch.utils.data import Sampler
 
 
@@ -14,6 +13,7 @@ def __init__(self, shuffle=False, seed=1234):
         super().__init__(None)
         self.epoch = 0
         self.batch = 0
+        self.init_batch = 0
         self.shuffle = shuffle
         self.seed = seed
 
@@ -28,16 +28,18 @@ def __init__(self, shuffle=False, seed=1234):
         self.world_size = world_size
         self.rng = torch.Generator()
 
-    def set_epoch(self, epoch):
+    def set_epoch(self, epoch, batch=0):
         self.epoch = epoch
+        self.init_batch = batch
 
     def _set_seed(self):
         if self.shuffle:
-            self.rng.manual_seed(self.seed + 10 * self.epoch)
+            self.rng.manual_seed(self.seed + 10 * self.epoch + 100 * self.init_batch)
         else:
             self.rng.manual_seed(self.seed)
 
     def __iter__(self):
-        self.batch = 0
+        self.batch = self.init_batch
+        self.init_batch = 0
         self._set_seed()
         return self
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 2933dcc6..da47c8ac 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -8,10 +8,9 @@
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.distributed as dist
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.segment_set import SegmentSet
 from .hyp_sampler import HypSampler
@@ -30,7 +29,6 @@ def __init__(
         seed=1234,
         **base_kwargs,
     ):
-
         super().__init__(shuffle=shuffle, seed=seed)
         self.seg_set = seg_set
         self.min_chunk_length = min_chunk_length
@@ -80,7 +78,6 @@ def get_random_duration(self):
             return self.min_chunk_length
 
     def _create_chunks(self):
-
         chunks = []
         for id, len in zip(self.seg_set["id"], self.seg_set[self.length_name]):
             if len < self.min_chunk_length:
@@ -91,7 +88,16 @@ def _create_chunks(self):
             num_chunks = math.ceil(len / self.avg_chunk_length)
             start = 0
             for i in range(num_chunks - 1):
-                dur = self.get_random_duration()
+                remainder = len - start
+                if remainder < self.min_chunk_length:
+                    remainder = self.min_chunk_length
+                    dur = remainder
+                    start = len - dur
+                else:
+                    dur = self.get_random_duration()
+                    if dur > remainder:
+                        dur = remainder
+
                 chunk = (f"{id}-{i}", id, start, dur)
                 chunks.append(chunk)
                 start += dur
@@ -135,7 +141,6 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
-
         valid_args = (
             "min_chunk_length",
             "max_chunk_length",
diff --git a/hyperion/torch/layer_blocks/conformer_encoder_v1.py b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
index 4f8e1b4d..349bef4b 100644
--- a/hyperion/torch/layer_blocks/conformer_encoder_v1.py
+++ b/hyperion/torch/layer_blocks/conformer_encoder_v1.py
@@ -131,6 +131,12 @@ def __init__(
         if self.concat_after:
             self.concat_linear = nn.Linear(num_feats + num_feats, num_feats)
 
+    def change_attn_dropout(self, att_dropout_rate):
+        attn = self.self_attn
+        if hasattr(attn, "dropout_rate"):
+            attn.dropout_rate = att_dropout_rate
+            attn.dropout.p = att_dropout_rate
+
     @staticmethod
     def _make_att(
         att_type,
diff --git a/hyperion/torch/layers/__init__.py b/hyperion/torch/layers/__init__.py
index b0b607e2..d53646ed 100644
--- a/hyperion/torch/layers/__init__.py
+++ b/hyperion/torch/layers/__init__.py
@@ -28,6 +28,12 @@
 from .mvn import MeanVarianceNorm
 from .norm_layer_factory import NormLayer1dFactory, NormLayer2dFactory
 from .pool_factory import GlobalPool1dFactory
-from .pos_encoder import ConvPosEncoder, NoPosEncoder, PosEncoder, RelPosEncoder
+from .pos_encoder import (
+    ConvPosEncoder,
+    NoPosEncoder,
+    PosEncoder,
+    PosEncoderBase,
+    RelPosEncoder,
+)
 from .spec_augment import AxisMasker, SpecAugment, SpecWarper
 from .subpixel_convs import ICNR1d, ICNR2d, SubPixelConv1d, SubPixelConv2d
diff --git a/hyperion/torch/layers/activation_factory.py b/hyperion/torch/layers/activation_factory.py
index f2467962..e656eff5 100644
--- a/hyperion/torch/layers/activation_factory.py
+++ b/hyperion/torch/layers/activation_factory.py
@@ -36,6 +36,7 @@
     "double_swish": DoubleSwish,
     "swish6": Swish6,
     "double_swish6": DoubleSwish6,
+    "gelu": nn.GELU,
 }
 
 
@@ -194,3 +195,6 @@ def get_config(activation):
             return {"name": "swish6"}
         if isinstance(activation, DoubleSwish6):
             return {"name": "double_swish6"}
+
+        if isinstance(activation, nn.GELU):
+            return {"name": "gelu"}
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 8fe67792..4587fbd2 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -6,7 +6,6 @@
 import math
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
@@ -64,7 +63,6 @@ def forward_slidwin(self, x, win_length, win_shift):
         raise NotImplementedError()
 
     def _slidwin_pad(self, x, win_length, win_shift, snip_edges):
-
         if snip_edges:
             num_frames = int(
                 math.floor((x.size(-1) - win_length + win_shift) / win_shift)
@@ -145,7 +143,6 @@ def _post_slidwin(self, m_x, x_shape):
         return m_x
 
     def _forward_slidwin_int(self, x, win_length, win_shift, snip_edges):
-
         c_x, out_shape = self._pre_slidwin(x, win_length, win_shift, snip_edges)
 
         m_x = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
@@ -204,7 +201,7 @@ def forward(self, x, x_lengths=None, weights=None):
             # this can produce slightly negative variance when relu6 saturates in all time steps
             # add 1e-5 for stability
             s = torch.sqrt(
-                torch.mean(delta ** 2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
+                torch.mean(delta**2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
             )
 
             mus = torch.cat((mu, s), dim=1)
@@ -217,7 +214,7 @@ def forward(self, x, x_lengths=None, weights=None):
         wbar = torch.mean(weights, dim=self.dim, keepdim=True)
         mu = xbar / wbar
         delta = x - mu
-        var = torch.mean(weights * delta ** 2, dim=self.dim, keepdim=True) / wbar
+        var = torch.mean(weights * delta**2, dim=self.dim, keepdim=True) / wbar
         s = torch.sqrt(var.clamp(min=SQRT_EPS))
         mu = mu.squeeze(self.dim)
         s = s.squeeze(self.dim)
@@ -257,19 +254,18 @@ def _forward_slidwin_int(self, x, win_length, win_shift, snip_edges):
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
         m_x = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
 
-        c_x = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
+        c_x = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
         m_x2 = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
-        s_x = torch.sqrt(m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
+        s_x = torch.sqrt(m_x2 - m_x**2).clamp(min=SQRT_EPS)
 
         mus = self._post_slidwin(m_x, s_x, out_shape)
         return mus
 
     def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
-
         x, out_shape = self._pre_slidwin(x, win_length, win_shift, snip_edges)
         num_frames = out_shape[-1]
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
-        c_x2 = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
+        c_x2 = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
 
         # xx = x.view(-1, x.shape[-1])
         # print(xx.shape[1])
@@ -313,7 +309,7 @@ def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
 
             k += win_shift
 
-        var_x = (m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
+        var_x = (m_x2 - m_x**2).clamp(min=SQRT_EPS)
         s_x = torch.sqrt(var_x)
         # idx = torch.isnan(s_x) #.any(dim=1)
         # if torch.sum(idx) > 0:
@@ -404,14 +400,14 @@ def forward(self, x, x_lengths=None, weights=None):
         weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             mu = torch.mean(x, dim=self.dim, keepdim=self.keepdim)
-            x2bar = torch.mean(x ** 2, dim=self.dim, keepdim=self.keepdim)
+            x2bar = torch.mean(x**2, dim=self.dim, keepdim=self.keepdim)
             logvar = torch.log(x2bar - mu * mu + 1e-5)  # for stability in case var=0
             return torch.cat((mu, logvar), dim=-1)
 
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=self.keepdim)
         wbar = torch.mean(weights, dim=self.dim, keepdim=self.keepdim)
         mu = xbar / wbar
-        x2bar = torch.mean(weights * x ** 2, dim=self.dim, keepdim=self.keepdim) / wbar
+        x2bar = torch.mean(weights * x**2, dim=self.dim, keepdim=self.keepdim) / wbar
         var = (x2bar - mu * mu).clamp(min=1e-5)
         logvar = torch.log(var)
 
@@ -448,7 +444,7 @@ def __init__(
         if dist_pow == 1:
             self.dist_f = lambda x: torch.norm(x, p=2, dim=-1)
         else:
-            self.dist_f = lambda x: torch.sum(x ** 2, dim=-1)
+            self.dist_f = lambda x: torch.sum(x**2, dim=-1)
 
         self.size_multiplier = num_comp
 
@@ -507,7 +503,7 @@ def forward(self, x, x_lengths=None, weights=None):
         delta = x - self.mu  # (batch, time, num_comp, feat_dim)
         dist = self.dist_f(delta)  # (batch, time, num_comp)
 
-        llk = -self.prec ** 2 * dist + self.bias
+        llk = -self.prec**2 * dist + self.bias
         r = nnf.softmax(llk, dim=-1)  # (batch, time, num_comp)
         if weights is not None:
             r *= weights
@@ -527,7 +523,6 @@ def forward(self, x, x_lengths=None, weights=None):
         return pool
 
     def get_config(self):
-
         config = {
             "in_feats": self.in_feats,
             "num_comp": self.num_comp,
@@ -783,10 +778,22 @@ def forward(self, x, x_lengths=None, weights=None):
         # x = (batch, feat_dim, time)
         weights = self._standardize_weights(x, x_lengths, weights)  # (batch, 1,  time)
         x_inner = self.conv1(x)  # (batch, inner_dim, time)
+        assert not torch.any(
+            torch.isnan(x_inner)
+        ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(x))} {torch.mean(x)} {torch.sum(torch.isinf(x))} {x.size()}"
+        assert not torch.any(
+            torch.isinf(x_inner)
+        ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(x))}"
         # logging.info('x_inner1={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
         if self.use_global_context:
             global_mus = self.stats_pool(x, weights=weights)
             x_inner = x_inner + self.lin_global(global_mus).unsqueeze(-1)
+            assert not torch.any(
+                torch.isnan(x_inner)
+            ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(global_mus))}"
+            assert not torch.any(
+                torch.isinf(x_inner)
+            ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(global_mus))}"
         # logging.info('x_inner2={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
         attn = self.conv2(
             self.activation(self.norm_layer(x_inner))
@@ -807,11 +814,23 @@ def forward(self, x, x_lengths=None, weights=None):
         if weights is not None:
             attn = attn * weights
 
+        assert not torch.any(
+            torch.isnan(attn)
+        ), f"attn is nan {torch.sum(torch.isnan(attn))}"
+        assert not torch.any(
+            torch.isinf(attn)
+        ), f"attn is inf {torch.sum(torch.isinf(attn))}"
         mus = self.stats_pool(x, weights=attn)
         # logging.info('mus={} {}'.format(torch.sum(torch.isnan(mus)), torch.sum(torch.isinf(mus))))
         if self.keepdim:
             mus = mus.unsqueeze(self.dim)
 
+        assert not torch.any(
+            torch.isnan(mus)
+        ), f"mus is nan {torch.sum(torch.isnan(mus))}"
+        assert not torch.any(
+            torch.isinf(mus)
+        ), f"mus is inf {torch.sum(torch.isinf(mus))}"
         return mus
 
     def get_config(self):
diff --git a/hyperion/torch/layers/norm_layer_factory.py b/hyperion/torch/layers/norm_layer_factory.py
index 8543b31b..8c0ebdeb 100644
--- a/hyperion/torch/layers/norm_layer_factory.py
+++ b/hyperion/torch/layers/norm_layer_factory.py
@@ -105,4 +105,4 @@ def create(norm_name, num_groups=None, momentum=0.1, eps=1e-5):
 
         if norm_name == "layer-norm":
             # it is equivalent to groupnorm with 1 group
-            return lambda x, momentum=momentum, eps=eps: nn.GroupNorm(1, x, eps=eps)
+            return lambda x, momentum=momentum, eps=eps: nn.LayerNorm(x, eps=eps)
diff --git a/hyperion/torch/layers/pos_encoder.py b/hyperion/torch/layers/pos_encoder.py
index b6f3672e..f18eb51f 100644
--- a/hyperion/torch/layers/pos_encoder.py
+++ b/hyperion/torch/layers/pos_encoder.py
@@ -11,7 +11,11 @@
 from .activation_factory import ActivationFactory as AF
 
 
-class PosEncoder(nn.Module):
+class PosEncoderBase(nn.Module):
+    pass
+
+
+class PosEncoder(PosEncoderBase):
     """Positional encoding.
 
     Attributes:
@@ -32,9 +36,9 @@ def __repr__(self):
         return self.__str__()
 
     def __str__(self):
-        s = "{}(num_feats={}, dropout_rate={})".format(self.__class__.__name__,
-                                                       self.num_feats,
-                                                       self.dropout_rate)
+        s = "{}(num_feats={}, dropout_rate={})".format(
+            self.__class__.__name__, self.num_feats, self.dropout_rate
+        )
         return s
 
     def _pe(self, x, relative=False):
@@ -48,14 +52,15 @@ def _pe(self, x, relative=False):
         pe = torch.zeros(x.size(1), self.num_feats)
         if relative:
             # this is for relative positional encoders
-            position = torch.arange(x.size(1) - 1, -1, -1,
-                                    dtype=torch.float32).unsqueeze(1)
+            position = torch.arange(
+                x.size(1) - 1, -1, -1, dtype=torch.float32
+            ).unsqueeze(1)
         else:
-            position = torch.arange(0, x.size(1),
-                                    dtype=torch.float32).unsqueeze(1)
+            position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
         div_term = torch.exp(
-            torch.arange(0, self.num_feats, 2, dtype=torch.float32) *
-            -(math.log(10000.0) / self.num_feats))
+            torch.arange(0, self.num_feats, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.num_feats)
+        )
         pe[:, 0::2] = torch.sin(position * div_term)
         pe[:, 1::2] = torch.cos(position * div_term)
         pe = pe.unsqueeze(0)
@@ -72,7 +77,7 @@ def forward(self, x: torch.Tensor):
             x-scaled + pos-encoder
         """
         pe = self._pe(x)
-        x = x * self.xscale + pe[:, :x.size(1)]
+        x = x * self.xscale + pe[:, : x.size(1)]
         if self.dropout_rate > 0:
             return self.dropout(x)
         return x
@@ -107,7 +112,7 @@ def forward(self, x: torch.Tensor):
         x = x * self.xscale
         # we want embedding  [R_L,..., R_0]
         # while in non relative we want [R_0, ..., R_L]
-        pos_emb = self.pe[:, -x.size(1):]
+        pos_emb = self.pe[:, -x.size(1) :]
         # this pos_emb is matrix Q in
         # https://arxiv.org/pdf/1901.02860.pdf Appendix B
         # I think it should have been denoted as R,
@@ -119,7 +124,7 @@ def forward(self, x: torch.Tensor):
         return x, pos_emb
 
 
-class NoPosEncoder(nn.Module):
+class NoPosEncoder(PosEncoderBase):
     """This is a dummy class for the case where we
     deactivate the positional encoder
 
@@ -140,7 +145,7 @@ def forward(self, x: torch.Tensor):
         return x
 
 
-class ConvPosEncoder(nn.Module):
+class ConvPosEncoder(PosEncoderBase):
     """Convolutional positional encoder like the one used in wav2vec2
 
     Attributes:
@@ -150,14 +155,21 @@ class ConvPosEncoder(nn.Module):
       activation: hidden activation
     """
 
-    def __init__(self, num_feats: int, kernel_size: int, num_groups: int,
-                 activation: Union[str, nn.Module]):
+    def __init__(
+        self,
+        num_feats: int,
+        kernel_size: int,
+        num_groups: int,
+        activation: Union[str, nn.Module],
+    ):
         super().__init__()
-        self.conv = nn.Conv1d(num_feats,
-                              num_feats,
-                              kernel_size=kernel_size,
-                              padding=kernel_size // 2,
-                              groups=num_groups)
+        self.conv = nn.Conv1d(
+            num_feats,
+            num_feats,
+            kernel_size=kernel_size,
+            padding=kernel_size // 2,
+            groups=num_groups,
+        )
         self.activation = AF.create(activation)
         self.num_pad_remove = 1 if kernel_size % 2 == 0 else 0
 
@@ -165,7 +177,7 @@ def forward(self, x: torch.Tensor):
         x = x.transpose(1, 2)
         x = self.conv(x)
         if self.num_pad_remove > 0:
-            x = x[:, :, :-self.num_pad_remove]
+            x = x[:, :, : -self.num_pad_remove]
 
         x = self.activation(x).transpose(1, 2)
 
diff --git a/hyperion/torch/layers/swish.py b/hyperion/torch/layers/swish.py
index 62225ad9..9ba0a896 100644
--- a/hyperion/torch/layers/swish.py
+++ b/hyperion/torch/layers/swish.py
@@ -55,16 +55,16 @@ def __str__(self):
 
 
 class DoubleSwishImplementation(torch.autograd.Function):
-    """ Implementation for DoubleSwish Activation from
-    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py    
+    """Implementation for DoubleSwish Activation from
+    https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
 
-    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)),
          where swish(x) =  x * sigmoid(x).
 
     Memory-efficient derivative computation:
      f'(x) = =  x * s'(x) + x' * s(x) = x * s'(x) + s(x).
          where s(x) = simoid(x), and s'(x) = s(x) * (1-s(x)).
-     
+
      f'(x) = x * s(x) * (1-s(x)) + s(x) = f(x) * (1-s(x)) + s(x)
     """
 
@@ -108,15 +108,14 @@ def backward(ctx, y_grad: torch.Tensor) -> torch.Tensor:
 
 
 class DoubleSwish(torch.nn.Module):
-    """ DoubleSwish activation
-    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
-         where swish(x) =  x * sigmoid(x).        
+    """DoubleSwish activation
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)),
+         where swish(x) =  x * sigmoid(x).
     """
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-
         if torch.jit.is_scripting() or torch.jit.is_tracing():
-            return (x * torch.sigmoid(x - 1.0)).clamp(max=6)
+            return x * torch.sigmoid(x - 1.0)
 
         return DoubleSwishImplementation.apply(x)
 
@@ -129,10 +128,10 @@ def __str__(self):
 
 
 class DoubleSwish6(torch.nn.Module):
-    """ DoubleSwish activation clamped to 6
+    """DoubleSwish activation clamped to 6
     x = min(x, 6)
-    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)), 
-         where swish(x) =  x * sigmoid(x).        
+    f(x) = x * torch.sigmoid(x-1) = swish(swish(x)),
+         where swish(x) =  x * sigmoid(x).
     """
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/hyperion/torch/losses/__init__.py b/hyperion/torch/losses/__init__.py
index bf3ce279..6f68ad45 100644
--- a/hyperion/torch/losses/__init__.py
+++ b/hyperion/torch/losses/__init__.py
@@ -4,3 +4,4 @@
 """
 
 from .bce_with_llr import BCEWithLLR
+from .dino_loss import DINOLoss
diff --git a/hyperion/torch/losses/dino_loss.py b/hyperion/torch/losses/dino_loss.py
new file mode 100644
index 00000000..b22489a3
--- /dev/null
+++ b/hyperion/torch/losses/dino_loss.py
@@ -0,0 +1,164 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
+
+
+class DINOLoss(nn.Module):
+    """Loss for Training DIstillation with NO labels.
+
+    Args:
+      num_classes: number of DINO classes
+      student_temp: temperature of student distribution
+      teacher_temp: final temperature of teacher distribution
+      teacher_warmup_temp: initial temperature of teacher distribution
+      temp_warmup_epochs: warmup epochs for the teacher temperature
+      center_momentum: momumntum for centering of the teacher distribution
+    """
+
+    def __init__(
+        self,
+        num_classes: int,
+        student_temp: float = 0.1,
+        teacher_temp: float = 0.04,
+        teacher_warmup_temp: float = 0.04,
+        temp_warmup_epochs: int = 30,
+        center_momentum: float = 0.9,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.student_temp = student_temp
+        self.teacher_temp = teacher_temp
+        self.teacher_warmup_temp = teacher_warmup_temp
+        self.temp_warmup_epochs = temp_warmup_epochs
+        self.center_momentum = center_momentum
+        self.cur_teacher_temp = teacher_warmup_temp
+        self.register_buffer("center", torch.zeros(1, num_classes))
+
+    def update_temp(self, epoch: int):
+        if epoch < self.temp_warmup_epochs:
+            self.cur_teacher_temp = (
+                self.teacher_warmup_temp
+                + (self.teacher_temp - self.teacher_warmup_temp)
+                * epoch
+                / self.temp_warmup_epochs
+            )
+            logging.info("updating dino-loss teacher temp=%.2f", self.cur_teacher_temp)
+        else:
+            self.cur_teacher_temp = self.teacher_temp
+
+    def forward(
+        self,
+        student_pred: torch.Tensor,
+        teacher_pred: torch.Tensor,
+        num_student_crops: int,
+        num_teacher_crops: int,
+    ):
+        """
+        Cross-entropy between softmax outputs of the teacher and student networks.
+        """
+        assert not torch.any(torch.isnan(student_pred)), f"loss/student is nan"
+        student_pred = student_pred / self.student_temp
+        assert not torch.any(torch.isnan(student_pred)), f"loss/p is nan"
+        student_pred = student_pred.chunk(num_student_crops)
+        teacher_pred = teacher_pred.detach()
+        center = self.center  # we take the center before updating it
+        if self.training:
+            self.update_center(teacher_pred)
+        assert not torch.any(torch.isnan(teacher_pred)), f"loss/teacher is nan"
+        teacher_pred = nn.functional.softmax(
+            (teacher_pred - center) / self.cur_teacher_temp, dim=-1
+        )
+        assert not torch.any(torch.isnan(teacher_pred)), f"loss/q is nan {center}"
+        teacher_pred = teacher_pred.chunk(num_teacher_crops)
+
+        total_loss = 0
+        n_loss_terms = 0
+        for iq, q in enumerate(teacher_pred):
+            for ip, p in enumerate(student_pred):
+                if ip == iq and num_teacher_crops > 1:
+                    # we skip cases where student and teacher operate on the same view
+                    continue
+                loss = torch.sum(-q * nn.functional.log_softmax(p, dim=-1), dim=-1)
+                assert not torch.any(
+                    torch.isnan(loss)
+                ), f"loss is nan {iq} {ip} {torch.mean(q)} {torch.mean(p)} {torch.mean(center)}"
+                total_loss += loss.mean()
+                n_loss_terms += 1
+        total_loss /= n_loss_terms
+        return total_loss
+
+    @torch.no_grad()
+    def update_center(self, teacher_pred: torch.Tensor):
+        """
+        Update center used for teacher output.
+        """
+        batch_acc = torch.sum(teacher_pred, dim=0, keepdim=True)
+        batch_size = torch.as_tensor(teacher_pred.size(0), device=batch_acc.device)
+        if dist.is_initialized():
+            dist.all_reduce(batch_size, op=dist.ReduceOp.SUM)
+            dist.all_reduce(batch_acc, op=dist.ReduceOp.SUM)
+
+        batch_center = batch_acc / batch_size
+        assert not torch.any(
+            torch.isnan(batch_center)
+        ), f"bc is nan {torch.mean(batch_acc)} {batch_size}"
+        # ema update
+        self.center = self.center * self.center_momentum + batch_center * (
+            1 - self.center_momentum
+        )
+
+    @staticmethod
+    def filter_args(**kwargs):
+        return filter_func_args(DINOLoss.__init__, kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--num-classes", default=65536, type=int, help="number of DINO classes"
+        )
+        parser.add_argument(
+            "--student-temp",
+            default=0.1,
+            type=float,
+            help="temperature of student distribution",
+        )
+        parser.add_argument(
+            "--teacher-temp",
+            default=0.07,
+            type=float,
+            help="final temperature of teacher distribution",
+        )
+        parser.add_argument(
+            "--teacher-warmup-temp",
+            default=0.04,
+            type=float,
+            help="initial temperature of teacher distribution",
+        )
+        parser.add_argument(
+            "--temp-warmup-epochs",
+            default=30,
+            type=int,
+            help="warmup epochs for the teacher temperature",
+        )
+        parser.add_argument(
+            "--center-momentum",
+            default=0.9,
+            type=float,
+            help="momumntum for centering of the teacher distribution",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/lr_schedulers/factory.py b/hyperion/torch/lr_schedulers/factory.py
index cf003ca7..f2886203 100644
--- a/hyperion/torch/lr_schedulers/factory.py
+++ b/hyperion/torch/lr_schedulers/factory.py
@@ -2,9 +2,8 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from .cos_lr import AdamCosineLR, CosineLR
 from .exp_lr import ExponentialLR
@@ -14,8 +13,7 @@
 from .triangular_lr import TriangularLR
 
 
-class LRSchedulerFactory(object):
-
+class LRSchedulerFactory:
     def create(
         optimizer,
         lrsch_type,
@@ -174,7 +172,6 @@ def create(
 
     @staticmethod
     def filter_args(**kwargs):
-
         valid_args = (
             "lrsch_type",
             "decay_rate",
@@ -222,9 +219,11 @@ def add_class_args(parser, prefix=None):
                 "noam_lr",
                 "triangular_lr",
             ],
-            help=("Learning rate schedulers: None, Exponential,"
-                  "Cosine Annealing, Cosine Annealing for Adam,"
-                  "Reduce on Plateau"),
+            help=(
+                "Learning rate schedulers: None, Exponential,"
+                "Cosine Annealing, Cosine Annealing for Adam,"
+                "Reduce on Plateau"
+            ),
         )
 
         parser.add_argument(
@@ -233,29 +232,22 @@ def add_class_args(parser, prefix=None):
             type=float,
             help=("LR decay rate in exp lr"),
         )
-        parser.add_argument("--decay-steps",
-                            default=100,
-                            type=int,
-                            help=("LR decay steps in exp lr"))
-        parser.add_argument("--power",
-                            default=0.5,
-                            type=float,
-                            help=("power in inverse power lr"))
-
-        parser.add_argument("--hold-steps",
-                            default=10,
-                            type=int,
-                            help=("LR hold steps in exp lr"))
-        parser.add_argument("--t",
-                            default=10,
-                            type=int,
-                            help=("Period in cos lr"))
+        parser.add_argument(
+            "--decay-steps", default=100, type=int, help=("LR decay steps in exp lr")
+        )
+        parser.add_argument(
+            "--power", default=0.5, type=float, help=("power in inverse power lr")
+        )
+
+        parser.add_argument(
+            "--hold-steps", default=10, type=int, help=("LR hold steps in exp lr")
+        )
+        parser.add_argument("--t", default=10, type=int, help=("Period in cos lr"))
         parser.add_argument(
             "--t-mul",
             default=1,
             type=int,
-            help=(
-                "Period multiplicator for each restart in cos/triangular lr"),
+            help=("Period multiplicator for each restart in cos/triangular lr"),
         )
         parser.add_argument(
             "--gamma",
@@ -267,13 +259,13 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--warm-restarts",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help=("Do warm restarts in cos lr"),
         )
 
-        parser.add_argument("--monitor",
-                            default="val_loss",
-                            help=("Monitor metric to reduce lr"))
+        parser.add_argument(
+            "--monitor", default="val_loss", help=("Monitor metric to reduce lr")
+        )
         parser.add_argument(
             "--mode",
             default="min",
@@ -285,24 +277,21 @@ def add_class_args(parser, prefix=None):
             "--factor",
             default=0.1,
             type=float,
-            help=(
-                "Factor by which the learning rate will be reduced on plateau"
-            ),
+            help=("Factor by which the learning rate will be reduced on plateau"),
         )
 
         parser.add_argument(
             "--patience",
             default=10,
             type=int,
-            help=
-            ("Number of epochs with no improvement after which learning rate will be reduced"
-             ),
+            help=(
+                "Number of epochs with no improvement after which learning rate will be reduced"
+            ),
         )
 
-        parser.add_argument("--threshold",
-                            default=1e-4,
-                            type=float,
-                            help=("Minimum metric improvement"))
+        parser.add_argument(
+            "--threshold", default=1e-4, type=float, help=("Minimum metric improvement")
+        )
 
         parser.add_argument(
             "--threshold_mode",
@@ -315,20 +304,16 @@ def add_class_args(parser, prefix=None):
             "--cooldown",
             default=0,
             type=int,
-            help=
-            ("Number of epochs to wait before resuming normal operation after lr has been reduced"
-             ),
+            help=(
+                "Number of epochs to wait before resuming normal operation after lr has been reduced"
+            ),
         )
 
-        parser.add_argument("--eps",
-                            default=1e-8,
-                            type=float,
-                            help=("Minimum decay applied to lr"))
+        parser.add_argument(
+            "--eps", default=1e-8, type=float, help=("Minimum decay applied to lr")
+        )
 
-        parser.add_argument("--min-lr",
-                            default=0,
-                            type=float,
-                            help=("Minimum lr"))
+        parser.add_argument("--min-lr", default=0, type=float, help=("Minimum lr"))
 
         parser.add_argument(
             "--warmup-steps",
@@ -352,13 +337,12 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--update-lr-on-opt-step",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help=("Update lr based on batch number instead of epoch number"),
         )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
             # help='learning rate scheduler options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index 3326ef81..b8e7fe74 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -24,7 +24,6 @@
 
 @dataclass
 class RNNTransducerOutput(HypDataClass):
-
     loss: torch.Tensor
     loss_simple: Optional[torch.Tensor] = None
     loss_pruned: Optional[torch.Tensor] = None
@@ -32,7 +31,7 @@ class RNNTransducerOutput(HypDataClass):
 
 
 class RNNTransducer(TorchModel):
-    """ Base-class for RNN-T in
+    """Base-class for RNN-T in
     "Sequence Transduction with Recurrent Neural Networks"
     https://arxiv.org/pdf/1211.3711.pdf
 
@@ -92,13 +91,15 @@ def forward(
         output = RNNTransducerOutput(*dec_output)
         return output
 
-    def infer(self,
-              x: torch.Tensor,
-              x_lengths: torch.Tensor,
-              decoding_method="time_sync_beam_search",
-              beam_width: int = 5,
-              max_sym_per_frame: int = 3,
-              max_sym_per_utt: int = 1000) -> List[List[int]]:
+    def infer(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        decoding_method="time_sync_beam_search",
+        beam_width: int = 5,
+        max_sym_per_frame: int = 3,
+        max_sym_per_utt: int = 1000,
+    ) -> List[List[int]]:
         """
         ASR tokens inference
         Args:
@@ -121,12 +122,14 @@ def infer(self,
         batch_size = x.size(0)
         y = []
         for i in range(batch_size):
-            x_i = x[i:i + 1, :x_lengths[i]]
-            y_i = self.decoder.decode(x_i,
-                                      method=decoding_method,
-                                      beam_width=beam_width,
-                                      max_sym_per_frame=max_sym_per_frame,
-                                      max_sym_per_utt=max_sym_per_utt)
+            x_i = x[i : i + 1, : x_lengths[i]]
+            y_i = self.decoder.decode(
+                x_i,
+                method=decoding_method,
+                beam_width=beam_width,
+                max_sym_per_frame=max_sym_per_frame,
+                max_sym_per_utt=max_sym_per_utt,
+            )
             y.append(y_i)
 
         return y
@@ -180,7 +183,6 @@ def filter_args(**kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -188,8 +190,7 @@ def add_class_args(parser, prefix=None, skip=set()):
         RNNTransducerDecoder.add_class_args(parser, prefix="decoder")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     def change_config(
         self,
@@ -201,7 +202,7 @@ def change_config(
     @staticmethod
     def filter_finetune_args(**kwargs):
         args = {}
-        decoder_args = Decoder.filter_finetune_args(**kwargs["decoder"])
+        decoder_args = RNNTransducerDecoder.filter_finetune_args(**kwargs["decoder"])
         args["decoder"] = decoder_args
         return args
 
@@ -214,8 +215,7 @@ def add_finetune_args(parser, prefix=None):
         RNNTransducerDecoder.add_finetune_args(parser, prefix="decoder")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def add_infer_args(parser, prefix=None):
@@ -223,29 +223,34 @@ def add_infer_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        parser.add_argument("--decoding-method",
-                            default="time_sync_beam_search",
-                            choices=[
-                                "greedy", "time_sync_beam_search",
-                                "align_length_sync_beam_search"
-                            ])
-
-        parser.add_argument("--beam-width",
-                            default=5,
-                            type=int,
-                            help="beam width for beam search")
-        parser.add_argument("--max-sym-per-frame",
-                            default=3,
-                            type=int,
-                            help="max symbols RNN-T can emit in 1 frame")
-        parser.add_argument("--max-sym-per-utt",
-                            default=1000,
-                            type=int,
-                            help="max symbols RNN-T can emit in 1 frame")
+        parser.add_argument(
+            "--decoding-method",
+            default="time_sync_beam_search",
+            choices=[
+                "greedy",
+                "time_sync_beam_search",
+                "align_length_sync_beam_search",
+            ],
+        )
+
+        parser.add_argument(
+            "--beam-width", default=5, type=int, help="beam width for beam search"
+        )
+        parser.add_argument(
+            "--max-sym-per-frame",
+            default=3,
+            type=int,
+            help="max symbols RNN-T can emit in 1 frame",
+        )
+        parser.add_argument(
+            "--max-sym-per-utt",
+            default=1000,
+            type=int,
+            help="max symbols RNN-T can emit in 1 frame",
+        )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_infer_args(**kwargs):
diff --git a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
index fc10f810..9a939346 100644
--- a/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/hf_wav2xvector.py
@@ -310,21 +310,15 @@ def freeze_hf_except_lora(self, bias=None):
         self.hf_feats.freeze_except_lora(bias)
 
     def has_param_groups(self):
-        return self.hf_feats.has_param_groups()
+        return self.hf_feats.has_param_groups() or self.xvector.has_param_groups()
 
     def trainable_param_groups(self):
         if not self.has_param_groups():
-            return self.trainable_parameters()
+            return [{"params": self.trainable_parameters()}]
 
         param_groups = self.hf_feats.trainable_param_groups()
         param_groups.append({"params": self.feat_fuser.trainable_parameters()})
-        # if self.feat_fusion_method == "weighted-avg":
-        #     if self.feat_fuser.requires_grad:
-        #         param_groups.append({"params": self.feat_fuser})
-        # else:
-        #     param_groups.append({"params": self.feat_fuser.parameters()})
-
-        param_groups.append({"params": self.xvector.trainable_parameters()})
+        param_groups.extend(self.xvector.trainable_param_groups())
         return param_groups
 
     def set_train_mode(self, mode):
@@ -362,6 +356,9 @@ def set_train_mode(self, mode):
         else:
             raise ValueError(f"invalid train_mode={mode}")
 
+        if self.xvector.head_type == "dino":
+            self.xvector.classif_net.freeze_output_g()
+
         logging.info("train mode set to %s", mode)
 
         if "nograd" in mode or mode == "ft-embed-affine":
@@ -377,6 +374,7 @@ def _train(self, train_mode: str):
             super()._train(train_mode)
         elif train_mode == "ft-embed-affine":
             self.hf_feats.train()
+            self.feat_fuser.train()
             self.xvector._train("ft-embed_affine")
         elif train_mode in [
             "ft-xvector",
@@ -389,6 +387,7 @@ def _train(self, train_mode: str):
             "hf-lora-with-bias",
         ]:
             self.hf_feats.train()
+            self.feat_fuser.train()
             self.xvector._train("full")
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
diff --git a/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py b/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
index ad6ae4c7..3f6acf02 100644
--- a/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2conformer_v1_xvector.py
@@ -68,3 +68,21 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = {}
+        child_args = ConformerV1XVector.filter_dino_teacher_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ConformerV1XVector.add_dino_teacher_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index 0e4faded..aa01850f 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNet1dXVector
 from .wav2xvector import Wav2XVector
@@ -26,7 +25,6 @@ class Wav2ResNet1dXVector(Wav2XVector):
     """
 
     def __init__(self, feats, xvector):
-
         if isinstance(xvector, dict):
             xvector = ResNet1dXVector.filter_args(**xvector)
             xvector = ResNet1dXVector(**xvector)
@@ -70,3 +68,21 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = {}
+        child_args = ResNet1dXVector.filter_dino_teacher_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNet1dXVector.add_dino_teacher_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
index 11d643af..642c282d 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ..xvectors import ResNetXVector
 from .wav2xvector import Wav2XVector
@@ -26,7 +25,6 @@ class Wav2ResNetXVector(Wav2XVector):
     """
 
     def __init__(self, feats, xvector):
-
         if isinstance(xvector, dict):
             xvector = ResNetXVector.filter_args(**xvector)
             xvector = ResNetXVector(**xvector)
@@ -70,3 +68,21 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = {}
+        child_args = ResNetXVector.filter_dino_teacher_args(**kwargs["xvector"])
+        base_args["xvector"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ResNetXVector.add_dino_teacher_args(parser, prefix="xvector")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 4bbc0c4c..501fa7f8 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -5,10 +5,9 @@
 import contextlib
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
@@ -24,7 +23,6 @@ class Wav2XVector(TorchModel):
     """
 
     def __init__(self, feats, xvector):
-
         super().__init__()
 
         if isinstance(feats, dict):
@@ -42,6 +40,15 @@ def __init__(self, feats, xvector):
     def sample_frequency(self):
         return self.feats.sample_frequency
 
+    # def clone(self):
+    #     # weight normalized layers cannot be copied with deepcopy,
+    #     # we remove them to clone and put them back later
+    #     modules, cloned_modules = self.xvector.before_cloning()
+    #     new_self = super().clone()
+    #     self.xvector.after_cloning(*modules)
+    #     new_self.xvector.after_cloning(*cloned_modules)
+    #     return new_self
+
     def compute_prototype_affinity(self):
         return self.xvector.compute_prototype_affinity()
 
@@ -80,6 +87,9 @@ def change_config(self, xvector):
         logging.info("changing wav2xvector config")
         self.xvector.change_config(**xvector)
 
+    def cancel_output_layer_grads(self):
+        self.xvector.cancel_output_layer_grads()
+
     def forward(
         self,
         x,
@@ -91,7 +101,6 @@ def forward(
         classif_layers=None,
         return_output=True,
     ):
-
         with self._feats_context:
             if vad_samples is not None:
                 x, x_lengths = remove_silence(x, vad_samples, x_lengths)
@@ -125,7 +134,6 @@ def extract_embed(
         embed_layer=None,
         detach_chunks=False,
     ):
-
         with self._feats_context:
             if vad_samples is not None:
                 x, x_lengths = remove_silence(x, vad_samples, x_lengths)
@@ -140,6 +148,10 @@ def extract_embed(
             feats, feat_lengths, chunk_length, embed_layer, detach_chunks
         )
 
+    def trainable_param_groups(self):
+        param_groups = self.xvector.trainable_param_groups()
+        return param_groups
+
     def set_train_mode(self, mode):
         if mode == self._train_mode:
             return
@@ -155,7 +167,6 @@ def set_train_mode(self, mode):
         self._train_mode = mode
 
     def _train(self, train_mode: str):
-
         self.feats.train()
         if train_mode in ["frozen"]:
             super()._train(train_mode)
diff --git a/hyperion/torch/models/xvectors/conformer_v1_xvector.py b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
index f52b8700..896cad77 100644
--- a/hyperion/torch/models/xvectors/conformer_v1_xvector.py
+++ b/hyperion/torch/models/xvectors/conformer_v1_xvector.py
@@ -34,9 +34,16 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
         if isinstance(encoder, dict):
             logging.info(f"making conformer encoder network={encoder}")
@@ -65,10 +72,17 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
     def get_config(self):
@@ -88,6 +102,7 @@ def get_config(self):
     def change_config(
         self,
         encoder,
+        override_output=False,
         override_dropouts=False,
         dropout_rate=0,
         num_classes=None,
@@ -100,6 +115,7 @@ def change_config(
         num_subcenters=2,
     ):
         super().change_config(
+            override_output,
             False,
             dropout_rate,
             num_classes,
@@ -170,3 +186,22 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+        child_args = Encoder.filter_finetune_args(**kwargs["encoder"])
+        base_args["encoder"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        Encoder.add_finetune_args(parser, prefix="encoder", skip=set())
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/efficient_net_xvector.py b/hyperion/torch/models/xvectors/efficient_net_xvector.py
index 132bb51d..923be8eb 100644
--- a/hyperion/torch/models/xvectors/efficient_net_xvector.py
+++ b/hyperion/torch/models/xvectors/efficient_net_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import EfficientNet as EN
 from .xvector import XVector
@@ -52,11 +51,17 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
-
         logging.info("making %s encoder network", effnet_type)
         encoder_net = EN(
             effnet_type,
@@ -99,11 +104,18 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
     @property
@@ -179,7 +191,6 @@ def time_se(self):
         return self.encoder_net.time_se
 
     def get_config(self):
-
         base_config = super().get_config()
         del base_config["encoder_cfg"]
 
@@ -208,7 +219,12 @@ def get_config(self):
         return config
 
     def change_config(
-        self, override_dropouts=False, dropout_rate=0, drop_connect_rate=0, **kwargs
+        self,
+        override_output=False,
+        override_dropouts=False,
+        dropout_rate=0,
+        drop_connect_rate=0,
+        **kwargs
     ):
         xvec_args = XVector.filter_finetune_args(**kwargs)
         xvec_args["override_dropouts"] = False
@@ -220,7 +236,6 @@ def change_config(
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-
         cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
 
         model = cls(**cfg)
@@ -231,7 +246,6 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = XVector.filter_args(**kwargs)
         child_args = EN.filter_args(**kwargs)
 
@@ -273,3 +287,23 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+        child_args = EN.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        EN.add_finetune_args(parser)
+        XVector.add_dino_teacher_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/resnet1d_xvector.py b/hyperion/torch/models/xvectors/resnet1d_xvector.py
index 20865880..d305bb6a 100644
--- a/hyperion/torch/models/xvectors/resnet1d_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet1d_xvector.py
@@ -5,17 +5,15 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNet1dEncoder as Encoder
 from .xvector import XVector
 
 
 class ResNet1dXVector(XVector):
-
     def __init__(
         self,
         resnet_enc,
@@ -23,10 +21,7 @@ def __init__(
         pool_net="mean+stddev",
         embed_dim=256,
         num_embed_layers=1,
-        hid_act={
-            "name": "relu",
-            "inplace": True
-        },
+        hid_act={"name": "relu", "inplace": True},
         loss_type="arc-softmax",
         cos_scale=64,
         margin=0.3,
@@ -39,14 +34,19 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
-
         if isinstance(resnet_enc, dict):
-            logging.info("making %s resnet1d encoder network",
-                         resnet_enc["resb_type"])
+            logging.info("making %s resnet1d encoder network", resnet_enc["resb_type"])
             resnet_enc = Encoder(**resnet_enc)
 
         super().__init__(
@@ -67,14 +67,20 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
     def get_config(self):
-
         base_config = super().get_config()
         del base_config["encoder_cfg"]
         del base_config["in_feats"]
@@ -91,6 +97,7 @@ def get_config(self):
     def change_config(
         self,
         resnet_enc,
+        override_output=False,
         override_dropouts=False,
         dropout_rate=0,
         num_classes=None,
@@ -103,6 +110,7 @@ def change_config(
         num_subcenters=2,
     ):
         super().change_config(
+            override_output,
             False,
             dropout_rate,
             num_classes,
@@ -122,7 +130,6 @@ def change_config(
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-
         cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
         try:
             del cfg["in_feats"]
@@ -137,7 +144,6 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = XVector.filter_args(**kwargs)
         child_args = Encoder.filter_args(**kwargs["resnet_enc"])
 
@@ -151,12 +157,9 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         XVector.add_class_args(parser, skip=set(["in_feats"]))
-        Encoder.add_class_args(parser,
-                               prefix="resnet_enc",
-                               skip=set(["head_channels"]))
+        Encoder.add_class_args(parser, prefix="resnet_enc", skip=set(["head_channels"]))
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     add_argparse_args = add_class_args
 
@@ -174,10 +177,30 @@ def add_finetune_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         XVector.add_finetune_args(parser)
-        Encoder.add_finetune_args(parser,
-                                  prefix="resnet_enc",
-                                  skip=set(["head_channels"]))
+        Encoder.add_finetune_args(
+            parser, prefix="resnet_enc", skip=set(["head_channels"])
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dinoteacher_args(**kwargs)
+        child_args = Encoder.filter_finetune_args(**kwargs["resnet_enc"])
+        base_args["resnet_enc"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        Encoder.add_finetune_args(
+            parser, prefix="resnet_enc", skip=set(["head_channels"])
+        )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index 0e9eba22..efc24f27 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import ResNetFactory as RNF
 from .xvector import XVector
@@ -46,14 +45,20 @@ def __init__(
         use_norm=True,
         norm_before=True,
         in_norm=False,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
         se_r=16,
         res2net_scale=4,
         res2net_width_factor=1,
+        bias_weight_decay=None,
     ):
-
         logging.info("making %s encoder network", resnet_type)
         encoder_net = RNF.create(
             resnet_type,
@@ -95,11 +100,18 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
         self.resnet_type = resnet_type
@@ -157,12 +169,8 @@ def res2net_width_factor(self):
         return self.encoder_net.res2net_width_factor
 
     def get_config(self):
-
         base_config = super().get_config()
         del base_config["encoder_cfg"]
-
-        pool_cfg = self.pool_net.get_config()
-
         config = {
             "resnet_type": self.resnet_type,
             "in_channels": self.in_channels,
@@ -185,7 +193,6 @@ def get_config(self):
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-
         cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
 
         model = cls(**cfg)
@@ -196,7 +203,6 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
     @staticmethod
     def filter_args(**kwargs):
-
         base_args = XVector.filter_args(**kwargs)
         child_args = RNF.filter_args(**kwargs)
 
@@ -219,7 +225,6 @@ def add_class_args(parser, prefix=None):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-
         base_args = XVector.filter_finetune_args(**kwargs)
         child_args = RNF.filter_finetune_args(**kwargs)
 
@@ -237,3 +242,23 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+        child_args = RNF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        RNF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/spinenet_xvector.py b/hyperion/torch/models/xvectors/spinenet_xvector.py
index 0b27a840..bf829b64 100644
--- a/hyperion/torch/models/xvectors/spinenet_xvector.py
+++ b/hyperion/torch/models/xvectors/spinenet_xvector.py
@@ -5,10 +5,9 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import SpineNetFactory as SNF
 from .xvector import XVector
@@ -50,14 +49,20 @@ def __init__(
         use_norm=True,
         norm_before=True,
         in_norm=False,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
         se_r=16,
         res2net_scale=4,
         res2net_width_factor=1,
+        bias_weight_decay=None,
     ):
-
         logging.info("making %s encoder network", spinenet_type)
         encoder_net = SNF.create(
             spinenet_type,
@@ -103,11 +108,18 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=in_feats,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
         self.spinenet_type = spinenet_type
@@ -181,7 +193,6 @@ def res2net_width_factor(self):
         return self.encoder_net.res2net_width_factor
 
     def get_config(self):
-
         base_config = super().get_config()
         del base_config["encoder_cfg"]
 
@@ -213,7 +224,6 @@ def get_config(self):
 
     @classmethod
     def load(cls, file_path=None, cfg=None, state_dict=None):
-
         cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
 
         model = cls(**cfg)
@@ -263,3 +273,23 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+        child_args = SNF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        SNF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/tdnn_xvector.py b/hyperion/torch/models/xvectors/tdnn_xvector.py
index 38262cc3..19c075b6 100644
--- a/hyperion/torch/models/xvectors/tdnn_xvector.py
+++ b/hyperion/torch/models/xvectors/tdnn_xvector.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TDNNFactory as TF
 from .xvector import XVector
@@ -43,11 +42,17 @@ def __init__(
         use_norm=True,
         norm_before=False,
         in_norm=False,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
-
         logging.info("making %s encoder network", tdnn_type)
         encoder_net = TF.create(
             tdnn_type,
@@ -84,11 +89,18 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=None,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
         self.tdnn_type = tdnn_type
@@ -125,7 +137,6 @@ def in_norm(self):
         return self.encoder_net.in_norm
 
     def get_config(self):
-
         base_config = super().get_config()
         del base_config["encoder_cfg"]
 
@@ -197,3 +208,23 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+        child_args = TF.filter_finetune_args(**kwargs)
+
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        TF.add_finetune_args(parser)
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/transformer_xvector_v1.py b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
index 25e9c894..00f54af7 100644
--- a/hyperion/torch/models/xvectors/transformer_xvector_v1.py
+++ b/hyperion/torch/models/xvectors/transformer_xvector_v1.py
@@ -5,10 +5,9 @@
 
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import TransformerEncoderV1 as TE
 from .xvector import XVector
@@ -83,11 +82,17 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=False,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         embed_layer=0,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
-
         logging.info("making transformer-v1 encoder network")
         encoder_net = TE(
             in_feats,
@@ -127,11 +132,18 @@ def __init__(
             head_norm_layer=head_norm_layer,
             use_norm=use_norm,
             norm_before=norm_before,
+            head_use_norm=head_use_norm,
             head_use_in_norm=head_use_in_norm,
+            head_hid_dim=head_hid_dim,
+            head_bottleneck_dim=head_bottleneck_dim,
+            proj_head_use_norm=proj_head_use_norm,
+            proj_head_norm_before=proj_head_norm_before,
             dropout_rate=dropout_rate,
             embed_layer=embed_layer,
             in_feats=None,
             proj_feats=proj_feats,
+            head_type=head_type,
+            bias_weight_decay=bias_weight_decay,
         )
 
     @property
@@ -409,3 +421,51 @@ def add_finetune_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        """Filters arguments correspondin to TransformerXVector
+            from args dictionary
+
+        Args:
+          kwargs: args dictionary
+
+        Returns:
+          args dictionary
+        """
+        base_args = XVector.filter_dino_teacher_args(**kwargs)
+
+        valid_args = (
+            "pos_dropout_rate",
+            "att_dropout_rate",
+        )
+
+        child_args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        base_args.update(child_args)
+        return base_args
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        """Adds TransformerXVector config parameters for finetuning to argparser
+
+        Args:
+           parser: argparse object
+           prefix: prefix string to add to the argument names
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        XVector.add_dino_teacher_args(parser)
+        parser.add_argument(
+            "--pos-dropout-rate",
+            default=0.1,
+            type=float,
+            help="positional encoder dropout",
+        )
+        parser.add_argument(
+            "--att-dropout-rate", default=0, type=float, help="self-att dropout"
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index de28ccae..b4926533 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -3,24 +3,32 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from enum import Enum
-from typing import Optional
+
+# from enum import Enum
+from dataclasses import dataclass
+from typing import List, Optional
 
 import torch
 import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+from ....utils import HypDataClass
 from ....utils.misc import filter_func_args
 from ...layer_blocks import TDNNBlock
 from ...layers import GlobalPool1dFactory as PF
-from ...narchs import ClassifHead, TorchNALoader
+from ...narchs import ClassifHead, DINOHead, ProjHead, TorchNALoader
 from ...torch_model import TorchModel
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
-# class XVectorTrainMode(Enum):
-#     full = 0
-#     frozen = 1
-#     ft_embed_affine = 2
+
+@dataclass
+class XVectorOutput(HypDataClass):
+    loss: torch.Tensor
+    logits: torch.Tensor
+    xvector: torch.Tensor
+    h_enc: Optional[List[torch.Tensor]] = None
+    h_classif: Optional[List[torch.Tensor]] = None
+    h_feats: Optional[List[torch.Tensor]] = None
 
 
 class XVector(TorchModel):
@@ -45,13 +53,20 @@ def __init__(
         head_norm_layer=None,
         use_norm=True,
         norm_before=True,
+        head_use_norm=True,
         head_use_in_norm=False,
+        head_hid_dim=2048,
+        head_bottleneck_dim=256,
+        proj_head_use_norm=True,
+        proj_head_norm_before=True,
         dropout_rate=0,
         embed_layer=0,
         in_feats=None,
         proj_feats=None,
+        head_type="x-vector",
+        bias_weight_decay=None,
     ):
-        super().__init__()
+        super().__init__(bias_weight_decay=bias_weight_decay)
 
         # encoder network
         self.encoder_net = encoder_net
@@ -112,78 +127,147 @@ def __init__(
 
         # create classification head
         logging.info("making classification head net")
-        self.classif_net = ClassifHead(
-            pool_feats,
-            num_classes,
-            embed_dim=embed_dim,
-            num_embed_layers=num_embed_layers,
-            hid_act=hid_act,
-            loss_type=loss_type,
-            cos_scale=cos_scale,
-            margin=margin,
-            margin_warmup_epochs=margin_warmup_epochs,
-            intertop_k=intertop_k,
-            intertop_margin=intertop_margin,
-            num_subcenters=num_subcenters,
-            norm_layer=head_norm_layer,
-            use_norm=use_norm,
-            norm_before=norm_before,
-            dropout_rate=dropout_rate,
-            use_in_norm=head_use_in_norm,
-        )
-
+        self.embed_dim = embed_dim
+        self.num_embed_layers = num_embed_layers
+        self.head_type = head_type
         self.hid_act = hid_act
         self.norm_layer = norm_layer
-        self.head_norm_layer = head_norm_layer
         self.use_norm = use_norm
         self.norm_before = norm_before
         self.head_use_in_norm = head_use_in_norm
+        self.head_use_norm = head_use_norm
+        self.head_norm_layer = head_norm_layer
+        self.head_hid_dim = head_hid_dim
+        self.head_bottleneck_dim = head_bottleneck_dim
+        self.proj_head_use_norm = proj_head_use_norm
+        self.proj_head_norm_before = proj_head_norm_before
         self.dropout_rate = dropout_rate
         self.embed_layer = embed_layer
+        if self.head_type == "x-vector":
+            self.proj_head_net = None
+            self.classif_net = ClassifHead(
+                pool_feats,
+                num_classes,
+                embed_dim=embed_dim,
+                num_embed_layers=num_embed_layers,
+                hid_act=hid_act,
+                loss_type=loss_type,
+                cos_scale=cos_scale,
+                margin=margin,
+                margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
+                num_subcenters=num_subcenters,
+                norm_layer=head_norm_layer,
+                use_norm=head_use_norm,
+                norm_before=norm_before,
+                dropout_rate=dropout_rate,
+                use_in_norm=head_use_in_norm,
+            )
+        elif self.head_type == "dino":
+            self.proj_head_net = ProjHead(
+                pool_feats,
+                embed_dim,
+                use_norm=proj_head_use_norm,
+                norm_before=proj_head_norm_before,
+            )
+            self.classif_net = DINOHead(
+                embed_dim,
+                num_classes,
+                hid_feats=head_hid_dim,
+                bottleneck_feats=head_bottleneck_dim,
+                num_hid_layers=num_embed_layers,
+                hid_act=hid_act,
+                output_type=loss_type,
+                norm_layer=head_norm_layer,
+                use_norm=head_use_norm,
+                norm_before=norm_before,
+                dropout_rate=dropout_rate,
+                use_in_norm=head_use_in_norm,
+            )
 
     @property
     def pool_feats(self):
-        return self.classif_net.in_feats
+        if self.proj_head_net is None:
+            return self.classif_net.in_feats
+        else:
+            return self.proj_head_net.in_feats
 
     @property
     def num_classes(self):
         return self.classif_net.num_classes
 
-    @property
-    def embed_dim(self):
-        return self.classif_net.embed_dim
-
-    @property
-    def num_embed_layers(self):
-        return self.classif_net.num_embed_layers
-
     @property
     def cos_scale(self):
-        return self.classif_net.cos_scale
+        if self.head_type == "x-vector":
+            return self.classif_net.cos_scale
+        elif self.head_type == "dino":
+            return 1
+        else:
+            raise ValueError
 
     @property
     def margin(self):
-        return self.classif_net.margin
+        if self.head_type == "x-vector":
+            return self.classif_net.margin
+        else:
+            return 0.0
 
     @property
     def margin_warmup_epochs(self):
-        return self.classif_net.margin_warmup_epochs
+        if self.head_type == "x-vector":
+            return self.classif_net.margin_warmup_epochs
+        else:
+            return 0
 
     @property
     def intertop_k(self):
-        return self.classif_net.intertop_k
+        if self.head_type == "x-vector":
+            return self.classif_net.intertop_k
+        else:
+            return 0
 
     @property
     def intertop_margin(self):
-        return self.classif_net.intertop_margin
+        if self.head_type == "x-vector":
+            return self.classif_net.intertop_margin
+        else:
+            return 0.0
 
     @property
     def num_subcenters(self):
-        return self.classif_net.num_subcenters
+        if self.head_type == "x-vector":
+            return self.classif_net.num_subcenters
+        else:
+            return 0
 
     @property
     def loss_type(self):
-        return self.classif_net.loss_type
+        if self.head_type == "x-vector":
+            return self.classif_net.loss_type
+        elif self.head_type == "dino":
+            return self.classif_net.output_type
+        else:
+            raise ValueError()
+
+    # def clone(self):
+    #     # weight normalized layers cannot be copied with deepcopy,
+    #     # we remove them to clone and put them back later
+    #     modules, cloned_modules = self.before_cloning()
+    #     new_self = super().clone()
+    #     self.after_cloning(*modules)
+    #     new_self.after_cloning(*cloned_modules)
+    #     return new_self
+
+    # def before_cloning(self):
+    #     if self.head_type == "dino":
+    #         return self.classif_net.before_cloning()
+    #     else:
+    #         return None, None
+
+    # def after_cloning(self, output):
+    #     if self.head_type == "dino":
+    #         self.classif_net.after_cloning(output)
 
     def _make_pool_net(self, pool_net, enc_feats=None):
         """Makes the pooling block
@@ -290,6 +374,8 @@ class logits tensor with shape=(batch, num_classes).
             x = x[0]
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
+        if self.proj_head_net is not None:
+            p = self.proj_head_net(p)
         y = self.classif_net(p, y)
 
         return y
@@ -329,6 +415,8 @@ def forward_hid_feats(
 
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
+        if self.proj_head_net is not None:
+            p = self.proj_head_net(p)
         h_classif = self.classif_net.forward_hid_feats(
             p, y, return_classif_layers, return_logits=return_logits
         )
@@ -358,6 +446,9 @@ def extract_embed(
 
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
+        if self.proj_head_net is not None:
+            return self.proj_head_net(p)
+
         y = self.classif_net.extract_embed(p, embed_layer)
         return y
 
@@ -491,7 +582,6 @@ def compute_slidwin_left_padding(
     def get_config(self):
         enc_cfg = self.encoder_net.get_config()
         pool_cfg = PF.get_config(self.pool_net)
-
         config = {
             "encoder_cfg": enc_cfg,
             "pool_net": pool_cfg,
@@ -507,14 +597,21 @@ def get_config(self):
             "intertop_margin": self.intertop_margin,
             "num_subcenters": self.num_subcenters,
             "norm_layer": self.norm_layer,
-            "head_norm_layer": self.head_norm_layer,
             "use_norm": self.use_norm,
             "norm_before": self.norm_before,
+            "head_norm_layer": self.head_norm_layer,
+            "head_use_norm": self.head_use_norm,
             "head_use_in_norm": self.head_use_in_norm,
+            "head_hid_dim": self.head_hid_dim,
+            "head_bottleneck_dim": self.head_bottleneck_dim,
+            "proj_head_use_norm": self.proj_head_use_norm,
+            "proj_head_norm_before": self.proj_head_norm_before,
             "dropout_rate": self.dropout_rate,
             "embed_layer": self.embed_layer,
             "in_feats": self.in_feats,
             "proj_feats": self.proj_feats,
+            "head_type": self.head_type,
+            "bias_weight_decay": self.bias_weight_decay,
         }
 
         base_config = super().get_config()
@@ -535,6 +632,7 @@ def load(cls, file_path=None, cfg=None, state_dict=None):
 
     def change_config(
         self,
+        override_output=False,
         override_dropouts=False,
         dropout_rate=0,
         num_classes=None,
@@ -547,16 +645,17 @@ def change_config(
         num_subcenters=2,
     ):
         logging.info("changing x-vector config")
-        self.rebuild_output_layer(
-            num_classes=num_classes,
-            loss_type=loss_type,
-            cos_scale=cos_scale,
-            margin=margin,
-            margin_warmup_epochs=margin_warmup_epochs,
-            intertop_k=intertop_k,
-            intertop_margin=intertop_margin,
-            num_subcenters=num_subcenters,
-        )
+        if override_output:
+            self.rebuild_output_layer(
+                num_classes=num_classes,
+                loss_type=loss_type,
+                cos_scale=cos_scale,
+                margin=margin,
+                margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
+                num_subcenters=num_subcenters,
+            )
 
         if override_dropouts:
             logging.info("overriding x-vector dropouts")
@@ -605,6 +704,10 @@ def rebuild_output_layer(
         self.classif_net.set_intertop_margin(intertop_margin)
         self.classif_net.set_num_subcenters(num_subcenters)
 
+    def cancel_output_layer_grads(self):
+        for p in self.classif_net.output.parameters():
+            p.grad = None
+
     def freeze_preembed_layers(self):
         self.encoder_net.freeze()
         if self.proj is not None:
@@ -630,6 +733,9 @@ def set_train_mode(self, mode):
         else:
             raise ValueError(f"invalid train_mode={mode}")
 
+        if self.head_type == "dino":
+            self.classif_net.freeze_output_g()
+
         self._train_mode = mode
 
     def _train(self, train_mode: str):
@@ -658,7 +764,7 @@ def valid_train_modes():
     def filter_args(**kwargs):
         # get arguments for pooling
         pool_args = PF.filter_args(**kwargs["pool_net"])
-        args = filter_func_args(ClassifHead.__init__, kwargs)
+        args = filter_func_args(XVector.__init__, kwargs)
         args["pool_net"] = pool_args
         return args
 
@@ -672,6 +778,13 @@ def add_class_args(parser, prefix=None, skip=set()):
             parser, prefix="pool_net", skip=["dim", "in_feats", "keepdim"]
         )
 
+        parser.add_argument(
+            "--head-type",
+            default="x-vector",
+            choices=["x-vector", "dino"],
+            help="type of classification head in [x-vector, dino]",
+        )
+
         parser.add_argument(
             "--embed-dim", default=256, type=int, help=("x-vector dimension")
         )
@@ -776,6 +889,12 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="batch normalizaton before activation",
         )
 
+        parser.add_argument(
+            "--head-use-norm",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton at the head",
+        )
         parser.add_argument(
             "--head-use-in-norm",
             default=False,
@@ -783,6 +902,33 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="batch normalizaton at the head input",
         )
 
+        parser.add_argument(
+            "--head-hid-dim",
+            default=2048,
+            type=int,
+            help="bottleneck dim of DINO head",
+        )
+
+        parser.add_argument(
+            "--head-bottleneck-dim",
+            default=256,
+            type=int,
+            help="bottleneck dim of DINO head",
+        )
+
+        parser.add_argument(
+            "--proj-head-use-norm",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton at projection head",
+        )
+        parser.add_argument(
+            "--proj-head-norm-before",
+            default=False,
+            action=ActionYesNo,
+            help="batch normalizaton at the begining of projection head",
+        )
+
         try:
             parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
         except:
@@ -808,6 +954,14 @@ def add_class_args(parser, prefix=None, skip=set()):
                 "if None, there is not projection"
             ),
         )
+
+        parser.add_argument(
+            "--bias-weight-decay",
+            default=None,
+            type=float,
+            help="biases weight decay, if None default it is used",
+        )
+
         if prefix is not None:
             outer_parser.add_argument(
                 "--" + prefix,
@@ -817,15 +971,7 @@ def add_class_args(parser, prefix=None, skip=set()):
 
     @staticmethod
     def filter_finetune_args(**kwargs):
-        valid_args = (
-            "loss_type",
-            "cos_scale",
-            "margin",
-            "margin_warmup_epochs",
-            "intertop_k",
-            "intertop_margin",
-        )
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        args = filter_func_args(XVector.change_config, kwargs)
         return args
 
     @staticmethod
@@ -834,6 +980,13 @@ def add_finetune_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
+        parser.add_argument(
+            "--override-output",
+            default=False,
+            action=ActionYesNo,
+            help="changes the config of the output layer",
+        )
+
         parser.add_argument(
             "--loss-type",
             default="arc-softmax",
@@ -894,5 +1047,36 @@ def add_finetune_args(parser, prefix=None):
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
+    @staticmethod
+    def filter_dino_teacher_args(**kwargs):
+        return XVector.filter_finetune_args(**kwargs)
+
+    @staticmethod
+    def add_dino_teacher_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
     add_argparse_args = add_class_args
     add_argparse_finetune_args = add_finetune_args
diff --git a/hyperion/torch/narchs/__init__.py b/hyperion/torch/narchs/__init__.py
index c46c87fa..0bf7ecf4 100644
--- a/hyperion/torch/narchs/__init__.py
+++ b/hyperion/torch/narchs/__init__.py
@@ -10,10 +10,12 @@
 from .dc1d_encoder import DC1dEncoder
 from .dc2d_decoder import DC2dDecoder
 from .dc2d_encoder import DC2dEncoder
+from .dino_head import DINOHead
 from .efficient_net import EfficientNet
 from .etdnn import ETDNNV1
 from .fcnet import FCNetV1, FCNetV2
 from .feat_fuser_mvn import FeatFuserMVN
+from .proj_head import ProjHead
 from .resetdnn import ResETDNNV1
 from .resnet import *
 from .resnet1d_decoder import ResNet1dDecoder
diff --git a/hyperion/torch/narchs/audio_feats_mvn.py b/hyperion/torch/narchs/audio_feats_mvn.py
index b42f48f1..dabf308f 100644
--- a/hyperion/torch/narchs/audio_feats_mvn.py
+++ b/hyperion/torch/narchs/audio_feats_mvn.py
@@ -4,7 +4,7 @@
 """
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layers import AudioFeatsFactory as AFF
 from ..layers import MeanVarianceNorm as MVN
@@ -116,7 +116,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--aug-after-mvn",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help=("do spec augment after st-mvn," "instead of before"),
         )
 
diff --git a/hyperion/torch/narchs/classif_head.py b/hyperion/torch/narchs/classif_head.py
index e5d90f4f..a4a7e9a1 100644
--- a/hyperion/torch/narchs/classif_head.py
+++ b/hyperion/torch/narchs/classif_head.py
@@ -4,10 +4,9 @@
 """
 
 
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.nn import Linear
 
 from ...utils.misc import filter_func_args
@@ -62,7 +61,6 @@ def __init__(
         dropout_rate=0,
         use_in_norm=False,
     ):
-
         super().__init__()
         assert num_embed_layers >= 1, "num_embed_layers (%d < 1)" % num_embed_layers
 
@@ -182,7 +180,6 @@ def rebuild_output_layer(
         intertop_margin=0.0,
         num_subcenters=2,
     ):
-
         embed_dim = self.embed_dim
         self.num_classes = num_classes
         self.loss_type = loss_type
@@ -283,7 +280,6 @@ def put_layers_in_eval_mode(self, layer_list):
             self.fc_blocks[l].eval()
 
     def forward(self, x, y=None):
-
         if self.use_in_norm:
             x = self.in_norm(x)
 
@@ -298,7 +294,6 @@ def forward(self, x, y=None):
         return y
 
     def forward_hid_feats(self, x, y=None, return_layers=None, return_logits=False):
-
         assert return_layers is not None or return_logits
         if return_layers is None:
             return_layers = []
@@ -322,7 +317,6 @@ def forward_hid_feats(self, x, y=None, return_layers=None, return_logits=False):
         return h, None
 
     def extract_embed(self, x, embed_layer=0):
-
         if self.use_in_norm:
             x = self.in_norm(x)
 
@@ -344,7 +338,6 @@ def compute_prototype_affinity(self):
         return torch.mm(kernel, kernel.transpose(0, 1))
 
     def get_config(self):
-
         hid_act = AF.get_config(self.fc_blocks[0].activation)
 
         config = {
@@ -372,7 +365,6 @@ def get_config(self):
 
     @staticmethod
     def filter_args(**kwargs):
-
         if "wo_norm" in kwargs:
             kwargs["use_norm"] = not kwargs["wo_norm"]
             del kwargs["wo_norm"]
@@ -413,7 +405,9 @@ def add_class_args(parser, prefix=None):
             help="loss type: softmax, arc-softmax, cos-softmax, subcenter-arc-softmax",
         )
 
-        parser.add_argument("--s", default=64, type=float, help="scale for arcface")
+        parser.add_argument(
+            "--cos-scale", default=64, type=float, help="scale for arcface"
+        )
 
         parser.add_argument(
             "--margin", default=0.3, type=float, help="margin for arcface, cosface,..."
@@ -460,17 +454,17 @@ def add_class_args(parser, prefix=None):
             pass
 
         parser.add_argument(
-            "--wo-norm",
-            default=False,
+            "--use-norm",
+            default=True,
             action=ActionYesNo,
             help="without batch normalization",
         )
 
         parser.add_argument(
-            "--norm-after",
-            default=False,
+            "--norm-before",
+            default=True,
             action=ActionYesNo,
-            help="batch normalizaton after activation",
+            help="batch normalizaton before activation",
         )
 
         parser.add_argument(
@@ -487,6 +481,5 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='classification head options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index 54c2f400..f232c986 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -3,6 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
+
 import torch
 import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
@@ -217,7 +219,7 @@ def _make_in_layer(self):
                 d_model, self.pos_kernel_size, self.pos_num_groups, self.hid_act
             )
         else:
-            raise Exception("wrong pos-enc-type={}".format(self.pos_enc_type))
+            raise Exception(f"wrong pos-enc-type={self.pos_enc_type}")
 
         hid_act = AF.create(self.hid_act)
 
@@ -274,6 +276,29 @@ def _forward_input(self, x, x_mask):
 
         return x, x_mask
 
+    def change_config(
+        self, override_dropouts, dropout_rate, pos_dropout_rate, att_dropout_rate
+    ):
+        if override_dropouts:
+            logging.info("changing conformer dropouts")
+            self.change_dropouts(dropout_rate, pos_dropout_rate, att_dropout_rate)
+
+    def change_dropouts(self, dropout_rate, pos_dropout_rate, att_dropout_rate):
+        super().change_dropouts(dropout_rate)
+        from ..layers import PosEncoderBase
+
+        for m in self.modules():
+            if isinstance(m, PosEncoderBase):
+                if hasattr(m, "dropout_rate"):
+                    m.dropout_rate = pos_dropout_rate
+                    m.dropout.p = pos_dropout_rate
+            elif isinstance(m, EBlock):
+                m.change_attn_dropout(att_dropout_rate)
+
+        self.dropout_rate = dropout_rate
+        self.pos_dropout_rate = pos_dropout_rate
+        self.att_dropout_rate = att_dropout_rate
+
     def forward(
         self, x, x_lengths=None, x_mask=None, return_mask=False, target_shape=None
     ):
@@ -611,3 +636,63 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        valid_args = (
+            "override_dropouts",
+            "dropout_rate",
+            "pos_dropout_rate",
+            "att_dropout_rate",
+        )
+        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None, skip=set([])):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        try:
+            parser.add_argument(
+                "--override-dropouts",
+                default=False,
+                action=ActionYesNo,
+                help=(
+                    "whether to use the dropout probabilities passed in the "
+                    "arguments instead of the defaults in the pretrained model."
+                ),
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument(
+                "--dropout-rate", default=0, type=float, help="dropout probability"
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument(
+                "--pos-dropout-rate",
+                default=0,
+                type=float,
+                help="positional encoder dropout probability",
+            )
+        except:
+            pass
+
+        try:
+            parser.add_argument(
+                "--att-dropout-rate",
+                default=0,
+                type=float,
+                help="attention dropout probability",
+            )
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/dino_head.py b/hyperion/torch/narchs/dino_head.py
new file mode 100644
index 00000000..a59434bf
--- /dev/null
+++ b/hyperion/torch/narchs/dino_head.py
@@ -0,0 +1,337 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+from typing import Optional
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
+from ..layer_blocks import FCBlock
+from ..layers import ActivationFactory as AF
+from ..layers import CosLossOutput
+from ..layers import NormLayer1dFactory as NLF
+from .net_arch import NetArch
+
+# class DINOHead1(nn.Module):
+#     def __init__(
+#         self,
+#         in_dim,
+#         out_dim,
+#         use_bn=False,
+#         norm_last_layer=True,
+#         nlayers=3,
+#         hidden_dim=2048,
+#         bottleneck_dim=256,
+#     ):
+#         super().__init__()
+#         nlayers = max(nlayers, 1)
+#         if nlayers == 1:
+#             self.mlp = nn.Linear(in_dim, bottleneck_dim)
+#         else:
+#             layers = [nn.Linear(in_dim, hidden_dim)]
+#             if use_bn:
+#                 layers.append(nn.BatchNorm1d(hidden_dim))
+#             layers.append(nn.GELU())
+#             for _ in range(nlayers - 2):
+#                 layers.append(nn.Linear(hidden_dim, hidden_dim))
+#                 if use_bn:
+#                     layers.append(nn.BatchNorm1d(hidden_dim))
+#                 layers.append(nn.GELU())
+#             layers.append(nn.Linear(hidden_dim, bottleneck_dim))
+#             self.mlp = nn.Sequential(*layers)
+#         self.apply(self._init_weights)
+#         self.last_layer = nn.utils.weight_norm(
+#             nn.Linear(bottleneck_dim, out_dim, bias=False)
+#         )
+#         self.last_layer.weight_g.data.fill_(1)
+#         if norm_last_layer:
+#             self.last_layer.weight_g.requires_grad = False
+
+#     def _init_weights(self, m):
+#         if isinstance(m, nn.Linear):
+#             nn.init.trunc_normal_(m.weight, std=0.02)
+#             if isinstance(m, nn.Linear) and m.bias is not None:
+#                 nn.init.constant_(m.bias, 0)
+
+#     def forward(self, x):
+#         x = self.mlp(x)
+#         x = nn.functional.normalize(x, dim=-1, p=2)
+#         x = self.last_layer(x)
+#         return x
+
+
+class DINOHead(NetArch):
+    """Classification Head for DINO x-vector style networks
+
+    Attributes:
+       in_feats: input features
+       num_classes: number of output classes
+       hid_feats: dimension of hidding layer
+       bottleneck_feats: dimension of bottleneck layer before output
+       num_hid_layers: number of hidden layers
+       hid_act: str or dict hidden activation type in ['relu', 'relu6', 'swish', ... ]
+       output_type: type of output layer that will be used with the x-vector in ['softmax', 'cos-softmax'],
+                  corresponding to standard cross-entorpy, cosine scoring
+       norm_layer: norm_layer object or str indicating type norm layer, if None it uses BatchNorm1d
+       use_norm: it True it uses layer/batch-normalization
+       norm_before: if True, layer-norm is before the activation function
+       use_in_norm: put batchnorm at the input
+    """
+
+    def __init__(
+        self,
+        in_feats,
+        num_classes,
+        hid_feats=2048,
+        bottleneck_feats=256,
+        num_hid_layers=3,
+        hid_act="gelu",
+        output_type="softmax",
+        norm_layer=None,
+        use_norm=False,
+        norm_before=True,
+        dropout_rate=0,
+        use_in_norm=False,
+    ):
+        super().__init__()
+        assert num_hid_layers >= 1, "num_layers (%d < 1)" % num_hid_layers
+
+        self.num_hid_ayers = num_hid_layers
+        self.in_feats = in_feats
+        self.hid_feats = hid_feats
+        self.bottleneck_feats = bottleneck_feats
+        self.num_classes = num_classes
+        self.norm_layer = norm_layer
+        self.use_in_norm = use_in_norm
+
+        if use_norm:
+            norm_groups = None
+            if norm_layer == "group-norm":
+                norm_groups = min(hid_feats // 8, 32)
+            self._norm_layer = NLF.create(norm_layer, norm_groups)
+        else:
+            self._norm_layer = None
+
+        self.use_norm = use_norm
+        self.norm_before = norm_before
+
+        self.dropout_rate = dropout_rate
+        self.output_type = output_type
+        if use_in_norm:
+            assert not self.norm_before
+            self.in_norm = self._norm_layer(in_feats)
+
+        if num_hid_layers == 1:
+            self.fc_layers = nn.Linear(in_feats, bottleneck_feats)
+        else:
+            layers = [nn.Linear(in_feats, hid_feats)]
+            if use_norm and norm_before:
+                layers.append(self._norm_layer(hid_feats))
+            layers.append(AF.create(hid_act))
+            if use_norm and not norm_before:
+                layers.append(self._norm_layer(hid_feats))
+            if self.dropout_rate > 0:
+                layers.append(nn.Dropout(self.dropout_rate))
+
+            for _ in range(num_hid_layers - 2):
+                layers.append(nn.Linear(hid_feats, hid_feats))
+                if use_norm and norm_before:
+                    layers.append(self._norm_layer(hid_feats))
+                layers.append(AF.create(hid_act))
+                if use_norm and not norm_before:
+                    layers.append(self._norm_layer(hid_feats))
+                if self.dropout_rate > 0:
+                    layers.append(nn.Dropout(self.dropout_rate))
+
+            layers.append(nn.Linear(hid_feats, bottleneck_feats))
+            self.hid_layers = nn.Sequential(*layers)
+
+        self.apply(self._init_weights)
+        if output_type == "softmax":
+            output = nn.Linear(bottleneck_feats, num_classes, bias=False)
+            with torch.no_grad():
+                self.output = nn.utils.weight_norm(output)
+            self.output.weight_g.data.fill_(1)
+            self.output.weight_g.requires_grad = False
+        elif output_type == "cos-softmax":
+            self.output = CosLossOutput(
+                hid_feats,
+                num_classes,
+                cos_scale=1,
+                margin=0,
+                margin_warmup_epochs=0,
+                intertop_k=0,
+                intertop_margin=0,
+            )
+        else:
+            raise ValueError(f"wrong loss_type={output_type}")
+
+    # def before_cloning(self):
+    #     if self.output_type == "cos-softmax":
+    #         return None, None
+
+    #     torch.nn.utils.remove_weight_norm(self.output)
+    #     return None, None
+    #     cloned_output = self._clone_output()
+    #     output = self.output
+    #     self.output = None
+    #     return output, cloned_output
+
+    # def after_cloning(self, output: nn.Module):
+    #     if self.output_type == "cos-softmax":
+    #         return
+
+    #     self.output = nn.utils.weight_norm(self.output)
+    #     self.output.weight_g.data.fill_(1)
+    #     self.output.weight_g.requires_grad = False
+
+    # def _clone_output(self):
+    #     output = nn.utils.weight_norm(
+    #         nn.Linear(self.bottleneck_feats, self.num_classes, bias=False)
+    #     )
+    #     output.weight_g.data.fill_(1)
+    #     output.weight_v.data = self.output_v.data.detach()
+    #     output.weight_g.requires_grad = False
+    #     return output
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            nn.init.trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, x: torch.Tensor, y: Optional[torch.Tensor] = None):
+        if self.use_in_norm:
+            x = self.in_norm(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        x = self.hid_layers(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x_hid is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        x = nn.functional.normalize(x, dim=-1, p=2)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x_l2 is nan  {x.size()} {torch.sum(torch.isnan(x))}"
+        x = self.output(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"out is nan  {x.size()} {torch.sum(torch.isnan(x))}"
+        return x
+
+    def get_config(self):
+        hid_act = AF.get_config(self.fc_blocks[0].activation)
+
+        config = {
+            "in_feats": self.in_feats,
+            "num_classes": self.num_classes,
+            "hid_feats": self.hid_feats,
+            "bottleneck_feats": self.bottleneck_feats,
+            "num_hid_layers": self.num_hid_layers,
+            "hid_act": hid_act,
+            "output_type": self.output_type,
+            "norm_layer": self.norm_layer,
+            "use_norm": self.use_norm,
+            "norm_before": self.norm_before,
+            "dropout_rate": self.dropout_rate,
+            "use_in_norm": self.use_in_norm,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        # if "wo_norm" in kwargs:
+        #     kwargs["use_norm"] = not kwargs["wo_norm"]
+        #     del kwargs["wo_norm"]
+
+        # if "norm_after" in kwargs:
+        #     kwargs["norm_before"] = not kwargs["norm_after"]
+        #     del kwargs["norm_after"]
+
+        args = filter_func_args(DINOHead.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--botteneck-feats",
+            default=256,
+            type=int,
+            help=("bottleneck dimension before output layer"),
+        )
+
+        parser.add_argument(
+            "--num-hid-layers",
+            default=3,
+            type=int,
+            help=("number of hidden layers in the classif head"),
+        )
+
+        try:
+            parser.add_argument("--hid-act", default="gelu", help="hidden activation")
+        except:
+            pass
+
+        parser.add_argument(
+            "--output-layer",
+            default="softmax",
+            choices=["softmax", "cos-softmax"],
+            help="loss type: softmax, cos-softmax",
+        )
+
+        try:
+            parser.add_argument(
+                "--norm-layer",
+                default=None,
+                choices=[
+                    "batch-norm",
+                    "group-norm",
+                    "instance-norm",
+                    "instance-norm-affine",
+                    "layer-norm",
+                ],
+                help="type of normalization layer for all components of x-vector network",
+            )
+        except:
+            pass
+
+        parser.add_argument(
+            "--use-norm",
+            default=True,
+            action=ActionYesNo,
+            help="without batch normalization",
+        )
+
+        parser.add_argument(
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton before activation",
+        )
+
+        parser.add_argument(
+            "--use-in-norm",
+            default=False,
+            action=ActionYesNo,
+            help="batch normalizaton in the classif head input",
+        )
+
+        try:
+            parser.add_argument("--dropout-rate", default=0, type=float, help="dropout")
+        except:
+            pass
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/feat_fuser_mvn.py b/hyperion/torch/narchs/feat_fuser_mvn.py
index 6fa4c6c0..0656e279 100644
--- a/hyperion/torch/narchs/feat_fuser_mvn.py
+++ b/hyperion/torch/narchs/feat_fuser_mvn.py
@@ -6,7 +6,7 @@
 
 import torch
 import torch.nn as nn
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..layers import FeatFuserFactory as FFF
 from ..layers import MeanVarianceNorm as MVN
@@ -103,7 +103,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--aug-after-mvn",
             default=False,
-            action="store_true",
+            action=ActionYesNo,
             help=("do spec augment after st-mvn," "instead of before"),
         )
 
diff --git a/hyperion/torch/narchs/proj_head.py b/hyperion/torch/narchs/proj_head.py
new file mode 100644
index 00000000..549f9e6a
--- /dev/null
+++ b/hyperion/torch/narchs/proj_head.py
@@ -0,0 +1,149 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from torch.nn import Linear
+
+from ...utils.misc import filter_func_args
+from ..layer_blocks import FCBlock
+from ..layers import ActivationFactory as AF
+from ..layers import NormLayer1dFactory as NLF
+from .net_arch import NetArch
+
+
+class ProjHead(NetArch):
+    """Classification Head for x-vector style networks
+
+    Attributes:
+       in_feats: input features
+       num_classes: number of output classes
+       out_feats: dimension of embedding layer
+       num_embed_layers: number of hidden layers
+       hid_act: str or dict hidden activation type in ['relu', 'relu6', 'swish', ... ]
+       loss_type: type of loss function that will be used with the x-vector in ['softmax', 'cos-softmax', 'arc-softmax'],
+                  corresponding to standard cross-entorpy, additive margin softmax or additive angular margin softmax.
+       cos_scale: scale parameter for cos-softmax and arc-softmax
+       margin: margin parameter for cos-softmax and arc-softmax
+       margin_warmup_epochs: number of epochs to anneal the margin from 0 to margin
+       intertop_k: adds negative angular penalty to k largest negative scores.
+       intertop_margin: inter-top-k penalty.
+       num_subcenters: number of subcenters in subcenter losses
+       norm_layer: norm_layer object or str indicating type norm layer, if None it uses BatchNorm1d
+       use_norm: it True it uses layer/batch-normalization
+       norm_before: if True, layer-norm is before the activation function
+       use_in_norm: put batchnorm at the input
+    """
+
+    def __init__(
+        self,
+        in_feats,
+        out_feats=256,
+        norm_layer=None,
+        use_norm=True,
+        norm_before=True,
+    ):
+        super().__init__()
+
+        self.in_feats = in_feats
+        self.out_feats = out_feats
+        self.norm_layer = norm_layer
+        self.use_norm = use_norm
+        self.norm_before = norm_before
+
+        if use_norm:
+            norm_groups = None
+            if norm_layer == "group-norm":
+                norm_groups = min(out_feats // 8, 32)
+            _norm_layer = NLF.create(norm_layer, norm_groups)
+            if norm_before:
+                self._norm_layer = _norm_layer(in_feats)
+            else:
+                self._norm_layer = _norm_layer(out_feats)
+        else:
+            self._norm_layer = None
+
+        self.proj = nn.Linear(in_feats, out_feats)
+
+    def forward(self, x, y=None):
+        if self.use_norm and self.norm_before:
+            x = self._norm_layer(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x before proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        x = self.proj(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x after proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        if self.use_norm and not self.norm_before:
+            x = self._norm_layer(x)
+        assert not torch.any(
+            torch.isnan(x)
+        ), f"x after bn is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        return x
+
+    def get_config(self):
+        hid_act = AF.get_config(self.fc_blocks[0].activation)
+
+        config = {
+            "in_feats": self.in_feats,
+            "out_feats": self.out_feats,
+            "norm_layer": self.norm_layer,
+            "use_norm": self.use_norm,
+            "norm_before": self.norm_before,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = filter_func_args(ProjHead.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--out-feats", default=256, type=int, help=("projection dimension")
+        )
+
+        try:
+            parser.add_argument(
+                "--norm-layer",
+                default=None,
+                choices=[
+                    "batch-norm",
+                    "group-norm",
+                    "instance-norm",
+                    "instance-norm-affine",
+                    "layer-norm",
+                ],
+                help="type of normalization layer for all components of x-vector network",
+            )
+        except:
+            pass
+
+        parser.add_argument(
+            "--use-norm",
+            default=False,
+            action=ActionYesNo,
+            help="without batch normalization",
+        )
+
+        parser.add_argument(
+            "--norm-before",
+            default=True,
+            action=ActionYesNo,
+            help="batch normalizaton after activation",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/narchs/rnn_transducer_decoder.py b/hyperion/torch/narchs/rnn_transducer_decoder.py
index 763ec67c..77c1234a 100644
--- a/hyperion/torch/narchs/rnn_transducer_decoder.py
+++ b/hyperion/torch/narchs/rnn_transducer_decoder.py
@@ -2,16 +2,15 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+import logging
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 
-import torchaudio
-import torchaudio.functional
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+import torchaudio
+import torchaudio.functional
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 try:
     import k2
@@ -36,8 +35,8 @@ class Hypothesis:
 
 
 class RNNTransducerDecoder(NetArch):
-    """ RNN-T Decoder composed of Predictor and Joiner networks
-    Implementation based on 
+    """RNN-T Decoder composed of Predictor and Joiner networks
+    Implementation based on
     https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/transducer/transducer.py
 
     Attributes:
@@ -48,15 +47,15 @@ class RNNTransducerDecoder(NetArch):
       blank_id: id of the null symbol.
       rnnt_loss: type of rnn-t loss between torchaudio, k2 or k2_pruned.
       rnnt_type: rnn-t variation between regular, modified or constrained.
-      delay_penalty: penalize symbol delay, which is used to make symbol 
+      delay_penalty: penalize symbol delay, which is used to make symbol
         emit earlier.
       reduction: type of reduction for rnn-t loss between sum or mean
-      prune_range: how many symbols to keep for each frame in k2 rnn-t 
+      prune_range: how many symbols to keep for each frame in k2 rnn-t
         pruned loss.
       lm_scale: language model scale in rnn-t smoothed loss.
       am_scale: acoustic model scale in rnn-t smoothed loss.
       simple_loss_scale: weight of rnn-t simple loss when using k2 pruned loss.
-      pruned_warmup_steps: number of steps to warm up the k2 rnn-t pruned loss 
+      pruned_warmup_steps: number of steps to warm up the k2 rnn-t pruned loss
         from 0.1 to 1.
     """
 
@@ -77,7 +76,6 @@ def __init__(
         simple_loss_scale: float = 0.5,
         pruned_warmup_steps: int = 2000,
     ):
-
         super().__init__()
         self.in_feats = in_feats
         self.vocab_size = vocab_size
@@ -206,7 +204,6 @@ def _rnnt_loss_k2_pruned(
         y_lengths: torch.Tensor,
         pred_out: torch.Tensor,
     ):
-
         y_padded = y.pad(mode="constant", padding_value=0)
         y_padded = y_padded.to(torch.int64)
         boundary = torch.zeros((x.size(0), 4), dtype=torch.int64, device=x.device)
@@ -281,7 +278,6 @@ def _rnnt_loss_k2_pruned(
     def forward(
         self, x: torch.Tensor, x_lengths: torch.Tensor, y: k2.RaggedTensor
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-
         # get y_lengths
         row_splits = y.shape.row_splits(1)
         y_lengths = row_splits[1:] - row_splits[:-1]
@@ -415,7 +411,10 @@ def decode_time_sync_beam_search(
                 if cached_key not in cache:
                     pred_in = torch.tensor([y_star.ys[-1]], device=device).reshape(1, 1)
 
-                    pred_out, pred_state = self.predictor(pred_in, y_star.pred_state,)
+                    pred_out, pred_state = self.predictor(
+                        pred_in,
+                        y_star.pred_state,
+                    )
                     cache[cached_key] = (pred_out, pred_state)
                 else:
                     pred_out, pred_state = cache[cached_key]
@@ -455,7 +454,9 @@ def decode_time_sync_beam_search(
                     new_ys = y_star.ys + [i]
                     new_log_prob = y_star.log_prob + v
                     new_hyp = Hypothesis(
-                        ys=new_ys, log_prob=new_log_prob, pred_state=pred_state,
+                        ys=new_ys,
+                        log_prob=new_log_prob,
+                        pred_state=pred_state,
                     )
                     A.append(new_hyp)
 
@@ -528,7 +529,10 @@ def decode_align_length_sync_beam_search(
                 if cached_key not in cache:
                     pred_in = torch.tensor([y_star.ys[-1]], device=device).reshape(1, 1)
 
-                    pred_out, pred_state = self.predictor(pred_in, y_star.pred_state,)
+                    pred_out, pred_state = self.predictor(
+                        pred_in,
+                        y_star.pred_state,
+                    )
                     cache[cached_key] = (pred_out, pred_state)
                 else:
                     pred_out, pred_state = cache[cached_key]
@@ -565,7 +569,9 @@ def decode_align_length_sync_beam_search(
                     new_ys = y_star.ys + [i]
                     new_log_prob = y_star.log_prob + v
                     new_hyp = Hypothesis(
-                        ys=new_ys, log_prob=new_log_prob, pred_state=pred_state,
+                        ys=new_ys,
+                        log_prob=new_log_prob,
+                        pred_state=pred_state,
                     )
                     A.append(new_hyp)
 
@@ -574,7 +580,9 @@ def decode_align_length_sync_beam_search(
                 # A_most_probable = max(A, key=lambda hyp: hyp.log_prob)
                 # print("tuAB1", t, u, len(A), A_most_probable.log_prob, len(B))
                 B0 = sorted(
-                    [hyp for hyp in A], key=lambda hyp: hyp.log_prob, reverse=True,
+                    [hyp for hyp in A],
+                    key=lambda hyp: hyp.log_prob,
+                    reverse=True,
                 )
                 B = []
                 B_ys = set()
@@ -621,7 +629,6 @@ def filter_finetune_args(**kwargs):
 
     @staticmethod
     def add_pred_args(parser):
-
         pred_parser = ArgumentParser(prog="")
         pred_parser.add_argument(
             "--pred-type",
@@ -682,7 +689,6 @@ def add_pred_args(parser):
 
     @staticmethod
     def add_joiner_args(parser):
-
         pred_parser = ArgumentParser(prog="")
         pred_parser.add_argument(
             "--joiner-type",
@@ -702,7 +708,6 @@ def add_joiner_args(parser):
     def add_class_args(
         parser, prefix=None, skip=set(["in_feats", "blank_id", "vocab_size"])
     ):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -791,7 +796,6 @@ def add_class_args(
 
     @staticmethod
     def add_finetune_args(parser, prefix=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/optim/__init__.py b/hyperion/torch/optim/__init__.py
index fd05c755..33364d63 100644
--- a/hyperion/torch/optim/__init__.py
+++ b/hyperion/torch/optim/__init__.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .ema import ExpMovingAvg
 from .factory import OptimizerFactory
 from .fgsm import FGSM
 from .radam import RAdam
diff --git a/hyperion/torch/optim/factory.py b/hyperion/torch/optim/factory.py
index 95117b05..b01d3b62 100644
--- a/hyperion/torch/optim/factory.py
+++ b/hyperion/torch/optim/factory.py
@@ -4,12 +4,11 @@
 """
 import logging
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.optim as optim
+from jsonargparse import ActionParser, ArgumentParser
 
-from ...utils.misc import filter_args
+from ...utils.misc import filter_args, filter_func_args
 from .radam import RAdam
 
 
@@ -39,7 +38,6 @@ def create(
         max_iter=20,
         oss=False,
     ):
-
         kwargs = locals()
         base_opt = None
         if opt_type == "sgd":
@@ -152,29 +150,30 @@ def create(
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "opt_type",
-            "lr",
-            "momentum",
-            "beta1",
-            "beta2",
-            "rho",
-            "eps",
-            "weight_decay",
-            "amsgrad",
-            "nesterov",
-            "lambd",
-            "asgd_alpha",
-            "t0",
-            "rmsprop_alpha",
-            "centered",
-            "lr_decay",
-            "init_acc_val",
-            "max_iter",
-            "oss",
-        )
-
-        return filter_args(valid_args, kwargs)
+        return filter_func_args(OptimizerFactory.create, kwargs)
+        # valid_args = (
+        #     "opt_type",
+        #     "lr",
+        #     "momentum",
+        #     "beta1",
+        #     "beta2",
+        #     "rho",
+        #     "eps",
+        #     "weight_decay",
+        #     "amsgrad",
+        #     "nesterov",
+        #     "lambd",
+        #     "asgd_alpha",
+        #     "t0",
+        #     "rmsprop_alpha",
+        #     "centered",
+        #     "lr_decay",
+        #     "init_acc_val",
+        #     "max_iter",
+        #     "oss",
+        # )
+
+        # return filter_args(valid_args, kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -323,6 +322,5 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='optimizer options')
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/optim/radam.py b/hyperion/torch/optim/radam.py
index 1b7a588f..1aa98517 100644
--- a/hyperion/torch/optim/radam.py
+++ b/hyperion/torch/optim/radam.py
@@ -1,7 +1,6 @@
 """
    Code taken from https://github.com/LiyuanLucasLiu/RAdam/blob/master/radam/radam.py
 """
-#
 
 import math
 
@@ -62,13 +61,11 @@ def __setstate__(self, state):
         super().__setstate__(state)
 
     def step(self, closure=None):
-
         loss = None
         if closure is not None:
             loss = closure()
 
         for group in self.param_groups:
-
             for p in group["params"]:
                 if p.grad is None:
                     continue
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 97be320c..3d5c8c9e 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -23,9 +23,10 @@ def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
         TorchModel.registry[cls.__name__] = cls
 
-    def __init__(self):
+    def __init__(self, bias_weight_decay=None):
         super().__init__()
         self._train_mode = "full"
+        self.bias_weight_decay = bias_weight_decay
 
     def get_config(self):
         config = {"class_name": self.__class__.__name__}
@@ -91,10 +92,26 @@ def print_parameter_list(self):
             logging.info("buffers: %s", n)
 
     def has_param_groups(self):
-        return False
+        return self.bias_weight_decay is not None
 
     def trainable_param_groups(self):
-        return self.trainable_parameters()
+        assert self.bias_weight_decay is not None
+        if self.bias_weight_decay is None:
+            return [{"params": self.trainable_parameters()}]
+
+        regularized = []
+        not_regularized = []
+        for name, param in self.trainable_named_parameters():
+            # we do not regularize biases nor Norm parameters
+            if name.endswith(".bias") or len(param.shape) == 1:
+                not_regularized.append(param)
+            else:
+                regularized.append(param)
+
+        return [
+            {"params": regularized},
+            {"params": not_regularized, "weight_decay": self.bias_weight_decay},
+        ]
 
     def freeze(self):
         for param in self.parameters():
@@ -315,6 +332,7 @@ def _try_to_get_from_hf(
     @staticmethod
     def auto_load(
         file_path: PathLike,
+        model_name: Optional[str] = None,
         extra_objs: dict = {},
         map_location: Optional[
             Union[
@@ -348,7 +366,9 @@ def auto_load(
         else:
             raise Exception("unknown object with class_name=%s" % (class_name))
 
-        state_dict = model_data["model_state_dict"]
+        if model_name is None:
+            model_name = "model"
+        state_dict = model_data[f"{model_name}_state_dict"]
 
         if "n_averaged" in state_dict:
             del state_dict["n_averaged"]
diff --git a/hyperion/torch/trainers/__init__.py b/hyperion/torch/trainers/__init__.py
index c1530608..94326857 100644
--- a/hyperion/torch/trainers/__init__.py
+++ b/hyperion/torch/trainers/__init__.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from .dino_xvector_trainer import DINOXVectorTrainer
 from .dvae_trainer import DVAETrainer
 from .torch_trainer import TorchTrainer
 from .transducer_trainer import TransducerTrainer
@@ -13,6 +14,5 @@
 from .xvector_adv_trainer_from_wav import XVectorAdvTrainerFromWav
 from .xvector_trainer import XVectorTrainer
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
-from .xvector_trainer_deep_feat_reg_from_wav import \
-    XVectorTrainerDeepFeatRegFromWav
+from .xvector_trainer_deep_feat_reg_from_wav import XVectorTrainerDeepFeatRegFromWav
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 9f5fafe6..a0f5f1d4 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -46,6 +45,7 @@ class AETrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -69,7 +69,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -78,46 +78,17 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="x",
     ):
-
         if loss is None:
             loss = nn.MSELoss()
 
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     loss,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
     def train_epoch(self, data_loader):
         """Training epoch loop
 
@@ -146,9 +117,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
@@ -156,17 +127,17 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
-            # total_batches += 1
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
-
         batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
@@ -197,7 +168,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     @staticmethod
     def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/dino_xvector_trainer.py b/hyperion/torch/trainers/dino_xvector_trainer.py
new file mode 100644
index 00000000..bb7b427d
--- /dev/null
+++ b/hyperion/torch/trainers/dino_xvector_trainer.py
@@ -0,0 +1,385 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import os
+from collections import OrderedDict as ODict
+
+import torch
+import torch.cuda.amp as amp
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+from torch.distributed.elastic.multiprocessing.errors import record
+
+from ...utils.misc import filter_func_args
+from ..optim import ExpMovingAvg as EMA
+from ..utils import MetricAcc, TorchDDP, tensors_subset
+from .torch_trainer import TorchTrainer
+
+
+class DINOXVectorTrainer(TorchTrainer):
+    """Trainer to train x-vector style models.
+
+    Attributes:
+      model: x-Vector model object.
+      optim: pytorch optimizer object or options dict
+      epochs: max. number of epochs
+      exp_path: experiment output path
+      cur_epoch: current epoch
+      grad_acc_steps: gradient accumulation steps to simulate larger batch size.
+      device: cpu/gpu device
+      metrics: extra metrics to compute besides cxe.
+      lrsched: learning rate scheduler object or options dict
+      teacher_optim: teacher EMA momentum
+      loggers: LoggerList object, loggers write training progress to std. output and file.
+               If None, it uses default loggers.
+      ddp: if True use distributed data parallel training
+      ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
+      loss: if None, it uses cross-entropy
+      train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
+      use_amp: uses mixed precision training.
+      log_interval: number of optim. steps between log outputs
+      use_tensorboard: use tensorboard logger
+      use_wandb: use wandb logger
+      wandb: wandb dictionary of options
+      grad_clip: norm to clip gradients, if 0 there is no clipping
+      grad_clip_norm: norm type to clip gradients
+      swa_start: epoch to start doing swa
+      swa_lr: SWA learning rate
+      swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
+      cpu_offload: CPU offload of gradients when using fully sharded ddp
+      input_key: dict. key for nnet input.
+      target_key: dict. key for nnet targets.
+    """
+
+    def __init__(
+        self,
+        student_model,
+        teacher_model,
+        loss,
+        optim,
+        teacher_optim,
+        epochs=100,
+        exp_path="./train",
+        cur_epoch=0,
+        grad_acc_steps=1,
+        eff_batch_size=None,
+        device=None,
+        metrics=None,
+        lrsched=None,
+        loggers=None,
+        ddp=False,
+        ddp_type="ddp",
+        train_mode="full",
+        freeze_output_layer_steps=3000,
+        use_amp=False,
+        log_interval=1000,
+        use_tensorboard=False,
+        use_wandb=False,
+        wandb={},
+        grad_clip=0,
+        grad_clip_norm=2,
+        swa_start=0,
+        swa_lr=1e-3,
+        swa_anneal_epochs=10,
+        save_interval_steps=None,
+        cpu_offload=False,
+        input_key="x",
+    ):
+        super_args = filter_func_args(super().__init__, locals())
+        self.teacher_model = teacher_model
+        self.teacher_optim = teacher_optim
+        self.freeze_output_layer_steps = freeze_output_layer_steps
+        super().__init__(student_model, **super_args)
+
+    def prepare_models_for_training(self):
+        super().prepare_models_for_training()
+        self.teacher_model, self.teacher_optimizer = self._prepare_model_for_ema(
+            self.teacher_model,
+            self.teacher_optim,
+            self.device,
+            self.ddp,
+        )
+
+    def _prepare_model_for_ema(self, model, optim, device, ddp):
+        if device is not None:
+            model.to(device)
+
+        optimizer = EMA(model.parameters(), **optim)
+
+        if ddp:
+            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
+
+        return model, optimizer
+
+    def set_train_mode(self):
+        super().set_train_mode()
+        self.teacher_model.freeze()
+
+    @torch.no_grad()
+    def update_teacher_model(self):
+        self.teacher_optimizer.step(self.model.parameters())
+        # print(
+        #     "pmw",
+        #     self.model.xvector.proj_head_net.proj.weight[:5, :5],
+        #     self.teacher_model.xvector.proj_head_net.proj.weight[:5, :5],
+        # )
+        # print(
+        #     "mw",
+        #     self.model.xvector.classif_net.output.weight[:5, :5],
+        #     self.teacher_model.xvector.classif_net.output.weight[:5, :5],
+        # )
+        # print(
+        #     "mwg",
+        #     self.model.xvector.classif_net.output.weight_g[:5, :5],
+        #     self.teacher_model.xvector.classif_net.output.weight_g[:5, :5],
+        # )
+        # print(
+        #     "mwv",
+        #     self.model.xvector.classif_net.output.weight_v[:5, :5],
+        #     self.teacher_model.xvector.classif_net.output.weight_v[:5, :5],
+        #     flush=True,
+        # )
+        # print("------------------------------", flush=True)
+
+    @staticmethod
+    def get_augs_keys(batch, base_key, subset, skip=set()):
+        base_key = f"{base_key}_{subset}"
+        keys = []
+
+        chunk_idx = 0
+        while True:
+            found_chunk = 0
+            chunk_key = f"{base_key}_{chunk_idx}"
+            if chunk_key in batch:
+                if chunk_key not in skip:
+                    keys.append(chunk_key)
+                found_chunk = True
+            aug_idx = 0
+            while True:
+                aug_key = f"{chunk_key}_aug_{aug_idx}"
+                if aug_key in batch:
+                    if aug_key not in skip:
+                        keys.append(aug_key)
+
+                    aug_idx += 1
+                    found_chunk = True
+                else:
+                    break
+
+            if not found_chunk:
+                break
+
+            chunk_idx += 1
+
+        return keys
+
+    @record
+    def train_epoch(self, data_loader):
+        """Training epoch loop
+
+        Args:
+          data_loader: pytorch data loader returning features and class labels.
+        """
+        metric_acc = MetricAcc(device=self.device)
+        batch_metrics = ODict()
+        self.model.train()
+        self.teacher_model.train()
+        self.loss.update_temp(self.cur_epoch)
+        self.loss.train()
+
+        for batch, data in enumerate(data_loader):
+            self.loggers.on_batch_begin(batch)
+
+            if batch % self.grad_acc_steps == 0:
+                self.optimizer.zero_grad()
+
+            teacher_keys = self.get_augs_keys(data, self.input_key, "teacher")
+            student_keys = self.get_augs_keys(data, self.input_key, "student")
+            with amp.autocast(enabled=self.use_amp):
+                with torch.no_grad():
+                    teacher_data = tensors_subset(data, teacher_keys, self.device)
+                    batch_size = teacher_data[0].size(0)
+                    num_teacher_crops = len(teacher_data)
+                    teacher_data = torch.cat(teacher_data, dim=0)
+                    teacher_out = self.teacher_model(teacher_data)
+
+                if num_teacher_crops > 1:
+                    student_out1 = self.model(teacher_data)
+
+                student_data = tensors_subset(data, student_keys, self.device)
+                num_student_crops = len(student_data)
+                student_data = torch.cat(student_data, dim=0)
+                student_out2 = self.model(student_data)
+                assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
+                assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
+                assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
+                assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
+                assert not torch.any(torch.isnan(student_out2)), "s2 is nan"
+                assert not torch.any(torch.isinf(student_out2)), "s2 is inf"
+                if num_teacher_crops > 1:
+                    student_out = torch.cat((student_out1, student_out2), dim=0)
+                    num_student_crops += num_teacher_crops
+                else:
+                    student_out = student_out2
+
+                loss = (
+                    self.loss(
+                        student_out, teacher_out, num_student_crops, num_teacher_crops
+                    )
+                    / self.grad_acc_steps
+                )
+                assert not torch.isnan(
+                    loss
+                ), f"loss is nan {batch} {torch.mean(teacher_out)} {torch.mean(student_out1)} {torch.mean(student_out2)}"
+
+            if self.use_amp:
+                self.grad_scaler.scale(loss).backward()
+            else:
+                loss.backward()
+
+            if (batch + 1) % self.grad_acc_steps == 0:
+                self.cur_batch = batch + 1
+                if self.freeze_output_layer_steps > self.global_step:
+                    self.model.cancel_output_layer_grads()
+
+                self.update_model()
+                self.update_teacher_model()
+                self.save_checkpoint(partial=True)
+
+            batch_metrics["loss"] = loss.item() * self.grad_acc_steps
+            # for k, metric in self.metrics.items():
+            #     batch_metrics[k] = metric(output, target)
+
+            metric_acc.update(batch_metrics, batch_size)
+            logs = metric_acc.metrics
+            lrs = self._get_lrs()
+            logs.update(lrs)
+            logs["ema_momentum"] = self.teacher_optimizer.momentum
+            self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
+
+        logs = metric_acc.metrics
+        logs = ODict(("train_" + k, v) for k, v in logs.items())
+        lrs = self._get_lrs()
+        logs.update(lrs)
+        logs["ema_momentum"] = self.teacher_optimizer.momentum
+        return logs
+
+    @torch.no_grad()
+    def validation_epoch(self, data_loader, swa_update_bn=False):
+        """Validation epoch loop
+
+        Args:
+          data_loader: PyTorch data loader return input/output pairs.
+          sw_update_bn: wheter or not, update batch-norm layers in SWA.
+        """
+        metric_acc = MetricAcc(self.device)
+        batch_metrics = ODict()
+        self.teacher_model.eval()
+        self.loss.eval()
+
+        if swa_update_bn:
+            log_tag = "train_"
+            self.model.train()
+        else:
+            log_tag = "val_"
+            self.model.eval()
+
+        for batch, data in enumerate(data_loader):
+            teacher_keys = self.get_augs_keys(data, self.input_key, "teacher")
+            student_keys = self.get_augs_keys(data, self.input_key, "student")
+            with amp.autocast(enabled=self.use_amp):
+                teacher_data = tensors_subset(data, teacher_keys, self.device)
+                batch_size = teacher_data[0].size(0)
+                num_teacher_crops = len(teacher_data)
+                teacher_data = torch.cat(teacher_data, dim=0)
+                teacher_out = self.teacher_model(teacher_data)
+
+                if num_teacher_crops > 1:
+                    student_out1 = self.model(teacher_data)
+
+                student_data = tensors_subset(data, student_keys, self.device)
+                num_student_crops = len(student_data)
+                student_data = torch.cat(student_data, dim=0)
+                student_out2 = self.model(student_data)
+                if num_teacher_crops > 1:
+                    student_out = torch.cat((student_out1, student_out2), dim=0)
+                    num_student_crops += num_teacher_crops
+                else:
+                    student_out = student_out2
+
+                loss = self.loss(
+                    student_out, teacher_out, num_student_crops, num_teacher_crops
+                )
+
+                batch_metrics["loss"] = loss.item()
+                # for k, metric in self.metrics.items():
+                #     batch_metrics[k] = metric(output, target)
+
+                metric_acc.update(batch_metrics, batch_size)
+
+        logs = metric_acc.metrics
+        logs = ODict((log_tag + k, v) for k, v in logs.items())
+        return logs
+
+    def _load_checkpoint(self, checkpoint):
+        self.teacher_model.load_state_dict(checkpoint["teacher_model_state_dict"])
+        # self.teacher_model.load_state_dict(checkpoint["teacher_state_dict"])
+        self.teacher_optimizer.load_state_dict(
+            checkpoint["teacher_optimizer_state_dict"]
+        )
+        return super()._load_checkpoint(checkpoint)
+
+    def checkpoint(self, logs=None):
+        checkpoint = super().checkpoint(logs)
+        self.teacher_model.train()
+        checkpoint["teacher_model_state_dict"] = self.teacher_model.state_dict()
+        checkpoint["teacher_optimizer_state_dict"] = self.teacher_optimizer.state_dict()
+        return checkpoint
+
+    def teacher_checkpoint(self, logs=None):
+        """Creates a checkpoint of the teacher model, to save and posterior recovery
+
+        Args:
+          logs: logs containing the current value of the metrics.
+        """
+        self.teacher_model.train()
+        checkpoint = {
+            "epoch": self.cur_epoch,
+            "batch": self.cur_batch,
+            "global_step": self.global_step,
+            "model_cfg": self.teacher_model.get_config(),
+            "model_state_dict": self.teacher_model.state_dict(),
+            "optimizer_state_dict": self.teacher_optimizer.state_dict(),
+        }
+
+        if logs is not None:
+            checkpoint["logs"] = logs
+
+        return checkpoint
+
+    @staticmethod
+    def filter_args(**kwargs):
+        args = filter_func_args(DINOXVectorTrainer.__init__, kwargs)
+        return args
+
+    @staticmethod
+    def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        skip.add("teacher_key")
+        TorchTrainer.add_class_args(parser, train_modes=train_modes)
+        EMA.add_class_args(parser, prefix="teacher_optim")
+        parser.add_argument(
+            "--freeze-output-layer-steps",
+            default=1500,
+            type=int,
+            help="freeze the output layer during the first updates of the model",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index e2d2d1f6..718630d6 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -45,6 +44,7 @@ class DVAETrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -67,7 +67,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -76,6 +76,7 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x_aug",
         target_key="x",
@@ -144,9 +145,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["elbo"] = elbo.item()
             for metric in ["log_px", "kldiv_z"]:
@@ -156,12 +157,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -201,7 +204,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     @staticmethod
     def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index d6761e87..a0099c02 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -48,6 +48,7 @@ class PLDATrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -73,7 +74,7 @@ def __init__(
         p_tar=0.5,
         train_mode="train",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -82,46 +83,17 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
     ):
-
         if loss is None:
             loss = nn.CrossEntropyLoss()
 
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     loss,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.loss_bce = BCEWithLLR(p_tar)
         self.loss_weights = loss_weights
 
@@ -179,9 +151,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             if return_bin:
@@ -193,11 +165,13 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 7260595c..4fa5bdab 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -4,19 +4,21 @@
 """
 
 import contextlib
+import glob
 import logging
 import math
 import os
 from collections import OrderedDict as ODict
 from enum import Enum
 from pathlib import Path
+from typing import Any, Dict, Optional
 
 import torch
 import torch.cuda.amp as amp
 import torch.distributed as dist
 import torch.nn as nn
 from fairscale.optim.grad_scaler import ShardedGradScaler
-from jsonargparse import ActionParser, ArgumentParser
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 from torch.optim.swa_utils import SWALR, AveragedModel
 
 from ...utils.misc import filter_func_args
@@ -71,6 +73,7 @@ class TorchTrainer(object):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -94,7 +97,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -103,6 +106,7 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
@@ -111,9 +115,12 @@ def __init__(
         self.loss = loss
         self.epochs = epochs
         self.cur_epoch = cur_epoch
+        self.cur_batch = 0
         self.grad_acc_steps = grad_acc_steps
         self.eff_batch_size = eff_batch_size
         self.exp_path = Path(exp_path)
+        self.optim = optim
+        self.lrsched = lrsched
 
         if loggers is None:
             self.loggers = self._default_loggers(
@@ -139,94 +146,212 @@ def __init__(
         self.target_key = target_key
         self.ddp = ddp
         self.ddp_type = ddp_type
+        self.cpu_offload = cpu_offload
         self.rank = 0
         self.world_size = 1
+        self.in_swa = False
+        self.global_step = 0
+        self.save_interval_steps = save_interval_steps
         if ddp:
             self.rank = dist.get_rank()
             self.world_size = dist.get_world_size()
 
         self.set_train_mode()
+        self.prepare_models_for_training()
+
+        # if device is not None:
+        #     self.model.to(device)
+        #     if loss is not None:
+        #         self.loss.to(device)
+
+        # if ddp:
+        #     if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
+        #         self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+        #         if self.rank == 0:
+        #             logging.info(
+        #                 "training in multiple gpus with distributed-data-parallel"
+        #             )
+        #         oss = False if ddp_type == DDPType.DDP else True
+        #         self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
+        #         self.model = TorchDDP(
+        #             self.model,
+        #             device_ids=[device],
+        #             output_device=device,
+        #         )
+        #     elif ddp_type == DDPType.OSS_SHARDED_DDP:
+        #         self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+        #         if self.rank == 0:
+        #             logging.info(
+        #                 "training in multiple gpus with fair sharded-distributed-data-parallel"
+        #             )
+        #         self.optimizer = self._make_optimizer(optim, self.model, oss=True)
+        #         self.model = FairShardedDDP(self.model, self.optimizer)
+        #     else:
+        #         if self.rank == 0:
+        #             logging.info(
+        #                 "training in multiple gpus with fair fully-sharded-distributed-data-parallel"
+        #             )
+        #         # syncbathcnorm is not supported here, it raises exception
+        #         self.model = FairFullyShardedDDP(
+        #             self.model,
+        #             mixed_precision=self.use_amp,
+        #             move_params_to_cpu=cpu_offload,
+        #         )
+        #         self.optimizer = self._make_optimizer(optim, self.model, oss=False)
+
+        # else:
+        #     self.optimizer = self._make_optimizer(optim, self.model)
+
+        # # make the learning rate scheduler
+        # self.lr_scheduler = self._make_lr_sched(lrsched, self.optimizer)
+
+        # if self.use_amp:
+        #     if ddp and ddp_type != DDPType.DDP:
+        #         if self.rank == 0:
+        #             logging.info(
+        #                 "using automatic mixed precision training with sharded-grad-scaler"
+        #             )
+        #         self.grad_scaler = ShardedGradScaler()
+        #     else:
+        #         if self.rank == 0:
+        #             logging.info(
+        #                 "using automatic mixed precision training with grad-scaler"
+        #             )
+        #         self.grad_scaler = amp.GradScaler()
+        #     self.amp_autocast = amp.autocast
+        # else:
+        #     self.amp_autocast = contextlib.nullcontext
+
+        # self.in_swa = False
+        # if self.do_swa:
+        #     if self.rank == 0:
+        #         logging.info("init SWA model")
+        #     self.swa_model = AveragedModel(self.model)
+        #     self.swa_scheduler = SWALR(
+        #         self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
+        #     )
+
+    def prepare_models_for_training(self):
+        self.loss = self._prepare_loss_for_training(self.loss, self.device)
+        (
+            self.model,
+            self.optimizer,
+            self.lr_scheduler,
+            self.grad_scaler,
+            self.swa_model,
+            self.swa_scheduler,
+        ) = self._prepare_model_for_training(
+            self.model,
+            self.optim,
+            self.lrsched,
+            self.device,
+            self.use_amp,
+            self.ddp,
+            self.ddp_type,
+            self.cpu_offload,
+            self.do_swa,
+            self.swa_lr,
+            self.swa_anneal_epochs,
+        )
+
+    def _prepare_loss_for_training(self, loss, device):
+        if loss is not None:
+            loss.to(device)
+
+        return loss
 
+    def _prepare_model_for_training(
+        self,
+        model,
+        optim,
+        lrsched,
+        device,
+        use_amp,
+        ddp,
+        ddp_type,
+        cpu_offload,
+        do_swa,
+        swa_lr,
+        swa_anneal_epochs,
+    ):
         if device is not None:
-            self.model.to(device)
-            if loss is not None:
-                self.loss.to(device)
+            model.to(device)
 
         if ddp:
             if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with distributed-data-parallel"
                     )
                 oss = False if ddp_type == DDPType.DDP else True
-                self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
-                self.model = TorchDDP(
-                    self.model,
+                optimizer = self._make_optimizer(optim, model, oss=oss)
+                model = TorchDDP(
+                    model,
                     device_ids=[device],
                     output_device=device,
                 )
             elif ddp_type == DDPType.OSS_SHARDED_DDP:
-                self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with fair sharded-distributed-data-parallel"
                     )
-                self.optimizer = self._make_optimizer(optim, self.model, oss=True)
-                self.model = FairShardedDDP(self.model, self.optimizer)
+                optimizer = self._make_optimizer(optim, model, oss=True)
+                model = FairShardedDDP(model, optimizer)
             else:
                 if self.rank == 0:
                     logging.info(
                         "training in multiple gpus with fair fully-sharded-distributed-data-parallel"
                     )
                 # syncbathcnorm is not supported here, it raises exception
-                self.model = FairFullyShardedDDP(
-                    self.model,
-                    mixed_precision=self.use_amp,
+                model = FairFullyShardedDDP(
+                    model,
+                    mixed_precision=use_amp,
                     move_params_to_cpu=cpu_offload,
                 )
-                self.optimizer = self._make_optimizer(optim, self.model, oss=False)
+                optimizer = self._make_optimizer(optim, model, oss=False)
 
         else:
-            self.optimizer = self._make_optimizer(optim, self.model)
+            optimizer = self._make_optimizer(optim, model)
 
         # make the learning rate scheduler
-        self.lr_scheduler = self._make_lr_sched(lrsched, self.optimizer)
+        lr_scheduler = self._make_lr_sched(lrsched, optimizer)
 
-        if self.use_amp:
+        if use_amp:
             if ddp and ddp_type != DDPType.DDP:
                 if self.rank == 0:
                     logging.info(
                         "using automatic mixed precision training with sharded-grad-scaler"
                     )
-                self.grad_scaler = ShardedGradScaler()
+                grad_scaler = ShardedGradScaler()
             else:
                 if self.rank == 0:
                     logging.info(
                         "using automatic mixed precision training with grad-scaler"
                     )
-                self.grad_scaler = amp.GradScaler()
-            self.amp_autocast = amp.autocast
-        else:
-            self.amp_autocast = contextlib.nullcontext
+                grad_scaler = amp.GradScaler()
 
-        self.in_swa = False
-        if self.do_swa:
+        swa_model = None
+        swa_scheduler = None
+        if do_swa:
             if self.rank == 0:
                 logging.info("init SWA model")
-            self.swa_model = AveragedModel(self.model)
-            self.swa_scheduler = SWALR(
-                self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
+            swa_model = AveragedModel(model)
+            swa_scheduler = SWALR(
+                optimizer, swa_lr=swa_lr, anneal_epochs=swa_anneal_epochs
             )
 
-    def set_epoch(self, data_loader):
+        return model, optimizer, lr_scheduler, grad_scaler, swa_model, swa_scheduler
+
+    def set_epoch(self, data_loader, cur_batch: int = 0):
         try:
             data_loader.dataset.set_epoch(self.cur_epoch)
         except AttributeError:
             logging.warning("dataset doesn't have set_epoch member function")
 
         try:
-            data_loader.batch_sampler.set_epoch(self.cur_epoch)
+            data_loader.batch_sampler.set_epoch(self.cur_epoch, cur_batch)
         except AttributeError:
             logging.warning("sampler doesn't have set_epoch member function")
 
@@ -246,7 +371,7 @@ def fit(self, train_data, val_data=None):
         val_logs = {}
         self.loggers.on_train_begin(epochs=self.epochs)
         for epoch in range(self.cur_epoch, self.epochs):
-            self.set_epoch(train_data)
+            self.set_epoch(train_data, self.cur_batch)
             self.loggers.on_epoch_begin(epoch, batches=len(train_data))
             if self.lr_scheduler is not None:
                 # this is needed by cosine scheduler
@@ -254,6 +379,7 @@ def fit(self, train_data, val_data=None):
                 self.lr_scheduler.on_epoch_begin(epoch, epoch_updates=epoch_updates)
 
             logs = self.train_epoch(train_data)
+            self.cur_batch = 0
             if val_data is not None:
                 self.set_epoch(val_data)
                 val_logs = self.validation_epoch(val_data)
@@ -311,7 +437,7 @@ def train_epoch(self, data_loader):
 
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
+                loss = self.loss(output, target) / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -319,9 +445,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
@@ -329,13 +455,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
-            # total_batches += 1
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -399,24 +526,54 @@ def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
             model.parameters(), grad_clip, norm_type=grad_clip_norm
         )
 
-    def update_model(self):
+    def _update_model_by_optim(
+        self, model, optimizer, grad_clip, grad_clip_norm, use_amp, grad_scaler
+    ):
         """Updates the model and does gradding clipping."""
-        if self.use_amp:
-            if self.grad_clip > 0:
-                self.grad_scaler.unscale_(self.optimizer)
-                self._clip_grad_norm(
-                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
-                )
+        if use_amp:
+            if grad_clip > 0:
+                grad_scaler.unscale_(optimizer)
+                self._clip_grad_norm(model, optimizer, grad_clip, grad_clip_norm)
 
-            self.grad_scaler.step(self.optimizer)
-            self.grad_scaler.update()
+            grad_scaler.step(optimizer)
+            grad_scaler.update()
         else:
-            if self.grad_clip > 0:
-                self._clip_grad_norm(
-                    self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
-                )
+            if grad_clip > 0:
+                self._clip_grad_norm(model, optimizer, grad_clip, grad_clip_norm)
+
+            optimizer.step()
+
+    def update_model(self):
+        """Updates the model and does gradding clipping."""
+        if self.lr_scheduler is not None and not self.in_swa:
+            self.lr_scheduler.on_opt_step()
+
+        self._update_model_by_optim(
+            self.model,
+            self.optimizer,
+            self.grad_clip,
+            self.grad_clip_norm,
+            self.use_amp,
+            self.grad_scaler,
+        )
+        self.global_step += 1
 
-            self.optimizer.step()
+        # if self.use_amp:
+        #     if self.grad_clip > 0:
+        #         self.grad_scaler.unscale_(self.optimizer)
+        #         self._clip_grad_norm(
+        #             self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
+        #         )
+
+        #     self.grad_scaler.step(self.optimizer)
+        #     self.grad_scaler.update()
+        # else:
+        #     if self.grad_clip > 0:
+        #         self._clip_grad_norm(
+        #             self.model, self.optimizer, self.grad_clip, self.grad_clip_norm
+        #         )
+
+        #     self.optimizer.step()
 
     def _make_optimizer(self, optim, model, oss=False):
         """Makes an optimizer object."""
@@ -429,7 +586,6 @@ def _make_optimizer(self, optim, model, oss=False):
         if self.rank == 0:
             logging.info("optimizer args={}".format(opt_args))
 
-        # optimizer = OF.create(model.parameters(), **opt_args)
         optimizer = OF.create(model.trainable_param_groups(), **opt_args)
         return optimizer
 
@@ -469,17 +625,14 @@ def _get_lr(self):
 
     def _get_lrs(self):
         """Returns the current learning rates of all param groups to show in the loggers"""
-        lrs = [param_group["lr"] for param_group in self.optimizer.param_groups]
-        all_eq = True
-        for lr in lrs:
-            if lr != lrs[0]:
-                all_eq = False
-                break
-
-        if all_eq:
-            return {"lr": lrs[0]}
+        lrs = {
+            f"lr_{i}": param_group["lr"]
+            for i, param_group in enumerate(self.optimizer.param_groups)
+        }
+        if len(lrs) == 1:
+            lrs["lr"] = lrs.pop("lr_0")
 
-        return {f"lr_{i}": lr for i, lr in enumerate(lrs)}
+        return lrs
 
     def _compute_grad_acc_steps(self, data_loader):
         if self.eff_batch_size is None:
@@ -524,6 +677,8 @@ def checkpoint(self, logs=None):
         self.model.train()
         checkpoint = {
             "epoch": self.cur_epoch,
+            "batch": self.cur_batch,
+            "global_step": self.global_step,
             "rng_state": torch.get_rng_state(),
             "model_cfg": self.model.get_config(),
             "model_state_dict": self.model.state_dict(),
@@ -544,12 +699,22 @@ def checkpoint(self, logs=None):
 
         return checkpoint
 
-    def save_checkpoint(self, logs=None):
+    def save_partial_checkpoint(self):
+        return (
+            self.save_interval_steps is not None
+            and self.global_step % self.save_interval_steps == 0
+        )
+
+    def new_save_checkpoint(self, logs=None, partial: bool = False):
         """Saves a checkpoint of the training status
 
         Args:
           logs: logs containing the current value of the metrics.
+          partial: if True, it is saving in the middle of the epoch
         """
+        if partial and not self.save_partial_checkpoint():
+            return
+
         if self.ddp and (
             self.ddp_type == DDPType.OSS_DDP or self.ddp_type == DDPType.OSS_SHARDED_DDP
         ):
@@ -564,7 +729,58 @@ def save_checkpoint(self, logs=None):
             return
 
         checkpoint = self.checkpoint(logs)
-        file_path = "%s/model_ep%04d.pth" % (self.exp_path, self.cur_epoch)
+        self.save_model_checkpoint("model", checkpoint, partial=partial)
+
+    def save_model_checkpoint(
+        self, model_name: str, checkpoint: Dict[str, Any], partial: bool = False
+    ):
+        if partial:
+            file_path = "%s/%s_ep%04d_step%08d.pth" % (
+                model_name,
+                self.exp_path,
+                self.cur_epoch,
+                self.global_step,
+            )
+        else:
+            file_path = "%s/%s_ep%04d.pth" % (model_name, self.exp_path, self.cur_epoch)
+
+        torch.save(checkpoint, file_path)
+
+    def save_checkpoint(self, logs=None, partial: bool = False):
+        """Saves a checkpoint of the training status
+
+        Args:
+          logs: logs containing the current value of the metrics.
+          partial: if True, it is saving in the middle of the epoch
+        """
+        if partial and (
+            self.save_interval_steps is None
+            or self.global_step % self.save_interval_steps != 0
+        ):
+            return
+
+        if self.ddp and (
+            self.ddp_type == DDPType.OSS_DDP or self.ddp_type == DDPType.OSS_SHARDED_DDP
+        ):
+            # Not sure what this does, just copying from the example in
+            # https://github.com/facebookresearch/fairscale/blob/master/benchmarks/oss.py
+            # Check the checkpointing in the case of the OSS optimizer
+            # Memory usage could spill over from there
+            # optimizer = cast(OSS, optimizer)
+            self.optimizer.consolidate_state_dict()
+
+        if self.rank != 0:
+            return
+
+        checkpoint = self.checkpoint(logs)
+        if partial:
+            file_path = "%s/model_ep%04d_step%08d.pth" % (
+                self.exp_path,
+                self.cur_epoch,
+                self.global_step,
+            )
+        else:
+            file_path = "%s/model_ep%04d.pth" % (self.exp_path, self.cur_epoch)
 
         torch.save(checkpoint, file_path)
 
@@ -584,13 +800,7 @@ def save_swa_model(self, logs=None):
 
         torch.save(checkpoint, file_path)
 
-    def load_checkpoint(self, file_path):
-        """Loads a training checkpoint from file.
-
-        Args:
-           file_path: checkpoint file path
-        """
-        checkpoint = torch.load(file_path, map_location=torch.device("cpu"))
+    def _load_checkpoint(self, checkpoint):
         rng_state = checkpoint["rng_state"]
         torch.set_rng_state(rng_state)
         if self.rank > 0:
@@ -600,6 +810,11 @@ def load_checkpoint(self, file_path):
             del dummy
 
         self.cur_epoch = checkpoint["epoch"]
+        if "batch" in checkpoint:
+            self.cur_batch = checkpoint["batch"]
+        else:
+            self.cur_batch = 0
+
         try:
             self.model.load_state_dict(checkpoint["model_state_dict"])
         except:
@@ -610,6 +825,12 @@ def load_checkpoint(self, file_path):
         if self.lr_scheduler is not None:
             self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
 
+        if "global_step" in checkpoint:
+            self.global_step = checkpoint["global_step"]
+        elif self.lr_scheduler is not None:
+            # this for older models that didn't save the global step
+            self.global_step = self.lr_scheduler.step
+
         # if self.use_amp:
         #    amp.load_state_dict(checkpoint['amp'])
         if self.do_swa:
@@ -638,17 +859,31 @@ def load_checkpoint(self, file_path):
 
         return logs
 
+    def load_checkpoint(self, file_path):
+        """Loads a training checkpoint from file.
+
+        Args:
+           file_path: checkpoint file path
+        """
+        checkpoint = torch.load(file_path, map_location=torch.device("cpu"))
+        return self._load_checkpoint(checkpoint)
+
     def load_last_checkpoint(self):
         """Loads the last training checkpoint in the experiment dir."""
         for epoch in range(self.epochs, 0, -1):
-            file_path = "%s/model_ep%04d.pth" % (self.exp_path, epoch)
-            if os.path.isfile(file_path):
+            file_path = Path("%s/model_ep%04d.pth" % (self.exp_path, epoch))
+            if file_path.is_file():
+                steps_pattern = "%s/model_ep%04d_steps*.pth" % (self.exp_path, epoch)
+                steps_file_paths = sorted(glob.glob(steps_pattern))
+                if len(steps_file_paths) > 0:
+                    file_path = steps_file_paths[-1]
+
                 return self.load_checkpoint(file_path)
 
         return None
 
     @staticmethod
-    def get_augs_keys(batch, base_key, skip={}):
+    def get_augs_keys(batch, base_key, skip=set()):
         keys = []
         if base_key in batch and base_key not in skip:
             keys.append(base_key)
@@ -712,12 +947,18 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
         parser.add_argument(
             "--log-interval",
             type=int,
-            default=10,
+            default=1000,
             help="how many batches to wait before logging training status",
         )
+        parser.add_argument(
+            "--save-interval-steps",
+            default=None,
+            type=int,
+            help="number of steps between model saves, if None only saves at the end of the epoch",
+        )
         parser.add_argument(
             "--use-tensorboard",
-            action="store_true",
+            action=ActionYesNo,
             default=False,
             help="use tensorboard logger",
         )
@@ -745,13 +986,13 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
         )
         parser.add_argument(
             "--use-amp",
-            action="store_true",
+            action=ActionYesNo,
             default=False,
             help="use mixed precision training",
         )
         parser.add_argument(
             "--cpu-offload",
-            action="store_true",
+            action=ActionYesNo,
             default=False,
             help="CPU offload of gradients when using fully_sharded_ddp",
         )
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 3a9cc288..541dc126 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -6,11 +6,10 @@
 import os
 from collections import OrderedDict as ODict
 
-import torchaudio
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+import torchaudio
+from jsonargparse import ActionParser, ArgumentParser
 from torch.distributed.elastic.multiprocessing.errors import record
 
 from ...utils.misc import filter_func_args
@@ -47,6 +46,7 @@ class TransducerTrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
     """
 
@@ -68,7 +68,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -77,11 +77,11 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="text",
     ):
-
         loss = None
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
@@ -93,9 +93,7 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-        batch_keys = [
-            self.input_key, f"{self.input_key}_lengths", self.target_key
-        ]
+        batch_keys = [self.input_key, f"{self.input_key}_lengths", self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
@@ -110,15 +108,14 @@ def train_epoch(self, data_loader):
             # # TODO: Check and Modify data, target
             # data, audio_length, target = data.to(self.device), audio_length.to(
             #     self.device), target.to(self.device)
-            #print(data.keys(), batch_keys, flush=True)
+            # print(data.keys(), batch_keys, flush=True)
             input_data, input_lengths, target = tensors_subset(
-                data, batch_keys, self.device)
+                data, batch_keys, self.device
+            )
             batch_size = input_data.shape[0]
 
             with self.amp_autocast():
-                output = self.model(input_data,
-                                    x_lengths=input_lengths,
-                                    y=target)
+                output = self.model(input_data, x_lengths=input_lengths, y=target)
                 loss = output.loss
                 loss = loss.mean() / self.grad_acc_steps
 
@@ -128,9 +125,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             for k, v in output.items():
                 if "loss" in k and v is not None:
@@ -141,12 +138,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -156,9 +155,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
           data_loader: PyTorch data loader return input/output pairs.
           sw_update_bn: wheter or not, update batch-norm layers in SWA.
         """
-        batch_keys = [
-            self.input_key, f"{self.input_key}_lengths", self.target_key
-        ]
+        batch_keys = [self.input_key, f"{self.input_key}_lengths", self.target_key]
         metric_acc = MetricAcc(self.device)
         batch_metrics = ODict()
         with torch.no_grad():
@@ -170,9 +167,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 self.model.eval()
 
             for batch, data in enumerate(data_loader):
-
                 input_data, input_lengths, target = tensors_subset(
-                    data, batch_keys, self.device)
+                    data, batch_keys, self.device
+                )
                 batch_size = input_data.shape[0]
 
                 # data, audio_length, target = data.to(
@@ -183,9 +180,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 # batch_size = data.shape[0]
 
                 with self.amp_autocast():
-                    output = self.model(input_data,
-                                        x_lengths=input_lengths,
-                                        y=target)
+                    output = self.model(input_data, x_lengths=input_lengths, y=target)
 
                 for k, v in output.items():
                     if "loss" in k and v is not None:
@@ -208,14 +203,11 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
 
         super_skip = skip.copy()
         super_skip.add("target_key")
-        TorchTrainer.add_class_args(parser,
-                                    train_modes=train_modes,
-                                    skip=super_skip)
+        TorchTrainer.add_class_args(parser, train_modes=train_modes, skip=super_skip)
         if "target_key" not in skip:
-            parser.add_argument("--target-key",
-                                default="text",
-                                help="dict. key for nnet targets")
+            parser.add_argument(
+                "--target-key", default="text", help="dict. key for nnet targets"
+            )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index f4877dc6..72942506 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -45,6 +44,7 @@ class VAETrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -67,7 +67,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -76,11 +76,11 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="x",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
@@ -145,9 +145,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["elbo"] = elbo.item()
             for metric in ["log_px", "kldiv_z"]:
@@ -157,12 +157,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -204,7 +206,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     @staticmethod
     def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index fc9d98f1..c89cfd9a 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -45,6 +44,7 @@ class VQDVAETrainer(DVAETrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -67,7 +67,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -76,44 +76,15 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x_aug",
         target_key="x",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
     def train_epoch(self, data_loader):
-
         batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
@@ -128,9 +99,8 @@ def train_epoch(self, data_loader):
             batch_size = input_data.size(0)
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
-                loss = output["loss"]
+                loss = output["loss"] / self.grad_acc_steps
                 x_hat = output["x_mean"]
-                loss = loss.mean() / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -138,9 +108,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for metric in ["elbo", "log_px", "kldiv_z", "vq_loss"]:
@@ -153,12 +123,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -197,7 +169,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     @staticmethod
     def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 35946e96..7d82dde2 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -7,11 +7,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -45,6 +44,7 @@ class VQVAETrainer(VAETrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -67,7 +67,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -76,6 +76,7 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="x",
@@ -128,7 +129,7 @@ def train_epoch(self, data_loader):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
-                loss = loss.mean() / self.grad_acc_steps
+                loss = loss / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -136,9 +137,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for metric in ["elbo", "log_px", "kldiv_z", "vq_loss"]:
@@ -151,12 +152,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -195,7 +198,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     @staticmethod
     def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 303427de..3943a681 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -7,11 +7,10 @@
 import time
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -49,6 +48,7 @@ class XVectorAdvTrainer(XVectorTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -75,7 +75,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -84,43 +84,13 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
-
-        # super().__init__(
-        #     model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     loss=loss,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.attack = attack
         self.attack.to(device)
         self.p_attack = p_attack * self.grad_acc_steps
@@ -155,7 +125,7 @@ def train_epoch(self, data_loader):
                     # generate adversarial attacks
                     logging.info("generating adv attack for batch=%d", batch)
                     self.model.eval()
-                    data_adv = self.attack.generate(inptu_data, target)
+                    data_adv = self.attack.generate(input_data, target)
                     max_delta = torch.max(torch.abs(data_adv - data)).item()
                     logging.info("adv attack max perturbation=%f", max_delta)
                     input_data = data_adv
@@ -173,9 +143,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
@@ -183,12 +153,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 2a012dde..522d7e0b 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -7,11 +7,10 @@
 import time
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -50,6 +49,7 @@ class XVectorAdvTrainerFromWav(XVectorTrainerFromWav):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -77,7 +77,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -86,44 +86,13 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
-
-        # super().__init__(
-        #     model,
-        #     feat_extractor,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     loss=loss,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.attack = attack
         self.attack.to(device)
         self.p_attack = p_attack * self.grad_acc_steps
@@ -182,9 +151,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
@@ -192,12 +161,14 @@ def train_epoch(self, data_loader):
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index aedd5be0..02c48577 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -45,6 +45,7 @@ class XVectorTrainer(TorchTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -68,7 +69,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -77,6 +78,7 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
@@ -104,14 +106,6 @@ def train_epoch(self, data_loader):
         for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
 
-            # try:
-            #       l1 = self.model.hf_feats.hf_model.encoder.layers[0].attention.v_proj
-            #      # print(f"lora train {l1.training}")
-            #        print(f"loraA {l1.lora_A}")
-            #        print(f"loraB {l1.lora_B}", flush=True)
-            # except:
-            #   pass
-
             if batch % self.grad_acc_steps == 0:
                 self.optimizer.zero_grad()
 
@@ -131,9 +125,9 @@ def train_epoch(self, data_loader):
                     loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * loss_scale
             for k, metric in self.metrics.items():
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 9d04af42..d4a34abc 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -6,11 +6,10 @@
 import os
 from collections import OrderedDict as ODict
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.cuda.amp as amp
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
@@ -51,6 +50,7 @@ class XVectorTrainerDeepFeatReg(XVectorTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -80,7 +80,7 @@ def __init__(
         reg_loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -89,43 +89,14 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     loss=loss,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.prior_model = prior_model
         if reg_loss is None or reg_loss == "l1":
             reg_loss = nn.L1Loss()
@@ -219,9 +190,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
@@ -229,11 +200,13 @@ def train_epoch(self, data_loader):
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
             logs = ODict(("train_" + k, v) for k, v in logs.items())
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     @staticmethod
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 6d06eac8..041a1ea7 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -50,6 +50,7 @@ class XVectorTrainerDeepFeatRegFromWav(XVectorTrainerDeepFeatReg):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -89,49 +90,14 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
     ):
-
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     prior_model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     reg_layers_enc=reg_layers_enc,
-        #     reg_layers_classif=reg_layers_classif,
-        #     reg_weight_enc=reg_weight_enc,
-        #     reg_weight_classif=reg_weight_classif,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     loss=loss,
-        #     reg_loss=reg_loss,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
         self.feat_extractor = feat_extractor
         if device is not None:
             self.feat_extractor.to(device)
@@ -218,21 +184,23 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
-            logs["lr"] = self._get_lr()
+            lrs = self._get_lrs()
+            logs.update(lrs)
             self.loggers.on_batch_end(logs=logs, batch_size=batch_size)
 
         logs = metric_acc.metrics
         logs = ODict(("train_" + k, v) for k, v in logs.items())
-        logs["lr"] = self._get_lr()
+        lrs = self._get_lrs()
+        logs.update(lrs)
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 6d00806a..2d6b5514 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -44,6 +44,7 @@ class XVectorTrainerFromWav(XVectorTrainer):
       swa_start: epoch to start doing swa
       swa_lr: SWA learning rate
       swa_anneal_epochs: SWA learning rate anneal epochs
+      save_interval_steps: number of steps between model saves, if None only saves at the end of the epoch
       cpu_offload: CPU offload of gradients when using fully sharded ddp
       input_key: dict. key for nnet input.
       target_key: dict. key for nnet targets.
@@ -68,7 +69,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
-        log_interval=10,
+        log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
         wandb={},
@@ -77,6 +78,7 @@ def __init__(
         swa_start=0,
         swa_lr=1e-3,
         swa_anneal_epochs=10,
+        save_interval_steps=None,
         cpu_offload=False,
         input_key="x",
         target_key="class_id",
@@ -120,9 +122,9 @@ def train_epoch(self, data_loader):
                 loss.backward()
 
             if (batch + 1) % self.grad_acc_steps == 0:
-                if self.lr_scheduler is not None and not self.in_swa:
-                    self.lr_scheduler.on_opt_step()
+                self.cur_batch = batch + 1
                 self.update_model()
+                self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
diff --git a/hyperion/torch/utils/ddp.py b/hyperion/torch/utils/ddp.py
index 4f006c0a..aa5efe37 100644
--- a/hyperion/torch/utils/ddp.py
+++ b/hyperion/torch/utils/ddp.py
@@ -54,8 +54,8 @@ def ddp_init(
         device = open_device(num_gpus)
         return device, 0, 1
 
-    os.environ["MASTER_ADDR"] = master_addr
-    os.environ["MASTER_PORT"] = master_port
+    os.environ["MASTER_ADDR"] = str(master_addr)
+    os.environ["MASTER_PORT"] = str(master_port)
 
     logging.info(
         f"init ddp rank={rank} world_size={world_size} master={master_addr}:{master_port} gpu_id={gpu_id}"
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index 51f0f37a..1b35364d 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -3,24 +3,26 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 import logging
-from pathlib import Path
-from typing import List, Dict, Optional, Union
-from copy import deepcopy
 import math
+from copy import deepcopy
+from pathlib import Path
+from typing import Dict, List, Optional, Union
+
+import lhotse
 import numpy as np
 import pandas as pd
 import yaml
 
-from .info_table import InfoTable
 from .class_info import ClassInfo
+from .enrollment_map import EnrollmentMap
 from .feature_set import FeatureSet
+from .info_table import InfoTable
 from .misc import PathLike
 from .recording_set import RecordingSet
 from .segment_set import SegmentSet
-from .enrollment_map import EnrollmentMap
+from .sparse_trial_key import SparseTrialKey
 from .trial_key import TrialKey
 from .trial_ndx import TrialNdx
-from .sparse_trial_key import SparseTrialKey
 
 
 class Dataset:
@@ -822,7 +824,7 @@ def remove_recordings(
 
     def remove_classes(self, classes_name: str):
         if self._classes_paths[classes_name] is not None:
-            self._files_to_delete.append(self._class_paths[class_name])
+            self._files_to_delete.append(self._class_paths[classes_name])
 
         del self._classes[classes_name]
         del self._classes_paths[classes_name]
@@ -1219,3 +1221,11 @@ def split_train_val(
         val_ds.clean()
 
         return train_ds, val_ds
+
+    def from_lhotse(
+        cls,
+        cuts: Optional[Union[lhotse.CutSet, PathLike]] = None,
+        recordings: Optional[Union[lhotse.RecordingSet, PathLike]] = None,
+        supervisions: Optional[Union[lhotse.SupervisionSet, PathLike]] = None,
+    ):
+        return None

From 4593c4b70cdd3a11a9a82eb4ca08596162601def Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 14 Dec 2023 11:11:26 -0500
Subject: [PATCH 125/154] dino seems to be working

---
 egs/librispeech/v1/datapath.sh                |  18 +++
 hyperion/torch/losses/dino_loss.py            |   2 +-
 hyperion/torch/optim/ema.py                   |  74 ++++++++++
 hyperion/torch/torch_model.py                 |   1 -
 hyperion/torch/trainers/ae_trainer.py         |   1 +
 .../torch/trainers/dino_xvector_trainer.py    |  67 ++++++---
 hyperion/torch/trainers/dvae_trainer.py       |   1 +
 hyperion/torch/trainers/plda_trainer.py       |   1 +
 hyperion/torch/trainers/torch_trainer.py      | 132 ++++++++++++++++--
 hyperion/torch/trainers/transducer_trainer.py |   1 +
 hyperion/torch/trainers/vae_trainer.py        |   1 +
 hyperion/torch/trainers/vq_dvae_trainer.py    |   1 +
 hyperion/torch/trainers/vq_vae_trainer.py     |   1 +
 .../torch/trainers/xvector_adv_trainer.py     |   1 +
 .../trainers/xvector_adv_trainer_from_wav.py  |   1 +
 hyperion/torch/trainers/xvector_trainer.py    |   2 +
 .../trainers/xvector_trainer_deep_feat_reg.py |   1 +
 .../xvector_trainer_deep_feat_reg_from_wav.py |   1 +
 .../trainers/xvector_trainer_from_wav.py      |   1 +
 hyperion/torch/wd_schedulers/__init__.py      |   9 ++
 hyperion/torch/wd_schedulers/cos_wd.py        |  50 +++++++
 hyperion/torch/wd_schedulers/factory.py       |  89 ++++++++++++
 hyperion/torch/wd_schedulers/wd_scheduler.py  | 120 ++++++++++++++++
 23 files changed, 541 insertions(+), 35 deletions(-)
 create mode 100644 egs/librispeech/v1/datapath.sh
 create mode 100644 hyperion/torch/optim/ema.py
 create mode 100644 hyperion/torch/wd_schedulers/__init__.py
 create mode 100644 hyperion/torch/wd_schedulers/cos_wd.py
 create mode 100644 hyperion/torch/wd_schedulers/factory.py
 create mode 100644 hyperion/torch/wd_schedulers/wd_scheduler.py

diff --git a/egs/librispeech/v1/datapath.sh b/egs/librispeech/v1/datapath.sh
new file mode 100644
index 00000000..3e8de307
--- /dev/null
+++ b/egs/librispeech/v1/datapath.sh
@@ -0,0 +1,18 @@
+# Copyright
+#            2018   Johns Hopkins University (Author: Jesus Villalba)
+#
+# Paths to the databases used in the experiment
+
+
+if [ "$(hostname --domain)" == "clsp.jhu.edu" ];then
+  librispeech_root=/export/corpora5/LibriSpeech 
+  musan_root=/export/corpora5/JHU/musan
+elif [ "$(hostname --domain)" == "cm.gemini" ];then
+  librispeech_root=/export/common/data/corpora/ASR/openslr/SLR12/LibriSpeech
+  musan_root=/export/common/data/corpora/MUSAN/musan
+else
+  echo "Put your database paths here"
+  exit 1
+fi
+
+
diff --git a/hyperion/torch/losses/dino_loss.py b/hyperion/torch/losses/dino_loss.py
index b22489a3..55f8e846 100644
--- a/hyperion/torch/losses/dino_loss.py
+++ b/hyperion/torch/losses/dino_loss.py
@@ -51,7 +51,7 @@ def update_temp(self, epoch: int):
                 * epoch
                 / self.temp_warmup_epochs
             )
-            logging.info("updating dino-loss teacher temp=%.2f", self.cur_teacher_temp)
+            logging.info("updating dino-loss teacher temp=%.3f", self.cur_teacher_temp)
         else:
             self.cur_teacher_temp = self.teacher_temp
 
diff --git a/hyperion/torch/optim/ema.py b/hyperion/torch/optim/ema.py
new file mode 100644
index 00000000..f120bf21
--- /dev/null
+++ b/hyperion/torch/optim/ema.py
@@ -0,0 +1,74 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import math
+
+import torch
+from jsonargparse import ActionParser, ArgumentParser
+
+
+class ExpMovingAvg:
+    def __init__(
+        self, params, init_momentum=0.996, momentum=0.996, warmup_steps=0, global_step=0
+    ):
+        if not isinstance(params, list):
+            params = [params]
+        self.params = [list(p) for p in params]
+        self.init_momentum = init_momentum
+        self._momentum = momentum
+        self.warmup_steps = warmup_steps
+        self.global_step = global_step
+
+    def state_dict(self):
+        """Returns the state of the optimizer as a :class:`dict` needed to restart the training."""
+        return {"global_step": self.global_step}
+
+    def load_state_dict(self, state_dict):
+        """Loads the optimizer state.
+
+        Arguments:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+
+    @property
+    def momentum(self):
+        if self.global_step >= self.warmup_steps:
+            return self._momentum
+        else:
+            alpha = (1 + math.cos(self.global_step / self.warmup_steps * math.pi)) / 2
+            return self.init_momentum * alpha + self._momentum * (1 - alpha)
+
+    @torch.no_grad()
+    def step(self, new_params):
+        if not isinstance(new_params, list):
+            new_params = [new_params]
+
+        assert len(self.params) == len(new_params)
+        momentum = self.momentum
+        for param_group, new_param_group in zip(self.params, new_params):
+            for p, p_new in zip(param_group, new_param_group):
+                p.data.mul_(momentum).add_((1 - momentum) * p_new.data)
+
+        self.global_step += 1
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--init-momentum", default=0.996, type=float, help="initial momentum"
+        )
+        parser.add_argument(
+            "--momentum", default=0.996, type=float, help="final momentum"
+        )
+        parser.add_argument(
+            "--warmup-steps", default=0, type=int, help="momentum warmup steps"
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/torch_model.py b/hyperion/torch/torch_model.py
index 3d5c8c9e..242402bc 100644
--- a/hyperion/torch/torch_model.py
+++ b/hyperion/torch/torch_model.py
@@ -95,7 +95,6 @@ def has_param_groups(self):
         return self.bias_weight_decay is not None
 
     def trainable_param_groups(self):
-        assert self.bias_weight_decay is not None
         if self.bias_weight_decay is None:
             return [{"params": self.trainable_parameters()}]
 
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index a0f5f1d4..9939797e 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -64,6 +64,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/dino_xvector_trainer.py b/hyperion/torch/trainers/dino_xvector_trainer.py
index bb7b427d..e4051058 100644
--- a/hyperion/torch/trainers/dino_xvector_trainer.py
+++ b/hyperion/torch/trainers/dino_xvector_trainer.py
@@ -15,7 +15,7 @@
 from ...utils.misc import filter_func_args
 from ..optim import ExpMovingAvg as EMA
 from ..utils import MetricAcc, TorchDDP, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import DDPType, TorchTrainer
 
 
 class DINOXVectorTrainer(TorchTrainer):
@@ -69,6 +69,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
@@ -121,28 +122,6 @@ def set_train_mode(self):
     @torch.no_grad()
     def update_teacher_model(self):
         self.teacher_optimizer.step(self.model.parameters())
-        # print(
-        #     "pmw",
-        #     self.model.xvector.proj_head_net.proj.weight[:5, :5],
-        #     self.teacher_model.xvector.proj_head_net.proj.weight[:5, :5],
-        # )
-        # print(
-        #     "mw",
-        #     self.model.xvector.classif_net.output.weight[:5, :5],
-        #     self.teacher_model.xvector.classif_net.output.weight[:5, :5],
-        # )
-        # print(
-        #     "mwg",
-        #     self.model.xvector.classif_net.output.weight_g[:5, :5],
-        #     self.teacher_model.xvector.classif_net.output.weight_g[:5, :5],
-        # )
-        # print(
-        #     "mwv",
-        #     self.model.xvector.classif_net.output.weight_v[:5, :5],
-        #     self.teacher_model.xvector.classif_net.output.weight_v[:5, :5],
-        #     flush=True,
-        # )
-        # print("------------------------------", flush=True)
 
     @staticmethod
     def get_augs_keys(batch, base_key, subset, skip=set()):
@@ -264,6 +243,7 @@ def train_epoch(self, data_loader):
         logs = ODict(("train_" + k, v) for k, v in logs.items())
         lrs = self._get_lrs()
         logs.update(lrs)
+        logs.update(self._get_wds())
         logs["ema_momentum"] = self.teacher_optimizer.momentum
         return logs
 
@@ -332,6 +312,18 @@ def _load_checkpoint(self, checkpoint):
         )
         return super()._load_checkpoint(checkpoint)
 
+    def _new_load_checkpoint(self, checkpoint, teacher_checkpoint):
+        self.teacher_model.load_state_dict(teacher_checkpoint["model_state_dict"])
+        self.teacher_optimizer.load_state_dict(
+            teacher_checkpoint["optimizer_state_dict"]
+        )
+        return super()._load_checkpoint(checkpoint)
+
+    def load_checkpoint(self, epoch, step):
+        checkpoint = self.load_model_checkpoint("model", epoch, step)
+        teacher_checkpoint = self.load_model_checkpoint("teacher_model", epoch, step)
+        return self._new_load_checkpoint(checkpoint, teacher_checkpoint)
+
     def checkpoint(self, logs=None):
         checkpoint = super().checkpoint(logs)
         self.teacher_model.train()
@@ -360,6 +352,35 @@ def teacher_checkpoint(self, logs=None):
 
         return checkpoint
 
+    def save_checkpoint(self, logs=None, partial: bool = False):
+        """Saves a checkpoint of the training status
+
+        Args:
+          logs: logs containing the current value of the metrics.
+          partial: if True, it is saving in the middle of the epoch
+        """
+        if partial and not self.save_partial_checkpoint():
+            return
+
+        if self.ddp and (
+            self.ddp_type == DDPType.OSS_DDP or self.ddp_type == DDPType.OSS_SHARDED_DDP
+        ):
+            # Not sure what this does, just copying from the example in
+            # https://github.com/facebookresearch/fairscale/blob/master/benchmarks/oss.py
+            # Check the checkpointing in the case of the OSS optimizer
+            # Memory usage could spill over from there
+            # optimizer = cast(OSS, optimizer)
+            self.optimizer.consolidate_state_dict()
+
+        if self.rank != 0:
+            return
+
+        checkpoint = self.checkpoint(logs)
+        self.save_model_checkpoint("model", checkpoint, partial=partial)
+
+        teacher_checkpoint = self.teacher_checkpoint(logs)
+        self.save_model_checkpoint("teacher_model", teacher_checkpoint, partial=partial)
+
     @staticmethod
     def filter_args(**kwargs):
         args = filter_func_args(DINOXVectorTrainer.__init__, kwargs)
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 718630d6..f128db44 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -62,6 +62,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index a0099c02..71845a4b 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -66,6 +66,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 4fa5bdab..b3d6cb9f 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -8,6 +8,7 @@
 import logging
 import math
 import os
+import re
 from collections import OrderedDict as ODict
 from enum import Enum
 from pathlib import Path
@@ -33,6 +34,8 @@
     TorchDDP,
     tensors_subset,
 )
+from ..wd_schedulers import WDScheduler as WDS
+from ..wd_schedulers import WDSchedulerFactory as WDSF
 
 
 class DDPType(str, Enum):
@@ -92,6 +95,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
@@ -121,6 +125,7 @@ def __init__(
         self.exp_path = Path(exp_path)
         self.optim = optim
         self.lrsched = lrsched
+        self.wdsched = wdsched
 
         if loggers is None:
             self.loggers = self._default_loggers(
@@ -237,6 +242,7 @@ def prepare_models_for_training(self):
             self.model,
             self.optimizer,
             self.lr_scheduler,
+            self.wd_scheduler,
             self.grad_scaler,
             self.swa_model,
             self.swa_scheduler,
@@ -244,6 +250,7 @@ def prepare_models_for_training(self):
             self.model,
             self.optim,
             self.lrsched,
+            self.wdsched,
             self.device,
             self.use_amp,
             self.ddp,
@@ -265,6 +272,7 @@ def _prepare_model_for_training(
         model,
         optim,
         lrsched,
+        wdsched,
         device,
         use_amp,
         ddp,
@@ -318,6 +326,9 @@ def _prepare_model_for_training(
         # make the learning rate scheduler
         lr_scheduler = self._make_lr_sched(lrsched, optimizer)
 
+        # make weight decay scheduler if needed
+        wd_scheduler = self._make_wd_sched(wdsched, optimizer)
+
         if use_amp:
             if ddp and ddp_type != DDPType.DDP:
                 if self.rank == 0:
@@ -342,7 +353,15 @@ def _prepare_model_for_training(
                 optimizer, swa_lr=swa_lr, anneal_epochs=swa_anneal_epochs
             )
 
-        return model, optimizer, lr_scheduler, grad_scaler, swa_model, swa_scheduler
+        return (
+            model,
+            optimizer,
+            lr_scheduler,
+            wd_scheduler,
+            grad_scaler,
+            swa_model,
+            swa_scheduler,
+        )
 
     def set_epoch(self, data_loader, cur_batch: int = 0):
         try:
@@ -378,6 +397,9 @@ def fit(self, train_data, val_data=None):
                 epoch_updates = int(len(train_data) / self.grad_acc_steps)
                 self.lr_scheduler.on_epoch_begin(epoch, epoch_updates=epoch_updates)
 
+            if self.wd_scheduler is not None:
+                self.wd_scheduler.on_epoch_begin(epoch)
+
             logs = self.train_epoch(train_data)
             self.cur_batch = 0
             if val_data is not None:
@@ -395,6 +417,8 @@ def fit(self, train_data, val_data=None):
             else:
                 if self.lr_scheduler is not None:
                     self.lr_scheduler.on_epoch_end(logs)
+                if self.wd_scheduler is not None:
+                    self.wd_scheduler.on_epoch_end()
 
             self.save_checkpoint(logs)
 
@@ -463,6 +487,7 @@ def train_epoch(self, data_loader):
         logs = ODict(("train_" + k, v) for k, v in logs.items())
         lrs = self._get_lrs()
         logs.update(lrs)
+        logs.update(self._get_wds())
         return logs
 
     def validation_epoch(self, data_loader, swa_update_bn=False):
@@ -502,7 +527,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
     def bn_update_epoch(self, data_loader):
         logs = self.validation_epoch(data_loader, swa_update_bn=True)
-        logs["lr"] = self._get_lr()
+        logs.update(self._get_lrs())
         return logs
 
     def _clip_grad_norm(self, model, optim, grad_clip, grad_clip_norm):
@@ -597,10 +622,22 @@ def _make_lr_sched(self, lr_sched, optim):
         assert isinstance(lr_sched, dict)
         args = LRSF.filter_args(**lr_sched)
         if self.rank == 0:
-            logging.info("lr scheduler args={}".format(args))
+            logging.info(f"lr scheduler args={args}")
         lr_sched = LRSF.create(optim, **args)
         return lr_sched
 
+    def _make_wd_sched(self, wd_sched, optim):
+        """Makes a Learning Rate scheduler object."""
+        if wd_sched is None or isinstance(wd_sched, WDS):
+            return wd_sched
+
+        assert isinstance(wd_sched, dict)
+        args = WDSF.filter_args(**wd_sched)
+        if self.rank == 0:
+            logging.info("wd scheduler args={args}")
+        wd_sched = WDSF.create(optim, **args)
+        return wd_sched
+
     def _default_loggers(self, log_interval, use_tensorboard, use_wandb, wandb):
         """Creates the default data loaders"""
         prog_log = ProgLogger(interval=log_interval)
@@ -634,6 +671,27 @@ def _get_lrs(self):
 
         return lrs
 
+    def _get_wd(self):
+        """Returns the current learning rate to show in the loggers"""
+        wds = [
+            param_group["weight_decay"] for param_group in self.optimizer.param_groups
+        ]
+        return max(wds)
+
+    def _get_wds(self, if_scheduler=True):
+        """Returns the current learning rates of all param groups to show in the loggers"""
+        if if_scheduler and self.wd_scheduler is None:
+            return {}
+
+        wds = {
+            f"wd_{i}": param_group["weight_decay"]
+            for i, param_group in enumerate(self.optimizer.param_groups)
+        }
+        if len(wds) == 1:
+            wds["wd"] = wds.pop("wd_0")
+
+        return wds
+
     def _compute_grad_acc_steps(self, data_loader):
         if self.eff_batch_size is None:
             return
@@ -690,6 +748,9 @@ def checkpoint(self, logs=None):
         if self.lr_scheduler is not None:
             checkpoint["lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
 
+        if self.wd_scheduler is not None:
+            checkpoint["wd_scheduler_state_dict"] = self.wd_scheduler.state_dict()
+
         if logs is not None:
             checkpoint["logs"] = logs
 
@@ -705,7 +766,7 @@ def save_partial_checkpoint(self):
             and self.global_step % self.save_interval_steps == 0
         )
 
-    def new_save_checkpoint(self, logs=None, partial: bool = False):
+    def save_checkpoint(self, logs=None, partial: bool = False):
         """Saves a checkpoint of the training status
 
         Args:
@@ -735,18 +796,19 @@ def save_model_checkpoint(
         self, model_name: str, checkpoint: Dict[str, Any], partial: bool = False
     ):
         if partial:
-            file_path = "%s/%s_ep%04d_step%08d.pth" % (
-                model_name,
+            file_path = "%s/%s_ep%04d_step%10d.pth" % (
                 self.exp_path,
+                model_name,
                 self.cur_epoch,
                 self.global_step,
             )
         else:
-            file_path = "%s/%s_ep%04d.pth" % (model_name, self.exp_path, self.cur_epoch)
+            file_path = "%s/%s_ep%04d.pth" % (self.exp_path, model_name, self.cur_epoch)
 
+        logging.info("saving %s to %s", model_name, file_path)
         torch.save(checkpoint, file_path)
 
-    def save_checkpoint(self, logs=None, partial: bool = False):
+    def old_save_checkpoint(self, logs=None, partial: bool = False):
         """Saves a checkpoint of the training status
 
         Args:
@@ -824,6 +886,8 @@ def _load_checkpoint(self, checkpoint):
             self.loss.load_state_dict(checkpoint["loss_state_dict"])
         if self.lr_scheduler is not None:
             self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
+        if self.wd_scheduler is not None:
+            self.wd_scheduler.load_state_dict(checkpoint["wd_scheduler_state_dict"])
 
         if "global_step" in checkpoint:
             self.global_step = checkpoint["global_step"]
@@ -859,6 +923,51 @@ def _load_checkpoint(self, checkpoint):
 
         return logs
 
+    def find_last_checkpoint(self, model_name="model"):
+        """finds the last checkpoint epoch and step in the experiment dir"""
+        last_epoch = 0
+        last_step = 0
+        file_pattern = "%s/%s_ep[0-9]*.pth" % (self.exp_path, model_name)
+        file_paths = sorted(glob.glob(file_pattern))
+        if len(file_paths) > 0:
+            last_epoch = int(re.search(r"ep[0-9]*", file_paths[-1])[2:])
+
+        file_pattern = "%s/%s_ep%04d_step[0-9]*.pth" % (
+            self.exp_path,
+            model_name,
+            last_epoch,
+        )
+        file_paths = sorted(glob.glob(file_pattern))
+        if len(file_paths) > 0:
+            last_step = int(re.search(r"step[0-9]*", file_paths[-1])[4:])
+
+        return last_epoch, last_step
+
+    def load_last_checkpoint(self):
+        """Loads the last training checkpoint in the experiment dir."""
+        last_epoch, last_step = self.find_last_checkpoint()
+        if last_epoch > 0 or last_step > 0:
+            return self.new_load_checkpoint(last_epoch, last_step)
+
+        return None
+
+    def load_model_checkpoint(self, model_name="model", epoch=0, step=0):
+        if step == 0:
+            file_path = "%s/%s_ep%04d.pth" % (self.exp_path, model_name, epoch)
+        else:
+            file_path = "%s/%s_ep%04d_steps%10d.pth" % (
+                self.exp_path,
+                model_name,
+                epoch,
+                step,
+            )
+        logging.info("loading %s from %s", model_name, file_path)
+        return torch.load(file_path, map_location=torch.device("cpu"))
+
+    def new_load_checkpoint(self, epoch, step):
+        checkpoint = self.load_model_checkpoint("model", epoch, step)
+        return self._load_checkpoint(checkpoint)
+
     def load_checkpoint(self, file_path):
         """Loads a training checkpoint from file.
 
@@ -868,7 +977,7 @@ def load_checkpoint(self, file_path):
         checkpoint = torch.load(file_path, map_location=torch.device("cpu"))
         return self._load_checkpoint(checkpoint)
 
-    def load_last_checkpoint(self):
+    def old_load_last_checkpoint(self):
         """Loads the last training checkpoint in the experiment dir."""
         for epoch in range(self.epochs, 0, -1):
             file_path = Path("%s/model_ep%04d.pth" % (self.exp_path, epoch))
@@ -924,11 +1033,14 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
         if "lrsched" not in skip:
             LRSF.add_class_args(parser, prefix="lrsched")
 
+        if "wdsched" not in skip:
+            WDSF.add_class_args(parser, prefix="wdsched")
+
         parser.add_argument(
             "--grad-acc-steps",
             type=int,
             default=1,
-            help="gradient accumulation batches before weigth update",
+            help="gradient accumulation batches before weight update",
         )
         parser.add_argument(
             "--eff-batch-size",
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 541dc126..1d4665cf 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -62,6 +62,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 72942506..79526122 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -62,6 +62,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index c89cfd9a..ff3f85cc 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -62,6 +62,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 7d82dde2..4ec04fde 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -62,6 +62,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 3943a681..e19945d1 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -69,6 +69,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 522d7e0b..ad6a3262 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -71,6 +71,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 02c48577..a9000f38 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -63,6 +63,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
@@ -143,4 +144,5 @@ def train_epoch(self, data_loader):
         logs = ODict(("train_" + k, v) for k, v in logs.items())
         lrs = self._get_lrs()
         logs.update(lrs)
+        logs.update(self._get_wds())
         return logs
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index d4a34abc..d80f03f1 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -73,6 +73,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 041a1ea7..cf956dc7 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -74,6 +74,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 2d6b5514..89c9b9a7 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -63,6 +63,7 @@ def __init__(
         device=None,
         metrics=None,
         lrsched=None,
+        wdsched=None,
         loggers=None,
         ddp=False,
         ddp_type="ddp",
diff --git a/hyperion/torch/wd_schedulers/__init__.py b/hyperion/torch/wd_schedulers/__init__.py
new file mode 100644
index 00000000..d8440b12
--- /dev/null
+++ b/hyperion/torch/wd_schedulers/__init__.py
@@ -0,0 +1,9 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+from .cos_wd import CosineWD
+from .factory import WDSchedulerFactory
+from .wd_scheduler import WDScheduler
diff --git a/hyperion/torch/wd_schedulers/cos_wd.py b/hyperion/torch/wd_schedulers/cos_wd.py
new file mode 100644
index 00000000..563e4353
--- /dev/null
+++ b/hyperion/torch/wd_schedulers/cos_wd.py
@@ -0,0 +1,50 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+import logging
+import math
+
+import torch
+
+from .wd_scheduler import WDScheduler
+
+
+class CosineWD(WDScheduler):
+    r"""Set the weight decay of each parameter group using a cosine
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      initial_wd: initial value of the weight decay.
+      warmup_steps: number of warm up steps to get the the weight decay to its final value.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_wd_on_opt_step: if True, updates the weight decay each time we update the model,
+        otherwise after each epoch.
+    """
+
+    def __init__(
+        self,
+        optimizer,
+        initial_wd=0,
+        warmup_steps=0,
+        epoch=0,
+        step=0,
+        update_wd_on_opt_step=False,
+    ):
+        super().__init__(
+            optimizer, initial_wd, warmup_steps, epoch, step, update_wd_on_opt_step
+        )
+
+    def get_wd(self, step):
+        if step >= self.warmup_steps:
+            return self.final_wds
+
+        r = math.pi / self.warmup_steps
+        return [
+            final_wd + (init_wd - final_wd) * (1 + math.cos(r * step)) / 2
+            for init_wd, final_wd in zip(self.initial_wds, self.final_wds)
+        ]
diff --git a/hyperion/torch/wd_schedulers/factory.py b/hyperion/torch/wd_schedulers/factory.py
new file mode 100644
index 00000000..3820daa2
--- /dev/null
+++ b/hyperion/torch/wd_schedulers/factory.py
@@ -0,0 +1,89 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
+from .cos_wd import CosineWD
+
+
+class WDSchedulerFactory:
+    def create(
+        optimizer,
+        wdsch_type,
+        initial_wd=None,
+        warmup_steps=0,
+        update_wd_on_opt_step=False,
+    ):
+        """Creates a weight decay scheduler object.
+
+        Args:
+          optimizer: Pytorch optimizer object.
+          wdsched_type: type of scheduler in ["none", "cos_wd"].
+          initial_wd: inital value of weight decay
+          warmup_steps: steps until reaching final weight decay
+          update_wd_on_opt_step: if True, updates the wd each time we update the model,
+                otherwise after each epoch.
+        """
+
+        if wdsch_type == "none":
+            return None
+
+        if wdsch_type == "cos_lr":
+            return CosineWD(
+                optimizer,
+                initial_wd=initial_wd,
+                warmup_steps=warmup_steps,
+                update_wd_on_opt_step=update_wd_on_opt_step,
+            )
+
+        raise ValueError(f"invalid wdsch_type={wdsch_type}")
+
+    @staticmethod
+    def filter_args(**kwargs):
+        return filter_func_args(WDSchedulerFactory.create, kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--wdsch-type",
+            type=str.lower,
+            default="none",
+            choices=[
+                "none",
+                "cos_wd",
+            ],
+            help=("weight decay schedulers: None," "Cosine Annealing."),
+        )
+
+        parser.add_argument(
+            "--initial-wd",
+            default=None,
+            type=float,
+            help=(
+                "Initial value of weight decay, it is expected to be lower than final value."
+            ),
+        )
+
+        parser.add_argument(
+            "--warmup-steps",
+            default=0,
+            type=int,
+            help=("Number of steps to reach the final value of weight decay"),
+        )
+
+        parser.add_argument(
+            "--update-wd-on-opt-step",
+            default=False,
+            action=ActionYesNo,
+            help=("Update weight decay based on batch number instead of epoch number"),
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/wd_schedulers/wd_scheduler.py b/hyperion/torch/wd_schedulers/wd_scheduler.py
new file mode 100644
index 00000000..a3059edc
--- /dev/null
+++ b/hyperion/torch/wd_schedulers/wd_scheduler.py
@@ -0,0 +1,120 @@
+"""
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import torch
+import torch.optim as optim
+
+
+class WDScheduler:
+    """Base class for weight decay schedulers.
+
+    Attributes:
+      optimizer: Pytorch optimizer object.
+      initial_wd: initial value of the weight decay.
+      warmup_steps: number of warm up steps to get the the weight decay to its final value.
+      epoch: initial training training epoch, this is needed to restart the model
+             training.
+      step: initial training step, this is needed to restart the model training.
+      update_wd_on_opt_step: if True, updates the weight decay each time we update the model,
+        otherwise after each epoch.
+    """
+
+    def __init__(
+        self,
+        optimizer,
+        initial_wd=0,
+        warmup_steps=0,
+        epoch=0,
+        step=0,
+        update_wd_on_opt_step=False,
+    ):
+        if not isinstance(optimizer, optim.Optimizer):
+            raise TypeError("%s is not an Optimizer" % (type(optimizer).__name__))
+        self.optimizer = optimizer
+
+        if epoch == 0:
+            for group in optimizer.param_groups:
+                group.setdefault("final_wd", group["weight_decay"])
+        else:
+            for i, group in enumerate(optimizer.param_groups):
+                if "final_wd" not in group:
+                    raise KeyError(
+                        "param 'final_wd' is not specified "
+                        "in param_groups[{}] when resuming an optimizer".format(i)
+                    )
+
+        self.final_wds = list(
+            map(lambda group: group["final_wd"], optimizer.param_groups)
+        )
+
+        if isinstance(initial_wd, list) or isinstance(initial_wd, tuple):
+            if len(initial_wd) != len(optimizer.param_groups):
+                raise ValueError(
+                    "expected {} initial_wds, got {}".format(
+                        len(optimizer.param_groups), len(initial_wd)
+                    )
+                )
+            self.initial_wds = list(initial_wd)
+        else:
+            max_wd = max([group["final_wd"] for group in optimizer.param_groups])
+            self.initial_wds = [
+                initial_wd * group["final_wd"] / max_wd
+                for group in optimizer.param_groups
+            ]
+
+        self.warmup_steps = warmup_steps
+        self.epoch = epoch
+        self.step = step
+        self.update_wd_on_opt_step = update_wd_on_opt_step
+
+    @property
+    def in_warmup(self):
+        return self.step < self.warmup_steps
+
+    def state_dict(self):
+        """Returns the state of the scheduler as a :class:`dict`.
+
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        """
+        return {
+            key: value for key, value in self.__dict__.items() if key != "optimizer"
+        }
+
+    def load_state_dict(self, state_dict):
+        """Loads the schedulers state.
+
+        Arguments:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+
+    def get_wd(self):
+        raise NotImplementedError
+
+    def on_epoch_begin(self, epoch=None, **kwargs):
+        if epoch is not None:
+            self.epoch = epoch
+
+        if self.update_wd_on_opt_step:
+            return
+
+        for param_group, wd in zip(
+            self.optimizer.param_groups, self.get_wd(self.epoch)
+        ):
+            param_group["weight_decay"] = wd
+
+    def on_epoch_end(self, metrics=None):
+        self.epoch += 1
+
+    def on_opt_step(self):
+        if self.update_wd_on_opt_step:
+            for param_group, wd in zip(
+                self.optimizer.param_groups, self.get_wd(self.step)
+            ):
+                param_group["weight_decay"] = wd
+
+        self.step += 1

From 249281957a5c6231b4c04d8c62c9cd1189ef617d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 10 Jan 2024 07:08:10 -0500
Subject: [PATCH 126/154] added clustering to dino

---
 .../ssl.v1/conf/teacher_reverb_noise_aug.yaml |  26 ++
 ...n_006_extract_dino_embeds_cluster_eval.sh} |  82 +++-
 egs/voxceleb/ssl.v1/run_007_train_xvector.sh  |  90 +++++
 ...train_ecapatdnn512x3_xvec_stage1_v3.1.yaml |  99 +++++
 ...train_ecapatdnn512x3_xvec_stage2_v3.1.yaml |  74 ++++
 .../train_lresnet34_xvec_stage1_v3.1.yaml     |  75 ++++
 hyperion/bin/cluster_embeddings.py            | 362 ++++++++++++++++++
 hyperion/bin/eval_plda_backend.py             | 232 +++++++++++
 hyperion/bin/train_plda.py                    | 161 ++++++++
 hyperion/helpers/plda_factory.py              |  83 ++--
 hyperion/np/clustering/__init__.py            |   3 +-
 hyperion/np/clustering/kmeans.py              | 184 ++++++++-
 hyperion/np/clustering/spectral_clustering.py | 312 +++++++++++++++
 hyperion/np/np_model.py                       |  19 +
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py     |   8 +-
 hyperion/np/pdfs/plda/__init__.py             |   3 +-
 hyperion/np/pdfs/plda/factory.py              | 204 ++++++++++
 hyperion/np/pdfs/plda/frplda.py               |   5 +-
 hyperion/np/pdfs/plda/plda.py                 |  15 +-
 hyperion/np/pdfs/plda/plda_base.py            |  61 ++-
 hyperion/np/pdfs/plda/splda.py                |  13 +-
 hyperion/np/preprocessing/__init__.py         |   6 +
 hyperion/np/preprocessing/resampler.py        |  46 +++
 hyperion/np/transforms/lda.py                 |  36 ++
 hyperion/np/transforms/pca.py                 |  26 +-
 hyperion/torch/data/audio_dataset.py          |  50 +--
 hyperion/torch/layers/global_pool.py          |  26 +-
 hyperion/torch/narchs/dino_head.py            |  73 +---
 hyperion/torch/narchs/proj_head.py            |  28 +-
 .../torch/trainers/dino_xvector_trainer.py    |   6 +-
 hyperion/torch/trainers/torch_trainer.py      |  12 +-
 hyperion/torch/wd_schedulers/factory.py       |   2 +-
 hyperion/torch/wd_schedulers/wd_scheduler.py  |   4 +
 33 files changed, 2203 insertions(+), 223 deletions(-)
 create mode 100644 egs/voxceleb/ssl.v1/conf/teacher_reverb_noise_aug.yaml
 rename egs/voxceleb/ssl.v1/{run_006_extract_dino_embeds.sh => run_006_extract_dino_embeds_cluster_eval.sh} (60%)
 create mode 100755 egs/voxceleb/ssl.v1/run_007_train_xvector.sh
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage1_v3.1.yaml
 create mode 100644 hyperion/bin/cluster_embeddings.py
 create mode 100755 hyperion/bin/eval_plda_backend.py
 create mode 100644 hyperion/bin/train_plda.py
 create mode 100644 hyperion/np/clustering/spectral_clustering.py
 create mode 100644 hyperion/np/pdfs/plda/factory.py
 create mode 100644 hyperion/np/preprocessing/__init__.py
 create mode 100644 hyperion/np/preprocessing/resampler.py

diff --git a/egs/voxceleb/ssl.v1/conf/teacher_reverb_noise_aug.yaml b/egs/voxceleb/ssl.v1/conf/teacher_reverb_noise_aug.yaml
new file mode 100644
index 00000000..6c2fecc0
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/teacher_reverb_noise_aug.yaml
@@ -0,0 +1,26 @@
+reverb_aug:
+  reverb_prob: 0.3
+  max_reverb_context: 0.5
+  rir_types: 
+    smallroom:
+      weight: 1
+      rir_path: csv:data/rirs_smallroom/rirs.csv
+      rir_norm: max
+noise_aug:
+  noise_prob: 0.7
+  noise_types: 
+    noise:
+      weight: 1
+      noise_path: data/musan_noise_proc_audio/recordings.csv
+      min_snr: 10
+      max_snr: 28
+    music:
+      weight: 1
+      noise_path: data/musan_music_proc_audio/recordings.csv
+      min_snr: 13
+      max_snr: 28
+    babble:
+      weight: 1
+      noise_path: data/musan_speech_babble/recordings.csv
+      min_snr: 13
+      max_snr: 28
diff --git a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
similarity index 60%
rename from egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh
rename to egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
index 36ccd294..8973483c 100755
--- a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds.sh
+++ b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
@@ -30,25 +30,14 @@ if [ $nnet_stage -eq 1 ];then
 elif [ $nnet_stage -eq 2 ];then
   nnet=$nnet_s2
   nnet_name=$nnet_s2_name
-elif [ $nnet_stage -eq 3 ];then
-  nnet=$nnet_s3
-  nnet_name=$nnet_s3_name
-elif [ $nnet_stage -eq 4 ];then
-  nnet=$nnet_s4
-  nnet_name=$nnet_s4_name
-elif [ $nnet_stage -eq 5 ];then
-  nnet=$nnet_s5
-  nnet_name=$nnet_s5_name
-elif [ $nnet_stage -eq 6 ];then
-  nnet=$nnet_s6
-  nnet_name=$nnet_s6_name
 fi
 
 xvector_dir=exp/xvectors/$nnet_name
 score_dir=exp/scores/$nnet_name
 score_cosine_dir=$score_dir/cosine
+score_plda_dir=$score_dir/${cluster_name}_plda
 
-if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qmf" == "true" || "$do_pca" == "true") ]]; then
+if [ $stage -le 1 ]; then
   # Extract xvectors for training LDA/PLDA
   nj=100
   for name in voxceleb2cat_train
@@ -63,7 +52,7 @@ if [[ $stage -le 1 && ( "$do_plda" == "true" || "$do_snorm" == "true" || "$do_qm
 	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
-	      --random-utt-length --min-utt-length 2 --max-utt-length 30 \
+	      --random-utt-length --min-utt-length 30 --max-utt-length 30 \
 	      --model-path $nnet  \
 	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
     hyperion-tables cat \
@@ -135,5 +124,70 @@ if [ $stage -le 3 ];then
 	     --output-file $score_cosine_dir/voxceleb1_results.csv
 
   cat $score_cosine_dir/voxceleb1_results.csv
+  exit
 fi
 
+
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+if [ $stage -le 4 ];then
+  echo "Cluster Vox2"
+  mkdir -p $cluster_dir
+  $train_cmd --mem 50G --num-threads 32 $cluster_dir/clustering.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV \
+    hyperion-cluster-embeddings $cluster_method --cfg $cluster_cfg \
+    --segments-file data/voxceleb2cat_train_xvector_train/segments.csv \
+    --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+    --output-file $cluster_dir/voxceleb2cat_train_xvector_train/segments.csv 
+fi
+
+if [ $stage -le 5 ];then
+  echo "Train PLDA"
+  $train_cmd $cluster_dir/plda.log \
+	     hyp_utils/conda_env.sh --conda-env $HYP_ENV \
+	     hyperion-train-plda --cfg $plda_cfg \
+	     --segments-file $cluster_dir/voxceleb2cat_train_xvector_train/segments.csv \
+	     --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+	     --preproc-file $cluster_dir/plda/preproc.h5 \
+	     --plda-file $cluster_dir/plda/plda.h5
+
+  
+fi
+
+if [ $stage -le 6 ];then
+
+  echo "Eval Voxceleb 1 with PLDA"
+  num_parts=8
+  for((i=1;i<=$num_parts;i++));
+  do
+    for((j=1;j<=$num_parts;j++));
+    do
+      $train_cmd $score_plda_dir/log/voxceleb1_${i}_${j}.log \
+		 hyp_utils/conda_env.sh \
+		 hyperion-eval-plda-backend \
+		 --feats-file csv:$xvector_dir/voxceleb1_test/xvector.csv \
+		 --ndx-file data/voxceleb1_test/trials.csv \
+		 --enroll-map-file data/voxceleb1_test/enrollment.csv  \
+		 --score-file $score_plda_dir/voxceleb1_scores.csv \
+		 --preproc-file $cluster_dir/plda/preproc.h5 \
+		 --plda-file $cluster_dir/plda/plda.h5 \
+		 --enroll-part-idx $i --num-enroll-parts $num_parts \
+		 --test-part-idx $j --num-test-parts $num_parts &
+    done
+  done
+  wait
+  hyperion-merge-scores --output-file $score_plda_dir/voxceleb1_scores.csv \
+			--num-enroll-parts $num_parts --num-test-parts $num_parts
+
+  $train_cmd --mem 12G --num-threads 6 $score_plda_dir/log/score_voxceleb1.log \
+	     hyperion-eval-verification-metrics \
+	     --score-files $score_plda_dir/voxceleb1_scores.csv \
+	     --key-files data/voxceleb1_test/trials_{o,e,h}.csv \
+	     --score-names voxceleb1 \
+	     --key-names O E H \
+	     --sparse \
+	     --output-file $score_plda_dir/voxceleb1_results.csv
+
+  cat $score_plda_dir/voxceleb1_results.csv
+  exit
+fi
+exit
diff --git a/egs/voxceleb/ssl.v1/run_007_train_xvector.sh b/egs/voxceleb/ssl.v1/run_007_train_xvector.sh
new file mode 100755
index 00000000..40aceb07
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_007_train_xvector.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_data_dir=data/${nnet_data}_xvector_train
+val_data_dir=data/${nnet_data}_xvector_val
+
+#add extra args from the command line arguments
+if [ -n "$num_workers" ];then
+    extra_args="--data.train.data_loader.num-workers $num_workers"
+fi
+if [ "$use_tb" == "true" ];then
+    extra_args="$extra_args --trainer.use-tensorboard"
+fi
+if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+fi
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+xvector_dir=exp/xvectors/$nnet_s1_name/voxceleb2cat_train
+output_dir=exp/clustering/$nnet_s1_name/$cluster_method/voxceleb2cat_train_xvector_train
+if [ $stage -le 1 ];then
+  mkdir -p $output_dir
+  $train_cmd --mem 50G --num-threads 32 $output_dir/clustering.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV \
+    hyperion-cluster-embeddings $cluster_method --cfg $cluster_cfg \
+    --segments-file data/voxceleb2cat_train_xvector_train/segments.csv \
+    --feats-file csv:$xvector_dir/xvector.csv \
+    --output-file $output_dir/segments.csv 
+fi
+exit
+# Network Training
+if [ $stage -le 2 ]; then
+  
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-train-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --trainer.exp-path $nnet_s1_dir \
+    --num-gpus $ngpu \
+  
+fi
+
+
+# Large Margin Fine-tuning
+if [ $stage -le 2 ]; then
+  if [ "$use_wandb" == "true" ];then
+    extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
+  fi
+  mkdir -p $nnet_s2_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s2_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
+    --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_data_dir/segments.csv \
+    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_data_dir/segments.csv \
+    --in-model-file $nnet_s1 \
+    --trainer.exp-path $nnet_s2_dir \
+    --num-gpus $ngpu \
+  
+fi
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..958c6237
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage1_v3.1.yaml
@@ -0,0 +1,99 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 512
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 512
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 1536
+      norm_before: false
+      dropout_rate: 0.002
+      hid_act: swish
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.0
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 40
+  eff_batch_size: 256
+  target_key: speaker
+  train_mode: full
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..c19546e8
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn512x3_xvec_stage2_v3.1.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 6.0
+      min_chunk_length: 6.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    resnet_enc:
+      override_dropouts: true
+      dropout_rate: 0.
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  swa_start: 31
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
+  train_mode: full
diff --git a/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..2244fd38
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage1_v3.1.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: lresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/hyperion/bin/cluster_embeddings.py b/hyperion/bin/cluster_embeddings.py
new file mode 100644
index 00000000..998b1f17
--- /dev/null
+++ b/hyperion/bin/cluster_embeddings.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+""" 
+ Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba) 
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+from scipy import sparse
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.clustering import AHC, KMeans, KMeansInitMethod, SpectralClustering
+from hyperion.np.pdfs import DiagGMM
+from hyperion.np.transforms import PCA, LNorm
+from hyperion.utils import SegmentSet
+from hyperion.utils.math_funcs import cosine_scoring
+
+subcommand_list = [
+    "cos_ahc",
+    "spectral_clustering",
+]
+
+
+def add_common_args(parser):
+    parser.add_argument("--feats-file", required=True)
+    parser.add_argument("--segments-file", required=True)
+    parser.add_argument("--output-file", required=True)
+    parser.add_argument(
+        "--filter-by-gmm-post",
+        default=0,
+        type=float,
+        help="remove segments with gmm posterior lower than threshold",
+    )
+
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+    )
+
+
+def load_data(segments_file, feats_file):
+    logging.info("loading data")
+    segments = SegmentSet.load(segments_file)
+    reader = DRF.create(feats_file)
+    x = reader.read(segments["id"], squeeze=True)
+    return segments, x
+
+
+def do_pca(x, pca_args):
+    pca_var_r = pca_args["pca_var_r"]
+    logging.info("computing pca pca_var_r=%f", pca_var_r)
+    if pca_var_r < 1:
+        pca = PCA(**pca_args)
+        pca.fit(x)
+        x = pca(x)
+        logging.info("pca-dim=%d", x.shape[1])
+
+    return x
+
+
+def do_kmeans(x, samples_per_cluster, epochs, rtol, init_method, num_workers):
+    if samples_per_cluster > 1:
+        km_clusters = x.shape[0] // samples_per_cluster
+        logging.info("kmeans with num_clusters=%d", km_clusters)
+        kmeans = KMeans(
+            num_clusters=km_clusters,
+            rtol=rtol,
+            epochs=epochs,
+            init_method=init_method,
+            num_workers=num_workers,
+        )
+        kmeans.fit(x)
+        idx_km, _ = kmeans(x)
+        x_km = kmeans.mu
+        del kmeans
+    else:
+        idx_km = None
+        x_km = x
+
+    return x_km, idx_km
+
+
+def get_gmm_post(x, y):
+    logging.info("computing cluster posteriors with gmm")
+    num_comp = np.max(y) + 1
+    gmm = DiagGMM(num_comp=num_comp, x_dim=x.shape[1], min_N=1)
+    u_dim = gmm.compute_suff_stats(x[:1]).shape[1]
+    N = np.zeros((num_comp,), dtype=float) + 1e-5
+    u_x = np.zeros((num_comp, u_dim), dtype=float)
+
+    for c in range(num_comp):
+        mask = y == c
+        N_c = np.sum(mask)
+        if N_c == 0:
+            continue
+
+        N[c] = N_c
+        u_x_c = gmm.compute_suff_stats(x[mask])
+        u_x[c] = np.sum(u_x_c, axis=0)
+
+    gmm.Mstep(N, u_x)
+    p = gmm.compute_pz(x, mode="std")
+    p_max = p[np.arange(x.shape[0]), y]
+    zz = p_max < 0.5
+    print(np.mean(p[zz]), np.max(p[zz]), p_max[zz])
+    p_2nd = np.sort(p, axis=1, kind="heapsort")[:, -2]
+    return p_max, p_2nd
+
+
+def cos_ahc(
+    segments_file,
+    feats_file,
+    output_file,
+    lnorm,
+    pca,
+    linkage_method,
+    stop_criterion,
+    num_clusters,
+    threshold,
+    ahc_precision,
+    pre_kmeans,
+    num_workers,
+    filter_by_gmm_post,
+):
+    segments, x = load_data(segments_file, feats_file)
+    if lnorm:
+        x = LNorm()(x)
+
+    x = do_pca(x, pca)
+    x_km, idx_km = do_kmeans(x, num_workers=num_workers, **pre_kmeans)
+
+    logging.info("compute affinity matrix")
+    if ahc_precision == "single":
+        x_lowprec = x_km.astype(np.float32)
+    elif ahc_precision == "half":
+        x_lowprec = x_km.astype(np.float16)
+    else:
+        x_lowprec = x_km
+
+    scores = cosine_scoring(x_lowprec, x_lowprec)
+
+    logging.info("running AHC")
+    ahc = AHC(method=linkage_method)
+    ahc.fit(scores)
+    if stop_criterion == "threshold":
+        y = ahc.get_flat_clusters_from_thr(threshold)
+    else:
+        y = ahc.get_flat_clusters_from_num_clusters(num_clusters)
+
+    del ahc
+    if idx_km is not None:
+        y = y[idx_km]
+        del x_km
+
+    p_max, p_2nd = get_gmm_post(x, y)
+    segments["cluster"] = y
+    segments["post_cluster"] = p_max
+    segments["post_cluster_2nd"] = p_2nd
+    if filter_by_gmm_post > 0:
+        idx = segments["post_cluster"] > filter_by_gmm_post
+        segments = SegmentSet(segments.loc[idx])
+
+    segments.save(output_file)
+
+
+def make_cos_ahc_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    add_common_args(parser)
+    parser.add_argument("--lnorm", default=False, action=ActionYesNo)
+    PCA.add_class_args(parser, prefix="pca")
+    parser.add_argument(
+        "--linkage-method",
+        default="average",
+        choices=["single", "complete", "average", "weighted", "ward"],
+        help="linkage method",
+    )
+    parser.add_argument(
+        "--stop-criterion",
+        default="threshold",
+        choices=["threshold", "num_clusters"],
+        help="stopping criterion",
+    )
+    parser.add_argument(
+        "--num-clusters", default=None, type=int, help="number of AHC clusters"
+    )
+    parser.add_argument("--threshold", default=0, type=float, help="stopping threshold")
+    parser.add_argument(
+        "--ahc-precision", default="single", choices=["half", "single", "double"]
+    )
+    parser.add_argument(
+        "--pre_kmeans.samples-per-cluster",
+        default=1,
+        type=int,
+        help="first k-means is done to recuce the computing cost of AHC",
+    )
+    parser.add_argument(
+        "--pre_kmeans.init_method",
+        default=KMeansInitMethod.max_dist,
+        choices=KMeansInitMethod.choices(),
+    )
+    parser.add_argument("--pre_kmeans.epochs", default=100, type=int)
+    parser.add_argument("--pre_kmeans.rtol", default=0.001, type=float)
+    parser.add_argument("--num_workers", default=1, type=int)
+    return parser
+
+
+def compute_sc_affinity(x, aff_func, gauss_sigma, aff_thr, precision):
+    if precision == "single":
+        x = x.astype(np.float32)
+    elif precision == "half":
+        x = x.astype(np.float16)
+
+    scores = cosine_scoring(x, x)
+    if aff_func == "gauss_cos":
+        assert gauss_sigma > 0
+        d2 = 1 - scores
+        scores = np.exp(-d2 / gauss_sigma)
+
+    assert aff_thr < 1
+    scores[scores < aff_thr] = 0
+    num_nodes = scores.shape[0]
+    scores.flat[:: num_nodes + 1] = 0
+    aff_size = num_nodes**2
+    num_edges = np.sum(scores > 0)
+    r = aff_size / num_edges
+    logging.info("num_nodes^2=%d, num_edges=%d r=%f", aff_size, num_edges, r)
+    if r > 4:
+        scores = sparse.csr_matrix(scores)
+    return scores
+
+
+def spectral_clustering(
+    segments_file,
+    feats_file,
+    output_file,
+    lnorm,
+    pca,
+    pre_kmeans,
+    affinity,
+    spectral_clustering,
+    filter_by_gmm_post,
+):
+    segments, x = load_data(segments_file, feats_file)
+    if lnorm:
+        x = LNorm()(x)
+
+    x = do_pca(x, pca)
+    x_km, idx_km = do_kmeans(x, **pre_kmeans)
+    A = compute_sc_affinity(x_km, **affinity)
+    sc = SpectralClustering(**spectral_clustering)
+    y, num_clusters, eigengap_stats = sc.fit(A)
+    if idx_km is not None:
+        y = y[idx_km]
+        del x_km
+
+    segments["cluster"] = y
+    if num_clusters > 1:
+        p_max, p_2nd = get_gmm_post(x, y)
+        segments["post_cluster"] = p_max
+        segments["post_cluster_2nd"] = p_2nd
+
+    if filter_by_gmm_post > 0:
+        idx = segments["post_cluster"] > filter_by_gmm_post
+        segments = SegmentSet(segments.loc[idx])
+
+    segments.save(output_file)
+    output_file = Path(output_file)
+    fig_file = output_file.with_stem(output_file.stem + "_eigengap").with_suffix(".png")
+    sc.plot_eigengap_stats(eigengap_stats, num_clusters, fig_file)
+
+    df_eig = pd.DataFrame(
+        {k: eigengap_stats[k] for k in ["eig_vals", "eigengap", "d_eig_vals"]}
+    )
+    df_eig["num_clusters"] = np.arange(1, len(df_eig) + 1)
+    eig_file = fig_file.with_suffix(".csv")
+    df_eig.to_csv(eig_file, index=False)
+
+
+def make_spectral_clustering_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    add_common_args(parser)
+    parser.add_argument("--lnorm", default=False, action=ActionYesNo)
+    PCA.add_class_args(parser, prefix="pca")
+    parser.add_argument(
+        "--pre_kmeans.samples-per-cluster",
+        default=1,
+        type=int,
+        help="first k-means is done to recuce the computing cost of AHC",
+    )
+    parser.add_argument(
+        "--pre_kmeans.init_method",
+        default=KMeansInitMethod.max_dist,
+        choices=KMeansInitMethod.choices(),
+    )
+    parser.add_argument("--pre_kmeans.epochs", default=100, type=int)
+    parser.add_argument("--pre_kmeans.rtol", default=0.001, type=float)
+    parser.add_argument("--pre_kmeans.num_workers", default=1, type=int)
+    parser.add_argument(
+        "--affinity.aff_func", default="cos", choices=["cos", "gauss_cos"]
+    )
+    parser.add_argument(
+        "--affinity.gauss-sigma",
+        default=1,
+        type=float,
+        help="std. dev. of gauss function",
+    )
+    parser.add_argument(
+        "--affinity.aff-thr",
+        default=0,
+        type=float,
+        help="values under this are set to 0",
+    )
+    parser.add_argument(
+        "--affinity.precision", default="single", choices=["half", "single", "double"]
+    )
+    SpectralClustering.add_class_args(parser, prefix="spectral_clustering")
+
+    return parser
+
+
+def main():
+    parser = ArgumentParser(
+        description="Cluster embeddings into classes, usually speakers"
+    )
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for subcommand in subcommand_list:
+        parser_func = f"make_{subcommand}_parser"
+        subparser = globals()[parser_func]()
+        subcommands.add_subcommand(subcommand, subparser)
+
+    args = parser.parse_args()
+    subcommand = args.subcommand
+    kwargs = namespace_to_dict(args)[args.subcommand]
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+    del kwargs["cfg"]
+    globals()[subcommand](**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/eval_plda_backend.py b/hyperion/bin/eval_plda_backend.py
new file mode 100755
index 00000000..2058b2cb
--- /dev/null
+++ b/hyperion/bin/eval_plda_backend.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python
+"""
+  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
+  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)  
+
+"""
+import logging
+import time
+from pathlib import Path
+
+import numpy as np
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np import NPModel
+from hyperion.np.pdfs import PLDAFactory, PLDALLRNvsMMethod
+from hyperion.np.score_norm import AdaptSNorm
+from hyperion.np.transforms import LNorm, TransformList
+from hyperion.utils import EnrollmentMap, SegmentSet, TrialKey, TrialNdx, TrialScores
+from hyperion.utils.math_funcs import cosine_scoring
+
+
+def load_trial_data(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+):
+    test_feats_reader = DRF.create(feats_file)
+    if enroll_feats_file is not None and enroll_feats_file != feats_file:
+        enroll_feats_reader = DRF.create(enroll_feats_file)
+    else:
+        enroll_feats_reader = test_feats_reader
+
+    enroll_map = EnrollmentMap.load(enroll_map_file)
+    try:
+        ndx = TrialNdx.load(ndx_file)
+    except:
+        ndx = TrialKey.load(ndx_file).to_ndx()
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        ndx = ndx.split(
+            enroll_part_idx, num_enroll_parts, test_part_idx, num_test_parts
+        )
+
+    enroll_map = enroll_map.filter(items=ndx.model_set)
+    x_e = enroll_feats_reader.read(enroll_map["segmentid"], squeeze=True)
+    x_t = test_feats_reader.read(ndx.seg_set, squeeze=True)
+    return enroll_map, ndx, x_e, x_t
+
+
+def load_cohort_data(segments_file, feats_file):
+    segments = SegmentSet.load(segments_file)
+    feats_reader = DRF.create(feats_file)
+    x = feats_reader.read(segments["id"], squeeze=True)
+    return segments, x
+
+
+def eval_backend(
+    enroll_map_file,
+    ndx_file,
+    enroll_feats_file,
+    feats_file,
+    preproc_file,
+    plda_file,
+    llr_method,
+    score_file,
+    enroll_part_idx,
+    num_enroll_parts,
+    test_part_idx,
+    num_test_parts,
+    cohort_segments_file,
+    cohort_feats_file,
+    cohort_nbest,
+    avg_cohort_by,
+):
+    logging.info("loading data")
+    enroll_map, ndx, x_e, x_t = load_trial_data(
+        enroll_map_file,
+        ndx_file,
+        enroll_feats_file,
+        feats_file,
+        enroll_part_idx,
+        num_enroll_parts,
+        test_part_idx,
+        num_test_parts,
+    )
+    enroll_set, enroll_ids = np.unique(enroll_map["id"], return_inverse=True)
+    if len(enroll_set) == np.max(enroll_ids) + 1:
+        is_Nvs1 = False
+    else:
+        is_Nvs1 = True
+
+    t1 = time.time()
+
+    if preproc_file is not None:
+        logging.info("Loading Preprocessor")
+        preprocessor = TransformList.load(preproc_file)
+        x_e = preprocessor(x_e)
+        x_t = preprocessor(x_t)
+        if llr_method == PLDALLRNvsMMethod.vavg and isinstance(
+            preprocessor.transforms[-1], LNorm
+        ):
+            llr_method = PLDALLRNvsMMethod.lnorm_vavg
+
+    assert llr_method == PLDALLRNvsMMethod.lnorm_vavg, preprocessor.transforms
+    logging.info("Loading PLDA model")
+    plda_model = NPModel.auto_load(plda_file)
+    logging.info("computing score")
+    if is_Nvs1:
+        scores = plda_model.llr_Nvs1(x_e, x_t, ids1=enroll_ids, method=llr_method)
+    else:
+        scores = plda_model.llr_1vs1(x_e, x_t)
+
+    dt = time.time() - t1
+    num_trials = scores.shape[0] * scores.shape[1]
+    logging.info(
+        "scoring elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+        dt,
+        dt / num_trials * 1000,
+    )
+
+    if cohort_segments_file is not None:
+        t1 = time.time()
+        cohort_segments, x_coh = load_cohort_data(
+            cohort_segments_file, cohort_feats_file
+        )
+        if preproc_file is not None:
+            x_coh = preprocessor(x_coh)
+
+        if avg_cohort_by is not None:
+            cohort_class = cohort_segments[avg_cohort_by]
+            _, cohort_ids = np.unique(cohort_class, return_inverse=True)
+        else:
+            cohort_ids = None
+
+        logging.info("computing enroll vs cohort")
+        scores_enr_coh = plda_model.llr_NvsM(
+            x_e, x_coh, ids1=enroll_ids, ids2=cohort_ids, method=llr_method
+        )
+        logging.info("computing cohort vs test")
+        scores_coh_test = plda_model.lrr_Nvs1(
+            x_coh, x_t, ids1=cohort_ids, method=llr_method
+        )
+        snorm = AdaptSNorm(cohort_nbest)
+        scores = snorm(scores, scores_coh_test, scores_enr_coh)
+        dt = time.time() - t1
+        logging.info(
+            "s-norm elapsed time: %.2f s. elapsed time per trial: %.2f ms.",
+            dt,
+            dt / num_trials * 1000,
+        )
+
+    if num_enroll_parts > 1 or num_test_parts > 1:
+        score_file = Path(score_file)
+        new_suffix = f".{enroll_part_idx}.{test_part_idx}{score_file.suffix}"
+        score_file = score_file.with_suffix(new_suffix)
+
+    logging.info("saving scores to %s", score_file)
+    # sort scores rows to match the ndx model_set order
+    sort_idx = [np.nonzero(enroll_set == e)[0][0] for e in ndx.model_set]
+    scores = scores[sort_idx]
+    scores = TrialScores(ndx.model_set, ndx.seg_set, scores, ndx.trial_mask)
+    scores.save(score_file)
+
+
+def main():
+    parser = ArgumentParser(description="Eval PLDA LLR with optional AS-Norm")
+
+    parser.add_argument("--enroll-feats-file", default=None)
+    parser.add_argument("--feats-file", required=True)
+    parser.add_argument("--ndx-file", required=True)
+    parser.add_argument("--enroll-map-file", required=True)
+    parser.add_argument("--preproc-file", default=None)
+    parser.add_argument("--plda-file", required=True)
+    parser.add_argument(
+        "--llr-method",
+        default=PLDALLRNvsMMethod.vavg,
+        choices=PLDALLRNvsMMethod.choices(),
+    )
+    parser.add_argument("--cohort-segments-file", default=None)
+    parser.add_argument("--cohort-feats-file", default=None)
+    parser.add_argument("--cohort-nbest", type=int, default=1000)
+    parser.add_argument(
+        "--avg-cohort-by",
+        default=None,
+        help="segments file column to average vectors from same class class",
+    )
+    parser.add_argument("--score-file", required=True)
+    parser.add_argument(
+        "--enroll-part-idx", default=1, type=int, help="enroll part index"
+    )
+    parser.add_argument(
+        "--num-enroll-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the enroll
+                list to run evaluation in parallel""",
+    )
+    parser.add_argument("--test-part-idx", default=1, type=int, help="test part index")
+    parser.add_argument(
+        "--num-test-parts",
+        default=1,
+        type=int,
+        help="""number of parts in which we divide the test list
+                to run evaluation in parallel""",
+    )
+
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    del args.verbose
+    logging.debug(args)
+
+    eval_backend(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_plda.py b/hyperion/bin/train_plda.py
new file mode 100644
index 00000000..b33afa31
--- /dev/null
+++ b/hyperion/bin/train_plda.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+""" 
+ Copyright 2024 Johns Hopkins University  (Author: Jesus Villalba) 
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0) 
+"""
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.io import RandomAccessDataReaderFactory as DRF
+from hyperion.np.pdfs import PLDAFactory
+from hyperion.np.transforms import LDA, PCA, CentWhiten, LNorm, TransformList
+from hyperion.utils import SegmentSet
+
+
+def load_data(segments_file, feats_file, class_name):
+    logging.info("loading data")
+    segments = SegmentSet.load(segments_file)
+    reader = DRF.create(feats_file)
+    x = reader.read(segments["id"], squeeze=True)
+    _, y = np.unique(segments[class_name], return_inverse=True)
+    return segments, x, y
+
+
+def train_pca(x, pca_lnorm, pca_args):
+    pca_var_r = pca_args["pca_var_r"]
+    logging.info("computing pca pca_var_r=%f", pca_var_r)
+    pca = None
+    pca_lnorm = None
+    if pca_var_r < 1:
+        if pca_lnorm:
+            logging.info("LNorm before PCA")
+            pca_lnorm = LNorm(name="pca_lnorm")
+            x = pca_lnorm(x)
+
+        pca = PCA(**pca_args)
+        pca.fit(x)
+        x = pca(x)
+        logging.info("pca-dim=%d", x.shape[1])
+
+    return x, pca_lnorm, pca
+
+
+def train_plda(
+    segments_file,
+    feats_file,
+    class_name,
+    preproc_file,
+    plda_file,
+    pca,
+    lda,
+    plda,
+    pca_lnorm,
+    do_lda,
+    lda_lnorm,
+    plda_lnorm,
+    plda_center,
+    plda_whiten,
+):
+    segments, x, y = load_data(segments_file, feats_file, class_name)
+    transform_list = []
+
+    x, pca_lnorm, pca_model = train_pca(x, pca_lnorm, pca)
+    if pca_lnorm is not None:
+        transform_list.append(pca_lnorm)
+
+    if pca_model is not None:
+        transform_list.append(pca_model)
+
+    if do_lda and x.shape[1] > lda["lda_dim"]:
+        if lda_lnorm:
+            logging.info("LNorm before LDA")
+            t = LNorm(name="lda_lnorm")
+            x = t(x)
+            transform_list.append(t)
+
+        logging.info("Training LDA")
+        lda_model = LDA(**lda)
+        lda_model.fit(x, y)
+        x = lda_model(x)
+        transform_list.append(lda_model)
+
+    if plda_center or plda_whiten:
+        if plda_lnorm:
+            t = LNorm(update_mu=plda_center, update_T=plda_whiten, name="plda_lnorm")
+        else:
+            t = CentWhiten(update_mu=plda_center, update_T=plda_whiten, name="plda_cw")
+
+        logging.info("Training Center/Whiten/LNorm")
+        t.fit(x)
+        logging.info("Center/Whiten/LNorm before PLDA")
+        x = t(x)
+        transform_list.append(t)
+    elif plda_lnorm:
+        logging.info("LNorm before PLDA")
+        t = LNorm(name="plda_lnorm")
+        x = t(x)
+        transform_list.append(t)
+
+    logging.info("Training PLDA")
+    plda["y_dim"] = min(x.shape[1], plda["y_dim"])
+    plda = PLDAFactory.create(**plda)
+    elbo, elbo_norm = plda.fit(x, y)
+
+    logging.info("Saving Models")
+    if len(transform_list) > 0:
+        transform_list = TransformList(transform_list)
+        transform_list.save(preproc_file)
+
+    plda.save(plda_file)
+    loss_file = Path(plda_file).with_suffix(".csv")
+    loss_df = pd.DataFrame(
+        {"epoch": np.arange(1, len(elbo) + 1), "elbo": elbo, "elbo_norm": elbo_norm}
+    )
+    loss_df.to_csv(loss_file, index=False)
+
+
+def main():
+    parser = ArgumentParser(description="Trains PLDA model and embedding preprocessor")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--feats-file", required=True)
+    parser.add_argument("--segments-file", required=True)
+    parser.add_argument("--class-name", default="speaker")
+    parser.add_argument("--preproc-file", required=True)
+    parser.add_argument("--plda-file", required=True)
+    PCA.add_class_args(parser, prefix="pca")
+    LDA.add_class_args(parser, prefix="lda")
+    PLDAFactory.add_class_args(parser, prefix="plda")
+    parser.add_argument("--pca-lnorm", default=False, action=ActionYesNo)
+    parser.add_argument("--lda-lnorm", default=False, action=ActionYesNo)
+    parser.add_argument("--do-lda", default=False, action=ActionYesNo)
+    parser.add_argument("--plda-lnorm", default=True, action=ActionYesNo)
+    parser.add_argument("--plda-center", default=True, action=ActionYesNo)
+    parser.add_argument("--plda-whiten", default=True, action=ActionYesNo)
+
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", default=1, choices=[0, 1, 2, 3], type=int
+    )
+    args = parser.parse_args()
+    config_logger(args.verbose)
+    logging.debug(args)
+    del args["verbose"]
+    del args["cfg"]
+    train_plda(**namespace_to_dict(args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/helpers/plda_factory.py b/hyperion/helpers/plda_factory.py
index 16cf01c4..0b90b334 100644
--- a/hyperion/helpers/plda_factory.py
+++ b/hyperion/helpers/plda_factory.py
@@ -3,16 +3,30 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from enum import Enum
+
 import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..np.pdfs.plda import FRPLDA, PLDA, SPLDA
+from ..utils.misc import filter_func_args
+
+
+class PLDAType(str, Enum):
+    frplda = "frplda"
+    splda = "splda"
+    plda = "plda"
+
+    @staticmethod
+    def choices():
+        return [PLDAType.frplda, PLDAType.splda, PLDAType.plda]
 
 
 class PLDAFactory(object):
     """Class to  create PLDA objects."""
 
     @staticmethod
-    def create_plda(
+    def create(
         plda_type,
         y_dim=None,
         z_dim=None,
@@ -27,8 +41,7 @@ def create_plda(
         name="plda",
         **kwargs
     ):
-
-        if plda_type == "frplda":
+        if plda_type == PLDAType.frplda:
             return FRPLDA(
                 fullcov_W=fullcov_W,
                 update_mu=update_mu,
@@ -37,7 +50,7 @@ def create_plda(
                 name=name,
                 **kwargs
             )
-        if plda_type == "splda":
+        if plda_type == PLDAType.splda:
             return SPLDA(
                 y_dim=y_dim,
                 fullcov_W=fullcov_W,
@@ -48,7 +61,7 @@ def create_plda(
                 **kwargs
             )
 
-        if plda_type == "plda":
+        if plda_type == PLDAType.plda:
             return PLDA(
                 y_dim=y_dim,
                 z_dim=z_dim,
@@ -71,7 +84,9 @@ def load_plda(plda_type, model_file):
             return PLDA.load(model_file)
 
     @staticmethod
-    def filter_train_args(prefix=None, **kwargs):
+    def filter_args(**kwargs):
+        return filter_func_args(PLDAFactory.create, kwargs)
+
         valid_args = (
             "plda_type",
             "y_dim",
@@ -109,7 +124,7 @@ def filter_train_args(prefix=None, **kwargs):
             "update_D",
         )
 
-        for a, b in zip(ne_args1, neg_args2):
+        for a, b in zip(neg_args1, neg_args2):
             d[b] = not d[a]
             del d[a]
 
@@ -117,63 +132,62 @@ def filter_train_args(prefix=None, **kwargs):
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-        if prefix is None:
-            p1 = "--"
-        else:
-            p1 = "--" + prefix + "."
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            p1 + "plda-type",
-            default="splda",
-            choices=["frplda", "splda", "plda"],
+            "--plda-type",
+            default=PLDAType.splda,
+            choices=PLDAType.choices(),
             help="PLDA type",
         )
 
         parser.add_argument(
-            p1 + "y-dim", type=int, default=150, help="num. of eigenvoices"
+            "--y-dim", type=int, default=150, help="num. of eigenvoices"
         )
         parser.add_argument(
-            p1 + "z-dim", type=int, default=400, help="num. of eigenchannels"
+            "--z-dim", type=int, default=400, help="num. of eigenchannels"
         )
 
         parser.add_argument(
-            p1 + "diag-W",
-            default=False,
-            action="store_false",
-            help="use diagonal covariance W",
+            "--fullcov-W",
+            default=True,
+            action=ActionYesNo,
+            help="use full covariance W",
         )
         parser.add_argument(
-            p1 + "no-update-mu",
-            default=False,
-            action="store_true",
+            "--update-mu",
+            default=True,
+            action=ActionYesNo,
             help="not update mu",
         )
         parser.add_argument(
-            p1 + "no-update-V", default=False, action="store_true", help="not update V"
+            "--update-V", default=True, action=ActionYesNo, help="update V"
         )
         parser.add_argument(
-            p1 + "no-update-U", default=False, action="store_true", help="not update U"
+            "--update-U", default=True, action=ActionYesNo, help="update U"
         )
 
         parser.add_argument(
-            p1 + "no-update-B", default=False, action="store_true", help="not update B"
+            "--update-B", default=True, action=ActionYesNo, help="update B"
         )
         parser.add_argument(
-            p1 + "no-update-W", default=False, action="store_true", help="not update W"
+            "--update-W", default=True, action=ActionYesNo, help="update W"
         )
         parser.add_argument(
-            p1 + "no-update-D", default=False, action="store_true", help="not update D"
+            "--update-D", default=True, action=ActionYesNo, help="update D"
         )
         parser.add_argument(
-            p1 + "floor-iD",
+            "--floor-iD",
             type=float,
             default=1e-5,
             help="floor for inverse of D matrix",
         )
 
-        parser.add_argument(p1 + "epochs", type=int, default=40, help="num. of epochs")
+        parser.add_argument("--epochs", type=int, default=40, help="num. of epochs")
         parser.add_argument(
-            p1 + "ml-md",
+            "--ml-md",
             default="ml+md",
             choices=["ml+md", "ml", "md"],
             help=("optimization type"),
@@ -187,7 +201,12 @@ def add_class_args(parser, prefix=None):
             help=("epochs in which we do MD, if None we do it in all the epochs"),
         )
 
-        parser.add_argument(p1 + "name", default="plda", help="model name")
+        parser.add_argument("--name", default="plda", help="model name")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
 
     @staticmethod
     def filter_eval_args(prefix=None, **kwargs):
diff --git a/hyperion/np/clustering/__init__.py b/hyperion/np/clustering/__init__.py
index 0841d47e..80cfaa2c 100644
--- a/hyperion/np/clustering/__init__.py
+++ b/hyperion/np/clustering/__init__.py
@@ -4,4 +4,5 @@
 """
 
 from .ahc import AHC
-from .kmeans import KMeans
+from .kmeans import KMeans, KMeansInitMethod
+from .spectral_clustering import SpectralClustering
diff --git a/hyperion/np/clustering/kmeans.py b/hyperion/np/clustering/kmeans.py
index abb88463..82d257d1 100644
--- a/hyperion/np/clustering/kmeans.py
+++ b/hyperion/np/clustering/kmeans.py
@@ -5,6 +5,8 @@
 
 import logging
 import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from enum import Enum
 
 import h5py
 import numpy as np
@@ -13,6 +15,15 @@
 from ..np_model import NPModel
 
 
+class KMeansInitMethod(str, Enum):
+    max_dist = "max_dist"
+    random = "random"
+
+    @staticmethod
+    def choices():
+        return [KMeansInitMethod.max_dist, KMeansInitMethod.random]
+
+
 class KMeans(NPModel):
     """K-Means clustering class.
 
@@ -22,13 +33,30 @@ class KMeans(NPModel):
       rtol: minimum delta in loss function used as stopping criterion.
     """
 
-    def __init__(self, num_clusters, mu=None, rtol=0.001, **kwargs):
-        super(KMeans, self).__init__(**kwargs)
+    def __init__(
+        self,
+        num_clusters,
+        mu=None,
+        rtol=0.001,
+        epochs=100,
+        init_method=KMeansInitMethod.max_dist,
+        num_workers=1,
+        verbose=True,
+        rng_seed=11235813,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
         self.num_clusters = num_clusters
         self.mu = mu
         self.rtol = rtol
+        self.epochs = epochs
+        self.verbose = verbose
+        self.num_workers = num_workers
+        self.init_method = init_method
+        if self.init_method == KMeansInitMethod.random:
+            self.rng = np.random.default_rng(seed=rng_seed)
 
-    def fit(self, x, epochs=100):
+    def fit(self, x):
         """Performs the clustering.
 
         Args:
@@ -39,22 +67,59 @@ def fit(self, x, epochs=100):
           loss: value of loss function (num_epochs,).
           cluster_index: clustering labels as int numpy array with shape=(num_samples,)
         """
-        loss = np.zeros((epochs,), dtype=float_cpu())
-        self.mu = self._choose_seeds(x)
-        cluster_index, err2 = self.predict(x)
-        for epoch in range(epochs):
-            self.mu = self._compute_centroids(x, cluster_index)
-            cluster_index, err2 = self.predict(x)
+        loss = np.zeros((self.epochs,), dtype=float_cpu())
+        if self.init_method == KMeansInitMethod.max_dist:
+            if self.num_workers == 1:
+                self.mu = self._choose_seeds_max_dist(x)
+            else:
+                self.mu = self._choose_seeds_max_dist_multithread(x)
+        else:
+            self.mu = self._choose_seeds_random(x)
+
+        cluster_index, err2 = self(x)
+        for epoch in range(self.epochs):
+            if self.num_workers == 1:
+                self.mu = self._compute_centroids(x, cluster_index)
+            else:
+                self.mu = self._compute_centroids_multithread(x, cluster_index)
+            cluster_index, err2 = self(x)
             loss[epoch] = np.mean(err2)
             if epoch > 0:
-                delta = np.abs(loss[epoch - 1] - loss[epoch]) / loss[epoch - 1]
+                delta = np.abs(loss[epoch - 1] - loss[epoch]) / (
+                    loss[epoch - 1] + 1e-10
+                )
+                if self.verbose:
+                    logging.info(
+                        "epoch: %d loss: %f rdelta: %f", epoch, loss[epoch], delta
+                    )
                 if delta < self.rtol:
                     loss = loss[: epoch + 1]
                     break
+            else:
+                if self.verbose:
+                    logging.info("epoch: %d loss: %f", epoch, loss[epoch])
 
         return loss, cluster_index
 
-    def _choose_seeds(self, x):
+    def _choose_seeds_random(self, x):
+        """Chooses the initial seeds for the clustering randomly.
+
+        Args:
+          x: input data (num_samples, feat_dim).
+
+        Returns:
+          Initial centers (num_clusters, feat_dim)
+        """
+        if self.verbose:
+            logging.info("choosing seeds")
+
+        mu = self.rng.choice(x, size=(self.num_clusters,), replace=False, shuffle=False)
+        if self.verbose:
+            logging.info("%d seeds chosen", self.num_clusters)
+
+        return mu
+
+    def _choose_seeds_max_dist(self, x):
         """Chooses the initial seeds for the clustering.
 
         Args:
@@ -63,6 +128,8 @@ def _choose_seeds(self, x):
         Returns:
           Initial centers (num_clusters, feat_dim)
         """
+        if self.verbose:
+            logging.info("choosing seeds")
         mu = np.zeros((self.num_clusters, x.shape[-1]), dtype=float_cpu())
         mu[0] = x[0]
         for i in range(1, self.num_clusters):
@@ -73,6 +140,40 @@ def _choose_seeds(self, x):
             mu[i] = x[index]
         return mu
 
+    @staticmethod
+    def _compute_d2(x, mu):
+        return np.sum(np.square(x - mu), axis=-1)
+
+    def _choose_seeds_max_dist_multithread(self, x):
+        """Chooses the initial seeds for the clustering.
+
+        Args:
+          x: input data (num_samples, feat_dim).
+
+        Returns:
+          Initial centers (num_clusters, feat_dim)
+        """
+        if self.verbose:
+            logging.info("choosing seeds")
+
+        mu = np.zeros((self.num_clusters, x.shape[-1]), dtype=float_cpu())
+        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+            mu[0] = x[0]
+            for i in range(1, self.num_clusters):
+                d = np.zeros((x.shape[0],), dtype=float_cpu())
+
+                futures = {
+                    executor.submit(KMeans._compute_d2, x, mu[j]): j for j in range(i)
+                }
+                for future in as_completed(futures):
+                    d += future.result()
+
+                index = np.argmax(d)
+                mu[i] = x[index]
+                if self.verbose and (i % 10 == 0 or i == self.num_clusters - 1):
+                    logging.info("%d seeds chosen", i + 1)
+        return mu
+
     def _compute_centroids(self, x, index):
         """Compute the centroids given cluster assigments.
 
@@ -90,6 +191,38 @@ def _compute_centroids(self, x, index):
                 mu[k] = np.mean(x[index == k], axis=0)
         return mu
 
+    @staticmethod
+    def _compute_centroid(x, index, k):
+        r = index == k
+        if np.sum(r) > 0:
+            return np.mean(x[index == k], axis=0)
+        else:
+            return None
+
+    def _compute_centroids_multithread(self, x, index):
+        """Compute the centroids given cluster assigments.
+
+        Args:
+          x: input data (num_samples, feat_dim)
+          index: cluster assignments as integers with shape=(num_samples,)
+
+        Returns:
+          Cluster centroids (num_clusters, feat_dim)
+        """
+        mu = np.zeros((self.num_clusters, x.shape[-1]), dtype=float_cpu())
+        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+            futures = {
+                executor.submit(KMeans._compute_centroid, x, index, k): k
+                for k in range(self.num_clusters)
+            }
+            for future in as_completed(futures):
+                k = futures[future]
+                mu_k = future.result()
+                if mu_k is not None:
+                    mu[k] = mu_k
+
+        return mu
+
     def predict(self, x):
         """Compute the cluster labels for new data.
 
@@ -106,3 +239,32 @@ def predict(self, x):
 
         index = np.argmin(err2, axis=-1)
         return index, err2[np.arange(x.shape[0]), index]
+
+    def predict_multithread(self, x):
+        """Compute the cluster labels for new data.
+
+        Args:
+          x: input data (num_samples, feat_dim)
+
+        Returns:
+          Cluster assignments as integer array (num_samples,)
+          Square distance of each element to the center of its cluster.
+        """
+        err2 = np.zeros((x.shape[0], self.num_clusters), dtype=float_cpu())
+        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
+            futures = {
+                executor.submit(KMeans._compute_d2, x, self.mu[k]): k
+                for k in range(self.num_clusters)
+            }
+            for future in as_completed(futures):
+                k = futures[future]
+                err2[:, k] = future.result()
+
+        index = np.argmin(err2, axis=-1)
+        return index, err2[np.arange(x.shape[0]), index]
+
+    def __call__(self, x):
+        if self.num_workers == 1:
+            return self.predict(x)
+        else:
+            return self.predict_multithread(x)
diff --git a/hyperion/np/clustering/spectral_clustering.py b/hyperion/np/clustering/spectral_clustering.py
new file mode 100644
index 00000000..ab2fad26
--- /dev/null
+++ b/hyperion/np/clustering/spectral_clustering.py
@@ -0,0 +1,312 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from copy import copy
+from enum import Enum
+from typing import Any, Dict, Optional
+
+import h5py
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+from scipy import sparse
+from scipy.linalg import eigh
+from scipy.sparse.csgraph import laplacian as csgraph_laplacian
+from scipy.sparse.linalg import eigsh
+from sklearn.metrics import completeness_score, homogeneity_score
+from sklearn.preprocessing import normalize
+
+from ...hyp_defs import float_cpu
+from ...utils import PathLike
+from ..np_model import NPModel
+from .kmeans import KMeans, KMeansInitMethod
+
+
+class LaplacianType(str, Enum):
+    unnormalized = "unnormalized"
+    norm_sym = "norm_sym"
+    norm_rw = "norm_rw"
+
+    @staticmethod
+    def choices():
+        return [
+            LaplacianType.unnormalized,
+            LaplacianType.norm_sym,
+            LaplacianType.norm_rw,
+        ]
+
+
+class SpectralClusteringNumClassCriterion(str, Enum):
+    max_eigengap = "max_eigengap"
+    max_d_eig_vals = "max_d_eig_vals"
+    thr_eigengap = "thr_eigengap"
+    thr_d_eig_vals = "thr_d_eig_vals"
+
+    @staticmethod
+    def choices():
+        return [
+            SpectralClusteringNumClassCriterion.max_eigengap,
+            SpectralClusteringNumClassCriterion.max_d_eig_vals,
+            SpectralClusteringNumClassCriterion.thr_eigengap,
+            SpectralClusteringNumClassCriterion.thr_d_eig_vals,
+        ]
+
+
+class SpectralClustering(NPModel):
+    """Spectral Clustering class"""
+
+    def __init__(
+        self,
+        laplacian: str = "norm_sym",
+        num_clusters: Optional[int] = None,
+        max_num_clusters: Optional[int] = None,
+        criterion: SpectralClusteringNumClassCriterion = SpectralClusteringNumClassCriterion.max_eigengap,
+        thr_eigengap: float = 1e-3,
+        kmeans_epochs: int = 100,
+        kmeans_init_method: KMeansInitMethod = KMeansInitMethod.max_dist,
+        num_workers: int = 1,
+    ):
+        self.laplacian = laplacian
+        self.num_clusters = num_clusters
+        self.max_num_clusters = max_num_clusters
+        self.criterion = criterion
+        self.kmeans_epochs = kmeans_epochs
+        self.thr_eigengap = thr_eigengap
+        self.kmeans_init_method = kmeans_init_method
+        self.num_workers = num_workers
+
+    def spectral_embedding(self, x: np.ndarray):
+        num_nodes = x.shape[0]
+        if not sparse.issparse(x):
+            x.flat[:: num_nodes + 1] = 0
+            r = num_nodes**2 / np.sum(x > 0)
+            if r > 4:
+                x = sparse.csr_matrix(x)
+
+        D = None
+        if self.laplacian in LaplacianType.unnormalized:
+            L = csgraph_laplacian(x, normed=False)
+        elif self.laplacian == LaplacianType.norm_sym:
+            L = csgraph_laplacian(x, normed=True)
+        elif self.laplacian == LaplacianType.norm_rw:
+            L, dd = csgraph_laplacian(x, normed=False, return_diag=True)
+            if sparse.issparse(L):
+                D = sparse.diags(dd)
+            else:
+                D = np.diag(dd)
+
+        max_num_clusters = num_nodes - 1
+        if self.max_num_clusters is not None:
+            max_num_clusters = min(max_num_clusters, self.max_num_clusters)
+        if self.num_clusters is not None:
+            max_num_clusters = min(max_num_clusters, self.num_clusters)
+
+        eig_vals, eig_vecs = eigsh(L, k=max_num_clusters, M=D, which="SM")
+        eig_vals = eig_vals[1:]
+        eig_vecs = eig_vecs[:, 1:]
+        return eig_vals, eig_vecs
+
+    def spectral_embedding_0(self, x: np.ndarray):
+        num_nodes = x.shape[0]
+        x.flat[:: num_nodes + 1] = 0
+        d = np.sum(x, axis=1)
+        D = None
+        if self.laplacian in LaplacianType.unnormalized:
+            L = np.diag(d) - x
+        elif self.laplacian == LaplacianType.norm_sym:
+            idsqrt = 1 / np.sqrt(d)
+            L = np.eye(num_nodes) - idsqrt[:, None] * x * idsqrt
+        elif self.laplacian == LaplacianType.norm_rw:
+            D = np.diag(d)
+            L = D - x
+
+        max_num_clusters = num_nodes
+        if self.max_num_clusters is not None:
+            max_num_clusters = min(max_num_clusters, self.max_num_clusters)
+        if self.num_clusters is not None:
+            max_num_clusters = min(max_num_clusters, self.num_clusters)
+
+        eig_vals, eig_vecs = eigh(
+            L, b=D, overwrite_a=True, subset_by_index=[1, max_num_clusters - 1]
+        )
+
+        return eig_vals, eig_vecs
+
+    def compute_eigengap(self, eig_vals: np.ndarray):
+        eig_vals = np.concatenate(([0.0], eig_vals))
+        eigengap = np.diff(np.concatenate(([0.0], eig_vals)))
+        filter = np.array([1 / 60, -3 / 20, 3 / 4, 0.0, -3 / 4, 3 / 20, -1 / 60])
+        eig_vals_ext = np.concatenate((eig_vals, [eig_vals[-1]] * 3))
+        d_eig_vals = np.convolve(eig_vals, filter)[3:-6]
+        k_max = np.argmax(eigengap)
+        gap_max = eigengap[k_max]
+        # k_relmax = []
+        # gap_relmax = []
+        # gap_norm_relmax = []
+        # for k in range(len(eigengap)):
+        #     if k == 0 and eigengap[k] > eigengap[k + 1]:
+        #         k_relmax.append(k)
+        #         gap_relmax.append(eigengap[k])
+        #         gap_norm_relmax.append(eigengap[k] / eigengap[k + 1])
+        #     elif k == len(eigengap) - 1 and eigengap[k] > eigengap[k - 1]:
+        #         k_relmax.append(k)
+        #         gap_relmax.append(eigengap[k])
+        #         gap_norm_relmax.append(eigengap[k] / eigengap[k - 1])
+        #     elif eigengap[k] > eigengap[k - 1] and eigengap[k] > eigengap[k + 1]:
+        #         k_relmax.append(k)
+        #         gap_relmax.append(eigengap[k])
+        #         gap_norm_relmax.append(
+        #             2 * eigengap[k] / (eigengap[k - 1] + eigengap[k + 1])
+        #         )
+
+        # idx = np.argmax(gap_norm_relmax)
+        # gap_norm_relmax_max = gap_norm_relmax[idx]
+        # k_relmax_max = k_relmax[idx]
+        eigengap_stats = {
+            "eig_vals": eig_vals,
+            "eigengap": eigengap,
+            "gap_max": gap_max,
+            "k_max": k_max,
+            # "gap_relmax": gap_relmax,
+            # "k_relmax": k_relmax,
+            # "gap_norm_relmax": gap_norm_relmax,
+            # "gap_norm_relmax_max": gap_norm_relmax_max,
+            # "k_relmax_max": k_relmax_max,
+            "d_eig_vals": d_eig_vals,
+        }
+        return eigengap_stats
+
+    def predict_num_clusters(self, eigengap_stats: np.ndarray):
+        if self.num_clusters is not None:
+            num_clusters = self.num_clusters
+
+        elif self.criterion == SpectralClusteringNumClassCriterion.max_eigengap:
+            num_clusters = eigengap_stats["k_max"] + 1
+        elif self.criterion == SpectralClusteringNumClassCriterion.max_d_eig_vals:
+            num_clusters = np.argmax(eigengap_stats["d_eig_vals"]) + 1
+        elif self.criterion == SpectralClusteringNumClassCriterion.thr_eigengap:
+            nz = (eigengap_stats["eigengap"] < self.thr_eigengap).nonzero()[0]
+            num_clusters = nz[nz > eigengap_stats["k_max"]][0] + 1
+        elif self.criterion == SpectralClusteringNumClassCriterion.thr_d_eig_vals:
+            nz = (eigengap_stats["d_eig_vals"] < self.thr_eigengap).nonzero()[0]
+            num_clusters = nz[nz > eigengap_stats["k_max"]][0] + 1
+        else:
+            raise ValueError(f"invalid num clusters criterion {self.criterion}")
+        return num_clusters
+
+    def normalize_eigvecs(self, eig_vecs: np.ndarray):
+        if self.laplacian == LaplacianType.norm_sym:
+            return normalize(eig_vecs, axis=1)
+        else:
+            return eig_vecs
+
+    def do_kmeans(self, x: np.ndarray, num_clusters: Optional[int] = None):
+        if num_clusters is None:
+            num_clusters = x.shape[1] + 1
+        kmeans = KMeans(
+            num_clusters=num_clusters,
+            epochs=self.kmeans_epochs,
+            init_method=self.kmeans_init_method,
+            num_workers=self.num_workers,
+        )
+        kmeans.fit(x)
+        y, _ = kmeans(x)
+        return y
+
+    def fit(self, x: np.ndarray):
+        logging.info("compute spectral embeddings")
+
+        eig_vals, eig_vecs = self.spectral_embedding(x)
+        if self.num_clusters is None:
+            logging.info("compute eigengap stats")
+            eigengap_stats = self.compute_eigengap(eig_vals)
+        else:
+            eigengap_stats = None
+
+        logging.info("predicting number of clusters")
+        num_clusters = self.predict_num_clusters(eigengap_stats)
+        logging.info("predicted num_clusters=%d", num_clusters)
+        if num_clusters == 1:
+            return np.zeros((x.shape[0]), dtype=int), num_clusters, eigengap_stats
+        # minus one because we already removed the first eig vector
+        logging.info("normalizing embeddings")
+        eig_vecs = eig_vecs[:, : num_clusters - 1]
+        eig_vecs = self.normalize_eigvecs(eig_vecs)
+        logging.info("running k-means")
+        y = self.do_kmeans(eig_vecs, num_clusters)
+        return y, num_clusters, eigengap_stats
+
+    def plot_eigengap_stats(
+        self,
+        eigengap_stats: Dict[str, Any],
+        num_clusters: int,
+        fig_file: Optional[PathLike] = None,
+    ):
+        fig, (ax0, ax1, ax2) = plt.subplots(
+            nrows=1, ncols=3, sharex=True, figsize=(12, 6)
+        )
+        eig_vals = eigengap_stats["eig_vals"]
+        ax0.plot(np.arange(1, len(eig_vals) + 1), eig_vals, "b")
+        ax0.vlines(
+            num_clusters, ymin=np.min(eig_vals), ymax=np.max(eig_vals), colors="r"
+        )
+        ax0.grid()
+        ax0.set_title("eigen_vals")
+        eigengap = eigengap_stats["eigengap"]
+        ax1.plot(np.arange(1, len(eigengap) + 1), eigengap, "b")
+        ax1.vlines(
+            num_clusters, ymin=np.min(eigengap), ymax=np.max(eigengap), colors="r"
+        )
+        ax1.grid()
+        ax1.set_title("eigengap")
+        d_eig_vals = eigengap_stats["d_eig_vals"]
+        ax2.plot(np.arange(1, len(d_eig_vals) + 1), d_eig_vals, "b")
+        ax2.vlines(
+            num_clusters, ymin=np.min(d_eig_vals), ymax=np.max(d_eig_vals), colors="r"
+        )
+        ax2.grid()
+        ax2.set_title("d_eigen_val")
+        if fig_file is not None:
+            fig.savefig(fig_file)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        """It adds the arguments corresponding to the class to jsonarparse.
+        Args:
+          parser: jsonargparse object
+          prefix: argument prefix.
+        """
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--laplacian",
+            default=LaplacianType.norm_sym,
+            choices=LaplacianType.choices(),
+        )
+        parser.add_argument("--num-clusters", default=None, type=int)
+        parser.add_argument("--max-num-clusters", default=None, type=int)
+        parser.add_argument(
+            "--criterion",
+            default=SpectralClusteringNumClassCriterion.max_eigengap,
+            choices=SpectralClusteringNumClassCriterion.choices(),
+        )
+        parser.add_argument("--thr-eigengap", default=1e-3, type=float)
+        parser.add_argument("--kmeans-epochs", default=100, type=int)
+        parser.add_argument(
+            "--kmeans-init-method",
+            default=KMeansInitMethod.max_dist,
+            choices=KMeansInitMethod.choices(),
+        )
+        parser.add_argument("--num-workers", default=1, type=int)
+
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
diff --git a/hyperion/np/np_model.py b/hyperion/np/np_model.py
index aa635fc5..7b3b2e1c 100644
--- a/hyperion/np/np_model.py
+++ b/hyperion/np/np_model.py
@@ -10,6 +10,7 @@
 import numpy as np
 
 from ..hyp_defs import float_cpu, float_save
+from ..utils.misc import PathLike
 
 
 class NPModel(object):
@@ -19,6 +20,12 @@ class NPModel(object):
       name: optional identifier for the model.
     """
 
+    registry = {}
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        NPModel.registry[cls.__name__] = cls
+
     def __init__(self, name=None, **kwargs):
         if name is None:
             name = self.__class__.__name__
@@ -227,3 +234,15 @@ def get_json_type(obj):
     def load_config_from_json(json_str):
         """Converts json string into dict."""
         return json.loads(json_str)
+
+    @staticmethod
+    def auto_load(file_path: PathLike, extra_objs: dict = {}):
+        class_name = NPModel.load_config(file_path)["class_name"]
+        if class_name in NPModel.registry:
+            class_obj = NPModel.registry[class_name]
+        elif class_name in extra_objs:
+            class_obj = extra_objs[class_name]
+        else:
+            raise Exception("unknown object with class_name=%s" % (class_name))
+
+        return class_obj.load(file_path)
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index 7589243e..ecc7bad7 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -159,7 +159,7 @@ def norm_suff_stats(self, N, u_x, return_order2=False):
         F, S = self.unstack_suff_stats(u_x)
         F_norm = self.cholLambda * (F - N[:, None] * self.mu)
         if return_order2:
-            S = S - 2 * self.mu * F + N * self.mu ** 2
+            S = S - 2 * self.mu * F + N * self.mu**2
             S *= self.Lambda
             return N, self.stack_suff_stats(F_norm, S)
 
@@ -179,9 +179,11 @@ def Mstep(self, N, u_x):
             self.mu = F / N[:, None]
 
         if self.update_Lambda:
-            S = S / N[:, None] - self.mu ** 2
+            S = S / N[:, None] - self.mu**2
             S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
+            S_floor = np.maximum(S_floor, 1e-10)
             S = np.maximum(S, S_floor)
+            print(np.min(S))
             self.Lambda = 1 / S
             self._Sigma = S
             self._cholLambda = None
@@ -212,7 +214,7 @@ def split_comp(self, K=2):
 
         num_comp = self.num_comp * K
         pi = np.repeat(self.pi, K) / K
-        Lambda = np.repeat(self.Lambda, K, axis=0) * (K ** 2)
+        Lambda = np.repeat(self.Lambda, K, axis=0) * (K**2)
         mu = np.repeat(self.mu, K, axis=0)
 
         if K == 2:
diff --git a/hyperion/np/pdfs/plda/__init__.py b/hyperion/np/pdfs/plda/__init__.py
index 13bc2d81..5961b71f 100644
--- a/hyperion/np/pdfs/plda/__init__.py
+++ b/hyperion/np/pdfs/plda/__init__.py
@@ -4,7 +4,8 @@
 """
 
 
+from .factory import PLDAFactory, PLDAType
 from .frplda import FRPLDA
 from .plda import PLDA
-from .plda_base import PLDABase
+from .plda_base import PLDABase, PLDALLRNvsMMethod
 from .splda import SPLDA
diff --git a/hyperion/np/pdfs/plda/factory.py b/hyperion/np/pdfs/plda/factory.py
new file mode 100644
index 00000000..dd19ab9f
--- /dev/null
+++ b/hyperion/np/pdfs/plda/factory.py
@@ -0,0 +1,204 @@
+"""
+ Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from enum import Enum
+
+import numpy as np
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ....utils.misc import filter_func_args
+from .frplda import FRPLDA
+from .plda import PLDA
+from .plda_base import PLDALLRNvsMMethod
+from .splda import SPLDA
+
+
+class PLDAType(str, Enum):
+    frplda = "frplda"
+    splda = "splda"
+    plda = "plda"
+
+    @staticmethod
+    def choices():
+        return [PLDAType.frplda, PLDAType.splda, PLDAType.plda]
+
+
+class PLDAFactory(object):
+    """Class to  create PLDA objects."""
+
+    @staticmethod
+    def create(
+        plda_type,
+        y_dim=None,
+        z_dim=None,
+        fullcov_W=True,
+        update_mu=True,
+        update_V=True,
+        update_U=True,
+        update_B=True,
+        update_W=True,
+        update_D=True,
+        floor_iD=1e-5,
+        name="plda",
+        **kwargs
+    ):
+        if plda_type == PLDAType.frplda:
+            return FRPLDA(
+                fullcov_W=fullcov_W,
+                update_mu=update_mu,
+                update_B=update_B,
+                update_W=update_W,
+                name=name,
+                **kwargs
+            )
+        if plda_type == PLDAType.splda:
+            return SPLDA(
+                y_dim=y_dim,
+                fullcov_W=fullcov_W,
+                update_mu=update_mu,
+                update_V=update_V,
+                update_W=update_W,
+                name=name,
+                **kwargs
+            )
+
+        if plda_type == PLDAType.plda:
+            return PLDA(
+                y_dim=y_dim,
+                z_dim=z_dim,
+                floor_iD=floor_iD,
+                update_mu=update_mu,
+                update_V=update_V,
+                update_U=update_U,
+                update_D=update_D,
+                name=name,
+                **kwargs
+            )
+
+    @staticmethod
+    def load_plda(plda_type, model_file):
+        if plda_type == "frplda":
+            return FRPLDA.load(model_file)
+        elif plda_type == "splda":
+            return SPLDA.load(model_file)
+        elif plda_type == "plda":
+            return PLDA.load(model_file)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        return filter_func_args(PLDAFactory.create, kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--plda-type",
+            default=PLDAType.splda,
+            choices=PLDAType.choices(),
+            help="PLDA type",
+        )
+
+        parser.add_argument(
+            "--y-dim", type=int, default=150, help="num. of eigenvoices"
+        )
+        parser.add_argument(
+            "--z-dim", type=int, default=400, help="num. of eigenchannels"
+        )
+
+        parser.add_argument(
+            "--fullcov-W",
+            default=True,
+            action=ActionYesNo,
+            help="use full covariance W",
+        )
+        parser.add_argument(
+            "--update-mu",
+            default=True,
+            action=ActionYesNo,
+            help="not update mu",
+        )
+        parser.add_argument(
+            "--update-V", default=True, action=ActionYesNo, help="update V"
+        )
+        parser.add_argument(
+            "--update-U", default=True, action=ActionYesNo, help="update U"
+        )
+
+        parser.add_argument(
+            "--update-B", default=True, action=ActionYesNo, help="update B"
+        )
+        parser.add_argument(
+            "--update-W", default=True, action=ActionYesNo, help="update W"
+        )
+        parser.add_argument(
+            "--update-D", default=True, action=ActionYesNo, help="update D"
+        )
+        parser.add_argument(
+            "--floor-iD",
+            type=float,
+            default=1e-5,
+            help="floor for inverse of D matrix",
+        )
+
+        parser.add_argument("--epochs", type=int, default=40, help="num. of epochs")
+        parser.add_argument(
+            "--ml-md",
+            default="ml+md",
+            choices=["ml+md", "ml", "md"],
+            help=("optimization type"),
+        )
+
+        parser.add_argument(
+            "--md-epochs",
+            default=None,
+            type=int,
+            nargs="+",
+            help=("epochs in which we do MD, if None we do it in all the epochs"),
+        )
+
+        parser.add_argument("--name", default="plda", help="model name")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
+
+    @staticmethod
+    def filter_eval_args(**kwargs):
+        valid_args = "eval_method"
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_llr_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--llr-method", default="vavg", choices=PLDALLRNvsMMethod.choices()
+        )
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
+
+    # @staticmethod
+    # def add_eval_args(parser, prefix=None):
+    #     if prefix is None:
+    #         p1 = "--"
+    #     else:
+    #         p1 = "--" + prefix + "."
+
+    #     parser.add_argument(
+    #         p1 + "plda-type",
+    #         default="splda",
+    #         choices=["frplda", "splda", "plda"],
+    #         help=("PLDA type"),
+    #     )
+    #     parser.add_argument(p1 + "model-file", required=True, help=("model file"))
diff --git a/hyperion/np/pdfs/plda/frplda.py b/hyperion/np/pdfs/plda/frplda.py
index af8c5d8b..591948f9 100644
--- a/hyperion/np/pdfs/plda/frplda.py
+++ b/hyperion/np/pdfs/plda/frplda.py
@@ -36,9 +36,12 @@ def __init__(
         update_mu=True,
         update_B=True,
         update_W=True,
+        epochs=20,
+        ml_md="ml+md",
+        md_epochs=None,
         **kwargs
     ):
-        super().__init__(mu=mu, update_mu=update_mu, **kwargs)
+        super().__init__(mu=mu, update_mu=update_mu, epochs=epochs, **kwargs)
         if mu is not None:
             self.y_dim = mu.shape[0]
         self.B = B
diff --git a/hyperion/np/pdfs/plda/plda.py b/hyperion/np/pdfs/plda/plda.py
index 76299970..35b133c2 100644
--- a/hyperion/np/pdfs/plda/plda.py
+++ b/hyperion/np/pdfs/plda/plda.py
@@ -44,9 +44,20 @@ def __init__(
         update_V=True,
         update_U=True,
         update_D=True,
+        epochs=20,
+        ml_md="ml+md",
+        md_epochs=None,
         **kwargs
     ):
-        super().__init__(y_dim=y_dim, mu=mu, update_mu=update_mu, **kwargs)
+        super().__init__(
+            y_dim=y_dim,
+            mu=mu,
+            update_mu=update_mu,
+            epochs=epochs,
+            ml_md=ml_md,
+            md_epochs=md_epochs,
+            **kwargs
+        )
         self.z_dim = z_dim
         if V is not None:
             self.y_dim = V.shape[0]
@@ -526,7 +537,7 @@ def log_probx_g_yz(self, x, y, z):
         logp = (
             -x.shape[-1] * np.log(2 * np.pi)
             + logD
-            - np.sum(self.D * delta ** 2, axis=-1)
+            - np.sum(self.D * delta**2, axis=-1)
         )
         logp /= 2
         return logp
diff --git a/hyperion/np/pdfs/plda/plda_base.py b/hyperion/np/pdfs/plda/plda_base.py
index 9dde58b1..09544cae 100644
--- a/hyperion/np/pdfs/plda/plda_base.py
+++ b/hyperion/np/pdfs/plda/plda_base.py
@@ -3,6 +3,8 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+from enum import Enum
+
 import numpy as np
 
 from ....hyp_defs import float_cpu
@@ -10,6 +12,22 @@
 from ..core.pdf import PDF
 
 
+class PLDALLRNvsMMethod(str, Enum):
+    vavg = "vavg"
+    lnorm_vavg = "lnorm-vavg"
+    savg = "savg"
+    book = "book"
+
+    @staticmethod
+    def choices():
+        return [
+            PLDALLRNvsMMethod.vavg,
+            PLDALLRNvsMMethod.lnorm_vavg,
+            PLDALLRNvsMMethod.savg,
+            PLDALLRNvsMMethod.book,
+        ]
+
+
 class PLDABase(PDF):
     """Abstract Base class for different versions of
     Probabilistic Linear Discriminant Analysis (PLDA) models.
@@ -21,7 +39,16 @@ class PLDABase(PDF):
       x_dim: data dimension.
     """
 
-    def __init__(self, y_dim=None, mu=None, update_mu=True, **kwargs):
+    def __init__(
+        self,
+        y_dim=None,
+        mu=None,
+        update_mu=True,
+        epochs=20,
+        ml_md="ml+md",
+        md_epochs=None,
+        **kwargs
+    ):
         super().__init__(**kwargs)
         self.mu = mu
         self.y_dim = y_dim
@@ -29,6 +56,10 @@ def __init__(self, y_dim=None, mu=None, update_mu=True, **kwargs):
         if mu is not None:
             self.x_dim = mu.shape[0]
 
+        self.epochs = epochs
+        self.ml_md = ml_md
+        self.md_epochs = md_epochs
+
     def initialize(self, D):
         """initializes the model.
 
@@ -55,8 +86,8 @@ def fit(
         class_ids_val=None,
         ptheta_val=None,
         sample_weight_val=None,
-        epochs=20,
-        ml_md="ml+md",
+        epochs=None,
+        ml_md=None,
         md_epochs=None,
     ):
         """Trains the model.
@@ -80,6 +111,12 @@ def fit(
           log p(X) of the val. data, if present.
           log p(x) of the val. data per sample, if present.
         """
+        if epochs is None:
+            epochs = self.epochs
+        if ml_md is None:
+            ml_md = self.ml_md
+        if md_epochs is None:
+            md_epochs = self.md_epochs
 
         use_ml = False if ml_md == "md" else True
         use_md = False if ml_md == "ml" else True
@@ -107,7 +144,6 @@ def fit(
         elbo = np.zeros((epochs,), dtype=float_cpu())
         elbo_val = np.zeros((epochs,), dtype=float_cpu())
         for epoch in range(epochs):
-
             stats = self.Estep(D)
             elbo[epoch] = self.elbo(stats)
             if x_val is not None:
@@ -206,7 +242,6 @@ def fit_adapt_weighted_avg_model(
         elbo = np.zeros((epochs,), dtype=float_cpu())
         elbo_val = np.zeros((epochs,), dtype=float_cpu())
         for epoch in range(epochs):
-
             stats = self.Estep(D)
             elbo[epoch] = self.elbo(stats)
             if x_val is not None:
@@ -363,17 +398,17 @@ def llr_NvsM(self, x1, x2, ids1=None, ids2=None, method="vavg-lnorm"):
         Returns:
           Score matrix with shape (num_enrollment_sides, num_test_sides).
         """
-        if method == "savg":
+        if method == PLDALLRNvsMMethod.savg:
             return self.llr_NvsM_savg(x1, ids1, x2, ids2)
 
         D1 = x1 if ids1 is None else self.compute_stats_hard(x1, class_ids=ids1)
         D2 = x2 if ids2 is None else self.compute_stats_hard(x2, class_ids=ids2)
 
-        if method == "book":
+        if method == PLDALLRNvsMMethod.book:
             return self.llr_NvsM_book(D1, D2)
-        if method == "vavg":
+        if method == PLDALLRNvsMMethod.vavg:
             return self.llr_NvsM_vavg(D1, D2, do_lnorm=False)
-        if method == "vavg-lnorm":
+        if method == PLDALLRNvsMMethod.lnorm_vavg:
             return self.llr_NvsM_vavg(D1, D2, do_lnorm=True)
 
     def llr_NvsM_vavg(self, D1, D2, do_lnorm=True):
@@ -436,17 +471,17 @@ def llr_Nvs1(self, x1, x2, ids1=None, method="vavg-lnorm"):
         Returns:
           Score matrix with shape (num_enrollment_sides, num_test_sides).
         """
-        if method == "savg":
+        if method == PLDALLRNvsMMethod.savg:
             return self.llr_Nvs1_savg(x1, ids1, x2)
 
         D1 = x1 if ids1 is None else self.compute_stats_hard(x1, class_ids=ids1)
 
-        if method == "book":
+        if method == PLDALLRNvsMMethod.book:
             D2 = self.compute_stats_hard(x2, np.arange(x2.shape[0]))
             return self.llr_NvsM_book(D1, D2)
-        if method == "vavg":
+        if method == PLDALLRNvsMMethod.vavg:
             return self.llr_Nvs1_vavg(D1, x2, do_lnorm=False)
-        if method == "vavg-lnorm":
+        if method == PLDALLRNvsMMethod.lnorm_vavg:
             return self.llr_Nvs1_vavg(D1, x2, do_lnorm=True)
 
     def llr_Nvs1_vavg(self, D1, x2, do_lnorm=True):
diff --git a/hyperion/np/pdfs/plda/splda.py b/hyperion/np/pdfs/plda/splda.py
index 5d397183..9e0c2a20 100644
--- a/hyperion/np/pdfs/plda/splda.py
+++ b/hyperion/np/pdfs/plda/splda.py
@@ -37,9 +37,20 @@ def __init__(
         update_mu=True,
         update_V=True,
         update_W=True,
+        epochs=20,
+        ml_md="ml+md",
+        md_epochs=None,
         **kwargs
     ):
-        super().__init__(y_dim=y_dim, mu=mu, update_mu=update_mu, **kwargs)
+        super().__init__(
+            y_dim=y_dim,
+            mu=mu,
+            update_mu=update_mu,
+            epochs=epochs,
+            ml_md=ml_md,
+            md_epochs=md_epochs,
+            **kwargs
+        )
         if V is not None:
             self.y_dim = V.shape[0]
         self.V = V
diff --git a/hyperion/np/preprocessing/__init__.py b/hyperion/np/preprocessing/__init__.py
new file mode 100644
index 00000000..8cbe932a
--- /dev/null
+++ b/hyperion/np/preprocessing/__init__.py
@@ -0,0 +1,6 @@
+"""
+ Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .resampler import Resampler
diff --git a/hyperion/np/preprocessing/resampler.py b/hyperion/np/preprocessing/resampler.py
new file mode 100644
index 00000000..1c3e5901
--- /dev/null
+++ b/hyperion/np/preprocessing/resampler.py
@@ -0,0 +1,46 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+
+class Resampler:
+    def __init__(self, target_sample_freq: float):
+        self.target_sample_freq = target_sample_freq
+        self.resamplers = {}
+
+    def _get_resampler(self, input_sample_freq):
+        if input_sample_freq in self.resamplers:
+            return self.resamplers[input_sample_freq]
+
+        import torch
+        import torchaudio.transforms as tat
+
+        try:
+            resampler = tat.Resample(
+                int(input_sample_freq),
+                int(self.target_sample_freq),
+                lowpass_filter_width=64,
+                rolloff=0.9475937167399596,
+                resampling_method="sinc_interp_kaiser",
+                beta=14.769656459379492,
+            )
+        except:
+            resampler = tat.Resample(
+                int(input_sample_freq),
+                int(self.target_sample_freq),
+                lowpass_filter_width=64,
+                rolloff=0.9475937167399596,
+                resampling_method="kaiser_window",
+                beta=14.769656459379492,
+            )
+        resampler_f = lambda x: resampler(torch.from_numpy(x)).numpy()
+        self.resamplers[fs] = resampler_f
+        return resampler_f
+
+    def __call__(self, x, sample_freq: float):
+        if sample_freq == self.target_sample_freq:
+            return x, sample_freq
+
+        resampler = self._get_resampler(sample_freq)
+        return resampler(x), self.target_sample_freq
diff --git a/hyperion/np/transforms/lda.py b/hyperion/np/transforms/lda.py
index fc886ede..b7a50f80 100644
--- a/hyperion/np/transforms/lda.py
+++ b/hyperion/np/transforms/lda.py
@@ -6,6 +6,7 @@
 import h5py
 import numpy as np
 import scipy.linalg as la
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ..np_model import NPModel
 from .sb_sw import SbSw
@@ -157,3 +158,38 @@ def save_mat(self, file_path):
         with h5py.File(file_path, "w") as f:
             f.create_dataset("mu", data=self.mu)
             f.create_dataset("T", data=self.T)
+
+    @staticmethod
+    def filter_args(**kwargs):
+        valid_args = ("update_mu", "update_T", "name", "lda_dim")
+        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--update-mu",
+            default=True,
+            action=ActionYesNo,
+            help=("updates centering parameter"),
+        )
+        parser.add_argument(
+            "--update-T",
+            default=True,
+            action=ActionYesNo,
+            help=("updates projection parameter"),
+        )
+
+        parser.add_argument(
+            "--lda-dim", required=True, help=("output dimension of LDA")
+        )
+
+        parser.add_argument("--name", dest="name", default="lda")
+        if prefix is not None:
+            outer_parser.add_argument(
+                "--" + prefix,
+                action=ActionParser(parser=parser),
+            )
diff --git a/hyperion/np/transforms/pca.py b/hyperion/np/transforms/pca.py
index aa25d8e9..98b6c192 100644
--- a/hyperion/np/transforms/pca.py
+++ b/hyperion/np/transforms/pca.py
@@ -91,7 +91,7 @@ def get_pca_dim_for_var_ratio(x, var_r=1, min_dim=2):
                 rank = matrix_rank(np.dot(x.T, x))
         else:
             sv = la.svd(x, compute_uv=False)
-            Ecc = np.cumsum(sv ** 2)
+            Ecc = np.cumsum(sv**2)
             Ecc = Ecc / Ecc[-1]
             rank = np.where(Ecc > var_r)[0][0]
 
@@ -186,7 +186,11 @@ def load_params(cls, f, config):
         """
         param_list = ["mu", "T"]
         params = cls._load_params_to_dict(f, config["name"], param_list)
-        return cls(mu=params["mu"], T=params["T"], **config,)
+        return cls(
+            mu=params["mu"],
+            T=params["T"],
+            **config,
+        )
 
     @classmethod
     def load_mat(cls, file_path):
@@ -202,12 +206,19 @@ def save_mat(self, file_path):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = ("update_mu", "update_T", "name", "pca_dim", "pca_var_r")
+        valid_args = (
+            "update_mu",
+            "update_T",
+            "name",
+            "pca_dim",
+            "pca_var_r",
+            "pca_min_dim",
+            "whiten",
+        )
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
-
         if prefix is not None:
             outer_parser = parser
             parser = ArgumentParser(prog="")
@@ -242,10 +253,15 @@ def add_class_args(parser, prefix=None):
             help=("proportion of variance to keep when choosing the PCA dimension"),
         )
 
+        parser.add_argument(
+            "--pca-min-dim", default=2, type=int, help=("min. output dimension of PCA")
+        )
+
         parser.add_argument("--name", dest="name", default="pca")
         if prefix is not None:
             outer_parser.add_argument(
-                "--" + prefix, action=ActionParser(parser=parser),
+                "--" + prefix,
+                action=ActionParser(parser=parser),
             )
 
     add_argparse_args = add_class_args
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 2329d0b1..e19ec329 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -21,6 +21,7 @@
 
 from ...io import RandomAccessAudioReader as AR
 from ...np.augment import SpeechAugment
+from ...np.preprocessing import Resampler
 from ...utils.class_info import ClassInfo
 from ...utils.misc import filter_func_args
 from ...utils.segment_set import SegmentSet
@@ -126,6 +127,7 @@ def __init__(
 
         self.target_sample_freq = target_sample_freq
         self.resamplers = {}
+        self.resampler = Resampler(target_sample_freq)
 
     def _load_legacy_durations(self, time_durs_file):
         if self.rank == 0:
@@ -353,14 +355,19 @@ def _get_resampler(self, fs):
         return resampler_f
 
     def _resample(self, x, fs):
-        try:
-            if self.target_sample_freq is None or fs == self.target_sample_freq:
-                return x, fs
-            resampler = self._get_resampler(fs)
-            return resampler(x), self.target_sample_freq
-        except:
+        if self.target_sample_freq is None:
             return x, fs
 
+        return self.resampler(x, fs)
+
+        # try:
+        #     if self.target_sample_freq is None or fs == self.target_sample_freq:
+        #         return x, fs
+        #     resampler = self._get_resampler(fs)
+        #     return resampler(x), self.target_sample_freq
+        # except:
+        #     return x, fs
+
     def __getitem__(self, segment):
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
@@ -368,39 +375,8 @@ def __getitem__(self, segment):
         data = {"seg_id": seg_id, "sample_freq": fs}
         x_augs = self._apply_augs(x, duration, fs)
         data.update(x_augs)
-
-        # if self.augmenters:
-        #     # augmentations
-        #     if duration == 0:
-        #         num_samples = len(x)
-        #     else:
-        #         num_samples = int(duration * fs)
-
-        #     reverb_context_samples = len(x) - num_samples
-        #     x_augs = self._apply_augs(x, reverb_context_samples)
-        #     data.update(x_augs)
-
-        #     # add original non augmented audio
-        #     if self.return_orig:
-        #         x_orig = x[reverb_context_samples:]
-        #         data["x"] = x_orig
-
-        # else:
-        #     data["x"] = x
-
         seg_info = self._get_segment_info(seg_id)
         data.update(seg_info)
-        # if np.any(~np.isfinite(data["x"])):
-        #     print(
-        #         "zzz",
-        #         x.max(),
-        #         x.min(),
-        #         x.mean(),
-        #         data["x"].max(),
-        #         data["x"].min(),
-        #         data["x"].mean(),
-        #         flush=True,
-        #     )
         return data
 
     @staticmethod
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index 4587fbd2..d314490c 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -201,7 +201,7 @@ def forward(self, x, x_lengths=None, weights=None):
             # this can produce slightly negative variance when relu6 saturates in all time steps
             # add 1e-5 for stability
             s = torch.sqrt(
-                torch.mean(delta**2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
+                torch.mean(delta ** 2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
             )
 
             mus = torch.cat((mu, s), dim=1)
@@ -214,7 +214,7 @@ def forward(self, x, x_lengths=None, weights=None):
         wbar = torch.mean(weights, dim=self.dim, keepdim=True)
         mu = xbar / wbar
         delta = x - mu
-        var = torch.mean(weights * delta**2, dim=self.dim, keepdim=True) / wbar
+        var = torch.mean(weights * delta ** 2, dim=self.dim, keepdim=True) / wbar
         s = torch.sqrt(var.clamp(min=SQRT_EPS))
         mu = mu.squeeze(self.dim)
         s = s.squeeze(self.dim)
@@ -254,9 +254,9 @@ def _forward_slidwin_int(self, x, win_length, win_shift, snip_edges):
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
         m_x = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
 
-        c_x = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
+        c_x = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
         m_x2 = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
-        s_x = torch.sqrt(m_x2 - m_x**2).clamp(min=SQRT_EPS)
+        s_x = torch.sqrt(m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
 
         mus = self._post_slidwin(m_x, s_x, out_shape)
         return mus
@@ -265,7 +265,7 @@ def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
         x, out_shape = self._pre_slidwin(x, win_length, win_shift, snip_edges)
         num_frames = out_shape[-1]
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
-        c_x2 = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
+        c_x2 = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
 
         # xx = x.view(-1, x.shape[-1])
         # print(xx.shape[1])
@@ -309,7 +309,7 @@ def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
 
             k += win_shift
 
-        var_x = (m_x2 - m_x**2).clamp(min=SQRT_EPS)
+        var_x = (m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
         s_x = torch.sqrt(var_x)
         # idx = torch.isnan(s_x) #.any(dim=1)
         # if torch.sum(idx) > 0:
@@ -400,14 +400,14 @@ def forward(self, x, x_lengths=None, weights=None):
         weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             mu = torch.mean(x, dim=self.dim, keepdim=self.keepdim)
-            x2bar = torch.mean(x**2, dim=self.dim, keepdim=self.keepdim)
+            x2bar = torch.mean(x ** 2, dim=self.dim, keepdim=self.keepdim)
             logvar = torch.log(x2bar - mu * mu + 1e-5)  # for stability in case var=0
             return torch.cat((mu, logvar), dim=-1)
 
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=self.keepdim)
         wbar = torch.mean(weights, dim=self.dim, keepdim=self.keepdim)
         mu = xbar / wbar
-        x2bar = torch.mean(weights * x**2, dim=self.dim, keepdim=self.keepdim) / wbar
+        x2bar = torch.mean(weights * x ** 2, dim=self.dim, keepdim=self.keepdim) / wbar
         var = (x2bar - mu * mu).clamp(min=1e-5)
         logvar = torch.log(var)
 
@@ -444,7 +444,7 @@ def __init__(
         if dist_pow == 1:
             self.dist_f = lambda x: torch.norm(x, p=2, dim=-1)
         else:
-            self.dist_f = lambda x: torch.sum(x**2, dim=-1)
+            self.dist_f = lambda x: torch.sum(x ** 2, dim=-1)
 
         self.size_multiplier = num_comp
 
@@ -503,7 +503,7 @@ def forward(self, x, x_lengths=None, weights=None):
         delta = x - self.mu  # (batch, time, num_comp, feat_dim)
         dist = self.dist_f(delta)  # (batch, time, num_comp)
 
-        llk = -self.prec**2 * dist + self.bias
+        llk = -self.prec ** 2 * dist + self.bias
         r = nnf.softmax(llk, dim=-1)  # (batch, time, num_comp)
         if weights is not None:
             r *= weights
@@ -784,7 +784,7 @@ def forward(self, x, x_lengths=None, weights=None):
         assert not torch.any(
             torch.isinf(x_inner)
         ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(x))}"
-        # logging.info('x_inner1={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
+
         if self.use_global_context:
             global_mus = self.stats_pool(x, weights=weights)
             x_inner = x_inner + self.lin_global(global_mus).unsqueeze(-1)
@@ -794,7 +794,7 @@ def forward(self, x, x_lengths=None, weights=None):
             assert not torch.any(
                 torch.isinf(x_inner)
             ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(global_mus))}"
-        # logging.info('x_inner2={} {}'.format(torch.sum(torch.isnan(x_inner)), torch.sum(torch.isinf(x_inner))))
+
         attn = self.conv2(
             self.activation(self.norm_layer(x_inner))
         )  # (batch, feat_dim, time)
@@ -821,7 +821,7 @@ def forward(self, x, x_lengths=None, weights=None):
             torch.isinf(attn)
         ), f"attn is inf {torch.sum(torch.isinf(attn))}"
         mus = self.stats_pool(x, weights=attn)
-        # logging.info('mus={} {}'.format(torch.sum(torch.isnan(mus)), torch.sum(torch.isinf(mus))))
+
         if self.keepdim:
             mus = mus.unsqueeze(self.dim)
 
diff --git a/hyperion/torch/narchs/dino_head.py b/hyperion/torch/narchs/dino_head.py
index a59434bf..87c8daae 100644
--- a/hyperion/torch/narchs/dino_head.py
+++ b/hyperion/torch/narchs/dino_head.py
@@ -2,8 +2,6 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
-
 from typing import Optional
 
 import torch
@@ -17,53 +15,6 @@
 from ..layers import NormLayer1dFactory as NLF
 from .net_arch import NetArch
 
-# class DINOHead1(nn.Module):
-#     def __init__(
-#         self,
-#         in_dim,
-#         out_dim,
-#         use_bn=False,
-#         norm_last_layer=True,
-#         nlayers=3,
-#         hidden_dim=2048,
-#         bottleneck_dim=256,
-#     ):
-#         super().__init__()
-#         nlayers = max(nlayers, 1)
-#         if nlayers == 1:
-#             self.mlp = nn.Linear(in_dim, bottleneck_dim)
-#         else:
-#             layers = [nn.Linear(in_dim, hidden_dim)]
-#             if use_bn:
-#                 layers.append(nn.BatchNorm1d(hidden_dim))
-#             layers.append(nn.GELU())
-#             for _ in range(nlayers - 2):
-#                 layers.append(nn.Linear(hidden_dim, hidden_dim))
-#                 if use_bn:
-#                     layers.append(nn.BatchNorm1d(hidden_dim))
-#                 layers.append(nn.GELU())
-#             layers.append(nn.Linear(hidden_dim, bottleneck_dim))
-#             self.mlp = nn.Sequential(*layers)
-#         self.apply(self._init_weights)
-#         self.last_layer = nn.utils.weight_norm(
-#             nn.Linear(bottleneck_dim, out_dim, bias=False)
-#         )
-#         self.last_layer.weight_g.data.fill_(1)
-#         if norm_last_layer:
-#             self.last_layer.weight_g.requires_grad = False
-
-#     def _init_weights(self, m):
-#         if isinstance(m, nn.Linear):
-#             nn.init.trunc_normal_(m.weight, std=0.02)
-#             if isinstance(m, nn.Linear) and m.bias is not None:
-#                 nn.init.constant_(m.bias, 0)
-
-#     def forward(self, x):
-#         x = self.mlp(x)
-#         x = nn.functional.normalize(x, dim=-1, p=2)
-#         x = self.last_layer(x)
-#         return x
-
 
 class DINOHead(NetArch):
     """Classification Head for DINO x-vector style networks
@@ -208,21 +159,21 @@ def _init_weights(self, m):
     def forward(self, x: torch.Tensor, y: Optional[torch.Tensor] = None):
         if self.use_in_norm:
             x = self.in_norm(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x is nan {x.size()} {torch.sum(torch.isnan(x))}"
         x = self.hid_layers(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x_hid is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x_hid is nan {x.size()} {torch.sum(torch.isnan(x))}"
         x = nn.functional.normalize(x, dim=-1, p=2)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x_l2 is nan  {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x_l2 is nan  {x.size()} {torch.sum(torch.isnan(x))}"
         x = self.output(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"out is nan  {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"out is nan  {x.size()} {torch.sum(torch.isnan(x))}"
         return x
 
     def get_config(self):
diff --git a/hyperion/torch/narchs/proj_head.py b/hyperion/torch/narchs/proj_head.py
index 549f9e6a..e2838013 100644
--- a/hyperion/torch/narchs/proj_head.py
+++ b/hyperion/torch/narchs/proj_head.py
@@ -40,12 +40,7 @@ class ProjHead(NetArch):
     """
 
     def __init__(
-        self,
-        in_feats,
-        out_feats=256,
-        norm_layer=None,
-        use_norm=True,
-        norm_before=True,
+        self, in_feats, out_feats=256, norm_layer=None, use_norm=True, norm_before=True,
     ):
         super().__init__()
 
@@ -72,23 +67,21 @@ def __init__(
     def forward(self, x, y=None):
         if self.use_norm and self.norm_before:
             x = self._norm_layer(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x before proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x before proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
         x = self.proj(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x after proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x after proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
         if self.use_norm and not self.norm_before:
             x = self._norm_layer(x)
-        assert not torch.any(
-            torch.isnan(x)
-        ), f"x after bn is nan {x.size()} {torch.sum(torch.isnan(x))}"
+        # assert not torch.any(
+        #     torch.isnan(x)
+        # ), f"x after bn is nan {x.size()} {torch.sum(torch.isnan(x))}"
         return x
 
     def get_config(self):
-        hid_act = AF.get_config(self.fc_blocks[0].activation)
-
         config = {
             "in_feats": self.in_feats,
             "out_feats": self.out_feats,
@@ -96,7 +89,6 @@ def get_config(self):
             "use_norm": self.use_norm,
             "norm_before": self.norm_before,
         }
-
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
diff --git a/hyperion/torch/trainers/dino_xvector_trainer.py b/hyperion/torch/trainers/dino_xvector_trainer.py
index e4051058..26d6a434 100644
--- a/hyperion/torch/trainers/dino_xvector_trainer.py
+++ b/hyperion/torch/trainers/dino_xvector_trainer.py
@@ -304,7 +304,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         logs = ODict((log_tag + k, v) for k, v in logs.items())
         return logs
 
-    def _load_checkpoint(self, checkpoint):
+    def _old_load_checkpoint(self, checkpoint):
         self.teacher_model.load_state_dict(checkpoint["teacher_model_state_dict"])
         # self.teacher_model.load_state_dict(checkpoint["teacher_state_dict"])
         self.teacher_optimizer.load_state_dict(
@@ -312,7 +312,7 @@ def _load_checkpoint(self, checkpoint):
         )
         return super()._load_checkpoint(checkpoint)
 
-    def _new_load_checkpoint(self, checkpoint, teacher_checkpoint):
+    def _load_checkpoint(self, checkpoint, teacher_checkpoint):
         self.teacher_model.load_state_dict(teacher_checkpoint["model_state_dict"])
         self.teacher_optimizer.load_state_dict(
             teacher_checkpoint["optimizer_state_dict"]
@@ -322,7 +322,7 @@ def _new_load_checkpoint(self, checkpoint, teacher_checkpoint):
     def load_checkpoint(self, epoch, step):
         checkpoint = self.load_model_checkpoint("model", epoch, step)
         teacher_checkpoint = self.load_model_checkpoint("teacher_model", epoch, step)
-        return self._new_load_checkpoint(checkpoint, teacher_checkpoint)
+        return self._load_checkpoint(checkpoint, teacher_checkpoint)
 
     def checkpoint(self, logs=None):
         checkpoint = super().checkpoint(logs)
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index b3d6cb9f..36a9a43f 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -634,7 +634,7 @@ def _make_wd_sched(self, wd_sched, optim):
         assert isinstance(wd_sched, dict)
         args = WDSF.filter_args(**wd_sched)
         if self.rank == 0:
-            logging.info("wd scheduler args={args}")
+            logging.info(f"wd scheduler args={args}")
         wd_sched = WDSF.create(optim, **args)
         return wd_sched
 
@@ -930,7 +930,7 @@ def find_last_checkpoint(self, model_name="model"):
         file_pattern = "%s/%s_ep[0-9]*.pth" % (self.exp_path, model_name)
         file_paths = sorted(glob.glob(file_pattern))
         if len(file_paths) > 0:
-            last_epoch = int(re.search(r"ep[0-9]*", file_paths[-1])[2:])
+            last_epoch = int(re.search(r"ep[0-9]*", file_paths[-1]).group()[2:])
 
         file_pattern = "%s/%s_ep%04d_step[0-9]*.pth" % (
             self.exp_path,
@@ -939,7 +939,7 @@ def find_last_checkpoint(self, model_name="model"):
         )
         file_paths = sorted(glob.glob(file_pattern))
         if len(file_paths) > 0:
-            last_step = int(re.search(r"step[0-9]*", file_paths[-1])[4:])
+            last_step = int(re.search(r"step[0-9]*", file_paths[-1]).group()[4:])
 
         return last_epoch, last_step
 
@@ -947,7 +947,7 @@ def load_last_checkpoint(self):
         """Loads the last training checkpoint in the experiment dir."""
         last_epoch, last_step = self.find_last_checkpoint()
         if last_epoch > 0 or last_step > 0:
-            return self.new_load_checkpoint(last_epoch, last_step)
+            return self.load_checkpoint(last_epoch, last_step)
 
         return None
 
@@ -964,11 +964,11 @@ def load_model_checkpoint(self, model_name="model", epoch=0, step=0):
         logging.info("loading %s from %s", model_name, file_path)
         return torch.load(file_path, map_location=torch.device("cpu"))
 
-    def new_load_checkpoint(self, epoch, step):
+    def load_checkpoint(self, epoch, step):
         checkpoint = self.load_model_checkpoint("model", epoch, step)
         return self._load_checkpoint(checkpoint)
 
-    def load_checkpoint(self, file_path):
+    def old_load_checkpoint(self, file_path):
         """Loads a training checkpoint from file.
 
         Args:
diff --git a/hyperion/torch/wd_schedulers/factory.py b/hyperion/torch/wd_schedulers/factory.py
index 3820daa2..dc72bd2c 100644
--- a/hyperion/torch/wd_schedulers/factory.py
+++ b/hyperion/torch/wd_schedulers/factory.py
@@ -31,7 +31,7 @@ def create(
         if wdsch_type == "none":
             return None
 
-        if wdsch_type == "cos_lr":
+        if wdsch_type == "cos_wd":
             return CosineWD(
                 optimizer,
                 initial_wd=initial_wd,
diff --git a/hyperion/torch/wd_schedulers/wd_scheduler.py b/hyperion/torch/wd_schedulers/wd_scheduler.py
index a3059edc..3a092c3d 100644
--- a/hyperion/torch/wd_schedulers/wd_scheduler.py
+++ b/hyperion/torch/wd_schedulers/wd_scheduler.py
@@ -64,6 +64,10 @@ def __init__(
                 for group in optimizer.param_groups
             ]
 
+        if epoch == 0:
+            for group, wd in zip(optimizer.param_groups, self.initial_wds):
+                group["weight_decay"] = wd
+
         self.warmup_steps = warmup_steps
         self.epoch = epoch
         self.step = step

From 68cc0931f86e6380a0cf3d0600f58b2282b2e676 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 18 Jan 2024 18:06:45 -0500
Subject: [PATCH 127/154] added ecapa and lresnet 3.1

---
 egs/voxceleb/v1.2/README.md                   | 26 +++++++
 .../train_cfwseresnet34_xvec_stage1_v3.0.yaml |  9 ++-
 .../train_cfwseresnet34_xvec_stage2_v3.0.yaml | 13 ++--
 .../train_cwseresnet34_xvec_stage1_v3.0.yaml  |  9 ++-
 .../train_cwseresnet34_xvec_stage2_v3.0.yaml  | 13 ++--
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.0.yaml |  9 ++-
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.0.yaml | 13 ++--
 .../train_fwseresnet34_xvec_stage1_v3.0.yaml  |  9 ++-
 .../train_fwseresnet34_xvec_stage2_v3.0.yaml  | 13 ++--
 ...rain_idrnd_resnet100_xvec_stage1_v3.0.yaml |  9 ++-
 ...rain_idrnd_resnet100_xvec_stage2_v3.0.yaml | 14 ++--
 .../train_lresnet34_xvec_stage2_v3.1.yaml     | 73 +++++++++++++++++++
 .../conf/train_resnet34_xvec_stage1_v3.0.yaml |  9 ++-
 .../conf/train_resnet34_xvec_stage2_v3.0.yaml | 13 ++--
 .../train_tseresnet34_xvec_stage1_v3.0.yaml   |  9 ++-
 .../train_tseresnet34_xvec_stage2_v3.0.yaml   | 13 ++--
 ...config_fbank80_stmn_ecapatdnn512x3.v3.1.sh | 46 ++++++++++++
 .../config_fbank80_stmn_lresnet34.v3.1.sh     | 44 +++++++++++
 hyperion/torch/layer_blocks/res2net_blocks.py | 38 +++++++---
 hyperion/torch/layer_blocks/resnet_blocks.py  | 37 ++++++++--
 .../torch/layer_blocks/seresnet_blocks.py     | 26 +++++--
 hyperion/torch/narchs/resnet.py               | 16 +++-
 hyperion/torch/narchs/resnet_factory.py       | 24 +++---
 hyperion/torch/trainers/xvector_trainer.py    | 47 +++++++++++-
 24 files changed, 420 insertions(+), 112 deletions(-)
 create mode 100644 egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.1.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_lresnet34.v3.1.sh

diff --git a/egs/voxceleb/v1.2/README.md b/egs/voxceleb/v1.2/README.md
index 6e8ba07a..6a2502e6 100644
--- a/egs/voxceleb/v1.2/README.md
+++ b/egs/voxceleb/v1.2/README.md
@@ -96,9 +96,15 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.11 | 0.069 | 0.126 |
 | | | | Cosine + AS-Norm | 1.10 | 0.065 | 0.108 |
 | | | | Cosine + QMF | 0.95 | 0.059 | 0.084 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.1.sh | ECAPA-TDNN 512x3 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.02 | 0.067 | 0.109 |
+| | | | Cosine + AS-Norm | 0.98 | 0.062 | 0.092 |
+| | | | Cosine + QMF | 0.85 | 0.061 | 0.091 | 
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
 | | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
 | | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
+| config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.59 | 0.1 | 0.172 |
+| | | | Cosine + AS-Norm | 1.54 | 0.927 | 0.140 |
+| | | | Cosine + QMF | 1.32 | 0.083 | 0.121 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.048 | 0.071 |
 | | | | Cosine + AS-Norm | 0.70 | 0.039 | 0.048 |
 | | | | Cosine + QMF | 0.62 | 0.034 | 0.042 |
@@ -129,9 +135,15 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.16 | 0.073 | 0.130 |
 | | | | Cosine + AS-Norm | 1.13 | 0.068 | 0.118 |
 | | | | Cosine + QMF | 1.06 | 0.064 | 0.112 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.1.sh | ECAPA-TDNN 512x3 | Stage2: SubCenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.06 | 0.066 | 0.116 |
+| | | | Cosine + AS-Norm | 1.01 | 0.061 | 0.106 |
+| | | | Cosine + QMF | 0.96 | 0.058 | 0.097 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
 | | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
 | | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
+| config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.69 | 0.103 | 0.174 |
+| | | | Cosine + AS-Norm | 1.62 | 0.096 | 0.156 |
+| | | | Cosine + QMF | 1.51 | 0.091 | 0.152 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.86 | 0.054 | 0.098 |
 | | | | Cosine + AS-Norm | 0.81 | 0.049 | 0.087 |
 | | | | Cosine + QMF | 0.77 | 0.046 | 0.082  |
@@ -154,6 +166,8 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | | | | Cosine + AS-Norm | 0.70 | 0.042 | 0.072 |
 | | | | Cosine + QMF | 0.68 | 0.040 | 0.069 |
 
+
+
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -161,9 +175,15 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.10 | 0.128 | 0.209 |
 | | | | Cosine + AS-Norm | 1.99 | 0.118 | 0.190 |
 | | | | Cosine + QMF | 1.84 | 0.111 | 0.184 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.1.sh | ECAPA-TDNN 512x3 | Stage2: SubCenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.93 | 0.120 | 0.198 |
+| | | | Cosine + AS-Norm | 1.84 | 0.113 | 0.184 |
+| | | | Cosine + QMF | 1.73 | 0.108 | 0.177 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
 | | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
 | | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
+| config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.84 | 0.167 | 0.267 |
+| | | | Cosine + AS-Norm | 2.58 | 0.150 | 0.252 |
+| | | | Cosine + QMF | 2.45 | 0.144 | 0.234 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.62 | 0.098 | 0.164 |
 | | | | Cosine + AS-Norm | 1.45 | 0.085 | 0.142 |
 | | | | Cosine + QMF | 1.36 | 0.082 | 0.137 |
@@ -194,9 +214,15 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn512x3.v3.0.sh | ECAPA-TDNN 512x3 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.87 | 0.185 | 0.304 |
 | | | | Cosine + AS-Norm | 2.84 | 0.182 | 0.304 |
 | | | | Cosine + QMF | 2.61 | 0.172 | 0.283 |
+| config_fbank80_stmn_ecapatdnn512x3.v3.1.sh | ECAPA-TDNN 512x3 | Stage2: SubCenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.60 | 0.174 | 0.287 |
+| | | | Cosine + AS-Norm | 2.58 | 0.172 | 0.291 | 
+| | | | Cosine + QMF | 2.44 | 0.161 | 0.274 |
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
 | | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
 | | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
+| config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 3.74 | 0.239 | 0.394 |
+| | | | Cosine + AS-Norm | 3.45 | 0.225 | 0.377 |
+| | | | Cosine + QMF | 3.27 | 0.213 | 0.356 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.19 | 0.142 | 0.242 |
 | | | | Cosine + AS-Norm | 2.00 | 0.133 | 0.254 |
 | | | | Cosine + QMF | 1.86 | 0.126 | 0.229 |
diff --git a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
index f4306e2e..132438bf 100644
--- a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -70,3 +70,4 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
index c0bd44e5..34c0801e 100644
--- a/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cfwseresnet34_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
index b5458f9d..f576e411 100644
--- a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -70,3 +70,4 @@ trainer:
   log_interval: 1000
   epochs: 25
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
index c0bd44e5..34c0801e 100644
--- a/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_cwseresnet34_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
index 2cf31713..23f03de7 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -95,3 +95,4 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
index c348e7c5..79d510ae 100644
--- a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -69,3 +69,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
index 01b2cc50..41748978 100644
--- a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -70,3 +70,4 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
index c0bd44e5..34c0801e 100644
--- a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
index 74553395..eff62765 100644
--- a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 32
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 32
@@ -71,3 +71,4 @@ trainer:
   log_interval: 1000
   epochs: 30
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
index b14cfc75..571411ca 100644
--- a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 16
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 16
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,5 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..ac859010
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_lresnet34_xvec_stage2_v3.1.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
index 6659b2f6..e35b273a 100644
--- a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -69,3 +69,4 @@ trainer:
   log_interval: 1000
   epochs: 35
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
index c0bd44e5..34c0801e 100644
--- a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
index 58d22733..f4c381d6 100644
--- a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage1_v3.0.yaml
@@ -2,11 +2,11 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -17,11 +17,11 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: seg_chunk_sampler
       min_batch_size: 64
@@ -70,3 +70,4 @@ trainer:
   log_interval: 1000
   epochs: 25
   eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
index c0bd44e5..34c0801e 100644
--- a/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
+++ b/egs/voxceleb/v1.2/conf/train_tseresnet34_xvec_stage2_v3.0.yaml
@@ -2,18 +2,18 @@ data:
   train:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -21,18 +21,18 @@ data:
   val:
     dataset:
       class_names:
-      - class_id
+      - speaker
       aug_cfgs:
       - conf/reverb_noise_aug.yaml
       return_segment_info:
-      - class_id
+      - speaker
     sampler:
       sampler_type: class_weighted_random_seg_chunk_sampler
       min_batch_size: 64
       max_chunk_length: 4.0
       min_chunk_length: 4.0
       num_chunks_per_seg_epoch: 6
-      class_name: class_id
+      class_name: speaker
       seg_weight_mode: data-prior
       num_hard_prototypes: 8
     data_loader:
@@ -68,3 +68,4 @@ trainer:
   swa_start: 10
   swa_lr: 1e-4
   swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.1.sh
new file mode 100644
index 00000000..05aa4033
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn512x3.v3.1.sh
@@ -0,0 +1,46 @@
+# ECAPA-TDNN small
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn512x3.v3.1
+
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0030.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage2_v3.1.yaml
+nnet_name=${feat_type}_ecapatdnn512x3.v3.1
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0030.pth
+nnet_s2=$nnet_s2_dir/swa_model_ep0036.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_lresnet34.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_lresnet34.v3.1.sh
new file mode 100644
index 00000000..019ac827
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_lresnet34.v3.1.sh
@@ -0,0 +1,44 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34.v3.1
+
+nnet_s1_base_cfg=conf/train_lresnet34_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_lresnet34_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/hyperion/torch/layer_blocks/res2net_blocks.py b/hyperion/torch/layer_blocks/res2net_blocks.py
index 8de700c4..55e35e5f 100644
--- a/hyperion/torch/layer_blocks/res2net_blocks.py
+++ b/hyperion/torch/layer_blocks/res2net_blocks.py
@@ -9,6 +9,7 @@
 from torch.nn import BatchNorm2d, Conv2d, Dropout2d
 
 from ..layers import ActivationFactory as AF
+from .resnet_blocks import FreqPosEnc
 from .se_blocks import CFwSEBlock2d, FwSEBlock2d, SEBlock2d, TSEBlock2d
 
 
@@ -32,7 +33,6 @@ def _conv1x1(in_channels, out_channels, stride=1, bias=False):
 
 
 def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before):
-
     if norm_before:
         return nn.Sequential(
             _conv1x1(in_channels, out_channels, stride, bias=False),
@@ -61,8 +61,10 @@ class Res2NetBasicBlock(nn.Module):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
       se_r:              squeeze-excitation compression ratio.
-      time_se:           If true, squeeze is done only in time dimension.
-      num_feats:         Number of features in dimension 2, needed if time_se=True.
+      se_type:           type of squeeze excitation in [t-se, cw-se, fw-se, cfw-se]
+      freq_pos_enc: use  frequency wise positional encoder
+      num_feats:         Number of features in dimension 2, needed if se_type!=cw-se or freq_pos_enc=True.
+      time_se:           (legacy deprecated) If true, use t-se
     """
 
     expansion = 1
@@ -82,10 +84,10 @@ def __init__(
         norm_before=True,
         se_r=None,
         se_type="cw-se",
-        time_se=False,
+        freq_pos_enc=False,
         num_feats=None,
+        time_se=False,
     ):
-
         super().__init__()
 
         self.in_channels = in_channels
@@ -148,9 +150,13 @@ def __init__(
         self.context = dilation
         self.downsample_factor = stride
 
+        self.pos_enc = None
+        if freq_pos_enc:
+            self.pos_enc = FreqPosEnc(num_feats)
+
         if se_r is not None:
             if time_se:
-                se_type = "cw-se"
+                se_type = "t-se"
 
             if se_type == "t-se":
                 self.se_layer = TSEBlock2d(channels, num_feats, se_r, activation)
@@ -182,6 +188,9 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         split_size = [self.width_in for i in range(self.scale - 1)]
         split_size.append(self.in_channels % self.width_in + self.width_in)
         split_x = torch.split(x, split_size, 1)
@@ -247,8 +256,10 @@ class Res2NetBNBlock(nn.Module):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
       se_r:              squeeze-excitation compression ratio.
-      time_se:           If true, squeeze is done only in time dimension.
-      num_feats:         Number of features in dimension 2, needed if time_se=True.
+      se_type:           type of squeeze excitation in [t-se, cw-se, fw-se, cfw-se]
+      freq_pos_enc: use  frequency wise positional encoder
+      num_feats:         Number of features in dimension 2, needed if se_type!=cw-se or freq_pos_enc=True.
+      time_se:           (legacy deprecated) If true, use t-se
     """
 
     expansion = 4
@@ -268,10 +279,10 @@ def __init__(
         norm_before=True,
         se_r=None,
         se_type="cw-se",
-        time_se=False,
+        freq_pos_enc=False,
         num_feats=None,
+        time_se=False,
     ):
-
         super().__init__()
 
         self.in_channels = in_channels
@@ -329,6 +340,10 @@ def __init__(
         self.context = dilation
         self.downsample_factor = stride
 
+        self.pos_enc = None
+        if freq_pos_enc:
+            self.pos_enc = FreqPosEnc(num_feats)
+
         if se_r is not None:
             if time_se:
                 se_type = "t-se"
@@ -364,6 +379,9 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
diff --git a/hyperion/torch/layer_blocks/resnet_blocks.py b/hyperion/torch/layer_blocks/resnet_blocks.py
index c077a54b..428d8139 100644
--- a/hyperion/torch/layer_blocks/resnet_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet_blocks.py
@@ -2,7 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-
+import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
 from torch.nn import BatchNorm2d, Conv2d, Dropout2d
@@ -30,7 +30,6 @@ def _conv1x1(in_channels, out_channels, stride=1, bias=False):
 
 
 def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before):
-
     if norm_before:
         return nn.Sequential(
             _conv1x1(in_channels, out_channels, stride, bias=False),
@@ -40,6 +39,14 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before)
     return _conv1x1(in_channels, out_channels, stride, bias=True)
 
 
+class FreqPosEnc(nn.Module):
+    def __init__(self, num_feats):
+        self.pos_enc = nn.Parameter(torch.zeros((num_feats, 1)))
+
+    def forward(self, x):
+        return x + self.pos_enc
+
+
 class ResNetInputBlock(nn.Module):
     """Input block for ResNet architecture
 
@@ -67,7 +74,6 @@ def __init__(
         norm_before=True,
         do_maxpool=True,
     ):
-
         super().__init__()
 
         padding = int((kernel_size - 1) / 2)
@@ -96,7 +102,6 @@ def __init__(
             self.downsample_factor *= 2
 
     def forward(self, x):
-
         x = self.conv(x)
         if self.norm_before:
             x = self.bn(x)
@@ -125,6 +130,9 @@ class ResNetBasicBlock(nn.Module):
       dilation:          dilation factor of the conv. kernels.
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
+      freq_pos_enc: use frequency wise positional encoder
+      num_feats:         Number of features in dimension 2, needed if freq_pos_enc=True.
+
     """
 
     expansion = 1
@@ -140,8 +148,9 @@ def __init__(
         dilation=1,
         norm_layer=None,
         norm_before=True,
+        freq_pos_enc=False,
+        num_feats=None,
     ):
-
         super().__init__()
         if norm_layer is None:
             norm_layer = nn.BatchNorm2d
@@ -174,6 +183,9 @@ def __init__(
 
         self.context = dilation + stride
         self.downsample_factor = stride
+        self.pos_enc = None
+        if freq_pos_enc:
+            self.pos_enc = FreqPosEnc(num_feats)
 
     @property
     def out_channels(self):
@@ -193,6 +205,9 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
@@ -232,6 +247,8 @@ class ResNetBNBlock(nn.Module):
       dilation:          dilation factor of the conv. kernels.
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
+      freq_pos_enc: use frequency wise positional encoder
+      num_feats:         Number of features in dimension 2, needed if freq_pos_enc=True.
     """
 
     expansion = 4
@@ -248,8 +265,9 @@ def __init__(
         dilation=1,
         norm_layer=None,
         norm_before=True,
+        freq_pos_enc=False,
+        num_feats=None,
     ):
-
         super().__init__()
 
         self.in_channels = in_channels
@@ -286,6 +304,9 @@ def __init__(
 
         self.context = dilation
         self.downsample_factor = stride
+        self.pos_enc = None
+        if freq_pos_enc:
+            self.pos_enc = FreqPosEnc(num_feats)
 
     @property
     def out_channels(self):
@@ -305,6 +326,9 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
@@ -369,7 +393,6 @@ def __init__(
         norm_layer=None,
         norm_before=True,
     ):
-
         super().__init__()
 
         if norm_layer is None:
diff --git a/hyperion/torch/layer_blocks/seresnet_blocks.py b/hyperion/torch/layer_blocks/seresnet_blocks.py
index b13a7ff3..9c25055b 100644
--- a/hyperion/torch/layer_blocks/seresnet_blocks.py
+++ b/hyperion/torch/layer_blocks/seresnet_blocks.py
@@ -26,8 +26,10 @@ class SEResNetBasicBlock(ResNetBasicBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
       se_r:              squeeze-excitation compression ratio.
-      time_se:           If true, squeeze is done only in time dimension.
+      se_type:           type of squeeze excitation in [t-se, cw-se, fw-se, cfw-se]
+      freq_pos_enc:      use frequency wise positional encoder.
       num_feats:         Number of features in dimension 2, needed if time_se=True.
+      time_se:           (legacy deprecated) If true, use t-se
     """
 
     def __init__(
@@ -43,10 +45,10 @@ def __init__(
         norm_before=True,
         se_r=16,
         se_type="cw-se",
-        time_se=False,
+        freq_pos_enc=False,
         num_feats=None,
+        time_se=False,
     ):
-
         super().__init__(
             in_channels,
             channels,
@@ -57,6 +59,8 @@ def __init__(
             dilation=dilation,
             norm_layer=norm_layer,
             norm_before=norm_before,
+            freq_pos_enc=freq_pos_enc,
+            num_feats=num_feats,
         )
 
         if time_se:
@@ -84,6 +88,9 @@ def forward(self, x, x_mask=None):
         """
         residual = x
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
@@ -129,8 +136,10 @@ class SEResNetBNBlock(ResNetBNBlock):
       norm_layer:        normalization layer constructor, if None BatchNorm2d is used.
       norm_before:       if True, normalization layer is before the activation, after otherwise.
       se_r=None:         squeeze-excitation compression ratio.
-      time_se:           If true, squeeze is done only in time dimension.
+      se_type:           type of squeeze excitation in [t-se, cw-se, fw-se, cfw-se]
+      freq_pos_enc:      use frequency wise positional encoder.
       num_feats:         Number of features in dimension 2, needed if time_se=True.
+      time_se:           (legacy deprecated) If true, use t-se
     """
 
     def __init__(
@@ -146,10 +155,10 @@ def __init__(
         norm_before=True,
         se_r=16,
         se_type="cw-se",
-        time_se=False,
+        freq_pos_enc=False,
         num_feats=None,
+        time_se=False,
     ):
-
         super().__init__(
             in_channels,
             channels,
@@ -160,6 +169,8 @@ def __init__(
             dilation=dilation,
             norm_layer=norm_layer,
             norm_before=norm_before,
+            freq_pos_enc=freq_pos_enc,
+            num_feats=num_feats,
         )
 
         if time_se:
@@ -190,6 +201,9 @@ def forward(self, x, x_mask=None):
         if self.downsample is not None:
             residual = self.downsample(residual)
 
+        if self.pos_enc is not None:
+            x = self.pos_enc(x)
+
         x = self.conv1(x)
         if self.norm_before:
             x = self.bn1(x)
diff --git a/hyperion/torch/narchs/resnet.py b/hyperion/torch/narchs/resnet.py
index 5d3b9793..7abe4e54 100644
--- a/hyperion/torch/narchs/resnet.py
+++ b/hyperion/torch/narchs/resnet.py
@@ -5,7 +5,6 @@
 import logging
 
 import numpy as np
-
 import torch
 import torch.nn as nn
 from torch.nn import BatchNorm1d, Conv1d, Linear
@@ -65,6 +64,7 @@ class ResNet(NetArch):
                 required when time_se=True to calculcate the size of the squeeze excitation matrices.
       res2net_scale: Res2Net scale parameter
       res2net_width_factor: Res2Net multiplier for the width of the bottlneck layers.
+      freq_pos_enc: use frequency wise positional encoder
     """
 
     def __init__(
@@ -96,8 +96,8 @@ def __init__(
         res2net_width_factor=1,
         resb_channels=None,
         time_se=False,
+        freq_pos_enc=False,
     ):
-
         super().__init__()
         logging.info("{}".format(locals()))
         self.block = block
@@ -128,6 +128,8 @@ def __init__(
         else:
             self._block = block
 
+        assert not self.has_se and not freq_pos_enc or in_feats is not None
+
         self.num_layers = num_layers
         self.in_channels = in_channels
         self.conv_channels = conv_channels
@@ -154,6 +156,7 @@ def __init__(
 
         self.multilevel = multilevel
         self.endpoint_channels = endpoint_channels
+        self.freq_pos_enc = freq_pos_enc
 
         self.norm_layer = norm_layer
         norm_groups = None
@@ -195,7 +198,7 @@ def __init__(
         self._downsample_factor = self.in_block.downsample_factor
 
         if resb_channels is None:
-            resb_channels = [base_channels * (2 ** i) for i in range(4)]
+            resb_channels = [base_channels * (2**i) for i in range(4)]
 
         self.cur_in_channels = conv_channels
         self.layer1 = self._make_layer(self._block, resb_channels[0], num_layers[0])
@@ -308,6 +311,11 @@ def _make_layer(self, block, channels, num_blocks, stride=1, dilate=False):
                     "num_feats": num_feats,
                 }
 
+            if self.freq_pos_enc:
+                kwargs["freq_pos_enc"] = True
+                num_feats = int(self.in_feats / (self._downsample_factor * stride))
+                kwargs["num_feats"] = num_feats
+
         if self.is_res2net:
             kwargs["scale"] = self.res2net_scale
             kwargs["width_factor"] = self.res2net_width_factor
@@ -595,6 +603,7 @@ def get_config(self):
             "res2net_scale": self.res2net_scale,
             "res2net_width_factor": self.res2net_width_factor,
             "resb_channels": self.resb_channels,
+            "freq_pos_enc": self.freq_pos_enc,
         }
 
         base_config = super().get_config()
@@ -1106,6 +1115,7 @@ def __init__(self, in_channels, **kwargs):
 
 #################### Res2Net variants ########################
 
+
 # Standard Res2Nets
 class Res2Net18(ResNet):
     def __init__(self, in_channels, **kwargs):
diff --git a/hyperion/torch/narchs/resnet_factory.py b/hyperion/torch/narchs/resnet_factory.py
index 35ed9af0..ba9d21a5 100644
--- a/hyperion/torch/narchs/resnet_factory.py
+++ b/hyperion/torch/narchs/resnet_factory.py
@@ -162,8 +162,8 @@ def create(
         in_feats=None,
         res2net_scale=4,
         res2net_width_factor=1,
+        freq_pos_enc=False,
     ):
-
         try:
             resnet_class = resnet_dict[resnet_type]
         except:
@@ -190,15 +190,12 @@ def create(
             in_feats=in_feats,
             res2net_scale=res2net_scale,
             res2net_width_factor=res2net_width_factor,
+            freq_pos_enc=freq_pos_enc,
         )
 
         return resnet
 
     def filter_args(**kwargs):
-        # if "norm_after" in kwargs:
-        #     kwargs["norm_before"] = not kwargs["norm_after"]
-        #     del kwargs["norm_after"]
-
         if "no_maxpool" in kwargs:
             kwargs["do_maxpool"] = not kwargs["no_maxpool"]
             del kwargs["no_maxpool"]
@@ -224,6 +221,7 @@ def filter_args(**kwargs):
             "se_r",
             "res2net_scale",
             "res2net_width_factor",
+            "freq_pos_enc",
         )
 
         args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
@@ -319,9 +317,6 @@ def add_class_args(parser, prefix=None):
             help="Zero-initialize the last BN in each residual branch",
         )
 
-        # parser.add_argument('--replace-stride-with-dilation', default=None, nargs='+', type=bool,
-        #  help='replaces strides with dilations to increase context without downsampling')
-
         parser.add_argument(
             "--se-r",
             default=16,
@@ -353,12 +348,6 @@ def add_class_args(parser, prefix=None):
                 help="batch normalizaton before activation",
             )
 
-            # parser.add_argument(
-            #     "--norm-after",
-            #     default=False,
-            #     action="store_true",
-            #     help="batch normalizaton after activation",
-            # )
         except:
             pass
 
@@ -367,6 +356,13 @@ def add_class_args(parser, prefix=None):
         except:
             pass
 
+        parser.add_argument(
+            "--freq-pos-enc",
+            default=False,
+            action=ActionYesNo,
+            help="use frequency wise positional encoder",
+        )
+
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index a9000f38..666c9a9d 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -97,7 +97,7 @@ def train_epoch(self, data_loader):
         Args:
           data_loader: pytorch data loader returning features and class labels.
         """
-        batch_keys = [self.input_key, self.target_key]
+        # batch_keys = [self.input_key, self.target_key]
         self.model.update_loss_margin(self.cur_epoch)
 
         metric_acc = MetricAcc(device=self.device)
@@ -112,6 +112,7 @@ def train_epoch(self, data_loader):
 
             input_keys = self.get_augs_keys(data, self.input_key)
             loss_scale = self.grad_acc_steps * len(input_keys)
+            loss_acc = 0.0
             for aug_key in input_keys:
                 batch_keys = [aug_key, self.target_key]
                 x, target = tensors_subset(data, batch_keys, self.device)
@@ -119,6 +120,7 @@ def train_epoch(self, data_loader):
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(x, y=target)
                     loss = self.loss(output, target) / loss_scale
+                    loss_acc += loss.item()
 
                 if self.use_amp:
                     self.grad_scaler.scale(loss).backward()
@@ -130,7 +132,7 @@ def train_epoch(self, data_loader):
                 self.update_model()
                 self.save_checkpoint(partial=True)
 
-            batch_metrics["loss"] = loss.item() * loss_scale
+            batch_metrics["loss"] = loss_acc * self.grad_acc_steps
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
@@ -146,3 +148,44 @@ def train_epoch(self, data_loader):
         logs.update(lrs)
         logs.update(self._get_wds())
         return logs
+
+    def validation_epoch(self, data_loader, swa_update_bn=False):
+        """Validation epoch loop
+
+        Args:
+          data_loader: PyTorch data loader return input/output pairs.
+          sw_update_bn: wheter or not, update batch-norm layers in SWA.
+        """
+        # batch_keys = [self.input_key, self.target_key]
+        metric_acc = MetricAcc(self.device)
+        batch_metrics = ODict()
+        with torch.no_grad():
+            if swa_update_bn:
+                log_tag = "train_"
+                self.model.train()
+            else:
+                log_tag = "val_"
+                self.model.eval()
+
+            for batch, data in enumerate(data_loader):
+                input_keys = self.get_augs_keys(data, self.input_key)
+                loss_scale = len(input_keys)
+                loss_acc = 0.0
+                for aug_key in input_keys:
+                    batch_keys = [aug_key, self.target_key]
+                    x, target = tensors_subset(data, batch_keys, self.device)
+                    batch_size = x.size(0)
+                    with amp.autocast(enabled=self.use_amp):
+                        output = self.model(x)
+                        loss = self.loss(output, target) / loss_scale
+                        loss_acc += loss.item()
+
+                batch_metrics["loss"] = loss_acc
+                for k, metric in self.metrics.items():
+                    batch_metrics[k] = metric(output, target)
+
+                metric_acc.update(batch_metrics, batch_size)
+
+        logs = metric_acc.metrics
+        logs = ODict((log_tag + k, v) for k, v in logs.items())
+        return logs

From ecdc31946552cfb0c755d18c91eadf81338a54ea Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Thu, 18 Jan 2024 19:14:16 -0500
Subject: [PATCH 128/154] added some vox confs

---
 ...rain_ecapatdnn2048x4_xvec_stage1_v3.1.yaml | 101 ++++++++++++++++++
 ...rain_ecapatdnn2048x4_xvec_stage2_v3.1.yaml |  74 +++++++++++++
 .../train_fwseresnet34_xvec_stage1_v3.1.yaml  |  76 +++++++++++++
 .../train_fwseresnet34_xvec_stage2_v3.1.yaml  |  73 +++++++++++++
 ...rain_idrnd_resnet100_xvec_stage1_v3.1.yaml |  77 +++++++++++++
 ...rain_idrnd_resnet100_xvec_stage2_v3.1.yaml |  74 +++++++++++++
 .../conf/train_resnet34_xvec_stage1_v3.1.yaml |  75 +++++++++++++
 .../conf/train_resnet34_xvec_stage2_v3.1.yaml |  73 +++++++++++++
 ...onfig_fbank80_stmn_ecapatdnn2048x4.v3.1.sh |  44 ++++++++
 .../config_fbank80_stmn_fwseresnet34.v3.1.sh  |  44 ++++++++
 ...onfig_fbank80_stmn_idrnd_resnet100.v3.1.sh |  44 ++++++++
 .../config_fbank80_stmn_resnet34.v3.1.sh      |  44 ++++++++
 12 files changed, 799 insertions(+)
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.1.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.1.sh

diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..b7fab34b
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
@@ -0,0 +1,101 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 2048
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 2048
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      - 5
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 4096
+      norm_before: false
+      dropout_rate: 0.2
+      hid_act: swish 
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.2
+    norm_before: false
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    #min_lr: 1.0e-05
+    min_lr: 1.0e-06
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..e147dbb3
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_ecapatdnn2048x4_xvec_stage2_v3.1.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    resnet_enc:
+      override_dropouts: true
+      dropout_rate: 0.25
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..ca15bbba
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage1_v3.1.yaml
@@ -0,0 +1,76 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 4
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..ac859010
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34_xvec_stage2_v3.1.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..03897a19
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml
@@ -0,0 +1,77 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 32
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseidrndresnet100
+    in_channels: 1
+    in_feats: 80
+    conv_channels: 128
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.05
+    se_r: 4
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..3b8d716a
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.1.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..0ec78598
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage1_v3.1.yaml
@@ -0,0 +1,75 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: resnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.1.yaml
new file mode 100644
index 00000000..ac859010
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_resnet34_xvec_stage2_v3.1.yaml
@@ -0,0 +1,73 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.0
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-3
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 15
+  eff_batch_size: 256
+  swa_start: 10
+  swa_lr: 1e-4
+  swa_anneal_epochs: 2
+  target_key: speaker
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh
new file mode 100644
index 00000000..5a9b6028
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh
@@ -0,0 +1,44 @@
+# ECAPA-TDNN large
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn2048x4.v3.1
+
+nnet_s1_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_ecapatdnn2048x4_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
new file mode 100644
index 00000000..12b86dd1
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
@@ -0,0 +1,44 @@
+# Freq-wise-SE ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34.v3.1
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_fwseresnet34_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=false #true
+do_qmf=false #true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.1.sh
new file mode 100644
index 00000000..f06bcbea
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.1.sh
@@ -0,0 +1,44 @@
+# IdRnd ResNet100
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet100.v3.1
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0029.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.1.sh
new file mode 100644
index 00000000..e954b63d
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_resnet34.v3.1.sh
@@ -0,0 +1,44 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_resnet34.v3.1
+
+nnet_s1_base_cfg=conf/train_resnet34_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+

From c26d17d9d72e35e16c5e6d1b2334d48aeb463c96 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Thu, 18 Jan 2024 19:16:55 -0500
Subject: [PATCH 129/154] pos enc in resnet xvector

---
 hyperion/torch/models/xvectors/resnet_xvector.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hyperion/torch/models/xvectors/resnet_xvector.py b/hyperion/torch/models/xvectors/resnet_xvector.py
index efc24f27..a639bdb8 100644
--- a/hyperion/torch/models/xvectors/resnet_xvector.py
+++ b/hyperion/torch/models/xvectors/resnet_xvector.py
@@ -57,6 +57,7 @@ def __init__(
         se_r=16,
         res2net_scale=4,
         res2net_width_factor=1,
+        freq_pos_enc=False,
         bias_weight_decay=None,
     ):
         logging.info("making %s encoder network", resnet_type)
@@ -80,6 +81,7 @@ def __init__(
             in_feats=in_feats,
             res2net_scale=res2net_scale,
             res2net_width_factor=res2net_width_factor,
+            freq_pos_enc=freq_pos_enc,
         )
 
         super().__init__(
@@ -168,6 +170,10 @@ def res2net_scale(self):
     def res2net_width_factor(self):
         return self.encoder_net.res2net_width_factor
 
+    @property
+    def freq_pos_enc(self):
+        return self.encoder_net.freq_pos_enc
+
     def get_config(self):
         base_config = super().get_config()
         del base_config["encoder_cfg"]
@@ -186,6 +192,7 @@ def get_config(self):
             "se_r": self.se_r,
             "res2net_scale": self.res2net_scale,
             "res2net_width_factor": self.res2net_width_factor,
+            "freq_pos_enc": self.freq_pos_enc,
         }
 
         config.update(base_config)

From d99189e22d19b8561961a9376818b8509f2cd5b3 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 1 Mar 2024 19:03:23 -0500
Subject: [PATCH 130/154] saving progress in dino

---
 .../conf/train_ecapatdnn512x3_dino_v1.1.yaml  | 115 ++++++++++++++++++
 .../conf/train_lresnet34_dino_v1.0.yaml       |  92 ++++++++++++++
 .../conf/train_lresnet34_dino_v1.1.yaml       |  92 ++++++++++++++
 .../config_fbank80_stmn_lresnet34.v1.0.sh     |  52 ++++++++
 .../config_fbank80_stmn_lresnet34.v1.1.sh     |  52 ++++++++
 ...un_006_extract_dino_embeds_cluster_eval.sh |  54 ++++----
 egs/voxceleb/v1.2/README.md                   |  65 +++++++++-
 hyperion/bin/cluster_embeddings.py            |  11 ++
 hyperion/torch/layers/global_pool.py          |  36 +++---
 .../torch/trainers/dino_xvector_trainer.py    |  29 +++--
 hyperion/torch/trainers/torch_trainer.py      | 105 +++++-----------
 11 files changed, 566 insertions(+), 137 deletions(-)
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.1.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.0.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.1.yaml
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.0.sh
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh

diff --git a/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.1.yaml b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.1.yaml
new file mode 100644
index 00000000..5dec90f3
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.1.yaml
@@ -0,0 +1,115 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 512
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 512
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 1536
+      norm_before: false
+      dropout_rate: 0.002
+      hid_act: swish
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.0
+    norm_before: false
+    hid_act: swish
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.005
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 25
+  use_amp: false
+  log_interval: 1000
+  epochs: 140
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.0.yaml b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.0.yaml
new file mode 100644
index 00000000..cb82c539
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.0.yaml
@@ -0,0 +1,92 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: true
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 8.0
+      min_chunk_length: 4.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: true
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 8.0
+      min_chunk_length: 4.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: lresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.01
+    norm_before: false
+    hid_act: swish
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.0025
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 25
+  use_amp: true
+  log_interval: 1000
+  epochs: 60
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.1.yaml b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.1.yaml
new file mode 100644
index 00000000..ac185913
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.1.yaml
@@ -0,0 +1,92 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: lresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.01
+    norm_before: false
+    hid_act: swish
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.0025
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 25
+  use_amp: true
+  log_interval: 1000
+  epochs: 100
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.0.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.0.sh
new file mode 100644
index 00000000..0a621148
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.0.sh
@@ -0,0 +1,52 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34_dino.v1.0
+
+nnet_s1_base_cfg=conf/train_lresnet34_dino_v1.0.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0060.pth
+
+nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# clustering
+cluster_method=cos_ahc
+cluster_name=${cluster_method}_1
+cluster_cfg=conf/ahc.yaml
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# back-end
+do_plda=false
+# do_snorm=true
+# do_qmf=true
+# do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh
new file mode 100644
index 00000000..752f7048
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh
@@ -0,0 +1,52 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34_dino.v1.1
+
+nnet_s1_base_cfg=conf/train_lresnet34_dino_v1.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0080.pth
+
+nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# clustering
+cluster_method=cos_ahc
+cluster_name=${cluster_method}_1
+cluster_cfg=conf/ahc.yaml
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# back-end
+do_plda=false
+# do_snorm=true
+# do_qmf=true
+# do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
index 8973483c..d848b466 100755
--- a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
+++ b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
@@ -7,7 +7,7 @@
 . ./path.sh
 set -e
 
-stage=2
+stage=1
 nnet_stage=1
 config_file=default_config.sh
 use_gpu=false
@@ -38,31 +38,6 @@ score_cosine_dir=$score_dir/cosine
 score_plda_dir=$score_dir/${cluster_name}_plda
 
 if [ $stage -le 1 ]; then
-  # Extract xvectors for training LDA/PLDA
-  nj=100
-  for name in voxceleb2cat_train
-  do
-    if [ -n "$vad_config" ];then
-      vad_args="--vad csv:data/$name/vad.csv"
-    fi
-    output_dir=$xvector_dir/$name
-    echo "Extracting x-vectors for $name"
-    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
-	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
-	      --part-idx JOB --num-parts $nj  \
-	      --recordings-file data/$name/recordings.csv \
-	      --random-utt-length --min-utt-length 30 --max-utt-length 30 \
-	      --model-path $nnet  \
-	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
-    hyperion-tables cat \
-		    --table-type features \
-		    --output-file $output_dir/xvector.csv --num-tables $nj
-
-  done
-fi
-
-if [ $stage -le 2 ]; then
   # Extracts x-vectors for evaluation
   nj=100
   if [ "$do_voxsrc22" == "true" ];then
@@ -91,7 +66,7 @@ if [ $stage -le 2 ]; then
   done
 fi
 
-if [ $stage -le 3 ];then
+if [ $stage -le 2 ];then
 
   echo "Eval Voxceleb 1 with Cosine scoring"
   num_parts=8
@@ -127,6 +102,31 @@ if [ $stage -le 3 ];then
   exit
 fi
 
+if [ $stage -le 3 ]; then
+  # Extract xvectors for training LDA/PLDA
+  nj=100
+  for name in voxceleb2cat_train
+  do
+    if [ -n "$vad_config" ];then
+      vad_args="--vad csv:data/$name/vad.csv"
+    fi
+    output_dir=$xvector_dir/$name
+    echo "Extracting x-vectors for $name"
+    $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
+	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
+	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      --part-idx JOB --num-parts $nj  \
+	      --recordings-file data/$name/recordings.csv \
+	      --random-utt-length --min-utt-length 30 --max-utt-length 30 \
+	      --model-path $nnet  \
+	      --output-spec ark,csv:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.csv
+    hyperion-tables cat \
+		    --table-type features \
+		    --output-file $output_dir/xvector.csv --num-tables $nj
+
+  done
+fi
+
 
 cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
 if [ $stage -le 4 ];then
diff --git a/egs/voxceleb/v1.2/README.md b/egs/voxceleb/v1.2/README.md
index 6a2502e6..e1199a3b 100644
--- a/egs/voxceleb/v1.2/README.md
+++ b/egs/voxceleb/v1.2/README.md
@@ -85,10 +85,8 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
    - `run_007_eval_be.sh`
       - Trains PLDA and evals PLDA and cosine scoring back-ends
 
-
 ## Results
 
-
 ### VoxCeleb 1 Original-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -102,18 +100,30 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.68 | 0.052 | 0.088 |
 | | | | Cosine + AS-Norm | 0.63 | 0.049 | 0.083 |
 | | | | Cosine + QMF | 0.57 | 0.037 | 0.071 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh | ECAPA-TDNN 2048x4 | Stage2: Subcenter ArcFace m=0.3/intertop_m=0.1/centers=2 Dropout=0.25 | Cosine | 0.62 | 0.049 | 0.076 |
+| | | | Cosine + AS-Norm | 0.61 | 0.044 | 0.075 |
+| | | | Cosine + QMF | 0.53 | 0.037 | 0.076 |
 | config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.59 | 0.1 | 0.172 |
 | | | | Cosine + AS-Norm | 1.54 | 0.927 | 0.140 |
 | | | | Cosine + QMF | 1.32 | 0.083 | 0.121 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.048 | 0.071 |
 | | | | Cosine + AS-Norm | 0.70 | 0.039 | 0.048 |
 | | | | Cosine + QMF | 0.62 | 0.034 | 0.042 |
+| config_fbank80_stmn_resnet34.v3.1.sh | ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.68 | 0.039 | 0.048
+| | | | Cosine + AS-Norm | 0.60 | 0.036 | 0.052 |
+| | | | Cosine + QMF | 0.53 | 0.033 | 0.050 |
 | config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.76 | 0.048 | 0.071 |
 | | | | Cosine + AS-Norm | 0.70 | 0.041 | 0.061 |
 | | | | Cosine + QMF | 0.62 | 0.037 | 0.056 |
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.77 | 0.48 | 0.077 |
 | | | | Cosine + AS-Norm | 0.68 | 0.040 | 0.062|
 | | | | Cosine + QMF | 0.62 | 0.036 | 0.063 |
+| config_fbank80_stmn_fwseresnet34.v3.1.sh | FwSE-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.66 | 0.046 | 0.060 |
+| | | | Cosine + AS-Norm | 0.61 | 0.040 | 0.052 |
+| | | | Cosine + QMF | 0.057 | 0.037 | 0.058 |
+| config_fbank80_stmn_fwseresnet34pe.v3.1.sh | FwSE-ResNet34-FPE | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.73 | 0.042 | 0.053 |
+| | | | Cosine + AS-Norm | 0.64 | 0.034 | 0.047 |
+| | | | Cosine + QMF | 0.60 | 0.033 | 0.044 |
 | config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.78 | 0.053 | 0.082 |
 | | | | Cosine + AS-Norm | 0.70 | 0.043 | 0.076 |
 | | | | Cosine + QMF | 0.63 | 0.042 | 0.071 |
@@ -123,6 +133,9 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.56 | 0.040 | 0.065 |
 | | | | Cosine + AS-Norm | 0.52 | 0.033 | 0.045 |
 | | | | Cosine + QMF | 0.45 | 0.027 | 0.043 |
+| config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.50 | 0.035 | 0.038 |
+| | | | Cosine + AS-Norm | 0.47 | 0.031 | 0.038 |
+| | | | Cosine + QMF | 0.40 | 0.027 | 0.032 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.60 | 0.043 | 0.071 |
 | | | | Cosine + AS-Norm | 0.53 | 0.034 | 0.063 |
 | | | | Cosine + QMF | 0.49 | 0.033 | 0.054 |
@@ -141,18 +154,30 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 0.85 | 0.055 | 0.100 |
 | | | | Cosine + AS-Norm | 0.80 | 0.050 | 0.087 |
 | | | | Cosine + QMF | 0.76 | 0.047 | 0.083 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh | ECAPA-TDNN 2048x4 | Stage2: Subcenter ArcFace m=0.3/intertop_m=0.1/centers=2 Dropout=0.25 | Cosine | 0.83 | 0.052 | 0.096 |
+| | | | Cosine + AS-Norm | 0.77 | 0.049 | 0.086 |
+| | | | Cosine + QMF | 0.74 | 0.047 | 0.082 |
 | config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.69 | 0.103 | 0.174 |
 | | | | Cosine + AS-Norm | 1.62 | 0.096 | 0.156 |
 | | | | Cosine + QMF | 1.51 | 0.091 | 0.152 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.86 | 0.054 | 0.098 |
 | | | | Cosine + AS-Norm | 0.81 | 0.049 | 0.087 |
 | | | | Cosine + QMF | 0.77 | 0.046 | 0.082  |
+| config_fbank80_stmn_resnet34.v3.1.sh | ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.80 | 0.049 | 0.094 |
+| | | | Cosine + AS-Norm | 0.76 | 0.046 | 0.081 |
+| | | | Cosine + QMF | 0.70 | 0.043 | 0.074 |
 | config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.89 | 0.058 | 0.098 |
 | | | | Cosine + AS-Norm | 0.84 | 0.053 | 0.087|
 | | | | Cosine + QMF | 0.80 | 0.050  | 0.081 |
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.83 | 0.053 | 0.098 |
 | | | | Cosine + AS-Norm | 0.78 | 0.047| 0.085 |
 | | | | Cosine + QMF | 0.74 | 0.045 | 0.081 |
+| config_fbank80_stmn_fwseresnet34.v3.1.sh | FwSE-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.80 | 0.051 | 0.090 |
+| | | | Cosine + AS-Norm | 0.74 | 0.046 | 0.081 |
+| | | | Cosine + QMF | 0.70 | 0.044 | 0.076 |
+| config_fbank80_stmn_fwseresnet34pe.v3.1.sh | FwSE-ResNet34-FPE | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.80 | 0.052 | 0.094 |
+| | | | Cosine + AS-Norm | 0.76 | 0.047 | 0.081 |
+| | | | Cosine + QMF | 0.72 | 0.045 | 0.076 |
 | config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 0.91 | 0.057 | 0.100 |
 | | | | Cosine + AS-Norm | 0.85 | 0.052 | 0.089 |
 | | | | Cosine + QMF | 0.81 | 0.049 | 0.085 |
@@ -162,12 +187,14 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.71 | 0.044 | 0.076|
 | | | | Cosine + AS-Norm | 0.66 | 0.040 | 0.069 |
 | | | | Cosine + QMF | 0.63 | 0.037 | 0.067 |
+| config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.69 | 0.043 | 0.074 |
+| | | | Cosine + AS-Norm | 0.65 | 0.039 | 0.068 |
+| | | | Cosine + QMF | 0.63 | 0.036 | 0.065 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.75 | 0.047 | 0.077 |
 | | | | Cosine + AS-Norm | 0.70 | 0.042 | 0.072 |
 | | | | Cosine + QMF | 0.68 | 0.040 | 0.069 |
 
 
-
 ### VoxCeleb 1 Hard-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -181,18 +208,30 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 1.66 | 0.103 | 0.168 |
 | | | | Cosine + AS-Norm | 1.53 | 0.091 | 0.151 |
 | | | | Cosine + QMF | 1.44 | 0.087 | 0.145 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh | ECAPA-TDNN 2048x4 | Stage2: Subcenter ArcFace m=0.3/intertop_m=0.1/centers=2 Dropout=0.25 | Cosine | 1.65 | 0.0101 | 0.169 |
+| | | | Cosine + AS-Norm | 1.53 | 0.090 | 0.149 |
+| | | | Cosine + QMF | 1.46 | 0.087 | 0.144 |
 | config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.84 | 0.167 | 0.267 |
 | | | | Cosine + AS-Norm | 2.58 | 0.150 | 0.252 |
 | | | | Cosine + QMF | 2.45 | 0.144 | 0.234 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.62 | 0.098 | 0.164 |
 | | | | Cosine + AS-Norm | 1.45 | 0.085 | 0.142 |
 | | | | Cosine + QMF | 1.36 | 0.082 | 0.137 |
+| config_fbank80_stmn_resnet34.v3.1.sh | ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.56 | 0.091 | 0.157 |
+| | | | Cosine + AS-Norm | 1.40 | 0.080 | 0.135 |
+| | | | Cosine + QMF | 1.33 | 0.076 | 0.128 |
 | config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.70 | 0.1 |  0.165 |
 | | | | Cosine + AS-Norm | 1.50 | 0.086 | 0.138 |
 | | | | Cosine + QMF | 1.44 | 0.085  | 0.139 |
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.59 | 0.096 | 0.165 |
 | | | | Cosine + AS-Norm | 1.41 | 0.083 | 0.143 |
 | | | | Cosine + QMF | 1.34 | 0.079 | 0.136 |
+| config_fbank80_stmn_fwseresnet34.v3.1.sh | FwSE-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.58 | 0.096 | 0.162 |
+| | | | Cosine + AS-Norm | 1.43 | 0.083 | 0.140 |
+| | | | Cosine + QMF | 0.134 | 0.079 | 0.134 |
+| config_fbank80_stmn_fwseresnet34pe.v3.1.sh | FwSE-ResNet34-FPE | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.61 | 0.097 | 1.63 |
+| | | | Cosine + AS-Norm | 1.44 | 0.085 | 0.138 |
+| | | | Cosine + QMF | 1.37 | 0.080 | 0.132 |
 | config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 1.75 | 0.104 | 0.171 |
 | | | | Cosine + AS-Norm | 1.56 | 0.091 | 0.152 |
 | | | | Cosine + QMF | 1.50 | 0.087 | 0.145 |
@@ -202,11 +241,15 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.30 | 0.076 | 0.125 |
 | | | | Cosine + AS-Norm | 1.15 | 0.066 | 0.109 |
 | | | | Cosine + QMF | 1.11 | 0.065 | 0.103 |
+| config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.36 | 0.077 | 0.122 |
+| | | | Cosine + AS-Norm | 1.23 | 0.069 | 0.112 |
+| | | | Cosine + QMF | 1.17 | 0.065 | 0.110 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.41 | 0.081 | 0.132 |
 | | | | Cosine + AS-Norm | 1.28 | 0.071 | 0.116 |
 | | | | Cosine + QMF | 1.21 | 0.069 | 0.113 |
 
 
+
 ### VoxSRC2022 dev
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -220,18 +263,30 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_ecapatdnn2048x4.v3.0.sh | ECAPA-TDNN 2048x4 | Stage2: ArcFace m=0.3/intertop_m=0.1 Dropout=0.25 | Cosine | 2.33 | 0.156 | 0.260 |
 | | | | Cosine + AS-Norm | 2.19 | 0.144 | 0.263 |
 | | | | Cosine + QMF | 2.06 | 0.137 | 0.251 |
+| config_fbank80_stmn_ecapatdnn2048x4.v3.1.sh | ECAPA-TDNN 2048x4 | Stage2: Subcenter ArcFace m=0.3/intertop_m=0.1/centers=2 Dropout=0.25 | Cosine | 2.34 | 0.152 | 0.275 |
+| | | | Cosine + AS-Norm | 2.24 | 0.143 | 0.268 |
+| | | | Cosine + QMF | 2.12 | 0.139 | 0.255 |
 | config_fbank80_stmn_lresnet34.v3.1.sh | Thin-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 3.74 | 0.239 | 0.394 |
 | | | | Cosine + AS-Norm | 3.45 | 0.225 | 0.377 |
 | | | | Cosine + QMF | 3.27 | 0.213 | 0.356 |
 | config_fbank80_stmn_resnet34.v3.0.sh | ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.19 | 0.142 | 0.242 |
 | | | | Cosine + AS-Norm | 2.00 | 0.133 | 0.254 |
 | | | | Cosine + QMF | 1.86 | 0.126 | 0.229 |
+| config_fbank80_stmn_resnet34.v3.1.sh | ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.15 | 0.135 | 0.233 |
+| | | | Cosine + AS-Norm | 1.98 | 0.126 | 0.245 |
+| | | | Cosine + QMF | 1.86 | 0.119 | 0.222 |
 | config_fbank80_stmn_cwseresnet34.v3.0.sh | CwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.34 | 0.145 | 0.246 |
 | | | | Cosine + AS-Norm | 2.10 | 0.135 | 0.248 |
 | | | | Cosine + QMF | 2.01 | 0.127 | 0.218 |
 | config_fbank80_stmn_fwseresnet34.v3.0.sh | FwSE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.25 | 0.136 | 0.239 |
 | | | | Cosine + AS-Norm | 1.99 | 0.127 | 0.232 |
 | | | | Cosine + QMF | 1.87 | 0.119 | 0.216 |
+| config_fbank80_stmn_fwseresnet34.v3.1.sh | FwSE-ResNet34 | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.14 | 0.134 | 0.228 |
+| | | | Cosine + AS-Norm | 1.97 | 0.124 | 0.223 |
+| | | | Cosine + QMF | 1.82 | 0.116 | 0.205 |
+| config_fbank80_stmn_fwseresnet34pe.v3.1.sh | FwSE-ResNet34-FPE | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.27 | 0.138 | 0.238 |
+| | | | Cosine + AS-Norm | 2.08 | 0.129 | 0.223 |
+| | | | Cosine + QMF | 1.94 | 0.120 | 0.207 |
 | config_fbank80_stmn_tseresnet34.v3.0.sh | Time-SE-ResNet34 | Stage2: ArcFace m=0.3/intertop_m=0.1 | Cosine | 2.36 | 0.153 | 0.259 |
 | | | | Cosine + AS-Norm | 2.18 | 0.139 | 0.249 |
 | | | | Cosine + QMF | 2.08 | 0.128 | 0.222 |
@@ -241,6 +296,10 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.0.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.92 | 0.124 | 0.208 |
 | | | | Cosine + AS-Norm | 1.71 | 0.109 | 0.212 |
 | | | | Cosine + QMF | 1.62 | 0.103 | 0.192 |
+| config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.02 | 0.116 | 0.194 |
+| | | | Cosine + AS-Norm | 1.81 | 0.107 | 0.199 |
+| | | | Cosine + QMF | 1.72 | 0.099 | 0.186 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.96 | 0.124 | 0.211 |
 | | | | Cosine + AS-Norm | 1.79 | 0.118 | 0239 |
 | | | | Cosine + QMF | 1.68 | 0.114 | 0.216 |
+
diff --git a/hyperion/bin/cluster_embeddings.py b/hyperion/bin/cluster_embeddings.py
index 998b1f17..10b6344e 100644
--- a/hyperion/bin/cluster_embeddings.py
+++ b/hyperion/bin/cluster_embeddings.py
@@ -9,6 +9,7 @@
 import time
 from pathlib import Path
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from jsonargparse import (
@@ -124,6 +125,14 @@ def get_gmm_post(x, y):
     return p_max, p_2nd
 
 
+def plot_score_hist(scores, fig_file):
+    mask = np.triu(np.ones_like(scores, dtype=bool))
+    fig = plt.figure()
+    scores = scores[mask]
+    plt.hist(scores, bins=100, density=True)
+    fig.savefig(fig_file)
+
+
 def cos_ahc(
     segments_file,
     feats_file,
@@ -155,6 +164,8 @@ def cos_ahc(
         x_lowprec = x_km
 
     scores = cosine_scoring(x_lowprec, x_lowprec)
+    fig_file = Path(output_file).parent / "score_hist.png"
+    plot_score_hist(scores, fig_file)
 
     logging.info("running AHC")
     ahc = AHC(method=linkage_method)
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index d314490c..aa14f743 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -781,9 +781,9 @@ def forward(self, x, x_lengths=None, weights=None):
         assert not torch.any(
             torch.isnan(x_inner)
         ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(x))} {torch.mean(x)} {torch.sum(torch.isinf(x))} {x.size()}"
-        assert not torch.any(
-            torch.isinf(x_inner)
-        ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(x))}"
+        # assert not torch.any(
+        #     torch.isinf(x_inner)
+        # ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(x))}"
 
         if self.use_global_context:
             global_mus = self.stats_pool(x, weights=weights)
@@ -791,9 +791,9 @@ def forward(self, x, x_lengths=None, weights=None):
             assert not torch.any(
                 torch.isnan(x_inner)
             ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(global_mus))}"
-            assert not torch.any(
-                torch.isinf(x_inner)
-            ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(global_mus))}"
+            # assert not torch.any(
+            #     torch.isinf(x_inner)
+            # ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(global_mus))}"
 
         attn = self.conv2(
             self.activation(self.norm_layer(x_inner))
@@ -814,23 +814,23 @@ def forward(self, x, x_lengths=None, weights=None):
         if weights is not None:
             attn = attn * weights
 
-        assert not torch.any(
-            torch.isnan(attn)
-        ), f"attn is nan {torch.sum(torch.isnan(attn))}"
-        assert not torch.any(
-            torch.isinf(attn)
-        ), f"attn is inf {torch.sum(torch.isinf(attn))}"
+        # assert not torch.any(
+        #     torch.isnan(attn)
+        # ), f"attn is nan {torch.sum(torch.isnan(attn))}"
+        # assert not torch.any(
+        #     torch.isinf(attn)
+        # ), f"attn is inf {torch.sum(torch.isinf(attn))}"
         mus = self.stats_pool(x, weights=attn)
 
         if self.keepdim:
             mus = mus.unsqueeze(self.dim)
 
-        assert not torch.any(
-            torch.isnan(mus)
-        ), f"mus is nan {torch.sum(torch.isnan(mus))}"
-        assert not torch.any(
-            torch.isinf(mus)
-        ), f"mus is inf {torch.sum(torch.isinf(mus))}"
+        # assert not torch.any(
+        #     torch.isnan(mus)
+        # ), f"mus is nan {torch.sum(torch.isnan(mus))}"
+        # assert not torch.any(
+        #     torch.isinf(mus)
+        # ), f"mus is inf {torch.sum(torch.isinf(mus))}"
         return mus
 
     def get_config(self):
diff --git a/hyperion/torch/trainers/dino_xvector_trainer.py b/hyperion/torch/trainers/dino_xvector_trainer.py
index 26d6a434..16a15304 100644
--- a/hyperion/torch/trainers/dino_xvector_trainer.py
+++ b/hyperion/torch/trainers/dino_xvector_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -15,7 +16,7 @@
 from ...utils.misc import filter_func_args
 from ..optim import ExpMovingAvg as EMA
 from ..utils import MetricAcc, TorchDDP, tensors_subset
-from .torch_trainer import DDPType, TorchTrainer
+from .torch_trainer import AMPDType, DDPType, TorchTrainer
 
 
 class DINOXVectorTrainer(TorchTrainer):
@@ -39,6 +40,7 @@ class DINOXVectorTrainer(TorchTrainer):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: float16 | bfloat16
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -76,6 +78,7 @@ def __init__(
         train_mode="full",
         freeze_output_layer_steps=3000,
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -98,10 +101,7 @@ def __init__(
     def prepare_models_for_training(self):
         super().prepare_models_for_training()
         self.teacher_model, self.teacher_optimizer = self._prepare_model_for_ema(
-            self.teacher_model,
-            self.teacher_optim,
-            self.device,
-            self.ddp,
+            self.teacher_model, self.teacher_optim, self.device, self.ddp,
         )
 
     def _prepare_model_for_ema(self, model, optim, device, ddp):
@@ -177,25 +177,25 @@ def train_epoch(self, data_loader):
 
             teacher_keys = self.get_augs_keys(data, self.input_key, "teacher")
             student_keys = self.get_augs_keys(data, self.input_key, "student")
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 with torch.no_grad():
                     teacher_data = tensors_subset(data, teacher_keys, self.device)
                     batch_size = teacher_data[0].size(0)
                     num_teacher_crops = len(teacher_data)
                     teacher_data = torch.cat(teacher_data, dim=0)
                     teacher_out = self.teacher_model(teacher_data)
+                    assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
+                    assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
 
                 if num_teacher_crops > 1:
                     student_out1 = self.model(teacher_data)
+                    assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
+                    assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
 
                 student_data = tensors_subset(data, student_keys, self.device)
                 num_student_crops = len(student_data)
                 student_data = torch.cat(student_data, dim=0)
                 student_out2 = self.model(student_data)
-                assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
-                assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
-                assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
-                assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
                 assert not torch.any(torch.isnan(student_out2)), "s2 is nan"
                 assert not torch.any(torch.isinf(student_out2)), "s2 is inf"
                 if num_teacher_crops > 1:
@@ -261,7 +261,6 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         self.loss.eval()
 
         if swa_update_bn:
-            log_tag = "train_"
             self.model.train()
         else:
             log_tag = "val_"
@@ -270,20 +269,26 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
         for batch, data in enumerate(data_loader):
             teacher_keys = self.get_augs_keys(data, self.input_key, "teacher")
             student_keys = self.get_augs_keys(data, self.input_key, "student")
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 teacher_data = tensors_subset(data, teacher_keys, self.device)
                 batch_size = teacher_data[0].size(0)
                 num_teacher_crops = len(teacher_data)
                 teacher_data = torch.cat(teacher_data, dim=0)
                 teacher_out = self.teacher_model(teacher_data)
+                assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
+                assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
 
                 if num_teacher_crops > 1:
                     student_out1 = self.model(teacher_data)
+                    assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
+                    assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
 
                 student_data = tensors_subset(data, student_keys, self.device)
                 num_student_crops = len(student_data)
                 student_data = torch.cat(student_data, dim=0)
                 student_out2 = self.model(student_data)
+                assert not torch.any(torch.isnan(student_out2)), "s2 is nan"
+                assert not torch.any(torch.isinf(student_out2)), "s2 is inf"
                 if num_teacher_crops > 1:
                     student_out = torch.cat((student_out1, student_out2), dim=0)
                     num_student_crops += num_teacher_crops
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 36a9a43f..8bbdcb47 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -44,6 +44,23 @@ class DDPType(str, Enum):
     OSS_SHARDED_DDP = "oss_sharded_ddp"
     FULLY_SHARDED_DDP = "fully_sharded_ddp"
 
+    @staticmethod
+    def choices():
+        return [o.value for o in DDPType]
+
+
+class AMPDType(str, Enum):
+    FLOAT16 = "float16"
+    BFLOAT16 = "bfloat16"
+
+    @staticmethod
+    def choices():
+        return [o.value for o in AMPDType]
+
+    @staticmethod
+    def to_dtype(dtype):
+        return torch.float16 if dtype == AMPDType.FLOAT16 else torch.bfloat16
+
 
 ddp_choices = [o.value for o in DDPType]
 
@@ -67,6 +84,7 @@ class TorchTrainer(object):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['full', 'frozen']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -101,6 +119,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -140,13 +159,13 @@ def __init__(
         self.device = device
         self.train_mode = train_mode
         self.use_amp = use_amp
+        self.amp_dtype = AMPDType.to_dtype(amp_dtype)
         self.grad_clip = grad_clip
         self.grad_clip_norm = grad_clip_norm
         self.swa_start = swa_start
         self.do_swa = swa_start > 0
         self.swa_lr = swa_lr
         self.swa_anneal_epochs = swa_anneal_epochs
-        self.amp_args = {}
         self.input_key = input_key
         self.target_key = target_key
         self.ddp = ddp
@@ -164,78 +183,6 @@ def __init__(
         self.set_train_mode()
         self.prepare_models_for_training()
 
-        # if device is not None:
-        #     self.model.to(device)
-        #     if loss is not None:
-        #         self.loss.to(device)
-
-        # if ddp:
-        #     if ddp_type == DDPType.DDP or ddp_type == DDPType.OSS_DDP:
-        #         self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
-        #         if self.rank == 0:
-        #             logging.info(
-        #                 "training in multiple gpus with distributed-data-parallel"
-        #             )
-        #         oss = False if ddp_type == DDPType.DDP else True
-        #         self.optimizer = self._make_optimizer(optim, self.model, oss=oss)
-        #         self.model = TorchDDP(
-        #             self.model,
-        #             device_ids=[device],
-        #             output_device=device,
-        #         )
-        #     elif ddp_type == DDPType.OSS_SHARDED_DDP:
-        #         self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
-        #         if self.rank == 0:
-        #             logging.info(
-        #                 "training in multiple gpus with fair sharded-distributed-data-parallel"
-        #             )
-        #         self.optimizer = self._make_optimizer(optim, self.model, oss=True)
-        #         self.model = FairShardedDDP(self.model, self.optimizer)
-        #     else:
-        #         if self.rank == 0:
-        #             logging.info(
-        #                 "training in multiple gpus with fair fully-sharded-distributed-data-parallel"
-        #             )
-        #         # syncbathcnorm is not supported here, it raises exception
-        #         self.model = FairFullyShardedDDP(
-        #             self.model,
-        #             mixed_precision=self.use_amp,
-        #             move_params_to_cpu=cpu_offload,
-        #         )
-        #         self.optimizer = self._make_optimizer(optim, self.model, oss=False)
-
-        # else:
-        #     self.optimizer = self._make_optimizer(optim, self.model)
-
-        # # make the learning rate scheduler
-        # self.lr_scheduler = self._make_lr_sched(lrsched, self.optimizer)
-
-        # if self.use_amp:
-        #     if ddp and ddp_type != DDPType.DDP:
-        #         if self.rank == 0:
-        #             logging.info(
-        #                 "using automatic mixed precision training with sharded-grad-scaler"
-        #             )
-        #         self.grad_scaler = ShardedGradScaler()
-        #     else:
-        #         if self.rank == 0:
-        #             logging.info(
-        #                 "using automatic mixed precision training with grad-scaler"
-        #             )
-        #         self.grad_scaler = amp.GradScaler()
-        #     self.amp_autocast = amp.autocast
-        # else:
-        #     self.amp_autocast = contextlib.nullcontext
-
-        # self.in_swa = False
-        # if self.do_swa:
-        #     if self.rank == 0:
-        #         logging.info("init SWA model")
-        #     self.swa_model = AveragedModel(self.model)
-        #     self.swa_scheduler = SWALR(
-        #         self.optimizer, swa_lr=self.swa_lr, anneal_epochs=self.swa_anneal_epochs
-        #     )
-
     def prepare_models_for_training(self):
         self.loss = self._prepare_loss_for_training(self.loss, self.device)
         (
@@ -329,6 +276,7 @@ def _prepare_model_for_training(
         # make weight decay scheduler if needed
         wd_scheduler = self._make_wd_sched(wdsched, optimizer)
 
+        grad_scaler = None
         if use_amp:
             if ddp and ddp_type != DDPType.DDP:
                 if self.rank == 0:
@@ -741,9 +689,9 @@ def checkpoint(self, logs=None):
             "model_cfg": self.model.get_config(),
             "model_state_dict": self.model.state_dict(),
             "optimizer_state_dict": self.optimizer.state_dict(),
-            "loss_state_dict": self.loss.state_dict()
-            if self.loss is not None
-            else None,
+            "loss_state_dict": (
+                self.loss.state_dict() if self.loss is not None else None
+            ),
         }
         if self.lr_scheduler is not None:
             checkpoint["lr_scheduler_state_dict"] = self.lr_scheduler.state_dict()
@@ -1093,7 +1041,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
         parser.add_argument(
             "--ddp-type",
             default="ddp",
-            choices=ddp_choices,
+            choices=DDPType.choices(),
             help="DDP type in {}".format(ddp_choices),
         )
         parser.add_argument(
@@ -1102,6 +1050,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             default=False,
             help="use mixed precision training",
         )
+        parser.add_argument(
+            "--amp-dtype", default=AMPDType.FLOAT16, choices=AMPDType.choices()
+        )
         parser.add_argument(
             "--cpu-offload",
             action=ActionYesNo,

From 12a1bd9250e8ca210bf53290d10bc3e772816c65 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 23 Apr 2024 18:54:32 -0400
Subject: [PATCH 131/154] added ft steps to voxceleb/ssl.v1

---
 ..._ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml |  18 ++
 ...uster_ecapatdnn512x3_v1.2_ft1_cos_ahc.yaml |  12 +
 ...patdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml |  20 ++
 ...uster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml |  18 ++
 .../cluster_lresnet34_v1.2_ft1_cos_ahc.yaml   |  12 +
 ...r_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml |  18 ++
 egs/voxceleb/ssl.v1/conf/plda.yaml            |  11 +
 .../conf/train_ecapatdnn512x3_dino_v1.2.yaml  | 115 ++++++++
 ...ain_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml |  68 +++++
 ...ain_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml |  68 +++++
 .../conf/train_fwseresnet34_dino_v1.2.yaml    |  93 ++++++
 .../conf/train_lresnet34_dino_v1.2.yaml       |  92 ++++++
 .../train_lresnet34_xvec_stage1.1_v1.2.yaml   |  70 +++++
 .../train_lresnet34_xvec_stage1.2_v1.2.yaml   |  70 +++++
 ...config_fbank80_stmn_ecapatdnn512x3.v1.2.sh |  68 +++++
 ...config_fbank80_stmn_fwseresnet34.v1.2.1.sh |  66 +++++
 .../config_fbank80_stmn_fwseresnet34.v1.2.sh  |  66 +++++
 .../config_fbank80_stmn_lresnet34.v1.1.sh     |  52 ++--
 .../config_fbank80_stmn_lresnet34.v1.2.1.sh   |  65 +++++
 .../config_fbank80_stmn_lresnet34.v1.2.sh     |  65 +++++
 ...un_006_extract_dino_embeds_cluster_eval.sh |  99 +++++--
 egs/voxceleb/ssl.v1/run_007_train_xvector.sh  |  57 ++--
 ...08_extract_ft1_xvec_embeds_cluster_eval.sh |  25 ++
 .../ssl.v1/run_009_finetune_xvector_s2.sh     |  22 ++
 ...10_extract_ft2_xvec_embeds_cluster_eval.sh |  25 ++
 egs/voxceleb/v1.2/README.md                   |  15 +-
 hyperion/bin/cluster_embeddings.py            | 266 ++++++++++++++++--
 hyperion/bin/hyperion_dataset.py              |  77 ++++-
 hyperion/bin/hyperion_tables.py               |  90 +++++-
 hyperion/np/pdfs/plda/frplda.py               |   7 +-
 hyperion/np/pdfs/plda/plda.py                 |  11 +-
 hyperion/np/pdfs/plda/plda_base.py            |  12 +-
 hyperion/np/pdfs/plda/splda.py                |   9 +-
 hyperion/torch/data/audio_dataset.py          |  33 +--
 .../torch/models/wav2xvectors/wav2xvector.py  |   9 +-
 hyperion/torch/models/xvectors/xvector.py     | 103 +++++--
 hyperion/torch/narchs/conformer_encoder_v1.py |   7 +-
 hyperion/torch/narchs/dino_head.py            |   6 +-
 hyperion/torch/narchs/proj_head.py            |  25 +-
 hyperion/torch/trainers/ae_trainer.py         |   8 +-
 hyperion/torch/trainers/dvae_trainer.py       |   6 +-
 hyperion/torch/trainers/plda_trainer.py       |   6 +-
 hyperion/torch/trainers/transducer_trainer.py |   5 +-
 hyperion/torch/trainers/vae_trainer.py        |   8 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |   7 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |  35 +--
 .../torch/trainers/xvector_adv_trainer.py     |   4 +
 .../trainers/xvector_adv_trainer_from_wav.py  |   4 +
 hyperion/torch/trainers/xvector_trainer.py    |   5 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |   4 +
 .../xvector_trainer_deep_feat_reg_from_wav.py |   5 +-
 .../trainers/xvector_trainer_from_wav.py      |   4 +
 hyperion/utils/dataset.py                     |  76 +++--
 hyperion/utils/info_table.py                  |  39 ++-
 54 files changed, 1926 insertions(+), 255 deletions(-)
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/plda.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.1.sh
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_008_extract_ft1_xvec_embeds_cluster_eval.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_009_finetune_xvector_s2.sh
 create mode 100755 egs/voxceleb/ssl.v1/run_010_extract_ft2_xvec_embeds_cluster_eval.sh

diff --git a/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml
new file mode 100644
index 00000000..fb6673df
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml
@@ -0,0 +1,18 @@
+pca:
+  pca_var_r: 0.995
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+threshold_stage_1: 0.875
+threshold_stage_2: -100.
+plda:
+  plda_type: splda
+  y_dim: 100
+max_samples_per_cluster: 50
+min_samples_per_cluster: 8
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc.yaml
new file mode 100644
index 00000000..c1bf8c94
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc.yaml
@@ -0,0 +1,12 @@
+pca:
+  pca_var_r: 0.995
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+threshold: 0.8
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
new file mode 100644
index 00000000..1a04d084
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
@@ -0,0 +1,20 @@
+pca:
+  pca_var_r: 0.995
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+#threshold_stage_1: 0.75
+#threshold_stage_2: 25
+threshold_stage_1: 0.8
+threshold_stage_2: 30
+plda:
+  plda_type: splda
+  y_dim: 100
+max_samples_per_cluster: 50
+min_samples_per_cluster: 8
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
new file mode 100644
index 00000000..3740d0e7
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
@@ -0,0 +1,18 @@
+pca:
+  pca_var_r: 0.99
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+threshold_stage_1: 0.80
+threshold_stage_2: -400
+plda:
+  plda_type: splda
+  y_dim: 100
+max_samples_per_cluster: 50
+min_samples_per_cluster: 8
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
new file mode 100644
index 00000000..9c446a2e
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
@@ -0,0 +1,12 @@
+pca:
+  pca_var_r: 0.99
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+threshold: 0.6
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
new file mode 100644
index 00000000..cf3adf41
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
@@ -0,0 +1,18 @@
+pca:
+  pca_var_r: 0.99
+pre_kmeans:
+  samples_per_cluster: 4
+  epochs: 10
+  rtol: 0.01
+  init_method: random
+stop_criterion: threshold
+threshold_stage_1: 0.6
+threshold_stage_2: 0
+plda:
+  plda_type: splda
+  y_dim: 100
+max_samples_per_cluster: 50
+min_samples_per_cluster: 8
+ahc_precision: single
+num_workers: 32
+filter_by_gmm_post: 0.9
diff --git a/egs/voxceleb/ssl.v1/conf/plda.yaml b/egs/voxceleb/ssl.v1/conf/plda.yaml
new file mode 100644
index 00000000..bbb8f051
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/plda.yaml
@@ -0,0 +1,11 @@
+class_name: cluster
+pca:
+  #pca_var_r: 0.975
+  pca_var_r: 0.99
+do_lda: true
+lda:
+  lda_dim: 120
+plda:
+  plda_type: splda
+  y_dim: 100
+  epochs: 20
diff --git a/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.2.yaml
new file mode 100644
index 00000000..ae9ab1fa
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_dino_v1.2.yaml
@@ -0,0 +1,115 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_enc:
+      in_feats: 80
+      in_conv_channels: 512
+      in_kernel_size: 5
+      in_stride: 1
+      resb_type: seres2bn
+      resb_repeats:
+      - 1
+      - 1
+      - 1
+      resb_channels:
+      - 512
+      resb_kernel_sizes:
+      - 3
+      resb_dilations:
+      - 2
+      - 3
+      - 4
+      resb_strides:
+      - 1
+      res2net_width_factor: 1
+      res2net_scale: 8
+      se_r: 4
+      multilayer: true
+      multilayer_concat: true
+      endpoint_channels: 1536
+      norm_before: false
+      dropout_rate: 0.002
+      hid_act: swish
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.0
+    norm_before: false
+    hid_act: swish
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.005
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 15
+  use_amp: false
+  log_interval: 1000
+  epochs: 120
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml
new file mode 100644
index 00000000..480ae04f
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml
@@ -0,0 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 10
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 16000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: false
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
+  target_key: cluster
+  train_mode: ft-embed-affine
+  
\ No newline at end of file
diff --git a/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml
new file mode 100644
index 00000000..8a7a700c
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml
@@ -0,0 +1,68 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 10
+    intertop_margin: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 100
+  eff_batch_size: 256
+  target_key: cluster
+  train_mode: full
+  
\ No newline at end of file
diff --git a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.yaml
new file mode 100644
index 00000000..24d09678
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.yaml
@@ -0,0 +1,93 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.01
+    norm_before: false
+    hid_act: swish
+    se_r: 4
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.005 
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 15
+  use_amp: true
+  log_interval: 1000
+  epochs: 100
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.2.yaml
new file mode 100644
index 00000000..fa6466ce
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_lresnet34_dino_v1.2.yaml
@@ -0,0 +1,92 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: lresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.01
+    norm_before: false
+    hid_act: swish
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.0025
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 15
+  use_amp: true
+  log_interval: 1000
+  epochs: 100
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.1_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
new file mode 100644
index 00000000..945fd42b
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 10
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.1
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 16000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 30
+  eff_batch_size: 256
+  target_key: cluster
+  train_mode: ft-embed-affine
+  
\ No newline at end of file
diff --git a/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.2_v1.2.yaml b/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
new file mode 100644
index 00000000..e8fd36a2
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
@@ -0,0 +1,70 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - cluster
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - cluster
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: cluster
+      seg_weight_mode: data-prior
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.2
+    margin_warmup_epochs: 10
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.01
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 0.01
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 70
+  eff_batch_size: 256
+  target_key: cluster
+  train_mode: full
+  
\ No newline at end of file
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
new file mode 100644
index 00000000..de643f1e
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
@@ -0,0 +1,68 @@
+# ECAPA-TDNN 512x3
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet1d
+nnet_name=${feat_type}_ecapatdnn512x3_dino.v1.2
+
+nnet_s1_base_cfg=conf/train_ecapatdnn512x3_dino_v1.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0120.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_ecapatdnn512x3_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc_plda_ahc
+cluster_ft_s1_cfg=conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_ecapatdnn512x3_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s2_method=cos_ahc_plda_ahc
+cluster_ft_s2_cfg=conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
+
+
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
new file mode 100644
index 00000000..102fbaef
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
@@ -0,0 +1,66 @@
+# ECAPA-TDNN 512x3
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34_dino.v1.2
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_dino_v1.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0034.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc
+cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s1_name=${cluster_method_ft_s1_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.1.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.1.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 2.2
+cluster_ft_s2_method=cos_ahc
+cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s2_name=${cluster_method_ft_s2_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
+
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
new file mode 100644
index 00000000..b3a6e963
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
@@ -0,0 +1,66 @@
+# ECAPA-TDNN 512x3
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34_dino.v1.2
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_dino_v1.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0034.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc_plda_ahc
+cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 2.2
+cluster_ft_s2_method=cos_ahc_plda_ahc
+cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
+
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh
index 752f7048..18fafd95 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.1.sh
@@ -19,11 +19,6 @@ nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/teacher_model_ep0080.pth
 
-nnet_s2_base_cfg=conf/train_resnet34_xvec_stage2_v3.0.yaml
-nnet_s2_name=${nnet_name}.s2
-nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
-nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
-
 # clustering
 cluster_method=cos_ahc
 cluster_name=${cluster_method}_1
@@ -32,21 +27,34 @@ cluster_cfg=conf/ahc.yaml
 # plda
 plda_cfg=conf/plda.yaml
 
-# back-end
-do_plda=false
-# do_snorm=true
-# do_qmf=true
-# do_voxsrc22=true
-
-plda_aug_config=conf/reverb_noise_aug.yaml
-plda_num_augs=0
-if [ $plda_num_augs -eq 0 ]; then
-    plda_data=voxceleb2cat_train
-else
-    plda_data=voxceleb2cat_train_augx${plda_num_augs}
-fi
-plda_type=splda
-lda_dim=200
-plda_y_dim=150
-plda_z_dim=200
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_stage1.1_v1.1.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0010.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_stage1.2_v1.1.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0080.pth
+
+
+# # back-end
+# do_plda=false
+# # do_snorm=true
+# # do_qmf=true
+# # do_voxsrc22=true
+
+# plda_aug_config=conf/reverb_noise_aug.yaml
+# plda_num_augs=0
+# if [ $plda_num_augs -eq 0 ]; then
+#     plda_data=voxceleb2cat_train
+# else
+#     plda_data=voxceleb2cat_train_augx${plda_num_augs}
+# fi
+# plda_type=splda
+# lda_dim=200
+# plda_y_dim=150
+# plda_z_dim=200
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.1.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.1.sh
new file mode 100644
index 00000000..7b822cf4
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.1.sh
@@ -0,0 +1,65 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34_dino.v1.2
+
+nnet_s1_base_cfg=conf/train_lresnet34_dino_v1.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0100.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc
+cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s1_name=${cluster_ft_s1_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.1.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.1.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 2.2
+cluster_ft_s2_method=cos_ahc
+cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s2_name=${cluster_ft_s2_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
new file mode 100644
index 00000000..788b3b4b
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
@@ -0,0 +1,65 @@
+# ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_lresnet34_dino.v1.2
+
+nnet_s1_base_cfg=conf/train_lresnet34_dino_v1.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0100.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc_plda_ahc
+cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 2.2
+cluster_ft_s2_method=cos_ahc_plda_ahc
+cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
+cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
diff --git a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
index d848b466..5bf085ae 100755
--- a/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
+++ b/egs/voxceleb/ssl.v1/run_006_extract_dino_embeds_cluster_eval.sh
@@ -9,9 +9,11 @@ set -e
 
 stage=1
 nnet_stage=1
+ft_stage=0
 config_file=default_config.sh
-use_gpu=false
+use_gpu=true
 xvec_chunk_length=120.0
+do_clustering=true
 . parse_options.sh || exit 1;
 . $config_file
 
@@ -24,14 +26,39 @@ else
   num_gpus=0
 fi
 
-if [ $nnet_stage -eq 1 ];then
-  nnet=$nnet_s1
-  nnet_name=$nnet_s1_name
-elif [ $nnet_stage -eq 2 ];then
-  nnet=$nnet_s2
-  nnet_name=$nnet_s2_name
+if [ $ft_stage -eq 0 ];then
+  if [ $nnet_stage -eq 1 ];then
+    nnet=$nnet_s1
+    nnet_name=$nnet_s1_name
+  elif [ $nnet_stage -eq 2 ];then
+    nnet=$nnet_s2
+    nnet_name=$nnet_s2_name
+  fi
+elif [ $ft_stage -eq 1 ];then
+  if [ $nnet_stage -eq 1 ];then
+    nnet=$nnet_ft_s1_1
+    nnet_name=$nnet_ft_s1_1_name
+  elif [ $nnet_stage -eq 2 ];then
+    nnet=$nnet_ft_s1_2
+    nnet_name=$nnet_ft_s1_2_name
+  fi
+  cluster_method=$cluster_ft_s1_method
+  cluster_cfg=$cluster_ft_s1_cfg
+  cluster_name=$cluster_ft_s1_name
+  cluster_dir=$cluster_ft_s1_dir
+elif [ $ft_stage -eq 2 ];then
+  if [ $nnet_stage -eq 1 ];then
+    nnet=$nnet_ft_s2_1
+    nnet_name=$nnet_ft_s2_1_name
+  elif [ $nnet_stage -eq 2 ];then
+    nnet=$nnet_ft_s2_2
+    nnet_name=$nnet_ft_s2_2_name
+  fi
+  cluster_method=$cluster_ft_s2_method
+  cluster_cfg=$cluster_ft_s2_cfg
+  cluster_name=$cluster_ft_s2_name
+  cluster_dir=$cluster_ft_s2_dir
 fi
-
 xvector_dir=exp/xvectors/$nnet_name
 score_dir=exp/scores/$nnet_name
 score_cosine_dir=$score_dir/cosine
@@ -99,22 +126,22 @@ if [ $stage -le 2 ];then
 	     --output-file $score_cosine_dir/voxceleb1_results.csv
 
   cat $score_cosine_dir/voxceleb1_results.csv
-  exit
+fi
+
+if [ "$do_clustering" == "false" ];then
+  exit 0
 fi
 
 if [ $stage -le 3 ]; then
   # Extract xvectors for training LDA/PLDA
   nj=100
-  for name in voxceleb2cat_train
+  for name in voxceleb2cat_train_filtered
   do
-    if [ -n "$vad_config" ];then
-      vad_args="--vad csv:data/$name/vad.csv"
-    fi
     output_dir=$xvector_dir/$name
     echo "Extracting x-vectors for $name"
     $xvec_cmd JOB=1:$nj $output_dir/log/extract_xvectors.JOB.log \
 	      hyp_utils/conda_env.sh --num-gpus $num_gpus \
-	      hyperion-extract-wav2xvectors ${xvec_args} ${vad_args} \
+	      hyperion-extract-wav2xvectors ${xvec_args} \
 	      --part-idx JOB --num-parts $nj  \
 	      --recordings-file data/$name/recordings.csv \
 	      --random-utt-length --min-utt-length 30 --max-utt-length 30 \
@@ -128,32 +155,46 @@ if [ $stage -le 3 ]; then
 fi
 
 
-cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
 if [ $stage -le 4 ];then
   echo "Cluster Vox2"
   mkdir -p $cluster_dir
   $train_cmd --mem 50G --num-threads 32 $cluster_dir/clustering.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV \
     hyperion-cluster-embeddings $cluster_method --cfg $cluster_cfg \
-    --segments-file data/voxceleb2cat_train_xvector_train/segments.csv \
-    --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
-    --output-file $cluster_dir/voxceleb2cat_train_xvector_train/segments.csv 
+    --segments-file data/voxceleb2cat_train_filtered/segments.csv \
+    --feats-file csv:$xvector_dir/voxceleb2cat_train_filtered/xvector.csv \
+    --output-file $cluster_dir/voxceleb2cat_train/segments.csv
 fi
 
 if [ $stage -le 5 ];then
+  hyperion-dataset add_cols_to_segments \
+		   --dataset data/voxceleb2cat_train_filtered \
+		   --column-names cluster \
+		   --right-table $cluster_dir/voxceleb2cat_train/segments.csv \
+		   --output-dataset $cluster_dir/voxceleb2cat_train_clustered \
+		   --remove-missing --create-class-info
+
+  hyperion-dataset remove_classes_few_toomany_segments \
+		   --dataset $cluster_dir/voxceleb2cat_train_clustered \
+		   --class-name cluster \
+		   --min-segs 10 \
+		   --max-segs 50 \
+		   --rebuild-idx \
+		   --output-dataset $cluster_dir/voxceleb2cat_train_clustered_filtered
+fi
+
+if [ $stage -le 6 ];then
   echo "Train PLDA"
   $train_cmd $cluster_dir/plda.log \
 	     hyp_utils/conda_env.sh --conda-env $HYP_ENV \
 	     hyperion-train-plda --cfg $plda_cfg \
-	     --segments-file $cluster_dir/voxceleb2cat_train_xvector_train/segments.csv \
-	     --feats-file csv:$xvector_dir/voxceleb2cat_train/xvector.csv \
+	     --segments-file $cluster_dir/voxceleb2cat_train_clustered_filtered/segments.csv \
+	     --feats-file csv:$xvector_dir/voxceleb2cat_train_filtered/xvector.csv \
 	     --preproc-file $cluster_dir/plda/preproc.h5 \
 	     --plda-file $cluster_dir/plda/plda.h5
-
-  
 fi
 
-if [ $stage -le 6 ];then
+if [ $stage -le 7 ];then
 
   echo "Eval Voxceleb 1 with PLDA"
   num_parts=8
@@ -188,6 +229,14 @@ if [ $stage -le 6 ];then
 	     --output-file $score_plda_dir/voxceleb1_results.csv
 
   cat $score_plda_dir/voxceleb1_results.csv
-  exit
 fi
-exit
+
+if [ $stage -le 8 ];then
+  hyperion-dataset split_train_val \
+                   --dataset $cluster_dir/voxceleb2cat_train_clustered_filtered \
+                   --val-prob 0.03 \
+                   --seed 1123581321 \
+                   --train-dataset $cluster_dir/voxceleb2cat_train_clustered_train \
+                   --val-dataset $cluster_dir/voxceleb2cat_train_clustered_val
+fi
+
diff --git a/egs/voxceleb/ssl.v1/run_007_train_xvector.sh b/egs/voxceleb/ssl.v1/run_007_train_xvector.sh
index 40aceb07..9732078a 100755
--- a/egs/voxceleb/ssl.v1/run_007_train_xvector.sh
+++ b/egs/voxceleb/ssl.v1/run_007_train_xvector.sh
@@ -7,6 +7,7 @@
 . ./path.sh
 set -e
 
+ft_stage=1
 stage=1
 ngpu=4
 config_file=default_config.sh
@@ -19,8 +20,27 @@ use_wandb=false
 . $config_file
 . datapath.sh
 
-train_data_dir=data/${nnet_data}_xvector_train
-val_data_dir=data/${nnet_data}_xvector_val
+if [ $ft_stage -eq 1 ];then
+  nnet_s1_base_cfg=$nnet_ft_s1_1_base_cfg
+  nnet_s2_base_cfg=$nnet_ft_s1_2_base_cfg
+  nnet_s1_dir=$nnet_ft_s1_1_dir
+  nnet_s2_dir=$nnet_ft_s1_2_dir
+  nnet_s0=$nnet_s1
+  nnet_s1=$nnet_ft_s1_1
+  nnet_s2=$nnet_ft_s1_2
+  train_data_dir=$cluster_dir/${nnet_data}_clustered_train
+  val_data_dir=$cluster_dir/${nnet_data}_clustered_val
+elif [ $ft_stage -eq 2 ];then
+  nnet_s1_base_cfg=$nnet_ft_s2_1_base_cfg
+  nnet_s2_base_cfg=$nnet_ft_s2_2_base_cfg
+  nnet_s1_dir=$nnet_ft_s2_1_dir
+  nnet_s2_dir=$nnet_ft_s2_2_dir
+  nnet_s0=$nnet_ft_s1_2
+  nnet_s1=$nnet_ft_s2_1
+  nnet_s2=$nnet_ft_s2_2
+  train_data_dir=$cluster_ft_s1_dir/${nnet_data}_clustered_train
+  val_data_dir=$cluster_ft_s1_dir/${nnet_data}_clustered_val
+fi
 
 #add extra args from the command line arguments
 if [ -n "$num_workers" ];then
@@ -30,45 +50,32 @@ if [ "$use_tb" == "true" ];then
     extra_args="$extra_args --trainer.use-tensorboard"
 fi
 if [ "$use_wandb" == "true" ];then
-    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
+    extra_args="$extra_args --trainer.use-wandb --trainer.wandb.project voxceleb-ssl.v1.1 --trainer.wandb.name $nnet_name.$(date -Iminutes)"
 fi
 
 if [ "$interactive" == "true" ];then
     export cuda_cmd=run.pl
 fi
 
-xvector_dir=exp/xvectors/$nnet_s1_name/voxceleb2cat_train
-output_dir=exp/clustering/$nnet_s1_name/$cluster_method/voxceleb2cat_train_xvector_train
-if [ $stage -le 1 ];then
-  mkdir -p $output_dir
-  $train_cmd --mem 50G --num-threads 32 $output_dir/clustering.log \
-    hyp_utils/conda_env.sh --conda-env $HYP_ENV \
-    hyperion-cluster-embeddings $cluster_method --cfg $cluster_cfg \
-    --segments-file data/voxceleb2cat_train_xvector_train/segments.csv \
-    --feats-file csv:$xvector_dir/xvector.csv \
-    --output-file $output_dir/segments.csv 
-fi
-exit
-# Network Training
-if [ $stage -le 2 ]; then
-  
+# Fine-tune last layer and embedding projection
+if [ $stage -le 1 ]; then
   mkdir -p $nnet_s1_dir/log
   $cuda_cmd \
     --gpu $ngpu $nnet_s1_dir/log/train.log \
     hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
-    hyperion-train-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
+    hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s1_base_cfg $nnet_s1_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
-    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.train.dataset.class-files $train_data_dir/cluster.csv \
     --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
     --data.val.dataset.segments-file $val_data_dir/segments.csv \
     --trainer.exp-path $nnet_s1_dir \
-    --num-gpus $ngpu \
-  
+    --in-model-file $nnet_s0 \
+    --num-gpus $ngpu 
 fi
 
 
-# Large Margin Fine-tuning
+# Fine-tune full model
 if [ $stage -le 2 ]; then
   if [ "$use_wandb" == "true" ];then
     extra_args="$extra_args --trainer.wandb.name $nnet_s2_name.$(date -Iminutes)"
@@ -80,11 +87,11 @@ if [ $stage -le 2 ]; then
     hyperion-finetune-wav2xvector $nnet_type --cfg $nnet_s2_base_cfg $nnet_s2_args $extra_args \
     --data.train.dataset.recordings-file $train_data_dir/recordings.csv \
     --data.train.dataset.segments-file $train_data_dir/segments.csv \
-    --data.train.dataset.class-files $train_data_dir/speaker.csv \
+    --data.train.dataset.class-files $train_data_dir/cluster.csv \
     --data.val.dataset.recordings-file $val_data_dir/recordings.csv \
     --data.val.dataset.segments-file $val_data_dir/segments.csv \
     --in-model-file $nnet_s1 \
     --trainer.exp-path $nnet_s2_dir \
-    --num-gpus $ngpu \
+    --num-gpus $ngpu 
   
 fi
diff --git a/egs/voxceleb/ssl.v1/run_008_extract_ft1_xvec_embeds_cluster_eval.sh b/egs/voxceleb/ssl.v1/run_008_extract_ft1_xvec_embeds_cluster_eval.sh
new file mode 100755
index 00000000..71cab44a
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_008_extract_ft1_xvec_embeds_cluster_eval.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=true
+xvec_chunk_length=120.0
+do_clustering=true
+. parse_options.sh || exit 1;
+
+./run_006_extract_dino_embeds_cluster_eval.sh \
+  --config-file $config_file \
+  --stage $stage \
+  --nnet-stage $nnet_stage \
+  --ft-stage 1 \
+  --use-gpu $use_gpu \
+  --xvec-chunk-length $xvec_chunk_length \
+  --do-clustering $do_clustering
diff --git a/egs/voxceleb/ssl.v1/run_009_finetune_xvector_s2.sh b/egs/voxceleb/ssl.v1/run_009_finetune_xvector_s2.sh
new file mode 100755
index 00000000..ca7d058a
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_009_finetune_xvector_s2.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright
+#                2019   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=4
+config_file=default_config.sh
+interactive=false
+. parse_options.sh || exit 1;
+
+./run_007_train_xvector.sh \
+  --config-file $config_file \
+  --ngpu $ngpu \
+  --stage $stage \
+  --ft-stage 2 \
+  --interactive $interactive
+
diff --git a/egs/voxceleb/ssl.v1/run_010_extract_ft2_xvec_embeds_cluster_eval.sh b/egs/voxceleb/ssl.v1/run_010_extract_ft2_xvec_embeds_cluster_eval.sh
new file mode 100755
index 00000000..4f09dfaf
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/run_010_extract_ft2_xvec_embeds_cluster_eval.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nnet_stage=2
+config_file=default_config.sh
+use_gpu=true
+xvec_chunk_length=120.0
+do_clustering=true
+. parse_options.sh || exit 1;
+
+./run_006_extract_dino_embeds_cluster_eval.sh \
+  --config-file $config_file \
+  --stage $stage \
+  --nnet-stage $nnet_stage \
+  --ft-stage 2 \
+  --use-gpu $use_gpu \
+  --xvec-chunk-length $xvec_chunk_length \
+  --do-clustering $do_clustering
diff --git a/egs/voxceleb/v1.2/README.md b/egs/voxceleb/v1.2/README.md
index e1199a3b..f6fec0a6 100644
--- a/egs/voxceleb/v1.2/README.md
+++ b/egs/voxceleb/v1.2/README.md
@@ -136,11 +136,13 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.50 | 0.035 | 0.038 |
 | | | | Cosine + AS-Norm | 0.47 | 0.031 | 0.038 |
 | | | | Cosine + QMF | 0.40 | 0.027 | 0.032 |
+| config_fbank80_stmn_idrnd_resnet100.v3.2.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.49 | 0.032 | 0.038 |
+| | | | Cosine + AS-Norm | 0.43 | 0.025 | 0.034 |
+| | | | Cosine + QMF | 0.37 | 0.024 | 0.033 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.60 | 0.043 | 0.071 |
 | | | | Cosine + AS-Norm | 0.53 | 0.034 | 0.063 |
 | | | | Cosine + QMF | 0.49 | 0.033 | 0.054 |
 
-
 ### VoxCeleb 1 Entire-Clean trial list
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -190,6 +192,9 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.69 | 0.043 | 0.074 |
 | | | | Cosine + AS-Norm | 0.65 | 0.039 | 0.068 |
 | | | | Cosine + QMF | 0.63 | 0.036 | 0.065 |
+| config_fbank80_stmn_idrnd_resnet100.v3.2.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 0.66 | 0.040 | 0.072 |
+| | | | Cosine + AS-Norm | 0.62 | 0.037 | 0.066 |
+| | | | Cosine + QMF | 0.59 | 0.035 | 0.064 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 0.75 | 0.047 | 0.077 |
 | | | | Cosine + AS-Norm | 0.70 | 0.042 | 0.072 |
 | | | | Cosine + QMF | 0.68 | 0.040 | 0.069 |
@@ -244,12 +249,14 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.36 | 0.077 | 0.122 |
 | | | | Cosine + AS-Norm | 1.23 | 0.069 | 0.112 |
 | | | | Cosine + QMF | 1.17 | 0.065 | 0.110 |
+| config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.27 | 0.072 | 0.121 |
+| | | | Cosine + AS-Norm | 1.15 | 0.065 | 0.107 |
+| | | | Cosine + QMF | 1.10 | 0.062 | 0.102 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.41 | 0.081 | 0.132 |
 | | | | Cosine + AS-Norm | 1.28 | 0.071 | 0.116 |
 | | | | Cosine + QMF | 1.21 | 0.069 | 0.113 |
 
 
-
 ### VoxSRC2022 dev
 
 | Config | Model Type | Model Details | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
@@ -299,7 +306,9 @@ run_007_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
 | config_fbank80_stmn_idrnd_resnet100.v3.1.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 2.02 | 0.116 | 0.194 |
 | | | | Cosine + AS-Norm | 1.81 | 0.107 | 0.199 |
 | | | | Cosine + QMF | 1.72 | 0.099 | 0.186 |
+| config_fbank80_stmn_idrnd_resnet100.v3.2.sh | ResNet100 / BasicBlock 128-256 ch. | Stage2: Subcenter-ArcFace m=0.3/intertop_m=0.1/centers=2 | Cosine | 1.91 | 0.111 | 0.192 |
+| | | | Cosine + AS-Norm | 1.75 | 0.105 | 0.194 |
+| | | | Cosine + QMF | 1.64 | 0.098 | 0.181 |
 | config_fbank80_stmn_res2net50w26s8.v3.0.sh | Res2Net50 w26 scale=8 | Stage2: ArcFace m=0.4/intertop_m=0.1 | Cosine | 1.96 | 0.124 | 0.211 |
 | | | | Cosine + AS-Norm | 1.79 | 0.118 | 0239 |
 | | | | Cosine + QMF | 1.68 | 0.114 | 0.216 |
-
diff --git a/hyperion/bin/cluster_embeddings.py b/hyperion/bin/cluster_embeddings.py
index 10b6344e..fb30fcae 100644
--- a/hyperion/bin/cluster_embeddings.py
+++ b/hyperion/bin/cluster_embeddings.py
@@ -24,15 +24,12 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.io import RandomAccessDataReaderFactory as DRF
 from hyperion.np.clustering import AHC, KMeans, KMeansInitMethod, SpectralClustering
-from hyperion.np.pdfs import DiagGMM
+from hyperion.np.pdfs import SPLDA, DiagGMM, PLDAFactory
 from hyperion.np.transforms import PCA, LNorm
 from hyperion.utils import SegmentSet
 from hyperion.utils.math_funcs import cosine_scoring
 
-subcommand_list = [
-    "cos_ahc",
-    "spectral_clustering",
-]
+subcommand_list = ["cos_ahc", "spectral_clustering", "cos_ahc_plda_ahc"]
 
 
 def add_common_args(parser):
@@ -98,6 +95,59 @@ def do_kmeans(x, samples_per_cluster, epochs, rtol, init_method, num_workers):
     return x_km, idx_km
 
 
+def change_precision(x, precision=None):
+    if precision == "single":
+        return x.astype(np.float32)
+    elif precision == "half":
+        return x.astype(np.float16)
+    else:
+        return x
+
+
+def do_cosine_scoring(x, precision=None):
+    logging.info("compute cosine affinity matrix")
+    x = change_precision(x)
+    return cosine_scoring(x, x)
+
+
+def train_plda(x, y, plda, min_samples_per_cluster, max_samples_per_cluster=None):
+    logging.info("Train Centering/Whitening + PLDA")
+    _, cluster_idx, counts = np.unique(y, return_inverse=True, return_counts=True)
+    max_samples_per_cluster = (
+        np.max(counts) if max_samples_per_cluster is None else max_samples_per_cluster
+    )
+    transforms = LNorm()
+    transforms.fit(x)
+    if plda["y_dim"] > x.shape[1]:
+        plda["y_dim"] = x.shape[1]
+    plda_model = PLDAFactory.create(**plda)
+
+    counts = counts[cluster_idx]
+    keep = np.logical_and(
+        counts >= min_samples_per_cluster, counts <= max_samples_per_cluster
+    )
+    x = x[keep]
+    cluster_idx = cluster_idx[keep]
+    _, cluster_idx = np.unique(cluster_idx, return_inverse=True)
+    plda_model.fit(x, class_ids=cluster_idx)
+
+    return transforms, plda_model
+
+
+def do_ahc(scores, linkage_method, stop_criterion, threshold, num_clusters):
+    logging.info(
+        f"running AHC stop_criterion: {stop_criterion} thr: {threshold} num_clusters: {num_clusters}",
+    )
+    ahc = AHC(method=linkage_method)
+    ahc.fit(scores)
+    if stop_criterion == "threshold":
+        y = ahc.get_flat_clusters_from_thr(threshold)
+    else:
+        y = ahc.get_flat_clusters_from_num_clusters(num_clusters)
+
+    return y
+
+
 def get_gmm_post(x, y):
     logging.info("computing cluster posteriors with gmm")
     num_comp = np.max(y) + 1
@@ -119,8 +169,6 @@ def get_gmm_post(x, y):
     gmm.Mstep(N, u_x)
     p = gmm.compute_pz(x, mode="std")
     p_max = p[np.arange(x.shape[0]), y]
-    zz = p_max < 0.5
-    print(np.mean(p[zz]), np.max(p[zz]), p_max[zz])
     p_2nd = np.sort(p, axis=1, kind="heapsort")[:, -2]
     return p_max, p_2nd
 
@@ -129,10 +177,32 @@ def plot_score_hist(scores, fig_file):
     mask = np.triu(np.ones_like(scores, dtype=bool))
     fig = plt.figure()
     scores = scores[mask]
+    logging.info(
+        f"score-mean=%f score-std=%f score-max=%f score-min=%f",
+        scores.mean(),
+        scores.std(),
+        scores.max(),
+        scores.min(),
+    )
+    if np.any(scores < -1.1) or np.any(scores > 1.1):
+        # if scores come from plda we limit the max and min val
+        thr = 2 * np.std(scores)
+        scores = scores.copy()
+        scores[scores > thr] = thr
+        scores[scores < -thr] = -thr
+
     plt.hist(scores, bins=100, density=True)
     fig.savefig(fig_file)
 
 
+def plot_cluster_size_hist(y, fig_file):
+    _, counts = np.unique(y, return_counts=True)
+    fig = plt.figure()
+    bins = np.arange(1, np.max(counts) + 1)
+    plt.hist(counts, bins=bins, density=False)
+    fig.savefig(fig_file)
+
+
 def cos_ahc(
     segments_file,
     feats_file,
@@ -148,34 +218,17 @@ def cos_ahc(
     num_workers,
     filter_by_gmm_post,
 ):
+    Path(output_file).parent.mkdir(exist_ok=True, parents=True)
     segments, x = load_data(segments_file, feats_file)
     if lnorm:
         x = LNorm()(x)
 
     x = do_pca(x, pca)
     x_km, idx_km = do_kmeans(x, num_workers=num_workers, **pre_kmeans)
-
-    logging.info("compute affinity matrix")
-    if ahc_precision == "single":
-        x_lowprec = x_km.astype(np.float32)
-    elif ahc_precision == "half":
-        x_lowprec = x_km.astype(np.float16)
-    else:
-        x_lowprec = x_km
-
-    scores = cosine_scoring(x_lowprec, x_lowprec)
+    scores = do_cosine_scoring(x_km, ahc_precision)
     fig_file = Path(output_file).parent / "score_hist.png"
     plot_score_hist(scores, fig_file)
-
-    logging.info("running AHC")
-    ahc = AHC(method=linkage_method)
-    ahc.fit(scores)
-    if stop_criterion == "threshold":
-        y = ahc.get_flat_clusters_from_thr(threshold)
-    else:
-        y = ahc.get_flat_clusters_from_num_clusters(num_clusters)
-
-    del ahc
+    y = do_ahc(scores, linkage_method, stop_criterion, threshold, num_clusters)
     if idx_km is not None:
         y = y[idx_km]
         del x_km
@@ -189,6 +242,8 @@ def cos_ahc(
         segments = SegmentSet(segments.loc[idx])
 
     segments.save(output_file)
+    fig_file = Path(output_file).parent / "cluster_size_hist.png"
+    plot_cluster_size_hist(segments["cluster"], fig_file)
 
 
 def make_cos_ahc_parser():
@@ -229,7 +284,158 @@ def make_cos_ahc_parser():
     )
     parser.add_argument("--pre_kmeans.epochs", default=100, type=int)
     parser.add_argument("--pre_kmeans.rtol", default=0.001, type=float)
-    parser.add_argument("--num_workers", default=1, type=int)
+    parser.add_argument("--num-workers", default=1, type=int)
+    return parser
+
+
+def cos_ahc_plda_ahc(
+    segments_file,
+    feats_file,
+    output_file,
+    lnorm,
+    pca,
+    linkage_method,
+    stop_criterion,
+    num_clusters_stage_1,
+    threshold_stage_1,
+    num_clusters_stage_2,
+    threshold_stage_2,
+    min_samples_per_cluster,
+    max_samples_per_cluster,
+    plda,
+    ahc_precision,
+    pre_kmeans,
+    num_workers,
+    filter_by_gmm_post,
+):
+    Path(output_file).parent.mkdir(exist_ok=True, parents=True)
+    segments, x = load_data(segments_file, feats_file)
+    if lnorm:
+        x = LNorm()(x)
+
+    x = do_pca(x, pca)
+
+    # stage 1
+    x_km, idx_km = do_kmeans(x, num_workers=num_workers, **pre_kmeans)
+    scores = do_cosine_scoring(x_km, ahc_precision)
+    fig_file = Path(output_file).parent / "cosine_score_hist.png"
+    plot_score_hist(scores, fig_file)
+    y = do_ahc(
+        scores, linkage_method, stop_criterion, threshold_stage_1, num_clusters_stage_1
+    )
+    if idx_km is not None:
+        y = y[idx_km]
+        del x_km
+
+    fig_file = Path(output_file).parent / "cosine_cluster_size_hist.png"
+    plot_cluster_size_hist(y, fig_file)
+    # stage 2
+    transform, plda_model = train_plda(
+        x, y, plda, min_samples_per_cluster, max_samples_per_cluster
+    )
+    x = transform(x)
+    z = plda_model.compute_py_g_x(x)
+    _, idx_km = do_kmeans(z, num_workers=num_workers, **pre_kmeans)
+
+    if idx_km is None:
+        scores = plda_model.llr_1vs1(x, x)
+    else:
+        scores = plda_model.llr_NvsM(x, x, ids1=idx_km, ids2=idx_km)
+
+    scores = change_precision(scores, ahc_precision)
+    fig_file = Path(output_file).parent / "plda_score_hist.png"
+    plot_score_hist(scores, fig_file)
+    y = do_ahc(
+        scores, linkage_method, stop_criterion, threshold_stage_2, num_clusters_stage_2
+    )
+    if idx_km is not None:
+        y = y[idx_km]
+
+    p_max, p_2nd = get_gmm_post(x, y)
+    segments["cluster"] = y
+    segments["post_cluster"] = p_max
+    segments["post_cluster_2nd"] = p_2nd
+    if filter_by_gmm_post > 0:
+        idx = segments["post_cluster"] > filter_by_gmm_post
+        segments = SegmentSet(segments.loc[idx])
+
+    segments.save(output_file)
+    fig_file = Path(output_file).parent / "plda_cluster_size_hist.png"
+    plot_cluster_size_hist(segments["cluster"], fig_file)
+
+
+def make_cos_ahc_plda_ahc_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    add_common_args(parser)
+    parser.add_argument("--lnorm", default=False, action=ActionYesNo)
+    PCA.add_class_args(parser, prefix="pca")
+    parser.add_argument(
+        "--linkage-method",
+        default="average",
+        choices=["single", "complete", "average", "weighted", "ward"],
+        help="linkage method",
+    )
+    parser.add_argument(
+        "--stop-criterion",
+        default="threshold",
+        choices=["threshold", "num_clusters"],
+        help="stopping criterion",
+    )
+    parser.add_argument(
+        "--num-clusters-stage-1",
+        default=None,
+        type=int,
+        help="number of AHC clusters for first stage",
+    )
+    parser.add_argument(
+        "--threshold-stage-1",
+        default=0,
+        type=float,
+        help="stopping threshold for first stage",
+    )
+    parser.add_argument(
+        "--num-clusters-stage-2",
+        default=None,
+        type=int,
+        help="number of AHC clusters for first stage",
+    )
+    parser.add_argument(
+        "--threshold-stage-2",
+        default=0,
+        type=float,
+        help="stopping threshold for first stage",
+    )
+    parser.add_argument(
+        "--ahc-precision", default="single", choices=["half", "single", "double"]
+    )
+    parser.add_argument(
+        "--min-samples-per-cluster",
+        default=8,
+        type=int,
+        help="minimum samples/cluster for a cluster to be used to train PLDA",
+    )
+    parser.add_argument(
+        "--max-samples-per-cluster",
+        default=50,
+        type=int,
+        help="maximum samples/cluster for a cluster to be used to train PLDA",
+    )
+    PLDAFactory.add_class_args(parser, prefix="plda")
+    parser.add_argument(
+        "--pre_kmeans.samples-per-cluster",
+        default=1,
+        type=int,
+        help="first k-means is done to recuce the computing cost of AHC",
+    )
+    parser.add_argument(
+        "--pre_kmeans.init_method",
+        default=KMeansInitMethod.max_dist,
+        choices=KMeansInitMethod.choices(),
+    )
+    parser.add_argument("--pre_kmeans.epochs", default=100, type=int)
+    parser.add_argument("--pre_kmeans.rtol", default=0.001, type=float)
+    parser.add_argument("--num-workers", default=1, type=int)
     return parser
 
 
@@ -269,6 +475,7 @@ def spectral_clustering(
     spectral_clustering,
     filter_by_gmm_post,
 ):
+    Path(output_file).parent.mkdir(exist_ok=True, parents=True)
     segments, x = load_data(segments_file, feats_file)
     if lnorm:
         x = LNorm()(x)
@@ -294,6 +501,9 @@ def spectral_clustering(
 
     segments.save(output_file)
     output_file = Path(output_file)
+    fig_file = Path(output_file).parent / "cluster_size_hist.png"
+    plot_cluster_size_hist(segments["cluster"], fig_file)
+
     fig_file = output_file.with_stem(output_file.stem + "_eigengap").with_suffix(".png")
     sc.plot_eigengap_stats(eigengap_stats, num_clusters, fig_file)
 
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
index 17fff2ba..2bd01f2d 100755
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -34,6 +34,7 @@
     "remove_short_segments",
     "rebuild_class_idx",
     "remove_classes_few_segments",
+    "remove_classes_few_toomany_segments",
     "split_train_val",
     "copy",
     "add_cols_to_segments",
@@ -281,6 +282,55 @@ def remove_classes_few_segments(
     dataset.save(output_dataset)
 
 
+def make_remove_classes_few_toomany_segments_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--class-name", required=True, help="""name of the class type e.g.: speaker"""
+    )
+    parser.add_argument(
+        "--min-segs", default=1, type=int, help="""min. num. of segments/class"""
+    )
+    parser.add_argument(
+        "--max-segs", default=None, type=int, help="""max. num. of segments/class"""
+    )
+    parser.add_argument(
+        "--rebuild-idx",
+        default=False,
+        action=ActionYesNo,
+        help="""regenerate class indexes from 0 to new_num_classes-1""",
+    )
+    parser.add_argument(
+        "--output-dataset",
+        default=None,
+        help="""output dataset dir, if None, we use the same as input""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def remove_classes_few_toomany_segments(
+    dataset: PathLike,
+    class_name: str,
+    min_segs: int,
+    max_segs: Union[int, None],
+    rebuild_idx: bool,
+    output_dataset: PathLike,
+):
+    if output_dataset is None:
+        output_dataset = dataset
+
+    dataset = Dataset.load(dataset, lazy=True)
+    dataset.remove_classes_few_toomany_segments(
+        class_name, min_segs, max_segs, rebuild_idx
+    )
+    dataset.save(output_dataset)
+
+
 def make_split_train_val_parser():
     parser = ArgumentParser()
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -397,7 +447,7 @@ def make_add_cols_to_segments_parser():
         "--right-table", required=True, help="table where the new data is"
     )
     parser.add_argument(
-        "--columns",
+        "--column-names",
         required=True,
         nargs="+",
         help="""columns to copy to segments table""",
@@ -421,6 +471,20 @@ def make_add_cols_to_segments_parser():
         help="""output dataset dir, if None, we use the same as input""",
     )
 
+    parser.add_argument(
+        "--remove-missing",
+        default=False,
+        action=ActionYesNo,
+        help="remove dataset entries that don't have a value in the right table",
+    )
+
+    parser.add_argument(
+        "--create-class-info",
+        default=False,
+        action=ActionYesNo,
+        help="creates class-info tables for the new columns added to the dataset",
+    )
+
     add_common_args(parser)
     return parser
 
@@ -432,12 +496,21 @@ def add_cols_to_segments(
     on: List[str],
     right_on: List[str],
     output_dataset: PathLike,
+    remove_missing: bool = False,
+    create_class_info: bool = False,
 ):
     if output_dataset is None:
         output_dataset = dataset
 
     dataset = Dataset.load(dataset, lazy=True)
-    dataset.add_cols_to_segments(right_table, column_names, on, right_on)
+    dataset.add_cols_to_segments(
+        right_table,
+        column_names,
+        on,
+        right_on,
+        remove_missing=remove_missing,
+        create_class_info=create_class_info,
+    )
     dataset.save(output_dataset)
 
 
diff --git a/hyperion/bin/hyperion_tables.py b/hyperion/bin/hyperion_tables.py
index 59472d83..3f847d29 100755
--- a/hyperion/bin/hyperion_tables.py
+++ b/hyperion/bin/hyperion_tables.py
@@ -7,9 +7,12 @@
 from pathlib import Path
 from typing import List, Optional, Union
 
+import numpy as np
+import pandas as pd
 from jsonargparse import (
     ActionConfigFile,
     ActionParser,
+    ActionYesNo,
     ArgumentParser,
     namespace_to_dict,
 )
@@ -25,7 +28,7 @@
     SegmentSet,
 )
 
-subcommand_list = ["cat"]
+subcommand_list = ["cat", "filter", "make_class_file_from_column"]
 table_dict = {
     "segments": SegmentSet,
     "recordings": RecordingSet,
@@ -108,6 +111,91 @@ def cat(
     output_table.save(output_file)
 
 
+def make_filter_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input-file", required=True, help="input table file")
+    parser.add_argument(
+        "--filter-file", required=True, help="table file that we use as filter"
+    )
+    parser.add_argument(
+        "--filter-by", default="id", help="column that we use to filter "
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="""output table file""",
+    )
+    parser.add_argument(
+        "--raise-if-missing",
+        default=True,
+        action=ActionYesNo,
+        help="raise exception if filter values are not in input file",
+    )
+    add_common_args(parser)
+    return parser
+
+
+def filter(
+    table_type: str,
+    input_file: PathLike,
+    filter_file: PathLike,
+    output_file: PathLike,
+    filter_by: str,
+    raise_if_missing: bool,
+):
+
+    input_file = Path(input_file)
+    filter_file = Path(filter_file)
+    output_file = Path(output_file)
+
+    table_class = table_dict[table_type]
+    input_table = table_class.load(input_file)
+    filter_table = table_class.load(filter_file)
+    output_table = input_table.filter(
+        items=filter_table[filter_by], by=filter_by, raise_if_missing=raise_if_missing
+    )
+    output_table.save(output_file)
+
+
+def make_make_class_file_from_column_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument("--input-file", required=True, help="input table file")
+
+    parser.add_argument(
+        "--column",
+        required=True,
+        help="column that we want to use to create a class-file",
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="""output class-file table""",
+    )
+
+    add_common_args(parser)
+    return parser
+
+
+def make_class_file_from_column(
+    table_type: str,
+    input_file: PathLike,
+    output_file: PathLike,
+    column: str,
+):
+
+    input_file = Path(input_file)
+    output_file = Path(output_file)
+
+    table_class = table_dict[table_type]
+    input_table = table_class.load(input_file)
+    class_ids = np.unique(input_table[column])
+    df = pd.DataFrame({"id": class_ids})
+    output_table = ClassInfo(df)
+    output_table.save(output_file)
+
+
 def main():
     parser = ArgumentParser(description="Tool to manipulates the Hyperion data tables")
     parser.add_argument("--cfg", action=ActionConfigFile)
diff --git a/hyperion/np/pdfs/plda/frplda.py b/hyperion/np/pdfs/plda/frplda.py
index 591948f9..84cf0ace 100644
--- a/hyperion/np/pdfs/plda/frplda.py
+++ b/hyperion/np/pdfs/plda/frplda.py
@@ -120,7 +120,12 @@ def compute_py_g_x(
 
         assert self.is_init
 
-        N, F, S = D
+        if isinstance(D, tuple):
+            N, F, S = D
+        else:
+            F = D
+            N = np.ones((F.shape[0],), dtype=F.dtype)
+            S = None
 
         M = F.shape[0]
         y_dim = self.y_dim
diff --git a/hyperion/np/pdfs/plda/plda.py b/hyperion/np/pdfs/plda/plda.py
index 35b133c2..92f77090 100644
--- a/hyperion/np/pdfs/plda/plda.py
+++ b/hyperion/np/pdfs/plda/plda.py
@@ -172,8 +172,13 @@ def compute_py_g_x(
           Py accumlator for MD step with shape (y_dim, y_dim)
         """
         assert self.is_init
+        if isinstance(D, tuple):
+            N, F, S = D
+        else:
+            F = D
+            N = np.ones((F.shape[0],), dtype=F.dtype)
+            S = None
 
-        N, F, S = D
         Fc = F - self.mu
 
         M = F.shape[0]
@@ -535,9 +540,7 @@ def log_probx_g_yz(self, x, y, z):
         logD = np.sum(np.log(self.D))
         delta = x - self.mu - np.dot(y, self.V) - np.dot(z, self.U)
         logp = (
-            -x.shape[-1] * np.log(2 * np.pi)
-            + logD
-            - np.sum(self.D * delta**2, axis=-1)
+            -x.shape[-1] * np.log(2 * np.pi) + logD - np.sum(self.D * delta**2, axis=-1)
         )
         logp /= 2
         return logp
diff --git a/hyperion/np/pdfs/plda/plda_base.py b/hyperion/np/pdfs/plda/plda_base.py
index 09544cae..a4a308e0 100644
--- a/hyperion/np/pdfs/plda/plda_base.py
+++ b/hyperion/np/pdfs/plda/plda_base.py
@@ -47,7 +47,7 @@ def __init__(
         epochs=20,
         ml_md="ml+md",
         md_epochs=None,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(**kwargs)
         self.mu = mu
@@ -380,7 +380,9 @@ def llr_NvsM_book(self, D1, D2):
         """
         pass
 
-    def llr_NvsM(self, x1, x2, ids1=None, ids2=None, method="vavg-lnorm"):
+    def llr_NvsM(
+        self, x1, x2, ids1=None, ids2=None, method=PLDALLRNvsMMethod.lnorm_vavg
+    ):
         """log-likelihood ratio between target and non-target hypothesis for
         the case of N segments/enrollment-side and M segments/test-side
 
@@ -411,6 +413,8 @@ def llr_NvsM(self, x1, x2, ids1=None, ids2=None, method="vavg-lnorm"):
         if method == PLDALLRNvsMMethod.lnorm_vavg:
             return self.llr_NvsM_vavg(D1, D2, do_lnorm=True)
 
+        raise ValueError(f"wrong llr {method}")
+
     def llr_NvsM_vavg(self, D1, D2, do_lnorm=True):
         """log-likelihood ratio between target and non-target hypothesis for
         the case of N segments/enrollment-side and M segments/test-side
@@ -455,7 +459,7 @@ def llr_NvsM_savg(self, x1, ids1, x2, ids2):
         scores = F.T / N
         return scores
 
-    def llr_Nvs1(self, x1, x2, ids1=None, method="vavg-lnorm"):
+    def llr_Nvs1(self, x1, x2, ids1=None, method=PLDALLRNvsMMethod.lnorm_vavg):
         """log-likelihood ratio between target and non-target hypothesis for
         the case of N segments/enrollment-side and M segments/test-side
 
@@ -484,6 +488,8 @@ def llr_Nvs1(self, x1, x2, ids1=None, method="vavg-lnorm"):
         if method == PLDALLRNvsMMethod.lnorm_vavg:
             return self.llr_Nvs1_vavg(D1, x2, do_lnorm=True)
 
+        raise ValueError(f"wrong llr {method}")
+
     def llr_Nvs1_vavg(self, D1, x2, do_lnorm=True):
         """log-likelihood ratio between target and non-target hypothesis for
         the case of N segments/enrollment-side and M segments/test-side
diff --git a/hyperion/np/pdfs/plda/splda.py b/hyperion/np/pdfs/plda/splda.py
index 9e0c2a20..32fc4628 100644
--- a/hyperion/np/pdfs/plda/splda.py
+++ b/hyperion/np/pdfs/plda/splda.py
@@ -2,6 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import numpy as np
 from scipy import linalg as sla
 
@@ -122,7 +123,13 @@ def compute_py_g_x(
           Ry accumlator for ML step with shape (y_dim, y_dim)
           Py accumlator for MD step with shape (y_dim, y_dim)
         """
-        N, F, S = D
+        if isinstance(D, tuple):
+            N, F, S = D
+        else:
+            F = D
+            N = np.ones((F.shape[0],), dtype=F.dtype)
+            S = None
+
         Fc = F - self.mu
 
         M = F.shape[0]
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index e19ec329..83f314e1 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -360,17 +360,12 @@ def _resample(self, x, fs):
 
         return self.resampler(x, fs)
 
-        # try:
-        #     if self.target_sample_freq is None or fs == self.target_sample_freq:
-        #         return x, fs
-        #     resampler = self._get_resampler(fs)
-        #     return resampler(x), self.target_sample_freq
-        # except:
-        #     return x, fs
-
     def __getitem__(self, segment):
         seg_id, start, duration = self._parse_segment_item(segment)
         x, fs = self._read_audio(seg_id, start, duration)
+        assert (
+            len(x) > 0
+        ), f"read audio empty seg_id={seg_id}, start={start}, dur={duration}"
         x, fs = self._resample(x, fs)
         data = {"seg_id": seg_id, "sample_freq": fs}
         x_augs = self._apply_augs(x, duration, fs)
@@ -384,28 +379,6 @@ def filter_args(**kwargs):
         args = filter_func_args(AudioDataset.__init__, kwargs)
         return args
 
-    # @staticmethod
-    # def filter_args(**kwargs):
-
-    #     ar_args = AR.filter_args(**kwargs)
-    #     valid_args = (
-    #         "recordings_file",
-    #         "segments_file",
-    #         "aug_cfgs",
-    #         "num_augs",
-    #         "class_names",
-    #         "class_files",
-    #         "bpe_model",
-    #         "text_file",
-    #         "return_segment_info",
-    #         "return_orig",
-    #         "time_durs_file",
-    #         "target_sample_freq",
-    #     )
-    #     args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-    #     args.update(ar_args)
-    #     return args
-
     @staticmethod
     def add_class_args(parser, prefix=None, skip=set()):
         if prefix is not None:
diff --git a/hyperion/torch/models/wav2xvectors/wav2xvector.py b/hyperion/torch/models/wav2xvectors/wav2xvector.py
index 501fa7f8..69e7b3ca 100644
--- a/hyperion/torch/models/wav2xvectors/wav2xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2xvector.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import contextlib
 import logging
 
@@ -155,14 +156,18 @@ def trainable_param_groups(self):
     def set_train_mode(self, mode):
         if mode == self._train_mode:
             return
-
+        logging.info("setting Wav2XVector train mode to %s", mode)
         if mode == "full-feats-grad":
             self._feats_context = contextlib.nullcontext()
             xvector_mode = "full"
         else:
             logging.info("using torch.no_grad for feats")
             self._feats_context = torch.no_grad()
+            xvector_mode = mode
 
+        logging.info(
+            "setting Wav2XVector XVector object train mode to %s", xvector_mode
+        )
         self.xvector.set_train_mode(xvector_mode)
         self._train_mode = mode
 
@@ -173,7 +178,7 @@ def _train(self, train_mode: str):
         elif train_mode in ["full-feats-grad", "full"]:
             self.xvector._train("full")
         elif train_mode == "ft-embed-affine":
-            self.xvector._train("ft-embed_affine")
+            self.xvector._train(train_mode)
         else:
             raise ValueError(f"invalid train_mode={train_mode}")
 
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index b4926533..c20f5520 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -2,10 +2,10 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
 
-# from enum import Enum
+import logging
 from dataclasses import dataclass
+from enum import Enum
 from typing import List, Optional
 
 import torch
@@ -21,6 +21,15 @@
 from ...utils import eval_nnet_by_chunks, scale_seq_lengths
 
 
+class XVectorHeadType(str, Enum):
+    XVECTOR = "x-vector"
+    DINO = "dino"
+
+    @staticmethod
+    def choices():
+        return [o.value for o in XVectorHeadType]
+
+
 @dataclass
 class XVectorOutput(HypDataClass):
     loss: torch.Tensor
@@ -63,7 +72,7 @@ def __init__(
         embed_layer=0,
         in_feats=None,
         proj_feats=None,
-        head_type="x-vector",
+        head_type=XVectorHeadType.XVECTOR,
         bias_weight_decay=None,
     ):
         super().__init__(bias_weight_decay=bias_weight_decay)
@@ -143,7 +152,7 @@ def __init__(
         self.proj_head_norm_before = proj_head_norm_before
         self.dropout_rate = dropout_rate
         self.embed_layer = embed_layer
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             self.proj_head_net = None
             self.classif_net = ClassifHead(
                 pool_feats,
@@ -164,10 +173,11 @@ def __init__(
                 dropout_rate=dropout_rate,
                 use_in_norm=head_use_in_norm,
             )
-        elif self.head_type == "dino":
+        elif self.head_type == XVectorHeadType.DINO:
             self.proj_head_net = ProjHead(
                 pool_feats,
                 embed_dim,
+                norm_layer=head_norm_layer,
                 use_norm=proj_head_use_norm,
                 norm_before=proj_head_norm_before,
             )
@@ -199,53 +209,53 @@ def num_classes(self):
 
     @property
     def cos_scale(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.cos_scale
-        elif self.head_type == "dino":
+        elif self.head_type == XVectorHeadType.DINO:
             return 1
         else:
             raise ValueError
 
     @property
     def margin(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.margin
         else:
             return 0.0
 
     @property
     def margin_warmup_epochs(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.margin_warmup_epochs
         else:
             return 0
 
     @property
     def intertop_k(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.intertop_k
         else:
             return 0
 
     @property
     def intertop_margin(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.intertop_margin
         else:
             return 0.0
 
     @property
     def num_subcenters(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.num_subcenters
         else:
             return 0
 
     @property
     def loss_type(self):
-        if self.head_type == "x-vector":
+        if self.head_type == XVectorHeadType.XVECTOR:
             return self.classif_net.loss_type
-        elif self.head_type == "dino":
+        elif self.head_type == XVectorHeadType.DINO:
             return self.classif_net.output_type
         else:
             raise ValueError()
@@ -260,13 +270,13 @@ def loss_type(self):
     #     return new_self
 
     # def before_cloning(self):
-    #     if self.head_type == "dino":
+    #     if self.head_type == XVectorHeadType.DINO:
     #         return self.classif_net.before_cloning()
     #     else:
     #         return None, None
 
     # def after_cloning(self, output):
-    #     if self.head_type == "dino":
+    #     if self.head_type == XVectorHeadType.DINO:
     #         self.classif_net.after_cloning(output)
 
     def _make_pool_net(self, pool_net, enc_feats=None):
@@ -643,6 +653,7 @@ def change_config(
         intertop_k=5,
         intertop_margin=0.0,
         num_subcenters=2,
+        head_type=XVectorHeadType.XVECTOR,
     ):
         logging.info("changing x-vector config")
         if override_output:
@@ -655,6 +666,7 @@ def change_config(
                 intertop_k=intertop_k,
                 intertop_margin=intertop_margin,
                 num_subcenters=num_subcenters,
+                head_type=head_type,
             )
 
         if override_dropouts:
@@ -672,7 +684,60 @@ def rebuild_output_layer(
         intertop_k=5,
         intertop_margin=0.0,
         num_subcenters=2,
+        head_type=XVectorHeadType.XVECTOR,
     ):
+
+        if head_type != self.head_type:
+            # only from dino to x-vector
+            assert self.head_type == XVectorHeadType.DINO
+            logging.info("transforming dino head into x-vector head")
+            self.num_embed_layers = 1
+            self.head_use_in_norm = (
+                self.proj_head_use_norm and self.proj_head_norm_before
+            )
+            self.head_use_norm = (
+                self.proj_head_use_norm and not self.proj_head_norm_before
+            )
+            self.classif_net = ClassifHead(
+                self.proj_head_net.in_feats,
+                num_classes,
+                embed_dim=self.proj_head_net.out_feats,
+                num_embed_layers=1,
+                hid_act=None,
+                loss_type=loss_type,
+                cos_scale=cos_scale,
+                margin=margin,
+                margin_warmup_epochs=margin_warmup_epochs,
+                intertop_k=intertop_k,
+                intertop_margin=intertop_margin,
+                num_subcenters=num_subcenters,
+                norm_layer=self.head_norm_layer,
+                use_norm=self.proj_head_use_norm,
+                norm_before=self.norm_before,
+                dropout_rate=self.dropout_rate,
+                use_in_norm=self.head_use_in_norm,
+            )
+
+            if (
+                self.classif_net.fc_blocks[0].linear.bias is not None
+                and self.proj_head_net.proj.bias is not None
+            ):
+                self.classif_net.fc_blocks[0].linear.bias.data.copy_(
+                    self.proj_head_net.proj.bias.data
+                )
+
+            self.classif_net.fc_blocks[0].linear.weight.data.copy_(
+                self.proj_head_net.proj.weight.data
+            )
+            if self.head_use_norm:
+                self.classif_net.fc_blocks[0].bn1.load_state_dict(
+                    self.proj_head_net._norm_layer.state_dict()
+                )
+            del self.proj_head_net
+            self.proj_head_net = None
+            self.head_type = XVectorHeadType.XVECTOR
+            return
+
         if (
             (self.num_classes is not None and self.num_classes != num_classes)
             or (self.loss_type != loss_type)
@@ -733,7 +798,7 @@ def set_train_mode(self, mode):
         else:
             raise ValueError(f"invalid train_mode={mode}")
 
-        if self.head_type == "dino":
+        if self.head_type == XVectorHeadType.DINO:
             self.classif_net.freeze_output_g()
 
         self._train_mode = mode
@@ -780,8 +845,8 @@ def add_class_args(parser, prefix=None, skip=set()):
 
         parser.add_argument(
             "--head-type",
-            default="x-vector",
-            choices=["x-vector", "dino"],
+            default=XVectorHeadType.XVECTOR,
+            choices=XVectorHeadType.choices(),
             help="type of classification head in [x-vector, dino]",
         )
 
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index f232c986..ff36096b 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -221,9 +221,8 @@ def _make_in_layer(self):
         else:
             raise Exception(f"wrong pos-enc-type={self.pos_enc_type}")
 
-        hid_act = AF.create(self.hid_act)
-
         if self.in_layer_type == "linear":
+            hid_act = AF.create(self.hid_act)
             self.in_layer = nn.Sequential(
                 nn.Linear(in_feats, d_model),
                 nn.LayerNorm(d_model),
@@ -235,7 +234,7 @@ def _make_in_layer(self):
             self.in_layer = Conv2dSubsampler(
                 in_feats,
                 d_model,
-                hid_act,
+                self.hid_act,
                 self.in_stride,
                 pos_enc,
                 time_dim=self.in_time_dim,
@@ -244,7 +243,7 @@ def _make_in_layer(self):
             self.in_layer = Conv1dSubsampler(
                 in_feats,
                 d_model,
-                hid_act,
+                self.hid_act,
                 self.in_stride,
                 pos_enc,
                 time_dim=self.in_time_dim,
diff --git a/hyperion/torch/narchs/dino_head.py b/hyperion/torch/narchs/dino_head.py
index 87c8daae..9f05aa7f 100644
--- a/hyperion/torch/narchs/dino_head.py
+++ b/hyperion/torch/narchs/dino_head.py
@@ -2,6 +2,7 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 from typing import Optional
 
 import torch
@@ -80,7 +81,8 @@ def __init__(
         if num_hid_layers == 1:
             self.fc_layers = nn.Linear(in_feats, bottleneck_feats)
         else:
-            layers = [nn.Linear(in_feats, hid_feats)]
+            use_bias = False if use_norm and norm_before else True
+            layers = [nn.Linear(in_feats, hid_feats, bias=use_bias)]
             if use_norm and norm_before:
                 layers.append(self._norm_layer(hid_feats))
             layers.append(AF.create(hid_act))
@@ -90,7 +92,7 @@ def __init__(
                 layers.append(nn.Dropout(self.dropout_rate))
 
             for _ in range(num_hid_layers - 2):
-                layers.append(nn.Linear(hid_feats, hid_feats))
+                layers.append(nn.Linear(hid_feats, hid_feats, bias=use_bias))
                 if use_norm and norm_before:
                     layers.append(self._norm_layer(hid_feats))
                 layers.append(AF.create(hid_act))
diff --git a/hyperion/torch/narchs/proj_head.py b/hyperion/torch/narchs/proj_head.py
index e2838013..63a5e128 100644
--- a/hyperion/torch/narchs/proj_head.py
+++ b/hyperion/torch/narchs/proj_head.py
@@ -3,7 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import torch
 import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
@@ -40,7 +39,12 @@ class ProjHead(NetArch):
     """
 
     def __init__(
-        self, in_feats, out_feats=256, norm_layer=None, use_norm=True, norm_before=True,
+        self,
+        in_feats,
+        out_feats=256,
+        norm_layer=None,
+        use_norm=True,
+        norm_before=True,
     ):
         super().__init__()
 
@@ -49,7 +53,7 @@ def __init__(
         self.norm_layer = norm_layer
         self.use_norm = use_norm
         self.norm_before = norm_before
-
+        use_bias = True
         if use_norm:
             norm_groups = None
             if norm_layer == "group-norm":
@@ -59,26 +63,21 @@ def __init__(
                 self._norm_layer = _norm_layer(in_feats)
             else:
                 self._norm_layer = _norm_layer(out_feats)
+                use_bias = False
         else:
             self._norm_layer = None
 
-        self.proj = nn.Linear(in_feats, out_feats)
+        self.proj = nn.Linear(in_feats, out_feats, bias=use_bias)
 
     def forward(self, x, y=None):
         if self.use_norm and self.norm_before:
             x = self._norm_layer(x)
-        # assert not torch.any(
-        #     torch.isnan(x)
-        # ), f"x before proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+
         x = self.proj(x)
-        # assert not torch.any(
-        #     torch.isnan(x)
-        # ), f"x after proj is nan {x.size()} {torch.sum(torch.isnan(x))}"
+
         if self.use_norm and not self.norm_before:
             x = self._norm_layer(x)
-        # assert not torch.any(
-        #     torch.isnan(x)
-        # ), f"x after bn is nan {x.size()} {torch.sum(torch.isnan(x))}"
+
         return x
 
     def get_config(self):
diff --git a/hyperion/torch/trainers/ae_trainer.py b/hyperion/torch/trainers/ae_trainer.py
index 9939797e..4004a565 100644
--- a/hyperion/torch/trainers/ae_trainer.py
+++ b/hyperion/torch/trainers/ae_trainer.py
@@ -14,7 +14,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class AETrainer(TorchTrainer):
@@ -36,6 +36,7 @@ class AETrainer(TorchTrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -70,6 +71,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -173,7 +175,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        TorchTrainer.add_class_args(
+            parser, train_modes, skip=skip.union({"target_key"})
+        )
         if "target_key" not in skip:
             parser.add_argument(
                 "--target-key", default="x", help="dict. key for nnet targets"
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index f128db44..10bc2edc 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -14,7 +14,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class DVAETrainer(TorchTrainer):
@@ -35,6 +35,7 @@ class DVAETrainer(TorchTrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -68,6 +69,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -209,7 +211,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        super().add_class_args(
+        TorchTrainer.add_class_args(
             parser, train_modes, skip=skip.union({"input_key", "target_key"})
         )
         if "input_key" not in skip:
diff --git a/hyperion/torch/trainers/plda_trainer.py b/hyperion/torch/trainers/plda_trainer.py
index 71845a4b..cd0b17e8 100644
--- a/hyperion/torch/trainers/plda_trainer.py
+++ b/hyperion/torch/trainers/plda_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -14,7 +15,7 @@
 from ..losses import BCEWithLLR
 from ..utils import MetricAcc, tensors_subset
 from ..utils.misc import get_selfsim_tarnon
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class PLDATrainer(TorchTrainer):
@@ -36,9 +37,9 @@ class PLDATrainer(TorchTrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       loss: if None, it uses cross-entropy
       loss_weights: dictionary with weights for multiclass and binary cross-entropies
-
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -75,6 +76,7 @@ def __init__(
         p_tar=0.5,
         train_mode="train",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 1d4665cf..808cce3e 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -14,7 +15,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class TransducerTrainer(TorchTrainer):
@@ -37,6 +38,7 @@ class TransducerTrainer(TorchTrainer):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -69,6 +71,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index 79526122..dbf5dfdd 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -14,7 +14,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class VAETrainer(TorchTrainer):
@@ -35,6 +35,7 @@ class VAETrainer(TorchTrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
@@ -68,6 +69,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -211,7 +213,9 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        TorchTrainer.add_class_args(
+            parser, train_modes, skip=skip.union({"target_key"})
+        )
         if "target_key" not in skip:
             parser.add_argument(
                 "--target-key", default="x", help="dict. key for nnet targets"
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index ff3f85cc..43aa59a5 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import math
 import os
@@ -14,7 +15,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .dvae_trainer import DVAETrainer
+from .dvae_trainer import AMPDType, DVAETrainer
 
 
 class VQDVAETrainer(DVAETrainer):
@@ -35,6 +36,7 @@ class VQDVAETrainer(DVAETrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -68,6 +70,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -174,7 +177,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        super().add_class_args(
+        DVAETrainer.add_class_args(
             parser, train_modes, skip=skip.union({"input_key", "target_key"})
         )
         if "input_key" not in skip:
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 4ec04fde..64db2e64 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import math
 import os
@@ -14,7 +15,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .vae_trainer import VAETrainer
+from .vae_trainer import AMPDType, VAETrainer
 
 
 class VQVAETrainer(VAETrainer):
@@ -35,6 +36,7 @@ class VQVAETrainer(VAETrainer):
       ddp_type: type of distributed data parallel in  (ddp, oss_ddp, oss_shared_ddp)
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -68,6 +70,7 @@ def __init__(
         ddp_type="ddp",
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
@@ -85,34 +88,6 @@ def __init__(
         super_args = filter_func_args(super().__init__, locals())
         super().__init__(**super_args)
 
-        # super().__init__(
-        #     model,
-        #     optim,
-        #     epochs,
-        #     exp_path,
-        #     cur_epoch=cur_epoch,
-        #     grad_acc_steps=grad_acc_steps,
-        #     eff_batch_size=eff_batch_size,
-        #     device=device,
-        #     metrics=metrics,
-        #     lrsched=lrsched,
-        #     loggers=loggers,
-        #     ddp=ddp,
-        #     ddp_type=ddp_type,
-        #     train_mode=train_mode,
-        #     use_amp=use_amp,
-        #     log_interval=log_interval,
-        #     use_tensorboard=use_tensorboard,
-        #     use_wandb=use_wandb,
-        #     wandb=wandb,
-        #     grad_clip=grad_clip,
-        #     grad_clip_norm=grad_clip_norm,
-        #     swa_start=swa_start,
-        #     swa_lr=swa_lr,
-        #     swa_anneal_epochs=swa_anneal_epochs,
-        #     cpu_offload=cpu_offload,
-        # )
-
     def train_epoch(self, data_loader):
         batch_keys = [self.input_key, self.target_key]
         metric_acc = MetricAcc(device=self.device)
@@ -203,7 +178,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        super().add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
+        VAETrainer.add_class_args(parser, train_modes, skip=skip.union({"target_key"}))
         if "target_key" not in skip:
             parser.add_argument(
                 "--target-key", default="x", help="dict. key for nnet targets"
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index e19945d1..b9dd67d7 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 import time
@@ -14,6 +15,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
+from .torch_trainer import AMPDType
 from .xvector_trainer import XVectorTrainer
 
 
@@ -39,6 +41,7 @@ class XVectorAdvTrainer(XVectorTrainer):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
@@ -76,6 +79,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index ad6a3262..8ece7de2 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 import time
@@ -14,6 +15,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
+from .torch_trainer import AMPDType
 from .xvector_trainer_from_wav import XVectorTrainerFromWav
 
 
@@ -41,6 +43,7 @@ class XVectorAdvTrainerFromWav(XVectorTrainerFromWav):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -78,6 +81,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 666c9a9d..2902f23d 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -13,7 +14,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType, TorchTrainer
 
 
 class XVectorTrainer(TorchTrainer):
@@ -36,6 +37,7 @@ class XVectorTrainer(TorchTrainer):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -70,6 +72,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index d80f03f1..98bc404d 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -13,6 +14,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
+from .torch_trainer import AMPDType
 from .xvector_trainer import XVectorTrainer
 
 
@@ -41,6 +43,7 @@ class XVectorTrainerDeepFeatReg(XVectorTrainer):
       reg_loss: nn.Module loss used for regularization, if None it uses L1 loss.
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -81,6 +84,7 @@ def __init__(
         reg_loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index cf956dc7..1005435f 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -12,7 +13,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, tensors_subset
-from .torch_trainer import TorchTrainer
+from .torch_trainer import AMPDType
 from .xvector_trainer_deep_feat_reg import XVectorTrainerDeepFeatReg
 
 
@@ -41,6 +42,7 @@ class XVectorTrainerDeepFeatRegFromWav(XVectorTrainerDeepFeatReg):
       reg_loss: nn.Module loss used for regularization, if None it uses L1 loss.
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -82,6 +84,7 @@ def __init__(
         reg_loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=10,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 89c9b9a7..2a238a06 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import os
 from collections import OrderedDict as ODict
@@ -12,6 +13,7 @@
 
 from ...utils.misc import filter_func_args
 from ..utils import MetricAcc, TorchDDP, tensors_subset
+from .torch_trainer import AMPDType
 from .xvector_trainer import XVectorTrainer
 
 
@@ -35,6 +37,7 @@ class XVectorTrainerFromWav(XVectorTrainer):
       loss: if None, it uses cross-entropy
       train_mode: training mode in ['train', 'ft-full', 'ft-last-layer']
       use_amp: uses mixed precision training.
+      amp_dtype: "float16" | "bfloat16"
       log_interval: number of optim. steps between log outputs
       use_tensorboard: use tensorboard logger
       use_wandb: use wandb logger
@@ -70,6 +73,7 @@ def __init__(
         loss=None,
         train_mode="full",
         use_amp=False,
+        amp_dtype=AMPDType.FLOAT16,
         log_interval=1000,
         use_tensorboard=False,
         use_wandb=False,
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/dataset.py
index 1b35364d..e485f1a5 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/dataset.py
@@ -2,6 +2,7 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import math
 from copy import deepcopy
@@ -619,8 +620,6 @@ def save_all(
     def update_from_disk(self):
         self.segments()
         self.recordings()
-        # for k, v in self.recordings():
-        #     pass
 
         for k, v in self.features():
             pass
@@ -810,18 +809,6 @@ def remove_recordings(
         self._recordings = None
         self._recordings_path = None
 
-    # def remove_recordings(
-    #     self,
-    #     recordings_name: str,
-    # ):
-    #     if self._recordingsr_paths[recordings_name] is not None:
-    #         file_path = Path(self._recordings_paths[recordings_name])
-    #         if file_path.is_file():
-    #             file_path.unlink()
-
-    #     del self._recordings[recordings_name]
-    #     del self._recordings_paths[recordings_name]
-
     def remove_classes(self, classes_name: str):
         if self._classes_paths[classes_name] is not None:
             self._files_to_delete.append(self._class_paths[classes_name])
@@ -855,6 +842,8 @@ def add_cols_to_segments(
         column_names: Union[None, str, List[str], np.ndarray] = None,
         on: Union[str, List[str], np.ndarray] = "id",
         right_on: Union[None, str, List[str], np.ndarray] = None,
+        remove_missing: bool = False,
+        create_class_info: bool = False,
     ):
         if isinstance(right_table, (str, Path)):
             file_path = Path(right_table)
@@ -871,30 +860,50 @@ def add_cols_to_segments(
                     raise ValueError("%s not found", right_table)
 
         segments = self.segments(keep_loaded=True)
-        segments.add_columns(right_table, column_names, on=on, right_on=right_on)
+        num_segs_0 = len(segments)
+        segments.add_columns(
+            right_table,
+            column_names,
+            on=on,
+            right_on=right_on,
+            remove_missing=remove_missing,
+        )
+        if remove_missing and len(segments) < num_segs_0:
+            self.clean()
+
+        if create_class_info and column_names is not None:
+            self.create_class_info_from_col(column_names)
+
+    def create_class_info_from_col(
+        self,
+        column_names: Union[str, List[str], np.ndarray],
+    ):
+        if isinstance(column_names, str):
+            column_names = [column_names]
+
+        for col in column_names:
+            if col not in self._classes:
+                df = pd.DataFrame(
+                    {"id": np.unique(self.segments(keep_loaded=True)[col])}
+                )
+                class_info = ClassInfo(df)
+                self.add_classes(col, class_info)
 
     def clean(self, rebuild_class_idx=False):
+
         rec_ids = self.segments().recordings()
-        # for k, table in self.recordings():
-        #     # table = table.loc[table["id"].isin(rec_ids)].copy()
-        #     # self._recordings[k] = RecordingSet(table)
         self._recordings = self.recordings().filter(lambda df: df["id"].isin(rec_ids))
 
         ids = self.segments()["id"].values
         for k, table in self.features():
             self._features[k] = table.filter(lambda df: df["id"].isin(ids))
-            # table = table.loc[table["id"].isin(ids)].copy()
-            # self._features[k] = FeatureSet(table)
 
         for k, table in self.classes():
             class_ids = self.segments()[k].unique()
             self._classes[k] = table.filter(lambda df: df["id"].isin(class_ids))
-            # table = table[table["id"].isin(class_ids)].copy()
-            # self._classes[k] = ClassInfo(table)
 
         remove_keys = []
         for k, table in self.enrollments():
-            # table = table.loc[table["segmentid"].isin(ids)].copy()
             table = table.filter(lambda df: df["segmentid"].isin(ids))
             if len(table) > 0:
                 self._enrollments[k] = table
@@ -1054,6 +1063,27 @@ def remove_classes_few_segments(
             class_info = self.classes_value(class_name)
             class_info.add_class_idx()
 
+    def remove_classes_few_toomany_segments(
+        self,
+        class_name: str,
+        min_segs: int,
+        max_segs: int,
+        rebuild_idx: bool = False,
+    ):
+        segments = self.segments()
+        classes, counts = np.unique(segments[class_name], return_counts=True)
+        if max_segs is None:
+            keep_classes = classes[counts >= min_segs]
+        else:
+            keep_classes = classes[
+                np.logical_and(counts >= min_segs, counts <= max_segs)
+            ]
+        self._segments = segments.filter(lambda df: df[class_name].isin(keep_classes))
+        self.clean()
+        if rebuild_idx:
+            class_info = self.classes_value(class_name)
+            class_info.add_class_idx()
+
     def rebuild_class_idx(self, class_name: str):
         class_info = self.classes_value(class_name)
         class_info.add_class_idx()
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index b94d9752..ea03f058 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -8,7 +8,7 @@
 from collections import OrderedDict
 from copy import deepcopy
 from pathlib import Path
-from typing import Optional, Union, List
+from typing import List, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -194,7 +194,14 @@ def cat(cls, tables):
         return cls(df)
 
     def filter(
-        self, predicate=None, items=None, iindex=None, columns=None, by="id", keep=True
+        self,
+        predicate=None,
+        items=None,
+        iindex=None,
+        columns=None,
+        by="id",
+        keep=True,
+        raise_if_missing=True,
     ):
         """Filters the table and produce a new table with the elements to keep
 
@@ -243,15 +250,20 @@ def filter(
         elif items is not None:
             if by != "id":
                 missing = [False if v in df[by] else True for v in items]
-                if any(missing):
+                if any(missing) and raise_if_missing:
                     raise Exception(f"{items[missing]} not found in table")
                 items = [True if v in items else False for v in df[by]]
+            elif not raise_if_missing:
+                items = [item for item in items if item in df.index]
 
             if columns is None:
                 df = df.loc[items]
             else:
                 df = df.loc[items, columns]
         else:
+            if not raise_if_missing:
+                iindex = iindex[iindex < len(df)]
+
             if iindex is not None:
                 df = self.df.iloc[iindex]
 
@@ -327,6 +339,7 @@ def add_columns(
         column_names: Union[None, str, List[str], np.ndarray] = None,
         on: Union[str, List[str], np.ndarray] = "id",
         right_on: Union[None, str, List[str], np.ndarray] = None,
+        remove_missing: bool = False,
     ):
         if isinstance(right_table, InfoTable):
             right_table = right_table.df
@@ -337,7 +350,25 @@ def add_columns(
         if right_on is None:
             right_on = on
 
-        self.df = self.df.merge(right_table, how="left", left_on=on, right_on=right_on)
+        how = "inner" if remove_missing else "left"
+        left_index = False
+        right_index = False
+        if on == "id" or on == ["id"]:
+            on = None
+            left_index = True
+
+        if (right_on == "id" or right_on == ["id"]) and "id" in right_table:
+            right_on = None
+            right_index = True
+
+        self.df = self.df.merge(
+            right_table,
+            how=how,
+            left_on=on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+        )
 
         # def __len__(self):
 

From ab8c2859fc3531da01cb7003e2881f9271130ba6 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 26 Apr 2024 16:17:15 -0400
Subject: [PATCH 132/154] added results to README in voxceleb/ssl.v1

---
 egs/voxceleb/ssl.v1/README.md                 | 188 ++++++++++++++++++
 egs/voxceleb/ssl.v1/cmd.sh                    |   2 +-
 egs/voxceleb/ssl.v1/default_config.sh         |   1 +
 hyperion/bin/hyperion_dataset.py              |  60 +++++-
 .../split_dataset_into_trials_and_cohort.py   |   4 +-
 hyperion/data_prep/musan.py                   |   7 +-
 hyperion/data_prep/rirs.py                    |  18 +-
 hyperion/data_prep/voxceleb1.py               |   7 +-
 hyperion/data_prep/voxceleb2.py               |   7 +-
 hyperion/data_prep/voxsrc22.py                |   7 +-
 hyperion/torch/lr_schedulers/cos_lr.py        |   6 +-
 hyperion/torch/lr_schedulers/lr_scheduler.py  |   2 +-
 hyperion/utils/__init__.py                    |   4 +-
 hyperion/utils/{dataset.py => hyp_dataset.py} | 107 ++++++++--
 14 files changed, 369 insertions(+), 51 deletions(-)
 create mode 100644 egs/voxceleb/ssl.v1/README.md
 create mode 120000 egs/voxceleb/ssl.v1/default_config.sh
 rename hyperion/utils/{dataset.py => hyp_dataset.py} (91%)

diff --git a/egs/voxceleb/ssl.v1/README.md b/egs/voxceleb/ssl.v1/README.md
new file mode 100644
index 00000000..03b2e1c4
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/README.md
@@ -0,0 +1,188 @@
+# VoxCeleb SSL V1
+
+Recipe for the Unsupervised VoxCeleb Speaker Verification Task:
+  - Trains embedding extractor using DINO
+  - Cluster embeddings of VoxCeleb2 to get pseuso-speaker labels
+  - Embedding Model is fine-tuned with Large Margin Softmax loss on the pseudo-speaker labels
+  - Repeakt embedding clustering to get new pseuso-speaker labels
+  - Embedding Model is fine-tuned with Large Margin Softmax loss on the new pseudo-speaker labels
+ 
+## Citing
+
+If you use our DINO implementation, please, cite these works:
+
+```
+@ARTICLE{9852303,
+  author={Cho, Jaejin and Villalba, Jesús and Moro-Velazquez, Laureano and Dehak, Najim},
+  journal={IEEE Journal of Selected Topics in Signal Processing}, 
+  title={Non-Contrastive Self-Supervised Learning for Utterance-Level Information Extraction From Speech}, 
+  year={2022},
+  volume={16},
+  number={6},
+  pages={1284-1295},
+  keywords={Alzheimer's disease;Transfer learning;Speech processing;Feature extraction;Self-supervised learning;Training;Emotion recognition;Self-supervised learning;transfer learning;speaker verification;emotion recognition;Alzheimer's disease;distillation;non-contrastive},
+  doi={10.1109/JSTSP.2022.3197315}}
+
+@inproceedings{cho22c_interspeech,
+  author={Jaejin Cho and Raghavendra Pappagari and Piotr Żelasko and Laureano Moro Velazquez and Jesus Villalba and Najim Dehak},
+  title={{Non-contrastive self-supervised learning of utterance-level speech representations}},
+  year=2022,
+  booktitle={Proc. Interspeech 2022},
+  pages={4028--4032},
+  doi={10.21437/Interspeech.2022-11141}
+}
+```
+
+## Training Data
+
+   - x-Vector network is trained on Voxceleb2 dev + test with augmentations
+     - MUSAN noise
+     - RIR reverberation
+
+## Test data
+
+   - Test data is VoxCeleb 1
+   - We evaluate the 3 conditions (with cleaned lists):
+      - VoxCeleb-O (Original): Original Voxceleb test set with 40 speakers
+      - VoxCeleb-E (Entire): List using all utterances of VoxCeleb1
+      - VoxCeleb-H (Hard): List of hard trials between all utterances of VoxCeleb1, same gender and nationality trials.
+
+## Usage
+
+   - Run the run_0*.sh scripts in sequence
+   - By default it will use config global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
+   - To use other configs: 
+```bash
+run_xxx_xxxx.sh --config-file global_conf/other_config.sh
+```
+
+
+## Recipe Steps:
+
+   - `run_001_prepare_data.sh`
+      - Data preparation script to generate Kaldi style data directories for 
+          - VoxCeleb2 train+test
+          - VoxCeleb1 O/E/H eval sets
+
+   - `run_002_compute_evad.sh`
+      - Computes Energy VAD for all datasets
+
+   - `run_003_prepare_noises_rirs.sh`
+      - Prepares MUSAN noises, music to be used by SpeechAugment class.
+      - Creates Babble noise from MUSAN speech to be used by SpeechAugment class.
+      - Prepares RIRs by compacting then into HDF5 files, to be used by SpeechAugment class.
+
+   - `run_004_prepare_xvec_train_data.sh`
+      - Transforms all the audios that we are going to use to train the x-vector into a common format, e.g., .flac.
+      - Removes silence from the audios
+      - Removes utterances shorter than 4secs and speakers with less than 8 utterances.
+      - Creates training and validation lists for x-vector training
+
+    - `run_005_train_dino.sh`
+      - Trains DINO embeddings
+
+    - `run_006_extract_dino_embeds_cluster_eval.sh`
+      - Extracts DINO embeddings for Vox2 and Vox1
+      - Evaluates SV metrics in Vox1-O/E/H using Cosine Scoring
+      - Clusters Vox2 Embeddings into pseudo-speakers
+      - Trains PLDA on Vox2 pseudo-speakers
+      - Evaluates SV metrics in Vox1-O/E/H using PLDA
+
+   - `run_007_train_xvector.sh`
+      - Fine-tunes DINO model in x-vector style using pseudo-labels from previous step
+      - First, it finetunes x-vector projection and output layer with the rest of network frozen
+      - Second, it finetunes full network
+
+    - `run_008_extract_ft1_xvec_embeds_cluster_eval.sh`
+      - Extracts X-Vector embeddings for Vox2 and Vox1
+      - Evaluates SV metrics in Vox1-O/E/H using Cosine Scoring
+      - Clusters Vox2 Embeddings into pseudo-speakers
+      - Trains PLDA on Vox2 pseudo-speakers
+      - Evaluates SV metrics in Vox1-O/E/H using PLDA
+      
+    - `run_009_finetune_xvector_s2.sh`
+      - Fine-tunes X-Vector model in x-vector style using pseudo-labels from previous step
+      - First, it finetunes x-vector projection and output layer with the rest of network frozen
+      - Second, it finetunes full network
+    
+    - `run_010_extract_ft2_xvec_embeds_cluster_eval.sh`
+      - Extracts X-Vector embeddings for Vox2 and Vox1
+      - Evaluates SV metrics in Vox1-O/E/H using Cosine Scoring
+      - Clusters Vox2 Embeddings into pseudo-speakers
+      - Trains PLDA on Vox2 pseudo-speakers
+      - Evaluates SV metrics in Vox1-O/E/H using PLDA
+
+
+## Results
+
+### VoxCeleb 1 Original-Clean trial list
+
+| Config | Model Type | DINO Clustering | X-Vector Clustering | Stage | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | --------------- | ------------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34.v1.2.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 3.96 | 0.276 | 0.423 |
+| | | | | | PLDA | 3.18 | 0.182 | 0.273 |
+| | | | | FT-1 | Cosine | 1.97 | 0.139 | 0.214 |
+| | | | | FT-2 | Cosine | 1.80 | 0.133 | 0.200 |
+| config_fbank80_stmn_lresnet34.v1.2.1.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.75 | 0.124 | 0.197 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.sh | ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 4.14 | 0.274 | 0.405 |
+| | | | | | PLDA | 4.16 | 0.225 | 0.361 |
+| | | | | FT-1 | Cosine | 2.68 | 0.173 | 0.258 |
+| | | | | FT-2 | Cosine | 2.57 | 0.151 | 0.244 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.1.sh| ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 2.71 | 0.169 | 0.243 |
+| config_fbank80_stmn_fwseresnet34.v1.2.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 4.57 | 0.344 | 0.553 |
+| | | | | | PLDA | 2.92 | 0.232 | 0.410 |
+| | | | | FT-1 | Cosine | 2.11 | 0.135 | 0.223 |
+| | | | | FT-1 | PLDA | 1.75 | 0.137 | 0.236 |
+| | | | | FT-2 | Cosine | 1.65 | 0.116 | 0.168 |
+| | | | | FT-2 | PLDA | 1.67 | 0.137 | 0.193 |
+| config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.49 | 0.101 | 0.161 |
+| | | | | FT-2 | PLDA | 1.53 | 0.109 | 0.168|
+
+
+### VoxCeleb 1 Entire-Clean trial list
+
+| Config | Model Type | DINO Clustering | X-Vector Clustering | Stage | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | --------------- | ------------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34.v1.2.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 4.94 | 0.304 | 0.483 |
+| | | | | | PLDA | 3.72 | 0.184 | 0.300 |
+| | | | | FT-1 | Cosine | 2.35 | 0.136 | 0.217 |
+| | | | | FT-2 | Cosine | 2.02 | 0.118 | 0.195 |
+| config_fbank80_stmn_lresnet34.v1.2.1.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.98 | 0.116 | 0.185 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.sh | ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 4.61 | 0.293 | 0.455|
+| | | | | | PLDA | 3.91 | 0.223 | 0.356 |
+| | | | | FT-1 | Cosine | 3.04 | 0.168 | 0.263 |
+| | | | | FT-2 | Cosine | 2.83 | 0.155 | 0.248 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.1.sh| ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 3.06 | 0.164 | 0.256 |
+| config_fbank80_stmn_fwseresnet34.v1.2.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 5.50 | 0.426 | 0.664 |
+| | | | | | PLDA | 3.33 | 0.245 | 0.425 |
+| | | | | FT-1 | Cosine | 2.42 | 0.147 | 0.243 |
+| | | | | FT-1 | PLDA | 2.03 | 0.144 | 0.255 |
+| | | | | FT-2 | Cosine | 1.86 | 0.112 | 0.186 |
+| | | | | FT-2 | PLDA | 1.77 | 0.121 | 0.208 |
+| config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.83 | 0.106 | 0.170 |
+| | | | | FT-2 | PLDA | 1.68 | 0.109 | 0.188 |
+
+
+### VoxCeleb 1 Hard-Clean trial list
+
+| Config | Model Type | DINO Clustering | X-Vector Clustering | Stage | Back-end | EER(%) | MinDCF(p=0.05) | MinDCF(p=0.01) |
+| ------ | ---------- | ------------- | --------------- | ------------------- | -------- | :----: | :------------: | :------------: |
+| config_fbank80_stmn_lresnet34.v1.2.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 8.33 | 0.462 | 0.664 |
+| | | | | | PLDA | 5.91 | 0.304 | 0.481 |
+| | | | | FT-1 | Cosine | 3.89 | 0.215 | 0.340 |
+| | | | | FT-2 | Cosine | 3.44 | 0.192 | 0.303 |
+| config_fbank80_stmn_lresnet34.v1.2.1.sh | LResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 3.33 | 0.185 | 0.290 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.sh | ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 8.38 | 0.458 | 0.635 |
+| | | | | | PLDA | 6.48 | 0.360 | 0.532 |
+| | | | | FT-1 | Cosine | 4.93 | 0.259 | 0.383 |
+| | | | | FT-2 | Cosine | 4.73 | 0.251 | 0.375 |
+| config_fbank80_stmn_ecapatdnn512x3.v1.2.1.sh| ECAPA-TDNN 512x3 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 4.90 | 0.251 | 0.378 |
+| config_fbank80_stmn_fwseresnet34.v1.2.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC+PLDA+AHC | DINO | Cosine | 10.9 | 0.644 | 0.822 |
+| | | | | | PLDA | 6.86 | 0.481 | 0.745 |
+| | | | | FT-1 | Cosine | 4.35 | 0.25 | 0.393 |
+| | | | | FT-1 | PLDA | 4.21 | 0.281 | 0.452
+| | | | | FT-2 | Cosine | 3.37 | 0.194 | 0.309 |
+| | | | | FT-2 | PLDA | 3.51 | 0.219 | 0.351 |
+| config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 3.11 | 0.172 | 0.270 |
+| | | | | FT-2 | PLDA | 3.15 | 0.186 | 0.294 |
+
diff --git a/egs/voxceleb/ssl.v1/cmd.sh b/egs/voxceleb/ssl.v1/cmd.sh
index 040f458b..4efc96e1 100755
--- a/egs/voxceleb/ssl.v1/cmd.sh
+++ b/egs/voxceleb/ssl.v1/cmd.sh
@@ -14,7 +14,7 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
-    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
+    export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 40G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
diff --git a/egs/voxceleb/ssl.v1/default_config.sh b/egs/voxceleb/ssl.v1/default_config.sh
new file mode 120000
index 00000000..f8aa12d5
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/default_config.sh
@@ -0,0 +1 @@
+global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
\ No newline at end of file
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
index 2bd01f2d..3bb61fb0 100755
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -18,9 +18,9 @@
 from hyperion.hyp_defs import config_logger
 from hyperion.utils import (
     ClassInfo,
-    Dataset,
     EnrollmentMap,
     FeatureSet,
+    HypDataset,
     InfoTable,
     PathLike,
     RecordingSet,
@@ -81,7 +81,7 @@ def add_features(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.add_features(features_name, features_file)
     dataset.save(output_dataset)
 
@@ -128,7 +128,7 @@ def set_recordings(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.set_recordings(recordings_file, update_seg_durs)
     if remove_features is not None:
         for features_name in remove_features:
@@ -161,7 +161,7 @@ def make_from_recordings(
     rec_df = pd.read_csv(recordings_file)
     seg_df = rec_df[["id"]]
     segments = SegmentSet(seg_df)
-    dataset = Dataset(segments, recordings=recordings_file)
+    dataset = HypDataset(segments, recordings=recordings_file)
     dataset.save(output_dataset)
 
 
@@ -202,7 +202,7 @@ def remove_short_segments(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.remove_short_segments(min_length, length_name)
     dataset.save(output_dataset)
 
@@ -234,7 +234,7 @@ def rebuild_class_idx(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.rebuild_class_idx(class_name)
     dataset.save(output_dataset)
 
@@ -277,7 +277,7 @@ def remove_classes_few_segments(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.remove_classes_few_segments(class_name, min_segs, rebuild_idx)
     dataset.save(output_dataset)
 
@@ -324,7 +324,7 @@ def remove_classes_few_toomany_segments(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.remove_classes_few_toomany_segments(
         class_name, min_segs, max_segs, rebuild_idx
     )
@@ -394,7 +394,7 @@ def split_train_val(
     train_dataset: PathLike,
     val_dataset: PathLike,
 ):
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     train_ds, val_ds = dataset.split_train_val(
         val_prob, joint_classes, disjoint_classes, min_train_samples, seed
     )
@@ -433,7 +433,7 @@ def copy(
     dataset: PathLike,
     output_dataset: PathLike,
 ):
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.save(output_dataset)
 
 
@@ -502,7 +502,7 @@ def add_cols_to_segments(
     if output_dataset is None:
         output_dataset = dataset
 
-    dataset = Dataset.load(dataset, lazy=True)
+    dataset = HypDataset.load(dataset, lazy=True)
     dataset.add_cols_to_segments(
         right_table,
         column_names,
@@ -514,6 +514,44 @@ def add_cols_to_segments(
     dataset.save(output_dataset)
 
 
+def make_from_lhotse_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--cuts-file",
+        default=None,
+        help="lhotse cuts file",
+    )
+    parser.add_argument(
+        "--recordings-file",
+        default=None,
+        help="lhotse recordings set file",
+    )
+    parser.add_argument(
+        "--supervisions-file",
+        default=None,
+        help="lhotse supervisions file",
+    )
+
+
+def from_lhotse(
+    dataset: PathLike,
+    cuts_file: Optional[PathLike] = None,
+    recordings_file: Optional[PathLike] = None,
+    supervisions_file: Optional[PathLike] = None,
+):
+
+    assert cuts_file is not None or supervisions_file is not None
+    dataset_dir = dataset
+    dataset = HypDataset.from_lhotse(
+        cuts=cuts_file, recordings=recordings_file, supervisions=supervisions_file
+    )
+    dataset.save(dataset)
+
+
 def main():
     parser = ArgumentParser(description="Tool to manipulates the Hyperion dataset")
     parser.add_argument("--cfg", action=ActionConfigFile)
diff --git a/hyperion/bin/split_dataset_into_trials_and_cohort.py b/hyperion/bin/split_dataset_into_trials_and_cohort.py
index 50c2f1f2..a5935910 100755
--- a/hyperion/bin/split_dataset_into_trials_and_cohort.py
+++ b/hyperion/bin/split_dataset_into_trials_and_cohort.py
@@ -15,7 +15,7 @@
 )
 
 from hyperion.hyp_defs import config_logger
-from hyperion.utils import Dataset
+from hyperion.utils import HypDataset
 
 
 def main():
@@ -63,7 +63,7 @@ def main():
     del args.trials_dir
     args = namespace_to_dict(args)
 
-    dataset = Dataset.load(data_dir)
+    dataset = HypDataset.load(data_dir)
     trials_dataset, cohort_dataset = dataset.split_into_trials_and_cohort(**args)
     trials_dataset.save(trials_dir)
     cohort_dataset.save(cohort_dir)
diff --git a/hyperion/data_prep/musan.py b/hyperion/data_prep/musan.py
index abf7a46c..b14785b8 100644
--- a/hyperion/data_prep/musan.py
+++ b/hyperion/data_prep/musan.py
@@ -2,8 +2,9 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
+
 import glob
+import logging
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -13,7 +14,7 @@
 from jsonargparse import ActionYesNo
 from tqdm import tqdm
 
-from ..utils import Dataset, RecordingSet, SegmentSet
+from ..utils import HypDataset, RecordingSet, SegmentSet
 from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
@@ -95,7 +96,7 @@ def prepare(self):
         segments = SegmentSet(segments)
         segments.sort()
         logging.info("making dataset")
-        dataset = Dataset(
+        dataset = HypDataset(
             segments,
             recordings=recs,
         )
diff --git a/hyperion/data_prep/rirs.py b/hyperion/data_prep/rirs.py
index 066819a8..accf7bad 100644
--- a/hyperion/data_prep/rirs.py
+++ b/hyperion/data_prep/rirs.py
@@ -2,8 +2,9 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
+
 import glob
+import logging
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -13,7 +14,7 @@
 from jsonargparse import ActionYesNo
 from tqdm import tqdm
 
-from ..utils import Dataset, RecordingSet, SegmentSet
+from ..utils import HypDataset, RecordingSet, SegmentSet
 from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
@@ -88,16 +89,23 @@ def prepare(self):
 
         logging.info("making SegmentsSet")
         segments = pd.DataFrame(
-            {"id": rec_ids, "duration": recs.loc[rec_ids, "duration"].values,}
+            {
+                "id": rec_ids,
+                "duration": recs.loc[rec_ids, "duration"].values,
+            }
         )
         if room_ids is not None:
             segments["room_id"] = room_ids
         segments = SegmentSet(segments)
         segments.sort()
         logging.info("making dataset")
-        dataset = Dataset(segments, recordings=recs,)
+        dataset = HypDataset(
+            segments,
+            recordings=recs,
+        )
         logging.info("saving dataset at %s", self.output_dir)
         dataset.save(self.output_dir)
         logging.info(
-            "datasets containts %d segments", len(segments),
+            "datasets containts %d segments",
+            len(segments),
         )
diff --git a/hyperion/data_prep/voxceleb1.py b/hyperion/data_prep/voxceleb1.py
index 025fad37..56cf0c59 100644
--- a/hyperion/data_prep/voxceleb1.py
+++ b/hyperion/data_prep/voxceleb1.py
@@ -2,18 +2,19 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
+import glob
 import logging
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
-import glob
 
 import numpy as np
 import pandas as pd
 from jsonargparse import ActionYesNo
 from tqdm import tqdm
 
-from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils import ClassInfo, HypDataset, RecordingSet, SegmentSet
 from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
@@ -328,7 +329,7 @@ def prepare(self):
             enrollments, trials = self.make_trials()
 
         logging.info("making dataset")
-        dataset = Dataset(
+        dataset = HypDataset(
             segments,
             classes={"speaker": speakers, "language_est": languages},
             recordings=recs,
diff --git a/hyperion/data_prep/voxceleb2.py b/hyperion/data_prep/voxceleb2.py
index 969f2228..550af3a8 100644
--- a/hyperion/data_prep/voxceleb2.py
+++ b/hyperion/data_prep/voxceleb2.py
@@ -2,8 +2,9 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
+
 import glob
+import logging
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -13,7 +14,7 @@
 from jsonargparse import ActionYesNo
 from tqdm import tqdm
 
-from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils import ClassInfo, HypDataset, RecordingSet, SegmentSet
 from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
@@ -250,7 +251,7 @@ def prepare(self):
         languages = ClassInfo(pd.DataFrame({"id": languages}))
 
         logging.info("making dataset")
-        dataset = Dataset(
+        dataset = HypDataset(
             segments,
             {"speaker": speakers, "language_est": languages},
             recs,
diff --git a/hyperion/data_prep/voxsrc22.py b/hyperion/data_prep/voxsrc22.py
index f81f6eaf..60192029 100644
--- a/hyperion/data_prep/voxsrc22.py
+++ b/hyperion/data_prep/voxsrc22.py
@@ -2,8 +2,9 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
+
 import glob
+import logging
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@@ -13,7 +14,7 @@
 from jsonargparse import ActionYesNo
 from tqdm import tqdm
 
-from ..utils import ClassInfo, Dataset, RecordingSet, SegmentSet
+from ..utils import ClassInfo, HypDataset, RecordingSet, SegmentSet
 from ..utils.misc import PathLike, urlretrieve_progress
 from .data_prep import DataPrep
 
@@ -149,7 +150,7 @@ def prepare_track12_dev(self):
         segments.sort()
 
         logging.info("making dataset")
-        dataset = Dataset(
+        dataset = HypDataset(
             segments,
             recordings=recs,
             enrollments=enrollments,
diff --git a/hyperion/torch/lr_schedulers/cos_lr.py b/hyperion/torch/lr_schedulers/cos_lr.py
index b9e7d069..c2ea8ec3 100644
--- a/hyperion/torch/lr_schedulers/cos_lr.py
+++ b/hyperion/torch/lr_schedulers/cos_lr.py
@@ -3,7 +3,6 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-
 import logging
 import math
 
@@ -32,7 +31,6 @@ class CosineLR(LRScheduler):
       optimizer: Pytorch optimizer object.
       T: period of the cycle.
       T_mul: period multiplier, after each cycle the period is multiplied by T_mul.
-      hold_steps: number of steps until the lr starts decaying.
       min_lr: minimum learning rate.
       warmup_steps: number of warm up steps to get the lr from 0 to the maximum lr.
       warm_restarts: whether or not to do warm restarts.
@@ -103,7 +101,7 @@ def get_lr(self, step):
             else:
                 return self.min_lrs
 
-        alpha = self.gamma ** self.num_restarts
+        alpha = self.gamma**self.num_restarts
         r = math.pi / self.T
 
         return [
@@ -182,7 +180,7 @@ def get_lr(self, step):
             else:
                 return self.min_lrs
 
-        alpha = self.gamma ** self.num_restarts
+        alpha = self.gamma**self.num_restarts
         r = math.pi / self.T
 
         return [
diff --git a/hyperion/torch/lr_schedulers/lr_scheduler.py b/hyperion/torch/lr_schedulers/lr_scheduler.py
index 5008e1be..d609bf26 100644
--- a/hyperion/torch/lr_schedulers/lr_scheduler.py
+++ b/hyperion/torch/lr_schedulers/lr_scheduler.py
@@ -90,7 +90,7 @@ def load_state_dict(self, state_dict):
     def get_warmup_lr(self):
         x = self.step
         return [
-            (base_lr - min_lr) / self.warmup_steps * x + min_lr
+            (base_lr - min(min_lr, 1e-8)) / self.warmup_steps * x + min(min_lr, 1e-8)
             for base_lr, min_lr in zip(self.base_lrs, self.min_lrs)
         ]
 
diff --git a/hyperion/utils/__init__.py b/hyperion/utils/__init__.py
index e8ad5056..9bc51181 100644
--- a/hyperion/utils/__init__.py
+++ b/hyperion/utils/__init__.py
@@ -3,12 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .info_table import InfoTable
 from .class_info import ClassInfo
-from .dataset import Dataset
 from .enrollment_map import EnrollmentMap
 from .feature_set import FeatureSet
 from .hyp_dataclass import HypDataClass
+from .hyp_dataset import HypDataset
+from .info_table import InfoTable
 from .kaldi_matrix import KaldiCompressedMatrix, KaldiMatrix
 from .misc import PathLike
 from .recording_set import RecordingSet
diff --git a/hyperion/utils/dataset.py b/hyperion/utils/hyp_dataset.py
similarity index 91%
rename from hyperion/utils/dataset.py
rename to hyperion/utils/hyp_dataset.py
index e485f1a5..dbf268da 100644
--- a/hyperion/utils/dataset.py
+++ b/hyperion/utils/hyp_dataset.py
@@ -26,7 +26,7 @@
 from .trial_ndx import TrialNdx
 
 
-class Dataset:
+class HypDataset:
     """Class that contains all objects
     (segments, recordings, features, class_infos) that
     conform a dataset
@@ -390,7 +390,7 @@ def save_changed(
             self.table_sep = table_sep
 
         table_ext = ".tsv" if table_sep == "\t" else ".csv"
-        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        dataset_dir, dataset_file = HypDataset.resolve_dataset_path(dataset_path)
         dataset = {}
         file_name = f"segments{table_ext}"
         dataset["segments"] = file_name
@@ -536,7 +536,7 @@ def save_all(
             self.table_sep = table_sep
 
         table_ext = ".tsv" if table_sep == "\t" else ".csv"
-        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        dataset_dir, dataset_file = HypDataset.resolve_dataset_path(dataset_path)
         dataset = {}
         file_name = f"segments{table_ext}"
         dataset["segments"] = file_name
@@ -647,12 +647,12 @@ def load(
          sparse_trials: load trial keys using the SparseTrialKey class instead of TrialKey class
 
         """
-        dataset_dir, dataset_file = Dataset.resolve_dataset_path(dataset_path)
+        dataset_dir, dataset_file = HypDataset.resolve_dataset_path(dataset_path)
         with open(dataset_file, "r") as f:
             dataset = yaml.safe_load(f)
 
         assert "segments" in dataset
-        segments = Dataset.resolve_file_path(dataset_dir, dataset["segments"])
+        segments = HypDataset.resolve_file_path(dataset_dir, dataset["segments"])
         classes = None
         recordings = None
         features = None
@@ -661,28 +661,30 @@ def load(
         if "classes" in dataset:
             classes = {}
             for k, v in dataset["classes"].items():
-                classes[k] = Dataset.resolve_file_path(dataset_dir, v)
+                classes[k] = HypDataset.resolve_file_path(dataset_dir, v)
 
         if "recordings" in dataset:
-            recordings = Dataset.resolve_file_path(dataset_dir, dataset["recordings"])
+            recordings = HypDataset.resolve_file_path(
+                dataset_dir, dataset["recordings"]
+            )
             # recordings = {}
             # for k, v in dataset["recordings"].items():
-            #     recordings[k] = Dataset.resolve_file_path(dataset_dir, v)
+            #     recordings[k] = HypDataset.resolve_file_path(dataset_dir, v)
 
         if "features" in dataset:
             features = {}
             for k, v in dataset["features"].items():
-                features[k] = Dataset.resolve_file_path(dataset_dir, v)
+                features[k] = HypDataset.resolve_file_path(dataset_dir, v)
 
         if "enrollments" in dataset:
             enrollments = {}
             for k, v in dataset["enrollments"].items():
-                enrollments[k] = Dataset.resolve_file_path(dataset_dir, v)
+                enrollments[k] = HypDataset.resolve_file_path(dataset_dir, v)
 
         if "trials" in dataset:
             trials = {}
             for k, v in dataset["trials"].items():
-                trials[k] = Dataset.resolve_file_path(dataset_dir, v)
+                trials[k] = HypDataset.resolve_file_path(dataset_dir, v)
 
         dataset = cls(
             segments,
@@ -991,8 +993,8 @@ def split_into_trials_and_cohort(
           intra_gender: if True, no cross gender trials are done.
 
         Returns:
-          Dataset used for trials with trial list.
-          Dataset used for cohort.
+          HypDataset used for trials with trial list.
+          HypDataset used for cohort.
         """
         num_tar_trials = num_1k_tar_trials * 1000
         if intra_gender:
@@ -1258,4 +1260,83 @@ def from_lhotse(
         recordings: Optional[Union[lhotse.RecordingSet, PathLike]] = None,
         supervisions: Optional[Union[lhotse.SupervisionSet, PathLike]] = None,
     ):
+        """Creates a Hyperion Dataset from a lhotse CutSet or
+        from a lhotse RecordingSet + SupervisionSet
+
+        Args:
+          cuts: lhotse CutSet manifest or file
+          recordings: lhotse RecordingSet manifest or file
+          supervisions: lhotse SupervisionSet manifest or file.
+
+        Returns
+          HypDataset object
+        """
+        assert cuts is not None or supervisions is not None
+        if cuts is not None:
+            if isinstance(cuts, (str, Path)):
+                cuts = lhotse.CutSet.from_file(cuts)
+        else:
+            if isinstance(supervisions, (str, Path)):
+                supervisions = lhotse.SupervisionSet.from_file(supervisions)
+
+            if recordings is not None and isinstance(recordings, (str, Path)):
+                recordings = lhotse.RecordingSet.from_file(recordings)
+
+            cuts = lhotse.CutSet.from_manifests(
+                recordings=recordings, supervisions=supervisions
+            )
+
+        from lhotse import MonoCut, Recording, SupervisionSegment
+
+        supervision_keys = ["speaker", "gender", "language", "text", "duration"]
+        recs_df = []
+        segs_df = []
+        for cut in cuts:
+            supervision = cut.supervisions[0]
+            recording = cut.recording
+            seg_dict = {"id": cut.id}
+            recording = cut.recording
+            if recording is not None:
+                if recording.id != cut.id:
+                    seg_dict["recording_id"] = recording.id
+
+                rec_dict = {
+                    "id": recording.id,
+                    "sampling_rate": recording.sampling_rate,
+                    "duration": recording.duration,
+                }
+                source = recording.sources[0]
+                assert len(recording.source) == 1
+                assert source.type in ["file", "command"]
+                rec_dict["storage_path"] = source.source
+                assert recording.transforms is None, f"{recording.transforms}"
+                recs_df.append(rec_dict)
+
+            for key in supervision_keys:
+                if hasattr(supervision, key):
+                    val = getattr(supervision, key)
+                    if val is not None:
+                        seg_dict[key] = val
+
+            if supervision.custom is not None:
+                for key, val in supervision.custom:
+                    if val is not None:
+                        seg_dict[key] = val
+
+            segs_df = seg_dict
+
+        recs_df = pd.DataFrame(recs_df)
+        segs_df = pd.DataFrame(segs_df)
+        recordings = RecordingSet(recs_df)
+        segments = SegmentSet(segs_df)
+        class_names = ["speaker", "language", "emotion", "gender"]
+        classes = {}
+        for key in class_names:
+            if key in segments:
+                uniq_classes = np.unique(segments[key])
+                classes[key] = pd.DataFrame({"id": uniq_classes})
+
+        dataset = cls(segments=segments, classes=classes, recordings=recordings)
+        return dataset
+
         return None

From dcabebe78e2ba201e56227aa6094942646bbbcfa Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 3 May 2024 14:57:58 -0400
Subject: [PATCH 133/154] added cosine loss to dino

---
 .../conf/train_fwseresnet34_dino_v1.2.2.yaml  |  96 +++++++++++++
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh |  67 +++++++++
 hyperion/bin/train_dino_wav2xvector.py        |  20 ++-
 hyperion/torch/losses/__init__.py             |   2 +-
 hyperion/torch/losses/dino_loss.py            |  81 +++++++++++
 hyperion/torch/models/xvectors/xvector.py     | 109 ++++++++++++--
 .../torch/trainers/dino_xvector_trainer.py    | 134 ++++++++++++++----
 .../torch/trainers/xvector_adv_trainer.py     |   6 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |   9 +-
 hyperion/torch/trainers/xvector_trainer.py    |   4 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |   7 +-
 .../xvector_trainer_deep_feat_reg_from_wav.py |   8 +-
 .../trainers/xvector_trainer_from_wav.py      |   4 +-
 13 files changed, 481 insertions(+), 66 deletions(-)
 create mode 100644 egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
 create mode 100644 egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh

diff --git a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
new file mode 100644
index 00000000..6d6e60a9
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
@@ -0,0 +1,96 @@
+data:
+  train:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      teacher_aug_cfg: conf/teacher_reverb_noise_aug.yaml
+      student_aug_cfg: conf/reverb_noise_aug.yaml
+      student_chunk_length: 2.
+      teacher_chunk_length: 4.
+      num_teacher_chunks: 2
+      num_student_chunks: 4
+      same_teacher_student_chunks: false
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 12.0
+      min_chunk_length: 6.0
+    data_loader:
+      num_workers: 8
+student_model: 
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    dropout_rate: 0.01
+    norm_before: false
+    hid_act: swish
+    se_r: 4
+    head_type: dino
+    embed_dim: 192
+    num_embed_layers: 3
+    loss_type: softmax
+    head_use_norm: true
+    head_hid_dim: 768
+    head_bottleneck_dim: 192
+    proj_head_use_norm: true
+    proj_head_norm_before: false
+teacher_model:
+  xvector:
+    override_dropouts: true
+    dropout_rate: 0.0
+dino_loss:
+  num_classes: 65536
+  temp_warmup_epochs: 0
+  teacher_temp: 0.04
+cosine_loss:
+  warmup_epochs: 20
+  scale: 0.1
+trainer:
+  optim: 
+    opt_type: adamw
+    lr: 0.005 
+    amsgrad: false
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 1e-1
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 60000
+    hold_steps: 15000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  teacher_optim:
+    init_momentum: 0.996
+    momentum: 1.0
+    warmup_steps: 500000
+  grad_clip: 15
+  use_amp: true
+  log_interval: 1000
+  epochs: 100
+  eff_batch_size: 256
+  train_mode: full
+  freeze_output_layer_steps: 1500
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
new file mode 100644
index 00000000..13a72732
--- /dev/null
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -0,0 +1,67 @@
+# ECAPA-TDNN 512x3
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34_dino.v1.2.2
+
+nnet_s1_base_cfg=conf/train_fwseresnet34_dino_v1.2.2.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/teacher_model_ep0034.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0025.pth
+
+# clustering of dino embeddings
+cluster_method=cos_ahc_plda_ahc
+cluster_cfg=conf/cluster_lresnet34_v1.2_cos_ahc_plda_ahc.yaml
+cluster_name=${cluster_method}
+cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
+
+# plda
+plda_cfg=conf/plda.yaml
+
+# finetuning stage 1.1
+nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
+nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+
+# finetuning stage 1.2
+nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
+nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
+nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 1.2
+cluster_ft_s1_method=cos_ahc
+cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s1_name=${cluster_method_ft_s1_method}
+cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
+
+# finetuning stage 2.1
+nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
+nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+
+# finetuning stage 2.2
+nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_name=$nnet_name.s1.ft.s2.2
+nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
+nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
+
+# clustering of ft embeddings from stage 2.2
+cluster_ft_s2_method=cos_ahc
+cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
+cluster_ft_s2_name=${cluster_method_ft_s2_method}
+cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
+
diff --git a/hyperion/bin/train_dino_wav2xvector.py b/hyperion/bin/train_dino_wav2xvector.py
index d1cd108e..cb541f55 100755
--- a/hyperion/bin/train_dino_wav2xvector.py
+++ b/hyperion/bin/train_dino_wav2xvector.py
@@ -19,7 +19,7 @@
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import DINOAudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-from hyperion.torch.losses import DINOLoss
+from hyperion.torch.losses import CosineDINOLoss, DINOLoss
 from hyperion.torch.metrics import CategoricalAccuracy
 
 # from hyperion.torch.models import EfficientNetXVector as EXVec
@@ -109,6 +109,21 @@ def init_dino_loss(rank, **kwargs):
     return loss
 
 
+def init_cosine_loss(rank, **kwargs):
+    loss_args = kwargs["cosine_loss"]
+    if rank == 0:
+        logging.info(f"cosine loss args={loss_args}")
+
+    if loss_args["scale"] <= 0:
+        return None
+
+    loss = CosineDINOLoss(**loss_args)
+    if rank == 0:
+        logging.info(f"cosine-loss={loss}")
+
+    return loss
+
+
 def train_xvec(gpu_id, args):
     config_logger(args.verbose)
     del args.verbose
@@ -126,6 +141,7 @@ def train_xvec(gpu_id, args):
     val_loader = init_data(partition="val", **kwargs)
 
     dino_loss = init_dino_loss(**kwargs)
+    cosine_loss = init_cosine_loss(**kwargs)
     student_model = init_student_xvector(num_classes=dino_loss.num_classes, **kwargs)
     kwargs["student_model"] = student_model
     teacher_model = init_teacher_xvector(**kwargs)
@@ -138,6 +154,7 @@ def train_xvec(gpu_id, args):
         student_model,
         teacher_model,
         dino_loss,
+        cosine_loss=cosine_loss,
         device=device,
         metrics=metrics,
         ddp=world_size > 1,
@@ -185,6 +202,7 @@ def make_parser(xvec_class):
     xvec_class.add_class_args(parser, prefix="student_model")
     xvec_class.add_dino_teacher_args(parser, prefix="teacher_model")
     DINOLoss.add_class_args(parser, prefix="dino_loss")
+    CosineDINOLoss.add_class_args(parser, prefix="cosine_loss")
     Trainer.add_class_args(
         parser, prefix="trainer", train_modes=xvec_class.valid_train_modes()
     )
diff --git a/hyperion/torch/losses/__init__.py b/hyperion/torch/losses/__init__.py
index 6f68ad45..56ad2a5d 100644
--- a/hyperion/torch/losses/__init__.py
+++ b/hyperion/torch/losses/__init__.py
@@ -4,4 +4,4 @@
 """
 
 from .bce_with_llr import BCEWithLLR
-from .dino_loss import DINOLoss
+from .dino_loss import CosineDINOLoss, DINOLoss
diff --git a/hyperion/torch/losses/dino_loss.py b/hyperion/torch/losses/dino_loss.py
index 55f8e846..c5f499c8 100644
--- a/hyperion/torch/losses/dino_loss.py
+++ b/hyperion/torch/losses/dino_loss.py
@@ -2,6 +2,7 @@
  Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 
 import torch
@@ -162,3 +163,83 @@ def add_class_args(parser, prefix=None):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+
+class CosineDINOLoss(nn.Module):
+    """Cosine Loss to regularize DINO
+    and enforze DINO embeddings to be suitable for cosine scoring
+
+    """
+
+    def __init__(
+        self,
+        scale: float = 1.0,
+        warmup_epochs: int = 30,
+    ):
+        super().__init__()
+        self.scale = scale
+        self.warmup_epochs = warmup_epochs
+        self.cur_scale = scale
+
+    def update_scale(self, epoch: int):
+        if epoch < self.warmup_epochs:
+            self.cur_scale = self.scale * epoch / self.warmup_epochs
+            logging.info("updating cosine-loss scale=%.3f", self.cur_scale)
+        else:
+            self.cur_scale = self.scale
+
+    def forward(
+        self,
+        student_embed: torch.Tensor,
+        teacher_embed: torch.Tensor,
+        num_student_crops: int,
+        num_teacher_crops: int,
+    ):
+        """
+        Cosine scoring between embeddings of the teacher and student networks.
+        """
+        if self.scale == 0:
+            return 0
+
+        student_embed = torch.nn.functional.normalize(student_embed, dim=-1)
+        teacher_embed = torch.nn.functional.normalize(teacher_embed, dim=-1)
+        student_embed = student_embed.chunk(num_student_crops)
+        teacher_embed = teacher_embed.detach()
+        teacher_embed = teacher_embed.chunk(num_teacher_crops)
+
+        total_loss = 0
+        n_loss_terms = 0
+        for iq, q in enumerate(teacher_embed):
+            for ip, p in enumerate(student_embed):
+                if ip == iq and num_teacher_crops > 1:
+                    # we skip cases where student and teacher operate on the same view
+                    continue
+                loss = 1 - torch.sum(q * p, dim=-1)
+                total_loss += loss.mean()
+                n_loss_terms += 1
+        total_loss /= n_loss_terms
+
+        return self.cur_scale * total_loss, total_loss
+
+    @staticmethod
+    def filter_args(**kwargs):
+        return filter_func_args(CosineDINOLoss.__init__, kwargs)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        parser.add_argument(
+            "--scale", default=0, type=float, help="Scale of Cosine loss to reg. DINO"
+        )
+        parser.add_argument(
+            "--warmup-epochs",
+            default=30,
+            type=int,
+            help="warmup epochs for the scale",
+        )
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/xvectors/xvector.py b/hyperion/torch/models/xvectors/xvector.py
index c20f5520..17d77116 100644
--- a/hyperion/torch/models/xvectors/xvector.py
+++ b/hyperion/torch/models/xvectors/xvector.py
@@ -384,11 +384,15 @@ class logits tensor with shape=(batch, num_classes).
             x = x[0]
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
+        xvector = None
         if self.proj_head_net is not None:
             p = self.proj_head_net(p)
-        y = self.classif_net(p, y)
+            xvector = p
 
-        return y
+        logits = self.classif_net(p, y)
+        # return logits
+        output = XVectorOutput(None, logits, xvector)
+        return output
 
     def forward_hid_feats(
         self,
@@ -432,14 +436,67 @@ def forward_hid_feats(
         )
         if return_logits:
             h_classif, y_pred = h_classif
-            output["h_classif"] = h_classif
-            output["logits"] = y_pred
-            return output
+        else:
+            y_pred = None
+
+        if h_classif is not None:
+            xvector = h_classif[0]
+        else:
+            xvector = None
 
-        output["h_classif"] = h_classif
+        output = XVectorOutput(None, y_pred, xvector, h_enc, h_classif)
         return output
 
-    def extract_embed(
+    # def forward_hid_feats(
+    #     self,
+    #     x,
+    #     x_lengths=None,
+    #     y=None,
+    #     return_enc_layers=None,
+    #     return_classif_layers=None,
+    #     return_logits=False,
+    # ):
+    #     """forwards hidden representations in the x-vector network
+
+    #     Args:
+    #       x: input features tensor with shape=(batch, in_feats, time).
+    #       x_lengths: time lengths of the features with shape=(batch,).
+    #       y: target classes torch.long tensor with shape=(batch,).
+    #       return_enc_layers: list of integers indicating, which encoder layers
+    #                          we should return. If None, no encoder layers are returned.
+    #       return_enc_layers: list of integers indicating, which classification head layers
+    #                          we should return. If None, no head layers are returned.
+    #       return_logits: if True, it adds the logits to the output dictionary.
+    #     Returns:
+    #       Dictionary with "logits", "h_enc" (list of hidden encoder layers),
+    #       "h_classif" (list hidden classification head layers).
+    #     """
+    #     max_in_length = x.size(-1)
+    #     x = self._pre_enc(x)
+    #     h_enc, x = self.encoder_net.forward_hid_feats(
+    #         x, return_enc_layers, return_output=True
+    #     )
+    #     output = {"h_enc": h_enc}
+    #     if not return_logits and return_classif_layers is None:
+    #         return output
+
+    #     x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
+    #     p = self.pool_net(x, x_lengths=x_lengths)
+    #     if self.proj_head_net is not None:
+    #         p = self.proj_head_net(p)
+    #     h_classif = self.classif_net.forward_hid_feats(
+    #         p, y, return_classif_layers, return_logits=return_logits
+    #     )
+    #     if return_logits:
+    #         h_classif, y_pred = h_classif
+    #         output["h_classif"] = h_classif
+    #         output["logits"] = y_pred
+    #         return output
+
+    #     output["h_classif"] = h_classif
+    #     return output
+
+    def extract_embed_impl(
         self, x, x_lengths=None, chunk_length=0, embed_layer=None, detach_chunks=False
     ):
         if embed_layer is None:
@@ -447,12 +504,17 @@ def extract_embed(
 
         max_in_length = x.size(-1)
         x = self._pre_enc(x)
-        x = eval_nnet_by_chunks(
-            x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
-        )
+        if max_in_length <= chunk_length or chunk_length == 0:
+            x = self.encoder_net(x, x_lengths=x_lengths)
+            if isinstance(x, tuple):
+                x = x[0]
+        else:
+            x = eval_nnet_by_chunks(
+                x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
+            )
 
-        if x.device != self.device:
-            x = x.to(self.device)
+            if x.device != self.device:
+                x = x.to(self.device)
 
         x, x_lengths = self._post_enc(x, x_lengths, max_in_length)
         p = self.pool_net(x, x_lengths=x_lengths)
@@ -462,6 +524,29 @@ def extract_embed(
         y = self.classif_net.extract_embed(p, embed_layer)
         return y
 
+    def extract_embed(
+        self, x, x_lengths=None, chunk_length=0, embed_layer=None, detach_chunks=False
+    ):
+
+        if x.size(-1) <= chunk_length or chunk_length == 0:
+            return self.extract_embed_impl(x, x_lengths, 0, embed_layer)
+        else:
+            e = []
+            for i in range(x.size(0)):
+                x_i = x[i : i + 1]
+                if x_lengths is not None:
+                    x_i = x_i[..., x_lengths[i]]
+
+                e_i = self.extract_embed_impl(
+                    x_i,
+                    chunk_length=chunk_length,
+                    embed_layer=embed_layer,
+                    detach_chunks=detach_chunks,
+                )
+                e.append(e_i)
+
+            return torch.cat(e, dim=0)
+
     def extract_embed_slidwin(
         self,
         x,
diff --git a/hyperion/torch/trainers/dino_xvector_trainer.py b/hyperion/torch/trainers/dino_xvector_trainer.py
index 16a15304..6573c21a 100644
--- a/hyperion/torch/trainers/dino_xvector_trainer.py
+++ b/hyperion/torch/trainers/dino_xvector_trainer.py
@@ -63,6 +63,7 @@ def __init__(
         loss,
         optim,
         teacher_optim,
+        cosine_loss=None,
         epochs=100,
         exp_path="./train",
         cur_epoch=0,
@@ -96,12 +97,16 @@ def __init__(
         self.teacher_model = teacher_model
         self.teacher_optim = teacher_optim
         self.freeze_output_layer_steps = freeze_output_layer_steps
+        self.cosine_loss = cosine_loss
         super().__init__(student_model, **super_args)
 
     def prepare_models_for_training(self):
         super().prepare_models_for_training()
         self.teacher_model, self.teacher_optimizer = self._prepare_model_for_ema(
-            self.teacher_model, self.teacher_optim, self.device, self.ddp,
+            self.teacher_model,
+            self.teacher_optim,
+            self.device,
+            self.ddp,
         )
 
     def _prepare_model_for_ema(self, model, optim, device, ddp):
@@ -168,6 +173,8 @@ def train_epoch(self, data_loader):
         self.teacher_model.train()
         self.loss.update_temp(self.cur_epoch)
         self.loss.train()
+        if self.cosine_loss is not None:
+            self.cosine_loss.update_scale(self.cur_epoch)
 
         for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
@@ -184,32 +191,54 @@ def train_epoch(self, data_loader):
                     num_teacher_crops = len(teacher_data)
                     teacher_data = torch.cat(teacher_data, dim=0)
                     teacher_out = self.teacher_model(teacher_data)
-                    assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
-                    assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
+                    assert not torch.any(
+                        torch.isnan(teacher_out.logits)
+                    ), "teacher is nan"
+                    assert not torch.any(
+                        torch.isinf(teacher_out.logits)
+                    ), "teacher is inf"
 
                 if num_teacher_crops > 1:
                     student_out1 = self.model(teacher_data)
-                    assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
-                    assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
+                    assert not torch.any(torch.isnan(student_out1.logits)), "s1 is nan"
+                    assert not torch.any(torch.isinf(student_out1.logits)), "s1 is inf"
 
                 student_data = tensors_subset(data, student_keys, self.device)
                 num_student_crops = len(student_data)
                 student_data = torch.cat(student_data, dim=0)
                 student_out2 = self.model(student_data)
-                assert not torch.any(torch.isnan(student_out2)), "s2 is nan"
-                assert not torch.any(torch.isinf(student_out2)), "s2 is inf"
+                assert not torch.any(torch.isnan(student_out2.logits)), "s2 is nan"
+                assert not torch.any(torch.isinf(student_out2.logits)), "s2 is inf"
                 if num_teacher_crops > 1:
-                    student_out = torch.cat((student_out1, student_out2), dim=0)
+                    student_out_logits = torch.cat(
+                        (student_out1.logits, student_out2.logits), dim=0
+                    )
+                    if self.cosine_loss is not None:
+                        student_out_embeds = torch.cat(
+                            (student_out1.xvector, student_out2.xvector), dim=0
+                        )
                     num_student_crops += num_teacher_crops
                 else:
-                    student_out = student_out2
-
-                loss = (
-                    self.loss(
-                        student_out, teacher_out, num_student_crops, num_teacher_crops
-                    )
-                    / self.grad_acc_steps
+                    student_out_logits = student_out2.logits
+                    student_out_embeds = student_out2.xvector
+
+                loss_dino = self.loss(
+                    student_out_logits,
+                    teacher_out.logits,
+                    num_student_crops,
+                    num_teacher_crops,
                 )
+                loss = loss_dino
+                if self.cosine_loss is not None:
+                    scaled_loss_cosine, loss_cosine = self.cosine_loss(
+                        student_out_embeds,
+                        teacher_out.xvector,
+                        num_student_crops,
+                        num_teacher_crops,
+                    )
+                    loss = loss_dino + scaled_loss_cosine
+
+                loss = loss / self.grad_acc_steps
                 assert not torch.isnan(
                     loss
                 ), f"loss is nan {batch} {torch.mean(teacher_out)} {torch.mean(student_out1)} {torch.mean(student_out2)}"
@@ -229,8 +258,9 @@ def train_epoch(self, data_loader):
                 self.save_checkpoint(partial=True)
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
-            # for k, metric in self.metrics.items():
-            #     batch_metrics[k] = metric(output, target)
+            if self.cosine_loss is not None:
+                batch_metrics["loss_dino"] = loss_dino.item()
+                batch_metrics["loss_cosine"] = loss_cosine.item()
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -275,31 +305,53 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 num_teacher_crops = len(teacher_data)
                 teacher_data = torch.cat(teacher_data, dim=0)
                 teacher_out = self.teacher_model(teacher_data)
-                assert not torch.any(torch.isnan(teacher_out)), "teacher is nan"
-                assert not torch.any(torch.isinf(teacher_out)), "teacher is inf"
+                assert not torch.any(torch.isnan(teacher_out.logits)), "teacher is nan"
+                assert not torch.any(torch.isinf(teacher_out.logits)), "teacher is inf"
 
                 if num_teacher_crops > 1:
                     student_out1 = self.model(teacher_data)
-                    assert not torch.any(torch.isnan(student_out1)), "s1 is nan"
-                    assert not torch.any(torch.isinf(student_out1)), "s1 is inf"
+                    assert not torch.any(torch.isnan(student_out1.logits)), "s1 is nan"
+                    assert not torch.any(torch.isinf(student_out1.logits)), "s1 is inf"
 
                 student_data = tensors_subset(data, student_keys, self.device)
                 num_student_crops = len(student_data)
                 student_data = torch.cat(student_data, dim=0)
                 student_out2 = self.model(student_data)
-                assert not torch.any(torch.isnan(student_out2)), "s2 is nan"
-                assert not torch.any(torch.isinf(student_out2)), "s2 is inf"
+                assert not torch.any(torch.isnan(student_out2.logits)), "s2 is nan"
+                assert not torch.any(torch.isinf(student_out2.logits)), "s2 is inf"
                 if num_teacher_crops > 1:
-                    student_out = torch.cat((student_out1, student_out2), dim=0)
+                    student_out_logits = torch.cat(
+                        (student_out1.logits, student_out2.logits), dim=0
+                    )
+                    if self.cosine_loss is not None:
+                        student_out_embeds = torch.cat(
+                            (student_out1.xvector, student_out2.xvector), dim=0
+                        )
                     num_student_crops += num_teacher_crops
                 else:
-                    student_out = student_out2
-
-                loss = self.loss(
-                    student_out, teacher_out, num_student_crops, num_teacher_crops
+                    student_out_logits = student_out2.logits
+                    student_out_embeds = student_out2.xvector
+
+                loss_dino = self.loss(
+                    student_out_logits,
+                    teacher_out.logits,
+                    num_student_crops,
+                    num_teacher_crops,
                 )
+                loss = loss_dino
+                if self.cosine_loss is not None:
+                    scaled_loss_cosine, loss_cosine = self.cosine_loss(
+                        student_out_embeds,
+                        teacher_out.xvector,
+                        num_student_crops,
+                        num_teacher_crops,
+                    )
+                    loss = loss_dino + scaled_loss_cosine
 
                 batch_metrics["loss"] = loss.item()
+                if self.cosine_loss is not None:
+                    batch_metrics["loss_dino"] = loss_dino.item()
+                    batch_metrics["loss_cosine"] = loss_cosine.item()
                 # for k, metric in self.metrics.items():
                 #     batch_metrics[k] = metric(output, target)
 
@@ -317,17 +369,26 @@ def _old_load_checkpoint(self, checkpoint):
         )
         return super()._load_checkpoint(checkpoint)
 
-    def _load_checkpoint(self, checkpoint, teacher_checkpoint):
+    def _load_checkpoint(self, checkpoint, teacher_checkpoint, loss_checkpoint=None):
         self.teacher_model.load_state_dict(teacher_checkpoint["model_state_dict"])
         self.teacher_optimizer.load_state_dict(
             teacher_checkpoint["optimizer_state_dict"]
         )
+        if loss_checkpoint is not None:
+            self.loss.load_state_dict(loss_checkpoint["model_state_dict"])
         return super()._load_checkpoint(checkpoint)
 
     def load_checkpoint(self, epoch, step):
         checkpoint = self.load_model_checkpoint("model", epoch, step)
         teacher_checkpoint = self.load_model_checkpoint("teacher_model", epoch, step)
-        return self._load_checkpoint(checkpoint, teacher_checkpoint)
+        try:
+            loss_checkpoint = self.load_model_checkpoint("dino_loss", epoch, step)
+        except:
+            logging.warning(
+                "dino loss checkpoint not found, initial center will be zero-vector"
+            )
+            loss_checkpoint = None
+        return self._load_checkpoint(checkpoint, teacher_checkpoint, loss_checkpoint)
 
     def checkpoint(self, logs=None):
         checkpoint = super().checkpoint(logs)
@@ -357,6 +418,16 @@ def teacher_checkpoint(self, logs=None):
 
         return checkpoint
 
+    def dino_loss_checkpoint(self, logs=None):
+        self.loss.train()
+        checkpoint = {
+            "epoch": self.cur_epoch,
+            "batch": self.cur_batch,
+            "global_step": self.global_step,
+            "model_state_dict": self.loss.state_dict(),
+        }
+        return checkpoint
+
     def save_checkpoint(self, logs=None, partial: bool = False):
         """Saves a checkpoint of the training status
 
@@ -386,6 +457,9 @@ def save_checkpoint(self, logs=None, partial: bool = False):
         teacher_checkpoint = self.teacher_checkpoint(logs)
         self.save_model_checkpoint("teacher_model", teacher_checkpoint, partial=partial)
 
+        loss_checkpoint = self.dino_loss_checkpoint()
+        self.save_model_checkpoint("dino_loss", loss_checkpoint, partial=partial)
+
     @staticmethod
     def filter_args(**kwargs):
         args = filter_func_args(DINOXVectorTrainer.__init__, kwargs)
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index b9dd67d7..8603b22a 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -140,7 +140,7 @@ def train_epoch(self, data_loader):
 
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(input_data, target)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
+                loss = self.loss(output.logits, target) / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -194,9 +194,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             with torch.no_grad():
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(data, **self.amp_args)
-                    loss = self.loss(output, target)
+                    loss = self.loss(output.logits, target)
 
-            batch_metrics["loss"] = loss.mean().item()
+            batch_metrics["loss"] = loss.item()
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index 8ece7de2..ccafecdd 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -135,9 +135,6 @@ def train_epoch(self, data_loader):
                     self.model.eval()
                     data_adv = self.attack.generate(input_data, target)
                     max_delta = torch.max(torch.abs(data_adv - data)).item()
-                    # z = torch.abs(data_adv-data) > 100
-                    # logging.info('zz {} {}'.format(data[z], data_adv[z]))
-                    # logging.info('adv attack max perturbation=%f' % (max_delta))
                     input_data = data_adv
                     self.model.train()
 
@@ -148,7 +145,7 @@ def train_epoch(self, data_loader):
 
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(feats, y=target)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
+                loss = self.loss(output.logits, target) / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -202,9 +199,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 feats = self.feat_extractor(input_data)
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats)
-                    loss = self.loss(output, target)
+                    loss = self.loss(output.logits, target)
 
-            batch_metrics["loss"] = loss.mean().item()
+            batch_metrics["loss"] = loss.item()
             for k, metric in self.metrics.items():
                 batch_metrics[k] = metric(output, target)
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 2902f23d..151993e0 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -122,7 +122,7 @@ def train_epoch(self, data_loader):
                 batch_size = x.size(0)
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(x, y=target)
-                    loss = self.loss(output, target) / loss_scale
+                    loss = self.loss(output.logits, target) / loss_scale
                     loss_acc += loss.item()
 
                 if self.use_amp:
@@ -180,7 +180,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                     batch_size = x.size(0)
                     with amp.autocast(enabled=self.use_amp):
                         output = self.model(x)
-                        loss = self.loss(output, target) / loss_scale
+                        loss = self.loss(output.logits, target) / loss_scale
                         loss_acc += loss.item()
 
                 batch_metrics["loss"] = loss_acc
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 98bc404d..1c9209f6 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -147,12 +147,10 @@ def train_epoch(self, data_loader):
                 h_enc, h_classif, output = (
                     outputs["h_enc"],
                     outputs["h_classif"],
-                    outputs["output"],
+                    outputs["logits"],
                 )
 
-                loss = self.loss(
-                    output, target
-                ).mean()  # you need to take the mean here because of the multi-gpu training
+                loss = self.loss(output, target)
                 batch_metrics["loss-classif"] = loss.item()
 
                 prior_outputs = self.prior_model(
@@ -269,4 +267,3 @@ def add_class_args(parser, prefix=None, skip=[]):
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
-            # help='trainer options')
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 1005435f..4b1d23ba 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -140,12 +140,12 @@ def train_epoch(self, data_loader):
                 h_enc, h_classif, output = (
                     outputs["h_enc"],
                     outputs["h_classif"],
-                    outputs["output"],
+                    outputs["logits"],
                 )
 
                 loss = self.loss(
                     output, target
-                ).mean()  # you need to take the mean here because of the multi-gpu training
+                )  # you need to take the mean here because of the multi-gpu training
                 batch_metrics["loss-classif"] = loss.item()
 
                 prior_outputs = self.prior_model(
@@ -231,9 +231,9 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 feats = self.feat_extractor(input_data)
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats)
-                    loss = self.loss(output, target)
+                    loss = self.loss(output.logits, target)
 
-                batch_metrics["loss"] = loss.mean().item()
+                batch_metrics["loss"] = loss.item()
                 for k, metric in self.metrics.items():
                     batch_metrics[k] = metric(output, target)
 
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index 2a238a06..f46b2109 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -119,7 +119,7 @@ def train_epoch(self, data_loader):
 
             with amp.autocast(enabled=self.use_amp):
                 output = self.model(feats, feats_lengths, y=target)
-                loss = self.loss(output, target).mean() / self.grad_acc_steps
+                loss = self.loss(output.logits, target) / self.grad_acc_steps
 
             if self.use_amp:
                 self.grad_scaler.scale(loss).backward()
@@ -173,7 +173,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 feats, feats_lengths = self.feat_extractor(audio)
                 with amp.autocast(enabled=self.use_amp):
                     output = self.model(feats, feats_lengths)
-                    loss = self.loss(output, target)
+                    loss = self.loss(output.logits, target)
 
                 batch_metrics["loss"] = loss.mean().item()
                 for k, metric in self.metrics.items():

From 2b707d058eead25d434b7932cd2df306937d6694 Mon Sep 17 00:00:00 2001
From: System User <lgarcia@test1.cm.gemini>
Date: Fri, 3 May 2024 15:05:17 -0400
Subject: [PATCH 134/154] xxx

---
 ...rain_fwseres2net50s8_xvec_stage1_v1.1.yaml |  78 -------
 .../open.v2.8k/run_030_extract_xvectors.sh    | 217 ++++++++++++++++++
 egs/voxceleb/v1.2/cmd.sh                      |   2 +-
 ...train_fwseresnet34pe_xvec_stage1_v3.1.yaml |  78 +++++++
 ...rain_idrnd_resnet100_xvec_stage2_v3.2.yaml |  74 ++++++
 .../config_fbank80_stmn_fwseresnet34.v3.1.sh  |   4 +-
 ...config_fbank80_stmn_fwseresnet34pe.v3.1.sh |  44 ++++
 ...onfig_fbank80_stmn_idrnd_resnet100.v3.2.sh |  45 ++++
 egs/voxceleb/v1.2/run_007_eval_be.sh          |   4 +-
 hyperion/torch/layer_blocks/resnet_blocks.py  |   3 +-
 10 files changed, 465 insertions(+), 84 deletions(-)
 delete mode 100644 egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
 create mode 100755 egs/lre22/open.v2.8k/run_030_extract_xvectors.sh
 create mode 100644 egs/voxceleb/v1.2/conf/train_fwseresnet34pe_xvec_stage1_v3.1.yaml
 create mode 100644 egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.2.yaml
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34pe.v3.1.sh
 create mode 100644 egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.2.sh

diff --git a/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml b/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
deleted file mode 100644
index 28b1f641..00000000
--- a/egs/lre22/fixed.v1.8k/conf/train_fwseres2net50s8_xvec_stage1_v1.1.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-data:
-  train:
-    dataset:
-      class_names:
-      - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
-      return_segment_info:
-      - class_id
-    sampler:
-      sampler_type: seg_chunk_sampler
-      min_batch_size: 24
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-    data_loader:
-      num_workers: 8
-  val:
-    dataset:
-      class_names:
-      - class_id
-      aug_cfgs:
-      - conf/reverb_noise_aug.yaml
-      return_segment_info:
-      - class_id
-    sampler:
-      sampler_type: seg_chunk_sampler
-      min_batch_size: 24
-      max_chunk_length: 3.0
-      min_chunk_length: 3.0
-    data_loader:
-      num_workers: 8
-feats: fbank64_specaug1_stmn_8k.yaml
-model:
-  resnet_type: fwseres2net50
-  in_channels: 1
-  in_feats: 64
-  in_kernel_size: 3
-  in_stride: 1
-  no_maxpool: true
-  res2net_width_factor: 3.25
-  res2net_scale: 8
-  se_r: 4
-  pool_net:
-    pool_type: ch-wise-att-mean+stddev
-    inner_feats: 128
-  embed_dim: 192
-  loss_type: subcenter-arc-softmax
-  num_subcenters: 2
-  cos_scale: 30.0
-  margin: 0.0
-  intertop_margin: 0.0
-  margin_warmup_epochs: 3.0
-  dropout_rate: 0.1
-  norm_before: false
-  hid_act: swish
-trainer:
-  optim: 
-    opt_type: adam
-    lr: 0.01
-    amsgrad: true
-    beta1: 0.9
-    beta2: 0.95
-    weight_decay: 2.0e-05
-  lrsched:
-    lrsch_type: exp_lr
-    decay_rate: 0.5
-    decay_steps: 40000
-    hold_steps: 65000
-    warmup_steps: 15000
-    min_lr: 1.0e-06
-    update_lr_on_opt_step: true
-  use_amp: true
-  swa_start: 12
-  swa_lr: 1e-5
-  swa_anneal_epochs: 2
-  log_interval: 1000
-  epochs: 15
-  eff_batch_size: 256
diff --git a/egs/lre22/open.v2.8k/run_030_extract_xvectors.sh b/egs/lre22/open.v2.8k/run_030_extract_xvectors.sh
new file mode 100755
index 00000000..d7e2775b
--- /dev/null
+++ b/egs/lre22/open.v2.8k/run_030_extract_xvectors.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=2
+nnet_stage=1
+config_file=default_config.sh
+use_gpu=false
+do_tsne=false
+split_dev=false
+hf_chunk_length=120 #seconds
+xvec_chunk_length=120 #seconds
+. parse_options.sh || exit 1;
+. $config_file
+
+if [ "$use_gpu" == "true" ];then
+    xvec_args="--use-gpu true --xvec-chunk-length $xvec_chunk_length --hf-chunk-length $hf_chunk_length"
+    xvec_cmd="$cuda_eval_cmd --mem 6G"
+else
+    xvec_cmd="$train_cmd --mem 12G"
+fi
+if [ $nnet_stages -lt $nnet_stage ];then
+    nnet_stage=$nnet_stages
+fi
+
+if [ $nnet_stage -eq 1 ];then
+  nnet=$nnet_s1
+  nnet_name=$nnet_s1_name
+elif [ $nnet_stage -eq 2 ];then
+  nnet=$nnet_s2
+  nnet_name=$nnet_s2_name
+elif [ $nnet_stage -eq 3 ];then
+  nnet=$nnet_s3
+  nnet_name=$nnet_s3_name
+elif [ $nnet_stage -eq 4 ];then
+  nnet=$nnet_s4
+  nnet_name=$nnet_s4_name
+elif [ $nnet_stage -eq 5 ];then
+  nnet=$nnet_s5
+  nnet_name=$nnet_s5_name
+elif [ $nnet_stage -eq 6 ];then
+  nnet=$nnet_s6
+  nnet_name=$nnet_s6_name
+fi
+
+xvector_dir=exp/xvectors/$nnet_name
+
+# if [ $stage -le 1 ]; then
+#     # Extract xvectors for training 
+#   for name in lre17_proc_audio_no_sil \
+# 		voxlingua107_codecs_proc_audio_no_sil \
+# 		babel_sre_proc_audio_no_sil \
+# 		cv_codecs_proc_audio_no_sil \
+# 		others_afr_proc_audio_no_sil
+#   do
+#     steps_xvec/extract_wav2vec2xvectors.sh \
+#       --cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+#       --use-bin-vad false \
+#       --random-utt-length true --min-utt-length 3 --max-utt-length 30 \
+#       $nnet data/${name} \
+#       $xvector_dir/${name}
+#     done
+# fi
+
+if [ $stage -le 2 ]; then
+    # Extract xvectors for training 
+    for name in lre22_dev
+    do
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd" --nj 100 ${xvec_args} \
+	--use-bin-vad true --num-augs 10 --aug-config conf/reverb_noise_aug.yaml \
+	--random-utt-length true --min-utt-length 3 --max-utt-length 30 \
+    	$nnet data/${name} \
+    	$xvector_dir/${name}_aug \
+	data/${name}_aug
+    done
+fi
+
+
+if [ $stage -le 3 ]; then
+    # Extracts x-vectors for dev and eval
+    for name in lre22_dev lre22_eval
+    do
+      num_spk=$(wc -l data/$name/spk2utt | awk '{ print $1}')
+      nj=$(($num_spk < 100 ? $num_spk:100))
+      steps_xvec/extract_wav2vec2xvectors.sh \
+	--cmd "$xvec_cmd --mem 6G" --nj $nj ${xvec_args} \
+	$nnet data/$name \
+	$xvector_dir/$name
+    done
+fi
+
+
+if [ $stage -le 4 ]; then
+    for name in lre22_dev
+    do
+	if [ "$do_tsne" == "true" ] || [ "$split_dev" == "true" ];then
+	    $train_cmd \
+		$xvector_dir/$name/tsne/tsne.log \
+		hyp_utils/conda_env.sh \
+		plot_embedding_tsne.py \
+		--train-list data/$name/utt2lang \
+		--train-v-file scp:$xvector_dir/$name/xvector.scp \
+		--output-dir $xvector_dir/$name/tsne \
+		--pca-var-r 0.975 \
+		--lnorm \
+		--prob-plot 1. \
+		--tsne.metric cosine \
+		--tsne.early-exaggeration 12 --tsne.perplexity 30
+
+	    $train_cmd \
+		$xvector_dir/$name/tsne_per_class/tsne.log \
+		hyp_utils/conda_env.sh \
+		plot_embedding_tsne_per_class.py \
+		--train-list data/$name/utt2lang \
+		--train-v-file scp:$xvector_dir/$name/xvector.scp \
+		--output-dir $xvector_dir/$name/tsne_per_class \
+		--pca-var-r 0.975 \
+		--lnorm \
+		--prob-plot 1. \
+		--tsne.metric cosine \
+		--tsne.early-exaggeration 12 --tsne.perplexity 30 \
+		--do-ahc --cluster-tsne --ahc-thr -5
+
+	    if [ "$split_dev" == "true" ];then
+		hyp_utils/conda_env.sh \
+		    local/split_dev.py \
+		    --segs-file $xvector_dir/$name/tsne_per_class/segments.csv \
+		    --output-dir ./resources/dev_splits \
+		    --num-folds 2
+
+		# delete the split data dirs so they are regenerated later
+		rm -rf data/lre22_dev_p{1,2}
+
+	    fi
+	fi
+    done
+fi
+
+if [ $stage -le 5 ]; then
+    if [ ! -d data/lre22_dev_p1 ];then
+	awk -F "," '$1!="id" { print $1}' \
+	    ./resources/dev_splits/fold_0/train_segments.csv \
+	    > p1.lst
+	awk -F "," '$1!="id" { print $1}' \
+	    ./resources/dev_splits/fold_0/test_segments.csv \
+	    > p2.lst
+	
+	for p in p1 p2
+	do
+	    utils/subset_data_dir.sh \
+		--utt-list $p.lst \
+		data/lre22_dev data/lre22_dev_$p
+	done
+    fi
+fi
+
+if [ $stage -le 6 ]; then
+    if [ -d data/lre22_dev_aug ] && [ ! -d data/lre22_dev_aug_p1 ];then
+	awk -v fsegs=./resources/dev_splits/fold_0/train_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+    > p1.lst
+
+	awk -v fsegs=./resources/dev_splits/fold_0/test_segments.csv '
+BEGIN{FS=",";
+getline;
+while(getline < fsegs)
+{
+   segs[$1]=1;
+}
+FS=" ";
+}
+{ if($2 in segs){ print $1}}' data/lre22_dev_aug/augm2clean \
+    > p2.lst
+
+	for p in p1 p2
+	do
+	    utils/subset_data_dir.sh \
+		--utt-list $p.lst \
+		data/lre22_dev_aug data/lre22_dev_aug_$p
+	done
+    fi
+fi
+
+if [ $stage -le 7 ];then
+    if [ -f $xvector_dir/lre22_dev_aug/xvector.scp ];then
+	mkdir -p $xvector_dir/lre22_dev_aug_clean
+	cat $xvector_dir/lre22_dev/xvector.scp \
+	    $xvector_dir/lre22_dev_aug/xvector.scp \
+	    > $xvector_dir/lre22_dev_aug_clean/xvector.scp
+
+	for p in "" _p1 _p2
+	do
+	    if [ ! -d data/lre22_dev_aug_clean$p ]; then
+		utils/combine_data.sh \
+		    data/lre22_dev_aug_clean$p \
+		    data/lre22_dev$p \
+		    data/lre22_dev_aug$p
+	    fi
+	done
+    fi
+fi
+
+exit
diff --git a/egs/voxceleb/v1.2/cmd.sh b/egs/voxceleb/v1.2/cmd.sh
index 040f458b..381b14e0 100755
--- a/egs/voxceleb/v1.2/cmd.sh
+++ b/egs/voxceleb/v1.2/cmd.sh
@@ -14,8 +14,8 @@ if [ "$(hostname -d)" == "cm.gemini" ];then
     #export train_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     export train_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 20G"
-    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
     export cuda_cmd="queue.pl --config conf/coe_gpu_rtx.conf --mem 40G"
+    #export cuda_cmd="queue.pl --config conf/coe_gpu_v100.conf --mem 20G"
     export cuda_eval_cmd="queue.pl --config conf/coe_gpu_short.conf --mem 4G"
     # export cuda_eval_cmd="queue.pl --config conf/coe_gpu_long.conf --mem 4G"
 else
diff --git a/egs/voxceleb/v1.2/conf/train_fwseresnet34pe_xvec_stage1_v3.1.yaml b/egs/voxceleb/v1.2/conf/train_fwseresnet34pe_xvec_stage1_v3.1.yaml
new file mode 100644
index 00000000..efa601c0
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_fwseresnet34pe_xvec_stage1_v3.1.yaml
@@ -0,0 +1,78 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+      num_augs: 4
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: seg_chunk_sampler
+      min_batch_size: 64
+      max_chunk_length: 2.0
+      min_chunk_length: 2.0
+    data_loader:
+      num_workers: 8
+model:
+  feats: fbank80_specaug1_stmn_16k.yaml
+  xvector:
+    resnet_type: fwseresnet34
+    in_channels: 1
+    in_feats: 80
+    in_kernel_size: 3
+    in_stride: 1
+    no_maxpool: true
+    pool_net:
+      pool_type: ch-wise-att-mean+stddev
+      inner_feats: 128
+    embed_dim: 192
+    cos_scale: 30.0
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    margin: 0.2
+    margin_warmup_epochs: 5.0
+    dropout_rate: 0.1
+    norm_before: false
+    hid_act: swish
+    se_r: 4
+    freq_pos_enc: true
+trainer:
+  optim: 
+    opt_type: adam
+    lr: 0.01
+    amsgrad: true
+    beta1: 0.9
+    beta2: 0.99
+    weight_decay: 2.0e-05
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 40000
+    hold_steps: 65000
+    min_lr: 1.0e-05
+    warmup_steps: 15000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 35
+  eff_batch_size: 256
+  target_key: speaker
+master_port: 4567
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.2.yaml b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.2.yaml
new file mode 100644
index 00000000..99fbf196
--- /dev/null
+++ b/egs/voxceleb/v1.2/conf/train_idrnd_resnet100_xvec_stage2_v3.2.yaml
@@ -0,0 +1,74 @@
+data:
+  train:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+  val:
+    dataset:
+      class_names:
+      - speaker
+      aug_cfgs:
+      - conf/reverb_noise_aug.yaml
+      return_segment_info:
+      - speaker
+    sampler:
+      sampler_type: class_weighted_random_seg_chunk_sampler
+      min_batch_size: 16
+      max_chunk_length: 4.0
+      min_chunk_length: 4.0
+      num_chunks_per_seg_epoch: 6
+      class_name: speaker
+      seg_weight_mode: data-prior
+      num_hard_prototypes: 8
+    data_loader:
+      num_workers: 8
+model:
+  xvector:
+    override_output: true
+    loss_type: subcenter-arc-softmax
+    num_subcenters: 2
+    cos_scale: 30.0
+    margin: 0.3
+    margin_warmup_epochs: 0
+    intertop_margin: 0.1
+    override_dropouts: true
+    dropout_rate: 0.025
+trainer:
+  optim:
+    opt_type: sgd
+    lr: 1e-4
+    momentum: 0.9
+    weight_decay: 2e-5
+  lrsched:
+    lrsch_type: exp_lr
+    decay_rate: 0.5
+    decay_steps: 32000
+    hold_steps: 16000
+    min_lr: 1.0e-6
+    warmup_steps: 8000
+    update_lr_on_opt_step: true
+  grad_clip: 250
+  use_amp: true
+  log_interval: 1000
+  epochs: 11
+  eff_batch_size: 256
+  swa_start: 20
+  swa_lr: 1e-5
+  swa_anneal_epochs: 2
+  target_key: speaker
+  
\ No newline at end of file
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
index 12b86dd1..19f90be6 100644
--- a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34.v3.1.sh
@@ -26,8 +26,8 @@ nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
 
 # back-end
 do_plda=false
-do_snorm=false #true
-do_qmf=false #true
+do_snorm=true
+do_qmf=true
 do_voxsrc22=true
 
 plda_aug_config=conf/reverb_noise_aug.yaml
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34pe.v3.1.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34pe.v3.1.sh
new file mode 100644
index 00000000..62092708
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_fwseresnet34pe.v3.1.sh
@@ -0,0 +1,44 @@
+# Freq-wise-SE ResNet34
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_fwseresnet34pe.v3.1
+
+nnet_s1_base_cfg=conf/train_fwseresnet34pe_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0035.pth
+
+nnet_s2_base_cfg=conf/train_fwseresnet34_xvec_stage2_v3.1.yaml
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/swa_model_ep0016.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.2.sh b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.2.sh
new file mode 100644
index 00000000..4dbee17d
--- /dev/null
+++ b/egs/voxceleb/v1.2/global_conf/config_fbank80_stmn_idrnd_resnet100.v3.2.sh
@@ -0,0 +1,45 @@
+# IdRnd ResNet100
+
+# acoustic features
+feat_config=conf/fbank80_stmn_16k.yaml
+feat_type=fbank80_stmn
+
+#vad
+vad_config=conf/vad_16k.yaml
+
+# x-vector training 
+nnet_data=voxceleb2cat_train
+
+# x-vector cfg
+nnet_type=resnet
+nnet_name=${feat_type}_idrnd_resnet100.v3.1
+
+nnet_s1_base_cfg=conf/train_idrnd_resnet100_xvec_stage1_v3.1.yaml
+nnet_s1_name=$nnet_name.s1
+nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
+nnet_s1=$nnet_s1_dir/model_ep0029.pth
+
+nnet_s2_base_cfg=conf/train_idrnd_resnet100_xvec_stage2_v3.2.yaml
+nnet_name=${feat_type}_idrnd_resnet100.v3.2
+nnet_s2_name=${nnet_name}.s2
+nnet_s2_dir=exp/xvector_nnets/$nnet_s2_name
+nnet_s2=$nnet_s2_dir/model_ep0011.pth
+
+# back-end
+do_plda=false
+do_snorm=true
+do_qmf=true
+do_voxsrc22=true
+
+plda_aug_config=conf/reverb_noise_aug.yaml
+plda_num_augs=0
+if [ $plda_num_augs -eq 0 ]; then
+    plda_data=voxceleb2cat_train
+else
+    plda_data=voxceleb2cat_train_augx${plda_num_augs}
+fi
+plda_type=splda
+lda_dim=200
+plda_y_dim=150
+plda_z_dim=200
+
diff --git a/egs/voxceleb/v1.2/run_007_eval_be.sh b/egs/voxceleb/v1.2/run_007_eval_be.sh
index 53621488..bd436644 100755
--- a/egs/voxceleb/v1.2/run_007_eval_be.sh
+++ b/egs/voxceleb/v1.2/run_007_eval_be.sh
@@ -157,7 +157,7 @@ if [ "$do_snorm" == "true" ];then
     do
       for((j=1;j<=$num_parts;j++));
       do    
-	$train_cmd $score_cosine_snorm_dir/log/voxsrc22_dev_${i}_${j}.log \
+	$train_cmd --mem 22G $score_cosine_snorm_dir/log/voxsrc22_dev_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
 		   hyperion-eval-cosine-scoring-backend \
 		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
@@ -278,7 +278,7 @@ if [ "$do_qmf" == "true" ];then
     do
       for((j=1;j<=$num_parts;j++));
       do    
-	$train_cmd $score_cosine_qmf_dir/log/voxsrc22_dev_${i}_${j}.log \
+	$train_cmd --mem 22G $score_cosine_qmf_dir/log/voxsrc22_dev_${i}_${j}.log \
 		   hyp_utils/conda_env.sh \
 		   hyperion-eval-cosine-scoring-backend-with-qmf \
 		   --feats-file csv:$xvector_dir/voxsrc22_dev/xvector.csv \
diff --git a/hyperion/torch/layer_blocks/resnet_blocks.py b/hyperion/torch/layer_blocks/resnet_blocks.py
index 428d8139..17b6ce25 100644
--- a/hyperion/torch/layer_blocks/resnet_blocks.py
+++ b/hyperion/torch/layer_blocks/resnet_blocks.py
@@ -41,6 +41,7 @@ def _make_downsample(in_channels, out_channels, stride, norm_layer, norm_before)
 
 class FreqPosEnc(nn.Module):
     def __init__(self, num_feats):
+        super().__init__()
         self.pos_enc = nn.Parameter(torch.zeros((num_feats, 1)))
 
     def forward(self, x):
@@ -185,7 +186,7 @@ def __init__(
         self.downsample_factor = stride
         self.pos_enc = None
         if freq_pos_enc:
-            self.pos_enc = FreqPosEnc(num_feats)
+            self.pos_enc = FreqPosEnc(num_feats*stride)
 
     @property
     def out_channels(self):

From 5a6240e90cb317bb60f5ead68a74dd01b9ea2a78 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 10 May 2024 19:14:59 -0400
Subject: [PATCH 135/154] asr conformer runs

---
 README.md                                     |   3 +
 .../v1/conf/fbank80_specaug1_mn_16k.yaml      |   3 +
 egs/librispeech/v1/conf/sp_unigram_1000.yaml  |   9 +
 egs/librispeech/v1/conf/sp_unigram_512.yaml   |   9 +
 ...mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml |   4 +-
 ...nk80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh |  12 +-
 egs/librispeech/v1/run_001_prepare_data.sh    |  25 +++
 .../v1/run_002_prepare_noises_rirs.sh         | 102 ++++++++++
 .../v1/run_003_train_tokenizers.sh            |  25 +++
 egs/librispeech/v1/run_004_train_asr.sh       |  47 +++++
 .../conf/train_fwseresnet34_dino_v1.2.2.yaml  |   3 +-
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh |  10 +-
 hyperion/bin/hyperion_dataset.py              |  58 +++++-
 hyperion/bin/train_dino_wav2xvector.py        |   4 +-
 hyperion/bin/train_tokenizer.py               | 174 ++++++++++++++++
 hyperion/bin/train_wav2rnn_transducer.py      |  19 +-
 hyperion/bin/train_wav2xvector.py             |   2 +-
 hyperion/np/augment/speed_augment.py          |   4 +-
 hyperion/torch/data/bucketing_seg_sampler.py  |  33 ++--
 hyperion/torch/data/dino_audio_dataset.py     |  38 ++--
 hyperion/torch/data/seg_sampler.py            | 107 +++++-----
 hyperion/torch/data/seg_sampler_factory.py    |  42 ++--
 .../torch/layer_blocks/transformer_input.py   |   3 +-
 hyperion/torch/layers/mvn.py                  |   4 +
 hyperion/torch/layers/spec_augment.py         |  71 ++++---
 hyperion/torch/models/__init__.py             |   5 +-
 hyperion/torch/models/transducer/__init__.py  |   9 +-
 .../torch/models/transducer/rnn_transducer.py |   2 +
 .../{subsampling.py => subsampling0.py}       |   0
 .../{transducer.py => transducer0.py}         |   0
 .../{transformer.py => transformer0.py}       |   7 +-
 .../torch/models/wav2transducer/__init__.py   |   7 +-
 .../wav2transducer/hf_wav2rnn_transducer.py   |  98 ++++-----
 .../hf_wav2vec2rnn_transducer.py              |  24 +--
 .../wav2conformer_v1_rnn_transducer.py        |  73 +++++++
 .../wav2transducer/wav2rnn_rnn_transducer.py  |  71 +++++++
 .../wav2transducer/wav2rnn_transducer.py      |  27 +--
 .../wav2xvectors/wav2resnet1d_xvector.py      |   1 -
 hyperion/torch/narchs/conformer_encoder_v1.py |   2 +-
 hyperion/torch/trainers/dvae_trainer.py       |   4 +-
 hyperion/torch/trainers/torch_trainer.py      |   2 +-
 hyperion/torch/trainers/transducer_trainer.py |   5 +-
 hyperion/torch/trainers/vae_trainer.py        |   4 +-
 hyperion/torch/trainers/vq_dvae_trainer.py    |   4 +-
 hyperion/torch/trainers/vq_vae_trainer.py     |   4 +-
 .../torch/trainers/xvector_adv_trainer.py     |   4 +-
 .../trainers/xvector_adv_trainer_from_wav.py  |   4 +-
 hyperion/torch/trainers/xvector_trainer.py    |   4 +-
 .../trainers/xvector_trainer_deep_feat_reg.py |   2 +-
 .../xvector_trainer_deep_feat_reg_from_wav.py |   4 +-
 .../trainers/xvector_trainer_from_wav.py      |   4 +-
 hyperion/torch/utils/masking.py               |  13 +-
 hyperion/utils/class_info.py                  |  12 +-
 hyperion/utils/hyp_dataset.py                 | 187 ++++++++++++++++--
 hyperion/utils/info_table.py                  |  24 ++-
 hyperion/utils/recording_set.py               |  39 ++++
 56 files changed, 1153 insertions(+), 304 deletions(-)
 create mode 100644 egs/librispeech/v1/conf/sp_unigram_1000.yaml
 create mode 100644 egs/librispeech/v1/conf/sp_unigram_512.yaml
 create mode 100755 egs/librispeech/v1/run_002_prepare_noises_rirs.sh
 create mode 100755 egs/librispeech/v1/run_003_train_tokenizers.sh
 create mode 100755 egs/librispeech/v1/run_004_train_asr.sh
 create mode 100644 hyperion/bin/train_tokenizer.py
 rename hyperion/torch/models/transducer/{subsampling.py => subsampling0.py} (100%)
 rename hyperion/torch/models/transducer/{transducer.py => transducer0.py} (100%)
 rename hyperion/torch/models/transducer/{transformer.py => transformer0.py} (98%)
 create mode 100644 hyperion/torch/models/wav2transducer/wav2conformer_v1_rnn_transducer.py
 create mode 100644 hyperion/torch/models/wav2transducer/wav2rnn_rnn_transducer.py

diff --git a/README.md b/README.md
index d56406d7..71a0fbd3 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,9 @@ The full API is described in the documentation page [https://hyperion-ml.readthe
 conda create --name ${your_env} python=3.11
 conda activate ${your_env}
 conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
+# If using k2 for ASR
+wget https://huggingface.co/csukuangfj/k2/resolve/main/ubuntu-cuda/k2-1.24.4.dev20240223+cuda11.8.torch2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+pip install k2-1.24.4.dev20240223+cuda11.8.torch2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 ```
 
 For systems with cuda 10.2 driver:
diff --git a/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml b/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
index 99f202bb..e6def26c 100644
--- a/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
+++ b/egs/librispeech/v1/conf/fbank80_specaug1_mn_16k.yaml
@@ -23,3 +23,6 @@ spec_augment:
   mask_method: mean
 mvn:
   norm_var: false
+  left_context: 0
+  right_context: 0
+  
diff --git a/egs/librispeech/v1/conf/sp_unigram_1000.yaml b/egs/librispeech/v1/conf/sp_unigram_1000.yaml
new file mode 100644
index 00000000..2a9b1b1e
--- /dev/null
+++ b/egs/librispeech/v1/conf/sp_unigram_1000.yaml
@@ -0,0 +1,9 @@
+vocab_size: 1000
+model_type: unigram
+char_coverage: 1.0
+unk_id: 2
+user_defined_symbols:
+- <blk>
+- <sos/eos>
+uppercase_text: true
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/sp_unigram_512.yaml b/egs/librispeech/v1/conf/sp_unigram_512.yaml
new file mode 100644
index 00000000..116e6d22
--- /dev/null
+++ b/egs/librispeech/v1/conf/sp_unigram_512.yaml
@@ -0,0 +1,9 @@
+vocab_size: 512
+model_type: unigram
+char_coverage: 1.0
+unk_id: 2
+user_defined_symbols:
+- <blk>
+- <sos/eos>
+uppercase_text: true
+ 
\ No newline at end of file
diff --git a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
index ed622adb..a142349b 100644
--- a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
+++ b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
@@ -8,7 +8,7 @@ data:
         - text
     sampler:
       sampler_type: bucketing_seg_sampler
-      max_batch_length: 625.
+      max_batch_length: 1500.
       min_batch_size: 1
       drop_last: false
     data_loader:
@@ -20,7 +20,7 @@ data:
         - text
     sampler:
       sampler_type: bucketing_seg_sampler
-      max_batch_length: 625
+      max_batch_length: 1500.
       min_batch_size: 1
       drop_last: true
     data_loader:
diff --git a/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh b/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
index ee8c2b55..62817852 100644
--- a/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
+++ b/egs/librispeech/v1/global_conf/config_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.sh
@@ -1,17 +1,19 @@
 # Conformer + RNN-T
 
 # training data 
-nnet_train_data=train_960h
-nnet_val__data=dev_all
+nnet_train_data=librispeech_train-960
+nnet_val_data=librispeech_dev
 
 # tokenizer
-bpe_model=data/lang_bpe_1000/bpe.model
+token_train_data=librispeech_train-960
+token_cfg=conf/sp_unigram_512.yaml
+token_dir=data/token_${token_train_data}_unigram_512
+token_model=$token_dir/tokenizer.model
 
 # rnn-t cfg
 nnet_type=conformer_v1_rnn_transducer
 nnet_name=fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p
-nnet_s1_base_cfg=conf/train_${nnet_name}.s1.yaml
-nnet_s1_args=""
+nnet_s1_cfg=conf/train_${nnet_name}.s1.yaml
 nnet_s1_name=$nnet_name.s1
 
 nnet_s1_dir=exp/asr_nnets/$nnet_s1_name
diff --git a/egs/librispeech/v1/run_001_prepare_data.sh b/egs/librispeech/v1/run_001_prepare_data.sh
index 3a4ef221..1ca8b585 100755
--- a/egs/librispeech/v1/run_001_prepare_data.sh
+++ b/egs/librispeech/v1/run_001_prepare_data.sh
@@ -41,3 +41,28 @@ if [ $stage -le 1 ]; then
     touch data/lhotse_librispeech/.librispeech.done
   fi
 fi
+
+if [ $stage -le 2 ];then
+  echo "Stage 2: Convert Manifest to Hyperion Datasets"
+  for data in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other
+  do
+    hyperion-dataset from_lhotse \
+		     --recordings-file data/lhotse_librispeech/librispeech_recordings_${data}.jsonl.gz \
+		     --supervisions-file data/lhotse_librispeech/librispeech_supervisions_${data}.jsonl.gz \
+		     --dataset data/librispeech_${data}
+  done
+    
+fi
+
+if [ $stage -le 3 ];then
+  echo "Stage 3: Merge Librispeech train sets"
+  hyperion-dataset merge \
+		   --input-datasets data/librispeech_train-{clean-100,clean-360,other-500} \
+		   --dataset data/librispeech_train-960
+
+  echo "Stage 3: Merge Librispeech dev sets"
+  hyperion-dataset merge \
+		   --input-datasets data/librispeech_dev-{clean,other} \
+		   --dataset data/librispeech_dev
+
+fi
diff --git a/egs/librispeech/v1/run_002_prepare_noises_rirs.sh b/egs/librispeech/v1/run_002_prepare_noises_rirs.sh
new file mode 100755
index 00000000..73c7ed82
--- /dev/null
+++ b/egs/librispeech/v1/run_002_prepare_noises_rirs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nj=10
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+# We prepare the noise files and RIR for online speech augmentation
+if [ $stage -le 1 ]; then
+  for name in noise music speech
+  do
+    hyperion-prepare-data musan \
+			  --corpus-dir $musan_root \
+			  --subset $name \
+			  --output-dir data/musan_$name
+  done
+fi
+
+if [ $stage -le 2 ]; then
+  # # Prepare to distribute data over multiple machines
+  # # This only does something at CLSP grid
+  # hyp_utils/create_data_split_dirs.sh $vad_dir $USER/hyp-data/voxceleb/v1.2/vad $nodes
+
+  for name in musan_noise musan_music
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_proc_audio
+    output_dir=exp/proc_audio/$name
+    $train_cmd JOB=1:$nj $output_dir/log/preproc_audios_${name}.JOB.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-preprocess-audio-files \
+	       --audio-format flac  \
+	       --part-idx JOB --num-parts $nj \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_dir/recordings.JOB.csv
+    
+    hyperion-tables cat \
+		    --table-type recordings \
+		    --output-file $output_dir/recordings.csv --num-tables $nj
+    hyperion-dataset set_recordings \
+		     --dataset $input_data_dir \
+		     --recordings-file $output_dir/recordings.csv \
+		     --output-dataset $output_data_dir
+    
+    
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  # Create Babble noise from MUSAN speech files
+  for name in musan_speech
+  do
+    input_data_dir=data/$name
+    output_data_dir=data/${name}_babble
+    output_dir=exp/proc_audio/${name}_babble
+    $train_cmd $output_dir/log/make_babble_noise_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-make-babble-noise-audio-files \
+	       --audio-format flac \
+	       --min-spks 3 --max-spks 10 --num-reuses 5 \
+	       --recordings-file $input_data_dir/recordings.csv \
+	       --output-path $output_dir \
+	       --output-recordings-file $output_data_dir/recordings.csv
+    hyperion-dataset make_from_recordings \
+		     --dataset $output_data_dir \
+		     --recordings-file $output_data_dir/recordings.csv
+  done
+fi
+
+if [ $stage -le 4 ]; then
+  if [ ! -d "RIRS_NOISES" ]; then
+    # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
+    wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
+    unzip rirs_noises.zip
+  fi
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/smallroom --output-dir data/rirs_smallroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/simulated_rirs/mediumroom --output-dir data/rirs_mediumroom
+  hyperion-prepare-data rirs --corpus-dir RIRS_NOISES/real_rirs_isotropic_noises --output-dir data/rirs_real
+  for rirs in rirs_smallroom rirs_mediumroom rirs_real
+  do
+    output_dir=exp/rirs/$rirs
+    data_dir=data/$rirs
+    $train_cmd $output_dir/log/pack_rirs_${name}.log \
+	       hyp_utils/conda_env.sh \
+	       hyperion-pack-wav-rirs ${args} --input $data_dir/recordings.csv \
+	       --output h5,csv:$output_dir/rirs.h5,$output_dir/rirs.csv || exit 1;
+    hyperion-dataset add_features --dataset $data_dir \
+		     --features-name rirs --features-file $output_dir/rirs.csv
+
+  done
+fi
+
diff --git a/egs/librispeech/v1/run_003_train_tokenizers.sh b/egs/librispeech/v1/run_003_train_tokenizers.sh
new file mode 100755
index 00000000..35ae7da2
--- /dev/null
+++ b/egs/librispeech/v1/run_003_train_tokenizers.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright
+#                2020   Johns Hopkins University (Author: Jesus Villalba)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+nj=10
+config_file=default_config.sh
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+if [ $stage -le 1 ];then
+  $train_cmd \
+    $token_dir/train_sp.log \
+    hyperion-train-tokenizer sentencepiece \
+    --cfg $token_cfg \
+    --segments-file data/$token_train_data/segments.csv \
+    --tokenizer-path $token_dir
+     
+fi
diff --git a/egs/librispeech/v1/run_004_train_asr.sh b/egs/librispeech/v1/run_004_train_asr.sh
new file mode 100755
index 00000000..d158689e
--- /dev/null
+++ b/egs/librispeech/v1/run_004_train_asr.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# Copyright
+#                2022   Johns Hopkins University (Author: Yen-Ju Lu)
+# Apache 2.0.
+#
+. ./cmd.sh
+. ./path.sh
+set -e
+
+stage=1
+ngpu=2
+config_file=default_config.sh
+interactive=false
+num_workers=""
+use_tb=false
+use_wandb=false
+
+. parse_options.sh || exit 1;
+. $config_file
+. datapath.sh
+
+train_dir=data/${nnet_train_data}
+val_dir=data/${nnet_val_data}
+
+if [ "$interactive" == "true" ];then
+    export cuda_cmd=run.pl
+fi
+
+# Network Training
+if [ $stage -le 1 ]; then
+
+  mkdir -p $nnet_s1_dir/log
+  $cuda_cmd \
+    --gpu $ngpu $nnet_s1_dir/log/train.log \
+    hyp_utils/conda_env.sh --conda-env $HYP_ENV --num-gpus $ngpu \
+    hyperion-train-wav2rnn-transducer $nnet_type \
+    --cfg $nnet_s1_cfg \
+    --data.train.dataset.recordings-file $train_dir/recordings.csv \
+    --data.train.dataset.segments-file $train_dir/segments.csv \
+    --data.train.dataset.bpe-model $token_model \
+    --data.val.dataset.recordings-file $val_dir/recordings.csv \
+    --data.val.dataset.segments-file $val_dir/segments.csv \
+    --trainer.exp-path $nnet_s1_dir $args \
+    --num-gpus $ngpu
+
+fi
+
diff --git a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
index 6d6e60a9..37bada36 100644
--- a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
+++ b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
@@ -87,7 +87,8 @@ trainer:
     init_momentum: 0.996
     momentum: 1.0
     warmup_steps: 500000
-  grad_clip: 15
+  # grad_clip: 15
+  grad_clip: 5
   use_amp: true
   log_interval: 1000
   epochs: 100
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index 13a72732..8a8b58a3 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -18,7 +18,15 @@ nnet_s1_base_cfg=conf/train_fwseresnet34_dino_v1.2.2.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
 nnet_s1=$nnet_s1_dir/teacher_model_ep0034.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0025.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0038.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0043.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0044.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0046.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0049.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0054.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0058.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0064.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0067.pth
 
 # clustering of dino embeddings
 cluster_method=cos_ahc_plda_ahc
diff --git a/hyperion/bin/hyperion_dataset.py b/hyperion/bin/hyperion_dataset.py
index 3bb61fb0..f5db8ada 100755
--- a/hyperion/bin/hyperion_dataset.py
+++ b/hyperion/bin/hyperion_dataset.py
@@ -38,6 +38,9 @@
     "split_train_val",
     "copy",
     "add_cols_to_segments",
+    "merge",
+    "from_lhotse",
+    "from_kaldi",
 ]
 
 
@@ -514,6 +517,30 @@ def add_cols_to_segments(
     dataset.save(output_dataset)
 
 
+def make_merge_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--input-datasets", required=True, nargs="+", help="input datasets"
+    )
+    add_common_args(parser)
+    return parser
+
+
+def merge(dataset: PathLike, input_datasets: List[PathLike]):
+    input_dataset_paths = input_datasets
+    dataset_path = dataset
+    input_datasets = []
+    for dset_file in input_dataset_paths:
+        input_datasets.append(HypDataset.load(dset_file))
+
+    dataset = HypDataset.merge(input_datasets)
+    dataset.save(dataset_path)
+
+
 def make_from_lhotse_parser():
     parser = ArgumentParser()
     parser.add_argument("--cfg", action=ActionConfigFile)
@@ -535,6 +562,8 @@ def make_from_lhotse_parser():
         default=None,
         help="lhotse supervisions file",
     )
+    add_common_args(parser)
+    return parser
 
 
 def from_lhotse(
@@ -545,11 +574,36 @@ def from_lhotse(
 ):
 
     assert cuts_file is not None or supervisions_file is not None
-    dataset_dir = dataset
+    dataset_path = dataset
     dataset = HypDataset.from_lhotse(
         cuts=cuts_file, recordings=recordings_file, supervisions=supervisions_file
     )
-    dataset.save(dataset)
+    dataset.save(dataset_path)
+
+
+def make_from_kaldi_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--dataset", required=True, help="""dataset dir or .yaml file"""
+    )
+    parser.add_argument(
+        "--kaldi-data-dir",
+        required=True,
+        help="Kaldi data directory",
+    )
+    add_common_args(parser)
+    return parser
+
+
+def from_kaldi(
+    dataset: PathLike,
+    kaldi_data_dir: PathLike,
+):
+
+    dataset_path = dataset
+    dataset = HypDataset.from_kaldi(kaldi_data_dir)
+    dataset.save(dataset_path)
 
 
 def main():
diff --git a/hyperion/bin/train_dino_wav2xvector.py b/hyperion/bin/train_dino_wav2xvector.py
index cb541f55..88d3a556 100755
--- a/hyperion/bin/train_dino_wav2xvector.py
+++ b/hyperion/bin/train_dino_wav2xvector.py
@@ -237,8 +237,8 @@ def main():
         try:
             config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
             parser.save(args, str(config_file), format="yaml", overwrite=True)
-        except:
-            pass
+        except Exception as err:
+            logging.warning(f"failed saving {args} to {config_file} with {err}")
 
     args_sc.xvec_class = xvec_dict[xvec_type]
     # torch docs recommend using forkserver
diff --git a/hyperion/bin/train_tokenizer.py b/hyperion/bin/train_tokenizer.py
new file mode 100644
index 00000000..b3d28923
--- /dev/null
+++ b/hyperion/bin/train_tokenizer.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+import logging
+import os
+from pathlib import Path
+from typing import Dict, List
+
+import sentencepiece as spm
+from jsonargparse import (
+    ActionConfigFile,
+    ActionParser,
+    ActionYesNo,
+    ArgumentParser,
+    namespace_to_dict,
+)
+
+from hyperion.hyp_defs import config_logger
+from hyperion.utils import PathLike, SegmentSet
+
+tokenizer_list = ["sentencepiece"]
+
+
+def add_common_args(parser):
+    parser.add_argument(
+        "--segments-file",
+        required=True,
+        help="input segments file with sentence transcriptions",
+    )
+    parser.add_argument(
+        "--text-column", default="text", help="text column in segments file"
+    )
+    parser.add_argument("--tokenizer-path", required=True, help="tokenizer model dir")
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default=1,
+        choices=[0, 1, 2, 3],
+        type=int,
+    )
+
+
+def train_sentencepiece(
+    segments_file: PathLike,
+    text_column: str,
+    vocab_size: int,
+    model_type: str,
+    char_coverage: str,
+    sentence_size: int,
+    user_defined_symbols: List[str],
+    unk_id: int,
+    sos_id: int,
+    eos_id: int,
+    pad_id: int,
+    unk_piece: str,
+    sos_piece: str,
+    eos_piece: str,
+    pad_piece: str,
+    uppercase_text: bool,
+    tokenizer_path: PathLike,
+):
+
+    tokenizer_path = Path(tokenizer_path)
+    tokenizer_path.mkdir(exist_ok=True, parents=True)
+
+    text_file = tokenizer_path / "text"
+    if not text_file.is_file():
+        segments = SegmentSet.load(segments_file)
+        with open(text_file, "w", encoding="utf-8") as f_text:
+            for text in segments[text_column]:
+                if uppercase_text:
+                    text = text.upper()
+                f_text.write(f"{text}\n")
+
+    model_prefix = tokenizer_path / "tokenizer"
+    model_file = model_prefix.with_suffix(".model")
+    if not model_file.is_file():
+        spm.SentencePieceTrainer.train(
+            input=text_file,
+            vocab_size=vocab_size,
+            model_type=model_type,
+            model_prefix=str(model_prefix),
+            input_sentence_size=sentence_size,
+            character_coverage=char_coverage,
+            user_defined_symbols=user_defined_symbols,
+            unk_id=unk_id,
+            bos_id=sos_id,
+            eos_id=eos_id,
+            pad_id=pad_id,
+            unk_piece=unk_piece,
+            bos_piece=sos_piece,
+            eos_piece=eos_piece,
+            pad_piece=pad_piece,
+        )
+
+    generate_sentencepiece_tokens(model_file, tokenizer_path)
+
+
+def generate_sentencepiece_tokens(model_file: PathLike, tokenizer_path: PathLike):
+    sp = spm.SentencePieceProcessor()
+    sp.load(str(model_file))
+    token2id: Dict[str, int] = {sp.id_to_piece(i): i for i in range(sp.vocab_size())}
+    with open(tokenizer_path / "tokens.txt", "w", encoding="utf-8") as f:
+        for sym, i in token2id.items():
+            f.write(f"{sym} {i}\n")
+
+
+def make_sentencepiece_parser():
+    parser = ArgumentParser()
+    parser.add_argument("--cfg", action=ActionConfigFile)
+    parser.add_argument(
+        "--vocab-size", default=1000, type=int, help="output vocabulary size"
+    )
+    parser.add_argument(
+        "--model-type", default="unigram", choices=["unigram", "bpe", "char", "word"]
+    )
+    parser.add_argument("--char-coverage", default=1.0, type=float)
+    parser.add_argument("--sentence-size", default=100000000, type=int)
+    parser.add_argument(
+        "--user-defined-symbols",
+        default=["<blk>", "<sos/eos>"],
+        nargs="+",
+        help="user defined symbols",
+    )
+    parser.add_argument("--unk-id", default=2, type=int)
+    parser.add_argument("--sos-id", default=-1, type=int)
+    parser.add_argument("--eos-id", default=-1, type=int)
+    parser.add_argument("--pad-id", default=-1, type=int)
+    parser.add_argument("--unk-piece", default="<unk>")
+    parser.add_argument("--sos-piece", default="<s>")
+    parser.add_argument("--eos-piece", default="</s>")
+    parser.add_argument("--pad-piece", default="<pad>")
+    parser.add_argument("--uppercase-text", default=True, action=ActionYesNo)
+
+    add_common_args(parser)
+    return parser
+
+
+def main():
+    parser = ArgumentParser(description="Train sentence piece tokenizer")
+    parser.add_argument("--cfg", action=ActionConfigFile)
+
+    subcommands = parser.add_subcommands()
+    for subcommand in tokenizer_list:
+        parser_func = f"make_{subcommand}_parser"
+        subparser = globals()[parser_func]()
+        subcommands.add_subcommand(subcommand, subparser)
+
+    args = parser.parse_args()
+    try:
+        gpu_id = int(os.environ["LOCAL_RANK"])
+    except:
+        gpu_id = 0
+
+    subcommand = f"train_{args.subcommand}"
+    kwargs = namespace_to_dict(args)[args.subcommand]
+    if gpu_id == 0:
+        try:
+            config_file = Path(kwargs["tokenizer_path"]) / "config.yaml"
+            parser.save(args, str(config_file), format="yaml", overwrite=True)
+        except Exception as err:
+            logging.warning(f"failed saving {args} err={err}")
+
+    config_logger(kwargs["verbose"])
+    del kwargs["verbose"]
+    del kwargs["cfg"]
+    globals()[subcommand](**kwargs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 6d947d24..ebd23845 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -25,12 +25,13 @@
 from hyperion.hyp_defs import config_logger, set_float_cpu
 from hyperion.torch.data import AudioDataset as AD
 from hyperion.torch.data import SegSamplerFactory
-from hyperion.torch.models import Wav2RNNRNNTransducer
+from hyperion.torch.models import Wav2ConformerV1RNNTransducer, Wav2RNNRNNTransducer
 from hyperion.torch.trainers import TransducerTrainer as Trainer
 from hyperion.torch.utils import ddp
 
 model_dict = {
     "rnn_rnn_transducer": Wav2RNNRNNTransducer,
+    "conformer_v1_rnn_transducer": Wav2ConformerV1RNNTransducer,
 }
 
 
@@ -38,6 +39,14 @@ def transducer_collate(batch):
     audio = []
     audio_length = []
     target = []
+    for record in batch:
+        audio_length.append(record["x"].shape[0])
+    audio_length = torch.as_tensor(audio_length)
+    if not torch.all(audio_length[:-1] >= audio_length[1:]):
+        sort_idx = torch.argsort(audio_length, descending=True)
+        batch = [batch[i] for i in sort_idx]
+
+    audio_length = []
     for record in batch:
         wav = torch.as_tensor(record["x"])
         audio.append(wav)
@@ -109,7 +118,7 @@ def train_model(gpu_id, args):
     set_float_cpu("float32")
     # torch.backends.cudnn.deterministic = True
     # torch.backends.cudnn.benchmark = False
-    torch.backends.cudnn.enabled = False
+    # torch.backends.cudnn.enabled = False
 
     ddp_args = ddp.filter_ddp_args(**kwargs)
     device, rank, world_size = ddp.ddp_init(gpu_id, **ddp_args)
@@ -200,7 +209,7 @@ def make_parser(model_class):
 
 
 def main():
-    parser = ArgumentParser(description="Train RNN Transducer model from audio files")
+    parser = ArgumentParser(description="Train Transducer model from audio files")
     parser.add_argument("--cfg", action=ActionConfigFile)
 
     subcommands = parser.add_subcommands()
@@ -222,8 +231,8 @@ def main():
         try:
             config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
             parser.save(args, str(config_file), format="yaml", overwrite=True)
-        except:
-            pass
+        except Exception as err:
+            logging.warning(f"{err}")
 
     args_sc.model_class = model_dict[model_type]
     # torch docs recommend using forkserver
diff --git a/hyperion/bin/train_wav2xvector.py b/hyperion/bin/train_wav2xvector.py
index 2c4684c3..bb4a3913 100755
--- a/hyperion/bin/train_wav2xvector.py
+++ b/hyperion/bin/train_wav2xvector.py
@@ -192,7 +192,7 @@ def main():
             config_file = Path(args_sc.trainer.exp_path) / "config.yaml"
             parser.save(args, str(config_file), format="yaml", overwrite=True)
         except:
-            pass
+            logging.warning(f"failed saving {args} to {config_file}")
 
     args_sc.xvec_class = xvec_dict[xvec_type]
     # torch docs recommend using forkserver
diff --git a/hyperion/np/augment/speed_augment.py b/hyperion/np/augment/speed_augment.py
index a648190d..95127084 100644
--- a/hyperion/np/augment/speed_augment.py
+++ b/hyperion/np/augment/speed_augment.py
@@ -96,11 +96,11 @@ def forward(self, x):
         # change speed
         r = self.speed_ratios[speed_idx]
         info = {"speed_ratio": r}
-        y = time_stretch(x, r)
+        y = time_stretch(x, rate=r)
         # print(f"1 r={r} {x.shape} {y.shape}", flush=True)
         if self.keep_length:
             if r > 1:
-                dither = np.max(x) / 2 ** 15  # we add some dither in the padding
+                dither = np.max(x) / 2**15  # we add some dither in the padding
                 pad_y = dither * np.ones((x.shape[-1] - y.shape[-1],), dtype=y.dtype)
                 y = np.concatenate((y, pad_y), axis=-1)
             elif r < 1:
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index c890627e..aa02661c 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -7,7 +7,6 @@
 import math
 
 import numpy as np
-
 import torch
 import torch.distributed as dist
 
@@ -17,13 +16,15 @@
 
 class BucketingSegSampler(HypSampler):
 
-    def __init__(self,
-                 seg_set,
-                 base_sampler=SegSampler,
-                 num_buckets=10,
-                 length_column="duration",
-                 seed=1234,
-                 **base_kwargs):
+    def __init__(
+        self,
+        seg_set,
+        base_sampler=SegSampler,
+        num_buckets=10,
+        length_column="duration",
+        seed=1234,
+        **base_kwargs
+    ):
         super().__init__(shuffle=False, seed=seed)
         self.seg_set = seg_set
         self.base_sampler = base_sampler
@@ -33,13 +34,12 @@ def __init__(self,
         self.length_column = length_column
         self._create_bucket_samplers()
         self._compute_len()
-        self.depleted_buckets = torch.zeros((num_buckets, ), dtype=torch.bool)
+        self.depleted_buckets = torch.zeros((num_buckets,), dtype=torch.bool)
 
     def create_buckets(self):
         sort_idx = np.argsort(self.seg_set[self.length_column].values)
         sorted_seg_set = self.seg_set.iloc[sort_idx]
-        cum_lengths = np.cumsum(sorted_seg_set[self.length_column].values,
-                                axis=0)
+        cum_lengths = np.cumsum(sorted_seg_set[self.length_column].values, axis=0)
         bucket_length = cum_lengths[-1] / self.num_buckets
         buckets = []
         for i in range(self.num_buckets):
@@ -67,9 +67,9 @@ def _compute_len(self):
         for i in range(self.num_buckets):
             self._len += len(self.bucket_samplers[i])
 
-    def set_epoch(self, epoch):
+    def set_epoch(self, epoch, batch=0):
         for i in range(self.num_buckets):
-            self.bucket_samplers[i].set_epoch(epoch)
+            self.bucket_samplers[i].set_epoch(epoch, batch)
 
     def __iter__(self):
         super().__iter__()
@@ -88,10 +88,9 @@ def __next__(self):
             raise StopIteration
 
         while True:
-            bucket_idx = torch.randint(low=0,
-                                       high=self.num_buckets,
-                                       size=(1, ),
-                                       generator=self.rng).item()
+            bucket_idx = torch.randint(
+                low=0, high=self.num_buckets, size=(1,), generator=self.rng
+            ).item()
             if self.depleted_buckets[bucket_idx]:
                 continue
 
diff --git a/hyperion/torch/data/dino_audio_dataset.py b/hyperion/torch/data/dino_audio_dataset.py
index bb0a93a5..15eaca4b 100644
--- a/hyperion/torch/data/dino_audio_dataset.py
+++ b/hyperion/torch/data/dino_audio_dataset.py
@@ -236,12 +236,12 @@ def _split_audio_into_teacher_student_chunks(self, x, duration, fs):
                 x_student,
                 student_samples,
             ) = self._split_audio_into_teacher_student_disjoint(x, duration, fs)
-        assert (
-            len(x_teacher) >= 64000 and len(x_teacher) <= 136000
-        ), f"{len(x_teacher)}, {len(x_student)} {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
-        assert (
-            len(x_student) >= 32000 and len(x_student) <= 136000
-        ), f"{len(x_teacher)}, {len(x_student)}, {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
+        # assert (
+        #     len(x_teacher) >= 64000 and len(x_teacher) <= 136000
+        # ), f"{len(x_teacher)}, {len(x_student)} {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
+        # assert (
+        #     len(x_student) >= 32000 and len(x_student) <= 136000
+        # ), f"{len(x_teacher)}, {len(x_student)}, {len(x)} {duration*fs}, {teacher_samples}, {student_samples}"
         xs_teacher = self._split_audio_into_chunks(
             x_teacher,
             teacher_samples,
@@ -254,14 +254,14 @@ def _split_audio_into_teacher_student_chunks(self, x, duration, fs):
             int(fs * self.student_chunk_length),
             self.num_student_chunks,
         )
-        for xx in xs_teacher:
-            assert (
-                len(xx) >= 64000 and len(xx) <= 72000
-            ), f"{[len(t) for t in xs_teacher]} {len(x_teacher)} {len(x)}"
-        for xx in xs_student:
-            assert (
-                len(xx) >= 32000 and len(xx) <= 40000
-            ), f"{[len(t) for t in xs_student]} {len(x_student)} {len(x)}"
+        # for xx in xs_teacher:
+        #     assert (
+        #         len(xx) >= 64000 and len(xx) <= 72000
+        #     ), f"{[len(t) for t in xs_teacher]} {len(x_teacher)} {len(x)}"
+        # for xx in xs_student:
+        #     assert (
+        #         len(xx) >= 32000 and len(xx) <= 40000
+        #     ), f"{[len(t) for t in xs_student]} {len(x_student)} {len(x)}"
 
         return xs_teacher, xs_student
 
@@ -284,16 +284,6 @@ def __getitem__(self, segment):
         )
         data.update(x_augs_teacher)
         data.update(x_augs_student)
-        # print(data, flush=True)
-        # for ll in [
-        #     "x_teacher_0",
-        #     "x_teacher_1",
-        #     "x_student_0",
-        #     "x_student_1",
-        #     "x_student_2",
-        #     "x_student_3",
-        # ]:
-        #     print("zzz ", ll, data[ll].shape, flush=True)
         seg_info = self._get_segment_info(seg_id)
         data.update(seg_info)
         return data
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 39d1eed2..a280c87e 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -7,10 +7,10 @@
 import math
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 
 
@@ -24,6 +24,7 @@ def __init__(
         length_name="duration",
         shuffle=False,
         drop_last=False,
+        sort_by_length=True,
         seed=1234,
     ):
         super().__init__(shuffle=shuffle, seed=seed)
@@ -33,21 +34,20 @@ def __init__(
         self.max_batch_length = max_batch_length
         self.var_batch_size = max_batch_length is not None
         self.length_name = length_name
+        self.sort_by_length = sort_by_length
         if self.var_batch_size:
-            avg_batch_size = max_batch_length / np.mean(
-                self.seg_set[self.length_name])
+            avg_batch_size = max_batch_length / np.mean(self.seg_set[self.length_name])
         else:
             avg_batch_size = min_batch_size
 
         self.avg_batch_size = avg_batch_size
 
         if drop_last:
-            self._len = int(
-                len(self.seg_set) / (avg_batch_size * self.world_size))
+            self._len = int(len(self.seg_set) / (avg_batch_size * self.world_size))
         else:
             self._len = int(
-                math.ceil(
-                    (len(self.seg_set) // self.world_size) / avg_batch_size))
+                math.ceil((len(self.seg_set) // self.world_size) / avg_batch_size)
+            )
 
         self._permutation = None
 
@@ -55,8 +55,9 @@ def __len__(self):
         return self._len
 
     def _shuffle_segs(self):
-        self._permutation = torch.randperm(len(self.seg_set),
-                                           generator=self.rng).numpy()
+        self._permutation = torch.randperm(
+            len(self.seg_set), generator=self.rng
+        ).numpy()
 
     def __iter__(self):
         super().__iter__()
@@ -82,37 +83,49 @@ def __next__(self):
                 else:
                     idx = self.start
 
-                max_length = max(max_length, self.seg_set.iloc[idx,
-                                                               column_idx])
+                max_length = max(max_length, self.seg_set.iloc[idx, column_idx])
                 if max_length * (batch_size + 1) > self.max_batch_length:
                     break
 
                 idxs.append(idx)
                 self.start = (self.start + self.world_size) % len(self.seg_set)
                 batch_size += 1
-                if (self.max_batch_size is not None
-                        and batch_size >= self.max_batch_size):
+                if (
+                    self.max_batch_size is not None
+                    and batch_size >= self.max_batch_size
+                ):
                     break
 
-            assert len(
-                idxs
-            ) >= 1, f"increase max_batch_length {self.max_batch_length} >= {max_length}"
+            assert (
+                len(idxs) >= 1
+            ), f"increase max_batch_length {self.max_batch_length} >= {max_length}"
         else:
-            stop = min(self.start + self.world_size * self.min_batch_size,
-                       len(self.seg_set))
+            stop = min(
+                self.start + self.world_size * self.min_batch_size, len(self.seg_set)
+            )
             if self.shuffle:
-                idxs = self._permutation[self.start:stop:self.world_size]
+                idxs = self._permutation[self.start : stop : self.world_size]
             else:
                 idxs = slice(self.start, stop, self.world_size)
 
             self.start += self.world_size * self.min_batch_size
 
+        ids = self.seg_set.iloc[idxs].id.values
+        if self.sort_by_length:
+            lengths = self.seg_set.loc[ids, self.length_name].values
+            sort_idx = np.argsort(lengths)[::-1]
+            ids = ids[sort_idx]
+
         if "chunk_start" in self.seg_set:
-            chunks = self.seg_set.iloc[idxs]
-            seg_ids = [(id, s, d) for id, s, d in zip(
-                chunks.seg_id, chunks.chunk_start, chunks[self.length_name])]
+            chunks = self.seg_set.loc[ids]
+            seg_ids = [
+                (id, s, d)
+                for id, s, d in zip(
+                    chunks.seg_id, chunks.chunk_start, chunks[self.length_name]
+                )
+            ]
         else:
-            seg_ids = self.seg_set.iloc[idxs].id.values
+            seg_ids = ids
 
         if self.batch == 0:
             logging.info("batch 0 seg_ids=%s", str(seg_ids[:10]))
@@ -122,18 +135,19 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
+        return filter_func_args(SegSampler.__init__, kwargs, skip={"seg_set"})
 
-        valid_args = (
-            "min_batch_size",
-            "max_batch_size",
-            "max_batch_length",
-            "length_name",
-            "shuffle",
-            "drop_last",
-            "seed",
-        )
+        # valid_args = (
+        #     "min_batch_size",
+        #     "max_batch_size",
+        #     "max_batch_length",
+        #     "length_name",
+        #     "shuffle",
+        #     "drop_last",
+        #     "seed",
+        # )
 
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        # return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -151,31 +165,28 @@ def add_class_args(parser, prefix=None):
             "--max-batch-size",
             type=int,
             default=None,
-            help=
-            ("maximum batch size per gpu, if None, estimated from max_batch_length"
-             ),
+            help=(
+                "maximum batch size per gpu, if None, estimated from max_batch_length"
+            ),
         )
 
         parser.add_argument(
             "--max-batch-duration",
             type=float,
             default=None,
-            help=
-            ("maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
-             ),
+            help=(
+                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+            ),
         )
 
         parser.add_argument(
-            "--drop-last",
-            action=ActionYesNo,
-            help="drops the last batch of the epoch",
+            "--drop-last", action=ActionYesNo, help="drops the last batch of the epoch",
         )
 
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
-            help=
-            "shuffles the segments or chunks at the beginning of the epoch",
+            help="shuffles the segments or chunks at the beginning of the epoch",
         )
 
         parser.add_argument(
@@ -188,9 +199,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--length-name",
             default="duration",
-            help=
-            "which column in the segment table indicates the duration of the file",
+            help="which column in the segment table indicates the duration of the file",
         )
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index 35973f50..8f6501b5 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -2,6 +2,7 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 from typing import Optional, Union
 
@@ -9,15 +10,13 @@
 
 from .audio_dataset import AudioDataset
 from .bucketing_seg_sampler import BucketingSegSampler
-from .class_weighted_seg_chunk_sampler import \
-    ClassWeightedRandomSegChunkSampler
+from .class_weighted_seg_chunk_sampler import ClassWeightedRandomSegChunkSampler
 from .feat_seq_dataset import FeatSeqDataset
 from .seg_chunk_sampler import SegChunkSampler
 from .seg_sampler import SegSampler
 
 sampler_dict = {
-    "class_weighted_random_seg_chunk_sampler":
-    ClassWeightedRandomSegChunkSampler,
+    "class_weighted_random_seg_chunk_sampler": ClassWeightedRandomSegChunkSampler,
     "seg_sampler": SegSampler,
     "seg_chunk_sampler": SegChunkSampler,
     "bucketing_seg_sampler": BucketingSegSampler,
@@ -28,6 +27,7 @@ class SegSamplerFactory(object):
     """Factory class to create different types of samplers for
     sequencial data like audio or acoustic features.
     """
+
     @staticmethod
     def create(
         dataset: Union[AudioDataset, FeatSeqDataset],
@@ -91,6 +91,7 @@ def filter_args(**kwargs):
             "batch_size",
             "shuffle",
             "drop_last",
+            "sort_by_length",
             "seed",
         )
 
@@ -113,8 +114,7 @@ def add_class_args(parser, prefix=None):
             "--base-sampler-type",
             choices=["seg_sampler", "bucketing_seg_sampler"],
             default="seg_sampler",
-            help=
-            "base sampler used for seg_chunk_sampler or bucketing_seg_sampler",
+            help="base sampler used for seg_chunk_sampler or bucketing_seg_sampler",
         )
 
         parser.add_argument(
@@ -141,9 +141,9 @@ def add_class_args(parser, prefix=None):
             "--max-batch-size",
             type=int,
             default=None,
-            help=
-            ("maximum batch size per gpu, if None, estimated from max_batch_length"
-             ),
+            help=(
+                "maximum batch size per gpu, if None, estimated from max_batch_length"
+            ),
         )
 
         parser.add_argument(
@@ -157,9 +157,9 @@ def add_class_args(parser, prefix=None):
             "--max-batch-length",
             type=float,
             default=None,
-            help=
-            ("maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
-             ),
+            help=(
+                "maximum accumlated duration of the batch, if None estimated from the min/max_batch_size and min/max_chunk_lengths"
+            ),
         )
 
         parser.add_argument(
@@ -225,8 +225,7 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
-            help=
-            "shuffles the segments or chunks at the beginning of the epoch",
+            help="shuffles the segments or chunks at the beginning of the epoch",
         )
         parser.add_argument(
             "--seed",
@@ -238,16 +237,19 @@ def add_class_args(parser, prefix=None):
         parser.add_argument(
             "--length-name",
             default="duration",
-            help=
-            "which column in the segment table indicates the duration of the segment",
+            help="which column in the segment table indicates the duration of the segment",
         )
         parser.add_argument(
             "--class-name",
             default="class_id",
-            help=
-            "which column in the segment table indicates the class of the segment",
+            help="which column in the segment table indicates the class of the segment",
+        )
+        parser.add_argument(
+            "--sort-by-length",
+            default=True,
+            action=ActionYesNo,
+            help="sort sequences in the batch by duration",
         )
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/layer_blocks/transformer_input.py b/hyperion/torch/layer_blocks/transformer_input.py
index 6c5de188..e55071b9 100644
--- a/hyperion/torch/layer_blocks/transformer_input.py
+++ b/hyperion/torch/layer_blocks/transformer_input.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import math
 
 import torch
@@ -80,7 +81,7 @@ def forward(self, x, x_mask=None):
         if x_mask is None:
             return x, None
 
-        return x, x_mask[:, :, :: self.stride]
+        return x, x_mask[..., :: self.stride]
 
 
 class TransformerConv1dSubsampler(nn.Module):
diff --git a/hyperion/torch/layers/mvn.py b/hyperion/torch/layers/mvn.py
index a46ce20d..5a92e89a 100644
--- a/hyperion/torch/layers/mvn.py
+++ b/hyperion/torch/layers/mvn.py
@@ -2,6 +2,7 @@
  Copyright 2020 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import torch
 import torch.nn as nn
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
@@ -53,6 +54,8 @@ def forward(self, x, x_lengths=None, x_mask=None):
 
         Args:
           x: feature tensor.
+          x_lengths: lengths of x sequences
+          x_mask: mask of valid frames, if present, x_lengths is ignored.
 
         Returns:
           Normalized feature tensor.
@@ -69,6 +72,7 @@ def forward(self, x, x_lengths=None, x_mask=None):
                 x_lengths,
                 max_length,
                 dtype=x.dtype,
+                ndim=x.dim(),
                 none_if_all_max=True,
             )
 
diff --git a/hyperion/torch/layers/spec_augment.py b/hyperion/torch/layers/spec_augment.py
index f4e03842..9ef71f5f 100644
--- a/hyperion/torch/layers/spec_augment.py
+++ b/hyperion/torch/layers/spec_augment.py
@@ -2,13 +2,15 @@
  Copyright 2021 Johns Hopkins University  (Author: Jesus Villalba, Nanxin Chen)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
-import logging
 
-from jsonargparse import ActionParser, ArgumentParser
+import logging
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as nnf
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
+
+from ...utils.misc import filter_func_args
 
 count = 0
 
@@ -22,8 +24,9 @@ class AxisMasker(nn.Module):
       max_width: maximum width of the mask.
       min_num_mask: minimum number of masks.
       max_num_mask: maximum number of masks.
-      dim: axis where we apply the mask
-      mask_value: masking value
+      dim: axis where we apply the mask.
+      mask_value: masking value.
+      use_num_masks_percentage: if True, num_masks are per 100 frames, if False they are absolute.
     """
 
     def __init__(
@@ -35,6 +38,7 @@ def __init__(
         dim=-1,
         mask_method="constant",
         mask_value=0,
+        use_num_masks_percentage=False,
     ):
         super().__init__()
         assert min_width >= 0
@@ -44,17 +48,22 @@ def __init__(
 
         self.min_width = min_width
         self.max_width = max_width
+        if not use_num_masks_percentage:
+            min_num_masks = int(min_num_masks)
+            max_num_masks = int(max_num_masks)
+
         self.min_num_masks = min_num_masks
         self.max_num_masks = max_num_masks
         self.dim = dim
         self.mask_method = mask_method
         self.mask_value = mask_value
+        self.use_num_masks_percentage = use_num_masks_percentage
 
     def __repr__(self):
         s = (
             "{}(min_width={}, max_width={}, "
             "min_num_masks={}, max_num_masks={}, "
-            "dim={}, mask_method={}, mask_value={})"
+            "dim={}, mask_method={}, mask_value={} use_num_masks_percentage={})"
         ).format(
             self.__class__.__name__,
             self.min_width,
@@ -64,6 +73,7 @@ def __repr__(self):
             self.dim,
             self.mask_method,
             self.mask_value,
+            self.use_num_masks_percentage,
         )
         return s
 
@@ -86,9 +96,16 @@ def forward(self, x):
 
         batch_size = x.shape[0]
         masked_dim_length = x.shape[self.dim]
+        if self.use_num_masks_percentage:
+            min_num_masks = int(round(self.min_num_masks * masked_dim_length / 100))
+            max_num_masks = int(round(self.max_num_masks * masked_dim_length / 100))
+        else:
+            min_num_masks = self.min_num_masks
+            max_num_masks = self.max_num_masks
+
         # select how many masks
         num_masks = torch.randint(
-            self.min_num_masks, self.max_num_masks + 1, size=(1,), device=x.device
+            min_num_masks, max_num_masks + 1, size=(1,), device=x.device
         )[0]
         # (batch, num_mask, 1)
         widths = torch.randint(
@@ -156,7 +173,7 @@ def forward(self, x, x_lengths=None):
 
         Args:
            x: spectrogram shape= (batch, *, time, freq)
-           lengths: time lengths of the sequences.
+           x_lengths: time lengths of the sequences.
         Returns:
            warped spectrogram shape = (batch, *, time, freq)
         """
@@ -184,11 +201,10 @@ def forward(self, x, x_lengths=None):
         if dim == -1 or x_lengths is None:
             warp_length = x.shape[-2]
         else:
-            warp_length = int(x.shape[-2] * torch.min(x_lengths))
+            warp_length = int(torch.min(x_lengths))
 
         center = torch.randint(self.window, warp_length - self.window, (1,))[0]
         warped = torch.randint(center - self.window, center + self.window, (1,))[0] + 1
-
         # (batch, C, warped, freq)
         left = nnf.interpolate(
             x[:, :, :center], (warped, x.shape[3]), mode=self.mode, align_corners=False
@@ -210,6 +226,9 @@ def forward(self, x, x_lengths=None):
         if dim == -1:
             x = x.transpose(-1, -2)
 
+        if ndim == 3:
+            x = x.squeeze(1)
+
         x = x.view(in_shape)
         return x
 
@@ -231,6 +250,7 @@ class SpecAugment(nn.Module):
       time_max_width:   maximum width of the time mask.
       time_min_num_mask: minimum number of time masks.
       time_max_num_mask: maximum number of time masks.
+      time_use_num_masks_percentage: if True, num_masks are per 100 frames, if False they are absolute.
       freq_mask_prob:    probability of applying frequency masking.
       freq_min_width:    minimum width of the frequency mask.
       freq_max_width:    maximum width of the frequency mask.
@@ -249,6 +269,7 @@ def __init__(
         time_mask_max_width=100,
         time_mask_min_num_masks=1,
         time_mask_max_num_masks=2,
+        time_use_num_masks_percentage=False,
         freq_mask_prob=0,
         freq_mask_min_width=0,
         freq_mask_max_width=20,
@@ -287,6 +308,7 @@ def __init__(
                 dim=-2,
                 mask_method=mask_method,
                 mask_value=mask_value,
+                use_num_masks_percentage=time_use_num_masks_percentage,
             )
 
         if self.freq_mask_prob > 0:
@@ -368,26 +390,7 @@ def filter_args(**kwargs):
         Returns:
           Dictionary with SpecAugment options.
         """
-        valid_args = (
-            "time_warp_prob",
-            "time_warp_window",
-            "time_warp_mode",
-            "time_mask_prob",
-            "time_mask_max_width",
-            "time_mask_min_width",
-            "time_mask_max_num_masks",
-            "time_mask_min_num_masks",
-            "freq_mask_prob",
-            "freq_mask_max_width",
-            "freq_mask_min_width",
-            "freq_mask_max_num_masks",
-            "freq_mask_min_num_masks",
-            "mask_value",
-            "mask_method",
-        )
-
-        d = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-        return d
+        return filter_func_args(SpecAugment.__init__, kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -437,16 +440,22 @@ def add_class_args(parser, prefix=None):
         )
         parser.add_argument(
             "--time-mask-min-num-masks",
-            type=int,
+            type=float,
             default=1,
             help="min. number of time mask",
         )
         parser.add_argument(
             "--time-mask-max-num-masks",
-            type=int,
+            type=float,
             default=2,
             help="max. number of time mask",
         )
+        parser.add_argument(
+            "--time-use-num-masks-percentage",
+            default=False,
+            action=ActionYesNo,
+            help="if True, num_masks are per 100 frames, if False they are absolute.",
+        )
 
         parser.add_argument(
             "--freq-mask-prob",
diff --git a/hyperion/torch/models/__init__.py b/hyperion/torch/models/__init__.py
index 7292dbad..77a2543f 100644
--- a/hyperion/torch/models/__init__.py
+++ b/hyperion/torch/models/__init__.py
@@ -7,11 +7,12 @@
 from .transducer import RNNRNNTransducer, RNNTransducer
 from .vae.vae import VAE
 from .vae.vq_vae import VQVAE
-from .wav2transducer import (
+from .wav2transducer import (  # HFWav2Vec2Transducer,
     HFWav2Vec2ConformerV1RNNTransducer,
     HFWav2Vec2RNNRNNTransducer,
     HFWav2Vec2RNNTransducer,
-    HFWav2Vec2Transducer,
+    Wav2ConformerV1RNNTransducer,
+    Wav2RNNRNNTransducer,
 )
 from .wav2xvectors import (
     HFHubert2ConformerV1XVector,
diff --git a/hyperion/torch/models/transducer/__init__.py b/hyperion/torch/models/transducer/__init__.py
index 984e15ec..331e3ef0 100644
--- a/hyperion/torch/models/transducer/__init__.py
+++ b/hyperion/torch/models/transducer/__init__.py
@@ -7,8 +7,9 @@
 from .conformer_v1_rnn_transducer import ConformerV1RNNTransducer
 from .rnn_rnn_transducer import RNNRNNTransducer
 from .rnn_transducer import RNNTransducer, RNNTransducerOutput
-from .transducer import Transducer
 
-#from .conformer import Conformer
-#from .decoder import Decoder
-#from .joiner import Joiner
+# from .transducer import Transducer
+
+# from .conformer import Conformer
+# from .decoder import Decoder
+# from .joiner import Joiner
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index b8e7fe74..a9fa5830 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -44,6 +44,8 @@ def __init__(
         self,
         encoder: Union[TorchModel, None],
         decoder: Union[Dict, RNNTransducerDecoder],
+        ctc_weight: float = 0.0,
+        rnnt_weight: float = 1.0,
     ):
         super().__init__()
         if encoder is not None:
diff --git a/hyperion/torch/models/transducer/subsampling.py b/hyperion/torch/models/transducer/subsampling0.py
similarity index 100%
rename from hyperion/torch/models/transducer/subsampling.py
rename to hyperion/torch/models/transducer/subsampling0.py
diff --git a/hyperion/torch/models/transducer/transducer.py b/hyperion/torch/models/transducer/transducer0.py
similarity index 100%
rename from hyperion/torch/models/transducer/transducer.py
rename to hyperion/torch/models/transducer/transducer0.py
diff --git a/hyperion/torch/models/transducer/transformer.py b/hyperion/torch/models/transducer/transformer0.py
similarity index 98%
rename from hyperion/torch/models/transducer/transformer.py
rename to hyperion/torch/models/transducer/transformer0.py
index a354b5f5..0beb405f 100644
--- a/hyperion/torch/models/transducer/transformer.py
+++ b/hyperion/torch/models/transducer/transformer0.py
@@ -20,10 +20,11 @@
 
 import torch
 import torch.nn as nn
+
 from hyperion.utils.text import make_pad_mask
 
 from .encoder_interface import EncoderInterface
-from .subsampling import Conv2dSubsampling, VggSubsampling
+from .subsampling0 import Conv2dSubsampling, VggSubsampling
 
 
 class Transformer(EncoderInterface):
@@ -250,9 +251,7 @@ def _get_activation_fn(activation: str):
     elif activation == "gelu":
         return nn.functional.gelu
 
-    raise RuntimeError(
-        "activation should be relu/gelu, not {}".format(activation)
-    )
+    raise RuntimeError("activation should be relu/gelu, not {}".format(activation))
 
 
 class PositionalEncoding(nn.Module):
diff --git a/hyperion/torch/models/wav2transducer/__init__.py b/hyperion/torch/models/wav2transducer/__init__.py
index 71e82b98..e57b36ff 100644
--- a/hyperion/torch/models/wav2transducer/__init__.py
+++ b/hyperion/torch/models/wav2transducer/__init__.py
@@ -4,8 +4,9 @@
 
 """
 
-from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
-from .hf_wav2vec2conformer_v1_rnn_transducer import \
-    HFWav2Vec2ConformerV1RNNTransducer
+# from .hf_wav2vec2_transducer import HFWav2Vec2Transducer
+from .hf_wav2vec2conformer_v1_rnn_transducer import HFWav2Vec2ConformerV1RNNTransducer
 from .hf_wav2vec2rnn_rnn_transducer import HFWav2Vec2RNNRNNTransducer
 from .hf_wav2vec2rnn_transducer import HFWav2Vec2RNNTransducer
+from .wav2conformer_v1_rnn_transducer import Wav2ConformerV1RNNTransducer
+from .wav2rnn_rnn_transducer import Wav2RNNRNNTransducer
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
index 1d16675c..c4f65ba6 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2rnn_transducer.py
@@ -2,15 +2,15 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import contextlib
 import logging
 from dataclasses import dataclass
 from typing import Dict, List, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...torch_model import TorchModel
 from ...utils import remove_silence
@@ -18,7 +18,7 @@
 
 
 class HFWav2RNNTransducer(TorchModel):
-    """Abstract Base class for x-vector models that use a Hugging Face Model as feature extractor.
+    """Abstract Base class for RNN-T transducer models that use a Hugging Face Model as feature extractor.
 
     Attributes:
        hf_feats: hugging face model wrapper object.
@@ -29,11 +29,13 @@ class HFWav2RNNTransducer(TorchModel):
                            than one layer is used.
     """
 
-    def __init__(self,
-                 hf_feats: TorchModel,
-                 transducer: Union[Dict, TorchModel],
-                 feat_fusion_start: int = 0,
-                 feat_fusion_method: str = "weighted-avg"):
+    def __init__(
+        self,
+        hf_feats: TorchModel,
+        transducer: Union[Dict, TorchModel],
+        feat_fusion_start: int = 0,
+        feat_fusion_method: str = "weighted-avg",
+    ):
 
         super().__init__()
         self.hf_feats = hf_feats
@@ -66,12 +68,9 @@ def _make_fuser(self):
             self.feat_fuser = nn.Parameter(torch.zeros(num_layers))
         elif self.feat_fusion_method == "linear":
             self.feat_fuser = nn.Linear(num_layers, 1, bias=False)
-            self.feat_fuser.weight.data = torch.ones(1,
-                                                     num_layers) / num_layers
+            self.feat_fuser.weight.data = torch.ones(1, num_layers) / num_layers
         elif self.feat_fusion_method == "cat":
-            self.feat_fuser = nn.Linear(num_layers * layer_dim,
-                                        layer_dim,
-                                        bias=False)
+            self.feat_fuser = nn.Linear(num_layers * layer_dim, layer_dim, bias=False)
 
     def _fuse_hid_feats(self, hid_feats):
         """Fuses the hidden features from the Wav2Vec model.
@@ -86,7 +85,7 @@ def _fuse_hid_feats(self, hid_feats):
             # There is only one layer of features
             return hid_feats[0]
 
-        hid_feats = hid_feats[self.feat_fusion_start:]
+        hid_feats = hid_feats[self.feat_fusion_start :]
         if self.feat_fusion_method == "weighted-avg":
             hid_feats = torch.stack(hid_feats, dim=-1)
             norm_weights = nn.functional.softmax(self.feat_fuser, dim=-1)
@@ -102,14 +101,14 @@ def _fuse_hid_feats(self, hid_feats):
 
         return feats
 
-    def forward_feats(self,
-                      x,
-                      x_lengths,
-                      return_feat_layers=None,
-                      chunk_length=0,
-                      detach_chunks=False):
-        return_hid_states = (False if return_feat_layers is None
-                             and self.feat_fusion_method == "last" else True)
+    def forward_feats(
+        self, x, x_lengths, return_feat_layers=None, chunk_length=0, detach_chunks=False
+    ):
+        return_hid_states = (
+            False
+            if return_feat_layers is None and self.feat_fusion_method == "last"
+            else True
+        )
         with self._hf_context:
             hf_output = self.hf_feats(
                 x,
@@ -131,7 +130,8 @@ def forward_feats(self,
             # add hidden feats from wav2vec to the output. We transpose to be (batch, C, time)
             # as the hidden features of the x-vector encoder.
             hid_feats = [
-                f.transpose(1, 2) for i, f in enumerate(hid_feats)
+                f.transpose(1, 2)
+                for i, f in enumerate(hid_feats)
                 if i in return_feat_layers
             ]
         else:
@@ -167,7 +167,8 @@ def forward(
           "h_feats" (wav2vec features)
         """
         feats, hid_feats, feat_lengths = self.forward_feats(
-            x, x_lengths, return_feat_layers)
+            x, x_lengths, return_feat_layers
+        )
 
         feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
         output = self.transducer(
@@ -181,13 +182,15 @@ def forward(
 
         return output
 
-    def infer(self,
-              x: torch.Tensor,
-              x_lengths: torch.Tensor,
-              decoding_method="time_sync_beam_search",
-              beam_width: int = 5,
-              max_sym_per_frame: int = 3,
-              max_sym_per_utt: int = 1000):
+    def infer(
+        self,
+        x: torch.Tensor,
+        x_lengths: torch.Tensor,
+        decoding_method="time_sync_beam_search",
+        beam_width: int = 5,
+        max_sym_per_frame: int = 3,
+        max_sym_per_utt: int = 1000,
+    ):
         """
         ASR tokens inference
         Args:
@@ -204,12 +207,14 @@ def infer(self,
 
         feats = feats.permute(0, 2, 1)  # (N, C, T) ->(N, T, C)
 
-        y = self.transducer.infer(feats,
-                                  feat_lengths,
-                                  decoding_method=decoding_method,
-                                  beam_width=beam_width,
-                                  max_sym_per_frame=max_sym_per_frame,
-                                  max_sym_per_utt=max_sym_per_utt)
+        y = self.transducer.infer(
+            feats,
+            feat_lengths,
+            decoding_method=decoding_method,
+            beam_width=beam_width,
+            max_sym_per_frame=max_sym_per_frame,
+            max_sym_per_utt=max_sym_per_utt,
+        )
         return y
 
     def freeze_feat_fuser(self):
@@ -265,11 +270,11 @@ def _train(self, train_mode: str):
         if train_mode in ["full", "frozen"]:
             super()._train(train_mode)
         elif train_mode in [
-                "ft-transducer",
-                "hf-feats-frozen",
-                "ft-transducer-nograd",
-                "hf-feats-frozen-nograd",
-                "hf-feat-extractor-frozen",
+            "ft-transducer",
+            "hf-feats-frozen",
+            "ft-transducer-nograd",
+            "hf-feats-frozen-nograd",
+            "hf-feat-extractor-frozen",
         ]:
             self.hf_feats.train()
             self.transducer._train("full")
@@ -340,8 +345,10 @@ def add_class_args(parser, prefix=None, skip=set()):
             "--feat-fusion-method",
             default="weighted-avg",
             choices=["weighted-avg", "linear", "cat", "last"],
-            help=("method to fuse the hidden layers from the wav2vec model "
-                  "in [weighted-avg, linear, cat, last]"),
+            help=(
+                "method to fuse the hidden layers from the wav2vec model "
+                "in [weighted-avg, linear, cat, last]"
+            ),
         )
 
         if prefix is not None:
@@ -359,8 +366,7 @@ def add_infer_args(parser, prefix=None):
         RNNTransducer.add_infer_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_infer_args(**kwargs):
diff --git a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
index fe82f734..dac8c776 100644
--- a/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/hf_wav2vec2rnn_transducer.py
@@ -2,13 +2,13 @@
  Copyright 2022 Johns Hopkins University  (Author: Yen-Ju Lu)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 from typing import Dict, Optional, Union
 
-from jsonargparse import ActionParser, ArgumentParser
-
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...tpm import HFWav2Vec2
 from ..transducer import RNNTransducer
@@ -44,19 +44,7 @@ def __init__(
         else:
             assert isinstance(hf_feats, HFWav2Vec2)
 
-        # if isinstance(transducer, dict):
-        #     transducer["decoder"]["in_feats"] = hf_feats.hidden_size
-        #     transducer["joiner"]["in_feats"] = hf_feats.hidden_size
-        #     if "class_name" in transducer:
-        #         del transducer["class_name"]
-        #     transducer = Transducer(**transducer)
-        # else:
-        #     assert isinstance(transducer, Transducer)
-        #     assert transducer.decoder.in_feats == hf_feats.hidden_size
-        #     assert transducer.joiner.in_feats == hf_feats.hidden_size
-
-        super().__init__(hf_feats, transducer, feat_fusion_start,
-                         feat_fusion_method)
+        super().__init__(hf_feats, transducer, feat_fusion_start, feat_fusion_method)
 
     @staticmethod
     def filter_args(**kwargs):
@@ -78,8 +66,7 @@ def add_class_args(parser, prefix=None):
         HFWav2RNNTransducer.add_class_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     @staticmethod
     def filter_finetune_args(**kwargs):
@@ -100,5 +87,4 @@ def add_finetune_args(parser, prefix=None):
         RNNTransducer.add_finetune_args(parser, prefix="transducer")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/wav2conformer_v1_rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2conformer_v1_rnn_transducer.py
new file mode 100644
index 00000000..330aea3b
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/wav2conformer_v1_rnn_transducer.py
@@ -0,0 +1,73 @@
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFWav2Vec2
+from ..transducer import ConformerV1RNNTransducer
+from .wav2rnn_transducer import Wav2RNNTransducer
+
+
+class Wav2ConformerV1RNNTransducer(Wav2RNNTransducer):
+    """Class for RNN-T with ConformerV1 Encoder and acoustic feature input
+
+    Attributes:
+      Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      transducer: Transducer configuration dictionary or object.
+    """
+
+    def __init__(
+        self,
+        feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, ConformerV1RNNTransducer],
+    ):
+
+        if isinstance(transducer, dict):
+            if "class_name" in transducer:
+                del transducer["class_name"]
+
+            transducer = ConformerV1RNNTransducer(**transducer)
+        else:
+            assert isinstance(transducer, ConformerV1RNNTransducer)
+
+        super().__init__(feats, transducer)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Wav2RNNTransducer.add_class_args(parser)
+        ConformerV1RNNTransducer.add_class_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = ConformerV1RNNTransducer.filter_finetune_args(
+            **kwargs["transducer"]
+        )
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        ConformerV1RNNTransducer.add_finetune_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/wav2rnn_rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2rnn_rnn_transducer.py
new file mode 100644
index 00000000..25890d78
--- /dev/null
+++ b/hyperion/torch/models/wav2transducer/wav2rnn_rnn_transducer.py
@@ -0,0 +1,71 @@
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Yen-Ju Lu)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+import logging
+from typing import Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
+
+from ...tpm import HFWav2Vec2
+from ..transducer import RNNRNNTransducer
+from .wav2rnn_transducer import Wav2RNNTransducer
+
+
+class Wav2RNNRNNTransducer(Wav2RNNTransducer):
+    """Class for RNN-T LSTM encoder and acoustic feature input
+
+    Attributes:
+      Attributes:
+      feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
+      transducer: Transducer configuration dictionary or object.
+    """
+
+    def __init__(
+        self,
+        feats: Union[Dict, HFWav2Vec2],
+        transducer: Union[Dict, RNNRNNTransducer],
+    ):
+
+        if isinstance(transducer, dict):
+            if "class_name" in transducer:
+                del transducer["class_name"]
+
+            transducer = RNNRNNTransducer(**transducer)
+        else:
+            assert isinstance(transducer, RNNRNNTransducer)
+
+        super().__init__(feats, transducer)
+
+    @staticmethod
+    def add_class_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        Wav2RNNTransducer.add_class_args(parser)
+        RNNRNNTransducer.add_class_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
+
+    @staticmethod
+    def filter_finetune_args(**kwargs):
+        base_args = {}
+        child_args = RNNRNNTransducer.filter_finetune_args(**kwargs["transducer"])
+        base_args["transducer"] = child_args
+        return base_args
+
+    @staticmethod
+    def add_finetune_args(parser, prefix=None):
+        if prefix is not None:
+            outer_parser = parser
+            parser = ArgumentParser(prog="")
+
+        RNNRNNTransducer.add_finetune_args(parser, prefix="transducer")
+
+        if prefix is not None:
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
index 4b2f235b..bce8e368 100644
--- a/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
+++ b/hyperion/torch/models/wav2transducer/wav2rnn_transducer.py
@@ -2,13 +2,18 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 from typing import Dict, Optional, Tuple, Union
 
-from jsonargparse import ActionParser, ArgumentParser
+try:
+    import k2
+except ModuleNotFoundError:
+    from ...utils import dummy_k2 as k2
 
 import torch
 import torch.nn as nn
+from jsonargparse import ActionParser, ArgumentParser
 
 from ...narchs import AudioFeatsMVN
 from ...torch_model import TorchModel
@@ -16,11 +21,12 @@
 
 
 class Wav2RNNTransducer(TorchModel):
-    """Base class for models that integrate the acoustic feature extractor and and x-vector model that takes acoustic features as input.
+    """Base class for models that integrate the acoustic feature extractor and and
+    RNN-T Transducer that takes acoustic features as input
 
     Attributes:
       feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
-      xvector: x-vector model object.
+      transducer: RNN-T transducer model
     """
 
     def __init__(self, feats, transducer):
@@ -29,7 +35,7 @@ def __init__(self, feats, transducer):
 
         if isinstance(feats, dict):
             feats = AudioFeatsMVN.filter_args(**feats)
-            feats["trans"] = True
+            feats["trans"] = False
             feats = AudioFeatsMVN(**feats)
         else:
             assert isinstance(feats, AudioFeatsMVN)
@@ -43,7 +49,7 @@ def forward(
         x_lengths: torch.Tensor,
         y: k2.RaggedTensor,
         vad_samples: Optional[torch.Tensor] = None,
-        vad_feats: Optional[torch.Tensor] = None
+        vad_feats: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
 
         if vad_samples is not None:
@@ -59,17 +65,17 @@ def set_train_mode(self, mode):
 
     def get_config(self):
         feat_cfg = self.feats.get_config()
-        xvector_cfg = self.xvector.get_config()
+        xvector_cfg = self.transducer.get_config()
         config = {
             "feats": feat_cfg,
-            "xvector": xvector_cfg,
+            "transducer": xvector_cfg,
         }
 
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
     @staticmethod
-    def filter_args(*kwargs):
+    def filter_args(**kwargs):
         """Filters Wav2XVector class arguments from arguments dictionary.
 
         Args:
@@ -80,7 +86,7 @@ def filter_args(*kwargs):
         """
         valid_args = (
             "feats",
-            "xvector",
+            "transducer",
         )
 
         return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
@@ -100,5 +106,4 @@ def add_class_args(parser, prefix=None):
         AudioFeatsMVN.add_class_args(parser, prefix="feats")
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
index aa01850f..5a8b14b8 100644
--- a/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
+++ b/hyperion/torch/models/wav2xvectors/wav2resnet1d_xvector.py
@@ -19,7 +19,6 @@ class Wav2ResNet1dXVector(Wav2XVector):
     ResNet1dXVector extractor.
 
     Attributes:
-      Attributes:
       feats: feature extractor object of class AudioFeatsMVN or dictionary of options to instantiate AudioFeatsMVN object.
       xvector: ResNet1dXVector configuration dictionary or object.
     """
diff --git a/hyperion/torch/narchs/conformer_encoder_v1.py b/hyperion/torch/narchs/conformer_encoder_v1.py
index ff36096b..72f50f82 100644
--- a/hyperion/torch/narchs/conformer_encoder_v1.py
+++ b/hyperion/torch/narchs/conformer_encoder_v1.py
@@ -317,7 +317,7 @@ def forward(
            Tensor with mask if return_mask is True
         """
         max_in_length = x.size(self.in_time_dim)
-        x_mask = self._make_masks(x, x_lengths, x_mask)
+        x_mask = self._make_masks(max_in_length, x_lengths, x_mask)
         x, x_mask = self._forward_input(x, x_mask)
         if isinstance(x, tuple):
             x, pos_emb = x
diff --git a/hyperion/torch/trainers/dvae_trainer.py b/hyperion/torch/trainers/dvae_trainer.py
index 10bc2edc..6b391912 100644
--- a/hyperion/torch/trainers/dvae_trainer.py
+++ b/hyperion/torch/trainers/dvae_trainer.py
@@ -135,7 +135,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 elbo = output["elbo"].mean()
@@ -190,7 +190,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index 8bbdcb47..bb0df6b6 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -1051,7 +1051,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             help="use mixed precision training",
         )
         parser.add_argument(
-            "--amp-dtype", default=AMPDType.FLOAT16, choices=AMPDType.choices()
+            "--amp-dtype", default=AMPDType.FLOAT16.value, choices=AMPDType.choices()
         )
         parser.add_argument(
             "--cpu-offload",
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 808cce3e..3c52b8bf 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict as ODict
 
 import torch
+import torch.cuda.amp as amp
 import torch.nn as nn
 import torchaudio
 from jsonargparse import ActionParser, ArgumentParser
@@ -118,7 +119,7 @@ def train_epoch(self, data_loader):
             )
             batch_size = input_data.shape[0]
 
-            with self.amp_autocast():
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, x_lengths=input_lengths, y=target)
                 loss = output.loss
                 loss = loss.mean() / self.grad_acc_steps
@@ -183,7 +184,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 # data, target = data.to(self.device), target.to(self.device)
                 # batch_size = data.shape[0]
 
-                with self.amp_autocast():
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(input_data, x_lengths=input_lengths, y=target)
 
                 for k, v in output.items():
diff --git a/hyperion/torch/trainers/vae_trainer.py b/hyperion/torch/trainers/vae_trainer.py
index dbf5dfdd..27d485ff 100644
--- a/hyperion/torch/trainers/vae_trainer.py
+++ b/hyperion/torch/trainers/vae_trainer.py
@@ -136,7 +136,7 @@ def train_epoch(self, data_loader):
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
 
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 elbo = output["elbo"].mean()
                 loss = -elbo / self.grad_acc_steps
@@ -191,7 +191,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/vq_dvae_trainer.py b/hyperion/torch/trainers/vq_dvae_trainer.py
index 43aa59a5..1488f5e5 100644
--- a/hyperion/torch/trainers/vq_dvae_trainer.py
+++ b/hyperion/torch/trainers/vq_dvae_trainer.py
@@ -101,7 +101,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"] / self.grad_acc_steps
                 x_hat = output["x_mean"]
@@ -152,7 +152,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/vq_vae_trainer.py b/hyperion/torch/trainers/vq_vae_trainer.py
index 64db2e64..2331a2b8 100644
--- a/hyperion/torch/trainers/vq_vae_trainer.py
+++ b/hyperion/torch/trainers/vq_vae_trainer.py
@@ -101,7 +101,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, x_target=target, return_x_mean=True)
                 loss = output["loss"]
                 x_hat = output["x_mean"]
@@ -153,7 +153,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
             for batch, data in enumerate(data_loader):
                 input_data, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = input_data.size(0)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(input_data, x_target=target, return_x_mean=True)
 
                 x_hat = output["x_mean"]
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 8603b22a..9d5a8bae 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -138,7 +138,7 @@ def train_epoch(self, data_loader):
 
                 self.optimizer.zero_grad()
 
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(input_data, target)
                 loss = self.loss(output.logits, target) / self.grad_acc_steps
 
@@ -192,7 +192,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                     self.model.train()
 
             with torch.no_grad():
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(data, **self.amp_args)
                     loss = self.loss(output.logits, target)
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index ccafecdd..f63c532b 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -143,7 +143,7 @@ def train_epoch(self, data_loader):
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
 
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(feats, y=target)
                 loss = self.loss(output.logits, target) / self.grad_acc_steps
 
@@ -197,7 +197,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(feats)
                     loss = self.loss(output.logits, target)
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 151993e0..15c5bd42 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -120,7 +120,7 @@ def train_epoch(self, data_loader):
                 batch_keys = [aug_key, self.target_key]
                 x, target = tensors_subset(data, batch_keys, self.device)
                 batch_size = x.size(0)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(x, y=target)
                     loss = self.loss(output.logits, target) / loss_scale
                     loss_acc += loss.item()
@@ -178,7 +178,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                     batch_keys = [aug_key, self.target_key]
                     x, target = tensors_subset(data, batch_keys, self.device)
                     batch_size = x.size(0)
-                    with amp.autocast(enabled=self.use_amp):
+                    with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                         output = self.model(x)
                         loss = self.loss(output.logits, target) / loss_scale
                         loss_acc += loss.item()
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
index 1c9209f6..f230372c 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg.py
@@ -136,7 +136,7 @@ def train_epoch(self, data_loader):
 
             input_data, target = tensors_subset(data, batch_keys, self.device)
             batch_size = input_data.size(0)
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 outputs = self.model(
                     input_data,
                     y=target,
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 4b1d23ba..98c74af3 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -129,7 +129,7 @@ def train_epoch(self, data_loader):
             with torch.no_grad():
                 feats = self.feat_extractor(input_data)
 
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 outputs = self.model(
                     feats,
                     y=target,
@@ -229,7 +229,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 batch_size = input_data.size(0)
 
                 feats = self.feat_extractor(input_data)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(feats)
                     loss = self.loss(output.logits, target)
 
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index f46b2109..ada74bb6 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -117,7 +117,7 @@ def train_epoch(self, data_loader):
             with torch.no_grad():
                 feats, feats_lengths = self.feat_extractor(audio)
 
-            with amp.autocast(enabled=self.use_amp):
+            with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                 output = self.model(feats, feats_lengths, y=target)
                 loss = self.loss(output.logits, target) / self.grad_acc_steps
 
@@ -171,7 +171,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
                 batch_size = audio.size(0)
 
                 feats, feats_lengths = self.feat_extractor(audio)
-                with amp.autocast(enabled=self.use_amp):
+                with amp.autocast(enabled=self.use_amp, dtype=self.amp_dtype):
                     output = self.model(feats, feats_lengths)
                     loss = self.loss(output.logits, target)
 
diff --git a/hyperion/torch/utils/masking.py b/hyperion/torch/utils/masking.py
index c7095b31..1a240976 100644
--- a/hyperion/torch/utils/masking.py
+++ b/hyperion/torch/utils/masking.py
@@ -21,7 +21,7 @@ def scale_seq_lengths(lengths, max_out_length, max_in_length=None):
 
 
 def seq_lengths_to_mask(
-    lengths, max_length=None, dtype=None, time_dim=1, none_if_all_max=False
+    lengths, max_length=None, dtype=None, time_dim=1, ndim=None, none_if_all_max=False
 ):
     """Creates a binary masks indicating the valid values in a sequence.
 
@@ -33,9 +33,11 @@ def seq_lengths_to_mask(
                 return a view of the mask which will adapt to the shape
                 of the tensor where we want to apply the mask.
                 This has to be a positive integer.
+      ndim: number of dimensions in the mask tensor, if None, it is equal to time_dim + 1.
+      none_if_all_max: if True and all lengths are equal to max. length, it returns None
 
     Returns:
-      Binary mask with shape=(batch,...,max_length) or None
+      Binary mask with shape=(batch,...,max_length,...) or None
     """
     if lengths is None:
         return None
@@ -54,9 +56,12 @@ def seq_lengths_to_mask(
     # compute mask shape=(batch, max_length)
     mask = idx.unsqueeze(0) < lengths.unsqueeze(1)
 
+    if ndim is None:
+        ndim = time_dim + 1
+
     # view to match the tensor where we want to apply the mask
-    if time_dim > 1:
-        shape = [1] * (time_dim + 1)
+    if ndim > 1:
+        shape = [1] * ndim
         shape[0] = lengths.size(0)
         shape[time_dim] = -1
         mask = mask.view(*shape)
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index 4d4dd55a..3cb03659 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -2,6 +2,8 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
+import logging
 from pathlib import Path
 
 import numpy as np
@@ -92,13 +94,19 @@ def cat(cls, tables):
         """
         df_list = [table.df for table in tables]
         df = pd.concat(df_list)
-        assert df["id"].is_unique, """there are duplicated ids in original tables"""
+        if not df["id"].is_unique:
+            logging.warning(
+                """there are duplicated ids in original tables, 
+                            removing duplicated rows"""
+            )
+            df.drop_duplicates(subset="id", keep="first", inplace=True)
+
         if not df["class_idx"].is_unique:
             logging.warning(
                 """class_idx in concat tables are not unique, 
                 we will assign new class_idx"""
             )
-            df["class_idx"].drop(columns=["class_idx"], inplace=True)
+            df.drop(columns=["class_idx"], inplace=True)
         return cls(df)
 
     def filter(
diff --git a/hyperion/utils/hyp_dataset.py b/hyperion/utils/hyp_dataset.py
index dbf268da..ba137b65 100644
--- a/hyperion/utils/hyp_dataset.py
+++ b/hyperion/utils/hyp_dataset.py
@@ -67,14 +67,14 @@ def __init__(
             self._segments_path = Path(segments)
 
         self._classes, self._classes_paths = self._parse_dict_args(classes, ClassInfo)
-
-        if isinstance(recordings, RecordingSet):
-            self._recordings = recordings
-            self._recordings_path = None
-        else:
-            assert isinstance(recordings, (str, Path))
-            self._recordings = None
-            self._recordings_path = Path(recordings)
+        if recordings is not None:
+            if isinstance(recordings, RecordingSet):
+                self._recordings = recordings
+                self._recordings_path = None
+            else:
+                assert isinstance(recordings, (str, Path))
+                self._recordings = None
+                self._recordings_path = Path(recordings)
 
         # self._recordings, self._recordings_paths = self._parse_dict_args(
         #     recordings, RecordingSet
@@ -183,8 +183,8 @@ def recordings(self, keep_loaded: bool = True):
     def features_keys(self):
         if self._features is not None:
             return self._features.keys()
-        elif self._features_path is not None:
-            return self._features_path.keys()
+        elif self._features_paths is not None:
+            return self._features_paths.keys()
         else:
             return {}
 
@@ -857,7 +857,7 @@ def add_cols_to_segments(
                 elif right_table in self.features_keys():
                     right_table = self.features_value(right_table)
                 elif right_table in self.classes_keys():
-                    right_table = self.classes_value
+                    right_table = self.classes_value(right_table)
                 else:
                     raise ValueError("%s not found", right_table)
 
@@ -1254,6 +1254,64 @@ def split_train_val(
 
         return train_ds, val_ds
 
+    @classmethod
+    def merge(cls, datasets):
+        segments = []
+        for dset in datasets:
+            segs_dset = dset.segments(keep_loaded=False)
+            if segs_dset is not None:
+                segments.append(segs_dset)
+
+        segments = SegmentSet.cat(segments)
+        dataset = cls(segments)
+
+        classes_keys = []
+        for dset in datasets:
+            classes_dset = list(dset.classes_keys())
+            classes_keys.extend(classes_dset)
+
+        classes_keys = list(set(classes_keys))
+        for key in classes_keys:
+            classes = []
+            for dset in datasets:
+                if key in dset.classes_keys():
+                    classes_key = dset.classes_value(key, keep_loaded=False)
+                    classes.append(classes_key)
+
+            classes = ClassInfo.cat(classes)
+            dataset.add_classes(classes_name=key, classes=classes)
+
+        recordings = []
+        for dset in datasets:
+            recs_i = dset.recordings(keep_loaded=False)
+            if recs_i is not None:
+                recordings.append(recs_i)
+
+        if recordings:
+            recordings = RecordingSet.cat(recordings)
+            dataset.set_recordings(recordings)
+
+        features_keys = []
+        for dset in datasets:
+            features_dset = list(dset.features_keys())
+            features_keys.extend(features_dset)
+
+        features_keys = list(set(features_keys))
+        for key in features_keys:
+            features = []
+            for dset in datasets:
+                if key in dset.features_keys():
+                    features_key = dset.features_value(key, keep_loaded=False)
+                    features.append(features_key)
+
+            features = FeatureSet.cat(features)
+            dataset.add_features(features_name=key, features=features)
+
+        # TODO: merge enrollments and trials
+        # Usually you don't need that
+        return dataset
+
+    @classmethod
     def from_lhotse(
         cls,
         cuts: Optional[Union[lhotse.CutSet, PathLike]] = None,
@@ -1288,7 +1346,14 @@ def from_lhotse(
 
         from lhotse import MonoCut, Recording, SupervisionSegment
 
-        supervision_keys = ["speaker", "gender", "language", "text", "duration"]
+        supervision_keys = [
+            "speaker",
+            "gender",
+            "language",
+            "emotion",
+            "text",
+            "duration",
+        ]
         recs_df = []
         segs_df = []
         for cut in cuts:
@@ -1297,16 +1362,16 @@ def from_lhotse(
             seg_dict = {"id": cut.id}
             recording = cut.recording
             if recording is not None:
-                if recording.id != cut.id:
-                    seg_dict["recording_id"] = recording.id
+                # if recording.id != cut.id:
+                #     seg_dict["recording_id"] = recording.id
 
                 rec_dict = {
-                    "id": recording.id,
+                    "id": cut.id,
                     "sampling_rate": recording.sampling_rate,
                     "duration": recording.duration,
                 }
                 source = recording.sources[0]
-                assert len(recording.source) == 1
+                assert len(recording.sources) == 1
                 assert source.type in ["file", "command"]
                 rec_dict["storage_path"] = source.source
                 assert recording.transforms is None, f"{recording.transforms}"
@@ -1323,7 +1388,7 @@ def from_lhotse(
                     if val is not None:
                         seg_dict[key] = val
 
-            segs_df = seg_dict
+            segs_df.append(seg_dict)
 
         recs_df = pd.DataFrame(recs_df)
         segs_df = pd.DataFrame(segs_df)
@@ -1334,9 +1399,93 @@ def from_lhotse(
         for key in class_names:
             if key in segments:
                 uniq_classes = np.unique(segments[key])
-                classes[key] = pd.DataFrame({"id": uniq_classes})
+                classes[key] = ClassInfo(pd.DataFrame({"id": uniq_classes}))
+
+        if not classes:
+            classes = None
 
         dataset = cls(segments=segments, classes=classes, recordings=recordings)
         return dataset
 
-        return None
+    @classmethod
+    def from_kaldi(
+        cls,
+        kaldi_data_dir: PathLike,
+    ):
+        """Creates a Hyperion Dataset from a Kaldi data dir
+
+        Args:
+          kaldi_data_dir: Kaldi data directory
+
+        Returns
+          HypDataset object
+        """
+        kaldi_data_dir = Path(kaldi_data_dir)
+
+        kaldi_files = ["utt2lang", "utt2dur", "utt2text"]
+        attributes = ["language", "duration", "text"]
+
+        k_file = kaldi_data_dir / "utt2spk"
+        from .utt2info import Utt2Info
+
+        utt2spk = Utt2Info.load(k_file)
+        df_segs = pd.DataFrame({"id": utt2spk.key, "speaker": utt2spk.info})
+        segments = SegmentSet(df_segs)
+        del utt2spk
+
+        for att, k_file in zip(kaldi_files, attributes):
+            k_file = kaldi_data_dir / k_file
+            if k_file.is_file():
+                u2i = Utt2Info.load(k_file)
+                segments.loc[u2i.key, att] = u2i.info
+
+        k_file = kaldi_data_dir / "spk2gender"
+        if k_file.is_file():
+            segments["gender"] = "N/A"
+            s2g = Utt2Info.load(k_file)
+            for spk in s2g.key:
+                g = s2g[spk]
+                segments.loc[segments["speaker"] == spk, "gender"] = g
+
+        kaldi_files = ["feats.scp", "vad.scp"]
+        attributes = ["feats", "vad"]
+        features = None
+        from .scp_list import SCPList
+
+        for att, k_file in zip(kaldi_files, attributes):
+            k_file = kaldi_data_dir / k_file
+            if k_file.is_file():
+                scp = SCPList.load(k_file)
+                feats_dict = {"id": scp.key, "storage_path": scp.file_path}
+                if scp.offset is not None:
+                    feats_dict["storage_byte"] = scp.offset
+                df_feats = pd.DataFrame(feats_dict)
+                if features is None:
+                    features = {}
+                features["att"] = FeatureSet(df_feats)
+
+        recordings = None
+        k_file = kaldi_data_dir / "wav.scp"
+        if k_file.is_file():
+            scp = SCPList.load(k_file)
+            wav_dict = {"id": scp.key, "storage_path": scp.file_path}
+            df_recs = pd.DataFrame(wav_dict)
+            recordings = RecordingSet(df_recs)
+            recordings.get_durations()
+            if "duration" not in segments:
+                segments["duration"] = recordings.loc[segments["id"], "duration"]
+
+        class_names = ["speaker", "language", "emotion", "gender"]
+        classes = {}
+        for key in class_names:
+            if key in segments:
+                uniq_classes = np.unique(segments[key])
+                classes[key] = ClassInfo(pd.DataFrame({"id": uniq_classes}))
+
+        if not classes:
+            classes = None
+
+        dataset = cls(
+            segments=segments, classes=classes, recordings=recordings, features=features
+        )
+        return dataset
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index ea03f058..a813a467 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -12,6 +12,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import infer_dtype
 
 from .list_utils import split_list, split_list_group_by_key
 
@@ -25,10 +26,15 @@ class InfoTable:
     """
 
     def __init__(self, df):
-        self.df = df
         assert "id" in df, f"info_table={df}"
+        self.df = df
+        self.fix_dtypes()
         self.df.set_index("id", drop=False, inplace=True)
 
+    def fix_dtypes(self):
+        if infer_dtype(self.df.id) != "string":
+            self.df[:, "id"] = self.df["id"].apply(str)
+
     def copy(self):
         """Makes a copy of the object."""
         return deepcopy(self)
@@ -145,7 +151,19 @@ def load(cls, file_path, sep=None, name="class_id"):
             if sep is None:
                 sep = "\t" if ".tsv" in ext else ","
 
-            df = pd.read_csv(file_path, sep=sep)
+            fixed_dtypes = {
+                "id": str,
+                "speaker": str,
+                "language": str,
+                "gender": str,
+                "duration": float,
+                "storage_path": str,
+                "storage_byte": int,
+                "num_frames": int,
+                "video_ids": str,
+                "language_est": str,
+            }
+            df = pd.read_csv(file_path, sep=sep, dtype=fixed_dtypes)
 
         return cls(df)
 
@@ -213,7 +231,7 @@ def filter(
           iindex: filters the table based on integer index with pandas command:
             df.iloc[iiindex], used if predicate and items are None
           columns: columns to keep of remove.
-          by: column id to use with itmes criterion
+          by: column id to use with items criterion
           keep: if True, the criterion is used to keep rows, if False it is used
             to remove rows
 
diff --git a/hyperion/utils/recording_set.py b/hyperion/utils/recording_set.py
index 8346315c..b266e514 100644
--- a/hyperion/utils/recording_set.py
+++ b/hyperion/utils/recording_set.py
@@ -3,6 +3,7 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
+import logging
 from pathlib import Path
 
 import numpy as np
@@ -59,3 +60,41 @@ def load(cls, file_path, sep=None):
             return cls(df)
 
         return super().load(file_path, sep)
+
+    @staticmethod
+    def _get_durations(recordings, i, n):
+        from ..io import SequentialAudioReader as AR
+
+        durations = []
+        fss = []
+        with AR(recordings, part_idx=i + 1, num_parts=n) as reader:
+            for data in reader:
+                key, x, fs = data
+                duration = x.shape[0] / fs
+                fss.append(fs)
+                durations.append(duration)
+
+        return fss, durations
+
+    def get_durations(self, num_threads: int = 16):
+
+        import itertools
+        from concurrent.futures import ThreadPoolExecutor
+
+        from tqdm import tqdm
+
+        futures = []
+        num_threads = min(num_threads, len(self.df))
+        logging.info("submitting threats...")
+        with ThreadPoolExecutor(max_workers=num_threads) as pool:
+            for i in tqdm(range(num_threads)):
+                future = pool.submit(RecordingSet._get_durations, self, i, num_threads)
+                futures.append(future)
+
+        logging.info("waiting threats...")
+        res = [f.result() for f in tqdm(futures)]
+        fss = list(itertools.chain(*[r[0] for r in res]))
+        durations = list(itertools.chain(*[r[1] for r in res]))
+
+        self.df["duration"] = durations
+        self.df["sample_freq"] = fss

From ad0561e8d4cba96d74e6a315afc72f3a4cdcec4a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 13 May 2024 17:17:51 -0400
Subject: [PATCH 136/154] add max_batches arg to samplers

---
 ...mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml |   4 +-
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh |   3 +
 hyperion/torch/data/bucketing_seg_sampler.py  |  30 ++---
 .../data/class_weighted_embed_sampler.py      |  68 ++++++-----
 .../data/class_weighted_seg_chunk_sampler.py  | 112 ++++++++++--------
 hyperion/torch/data/embed_sampler.py          |  52 +++++---
 hyperion/torch/data/hyp_sampler.py            |   9 +-
 hyperion/torch/data/seg_chunk_sampler.py      |  42 ++++---
 hyperion/torch/data/seg_sampler.py            |  37 ++++--
 hyperion/torch/trainers/torch_trainer.py      |   6 +-
 10 files changed, 226 insertions(+), 137 deletions(-)

diff --git a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
index a142349b..683f85ee 100644
--- a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
+++ b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
@@ -62,7 +62,9 @@ trainer:
     min_lr: 1e-6
     warmup_steps: 25000
     update_lr_on_opt_step: true
-  grad_clip: 100
+  # grad_clip: 100
+  # grad_clip: 20
+  grad_clip: 1
   use_amp: true
   log_interval: 1000
   epochs: 120
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index 8a8b58a3..e56d97cc 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -27,6 +27,9 @@ nnet_s1=$nnet_s1_dir/teacher_model_ep0054.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0058.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0064.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0067.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0071.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0077.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0083.pth
 
 # clustering of dino embeddings
 cluster_method=cos_ahc_plda_ahc
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index aa02661c..e73e7e44 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -5,11 +5,13 @@
 
 import logging
 import math
+from typing import Optional, Type
 
 import numpy as np
 import torch
 import torch.distributed as dist
 
+from ...utils import SegmentSet
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
 
@@ -18,14 +20,17 @@ class BucketingSegSampler(HypSampler):
 
     def __init__(
         self,
-        seg_set,
-        base_sampler=SegSampler,
-        num_buckets=10,
-        length_column="duration",
-        seed=1234,
+        seg_set: SegmentSet,
+        base_sampler: Type[HypSampler] = SegSampler,
+        num_buckets: int = 10,
+        length_column: str = "duration",
+        max_batches_per_epoch: Optional[int] = None,
+        seed: int = 1234,
         **base_kwargs
     ):
-        super().__init__(shuffle=False, seed=seed)
+        super().__init__(
+            max_batches_per_epoch=max_batches_per_epoch, maxshuffle=False, seed=seed
+        )
         self.seg_set = seg_set
         self.base_sampler = base_sampler
         self.base_kwargs = base_kwargs
@@ -67,6 +72,9 @@ def _compute_len(self):
         for i in range(self.num_buckets):
             self._len += len(self.bucket_samplers[i])
 
+        if self.max_batches_per_epoch is not None:
+            self._len = min(self._len, self.max_batches_per_epoch)
+
     def set_epoch(self, epoch, batch=0):
         for i in range(self.num_buckets):
             self.bucket_samplers[i].set_epoch(epoch, batch)
@@ -120,12 +128,4 @@ def avg_batch_size(self):
 
     @staticmethod
     def filter_args(**kwargs):
-
-        valid_args = (
-            "num_buckets",
-            "length_column",
-            "shuffle",
-            "seed",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return kwargs
diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
index edf1c00d..264e561c 100644
--- a/hyperion/torch/data/class_weighted_embed_sampler.py
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -6,13 +6,15 @@
 import logging
 import math
 import time
+from typing import Optional
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+from ...utils import ClassInfo
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 
 
@@ -20,18 +22,21 @@ class ClassWeightedEmbedSampler(HypSampler):
     def __init__(
         self,
         embed_set,
-        class_info,
-        batch_size=1,
-        num_embeds_per_class=1,
-        weight_exponent=1.0,
-        weight_mode="custom",
-        num_hard_prototypes=0,
-        affinity_matrix=None,
-        class_name="class_id",
-        shuffle=False,
-        seed=1234,
+        class_info: ClassInfo,
+        batch_size: int = 1,
+        num_embeds_per_class: int = 1,
+        weight_exponent: float = 1.0,
+        weight_mode: str = "custom",
+        num_hard_prototypes: int = 0,
+        affinity_matrix: Optional[torch.Tensor] = None,
+        class_name: str = "class_id",
+        max_batches_per_epoch: Optiona[int] = None,
+        shuffle: bool = False,
+        seed: int = 1234,
     ):
-        super().__init__(shuffle=shuffle, seed=seed)
+        super().__init__(
+            max_batches_per_epoch=max_batches_per_epoch, shuffle=shuffle, seed=seed
+        )
         self.class_name = class_name
         self.embed_set = embed_set
         self.class_info = class_info
@@ -70,6 +75,8 @@ def _compute_len(self):
         self._len = int(
             math.ceil(len(self.embed_set) / self.avg_batch_size / self.world_size)
         )
+        if self.max_batches_per_epoch is not None:
+            self._len = min(self._len, self.max_batches_per_epoch)
 
     def __len__(self):
         return self._len
@@ -147,7 +154,9 @@ def _compute_num_classes_per_batch(self):
             num_classes /= self.num_hard_prototypes
         self.num_classes_per_batch = int(math.ceil(num_classes))
 
-    def _get_class_weights(self,):
+    def _get_class_weights(
+        self,
+    ):
         return torch.as_tensor(self.class_info["weights"].values)
 
     def _sample_classes(self):
@@ -208,19 +217,19 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
-
-        valid_args = (
-            "batch_size",
-            "num_embeds_per_class",
-            "weight_exponent",
-            "weight_mode",
-            "num_hard_prototypes",
-            "class_name",
-            "shuffle",
-            "seed",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        return filter_func_args(ClassWeightedEmbedSampler.__init__, kwargs)
+        # valid_args = (
+        #     "batch_size",
+        #     "num_embeds_per_class",
+        #     "weight_exponent",
+        #     "weight_mode",
+        #     "num_hard_prototypes",
+        #     "class_name",
+        #     "shuffle",
+        #     "seed",
+        # )
+
+        # return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -229,7 +238,10 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            "--batch-size", type=int, default=1, help=("batch size per gpu"),
+            "--batch-size",
+            type=int,
+            default=1,
+            help=("batch size per gpu"),
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 6ee00307..0bb78901 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -6,42 +6,47 @@
 import logging
 import math
 import time
+from typing import Optional
 
 import numpy as np
 import pandas as pd
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+from ...utils import ClassInfo, SegmentSet
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 
 
 class ClassWeightedRandomSegChunkSampler(HypSampler):
     def __init__(
         self,
-        seg_set,
-        class_info,
-        min_chunk_length,
-        max_chunk_length=None,
-        min_batch_size=1,
-        max_batch_size=None,
-        max_batch_length=None,
-        num_chunks_per_seg_epoch="auto",
-        num_segs_per_class=1,
-        num_chunks_per_seg=1,
-        weight_exponent=1.0,
-        weight_mode="custom",
-        seg_weight_mode="uniform",
-        num_hard_prototypes=0,
-        affinity_matrix=None,
-        class_name="class_id",
-        length_name="duration",
-        shuffle=False,
-        iters_per_epoch=None,
-        batch_size=None,
-        seed=1234,
+        seg_set: SegmentSet,
+        class_info: ClassInfo,
+        min_chunk_length: int,
+        max_chunk_length: Optional[int] = None,
+        min_batch_size: int = 1,
+        max_batch_size: Optional[int] = None,
+        max_batch_length: Optional[int] = None,
+        num_chunks_per_seg_epoch: Union[str, int] = "auto",
+        num_segs_per_class: int = 1,
+        num_chunks_per_seg: int = 1,
+        weight_exponent: float = 1.0,
+        weight_mode: str = "custom",
+        seg_weight_mode: str = "uniform",
+        num_hard_prototypes: int = 0,
+        affinity_matrix: Optional[torch.Tensor] = None,
+        class_name: str = "class_id",
+        length_name: str = "duration",
+        max_batches_per_epoch: Optional[int] = None,
+        shuffle: bool = False,
+        iters_per_epoch: Optional[int] = None,
+        batch_size: Optional[int] = None,
+        seed: int = 1234,
     ):
-        super().__init__(shuffle=shuffle, seed=seed)
+        super().__init__(
+            max_batches_per_epoch=max_batches_per_epoch, shuffle=shuffle, seed=seed
+        )
         self.class_name = class_name
         self.length_name = length_name
         self.seg_set = seg_set
@@ -148,6 +153,8 @@ def _compute_len(self):
                 / self.world_size
             )
         )
+        if self.max_batches_per_epoch is not None:
+            self._len = min(self._len, self.max_batches_per_epoch)
 
     def __len__(self):
         return self._len
@@ -284,7 +291,10 @@ def _get_class_weights(self, chunk_length):
     def _sample_classes(self, num_classes, chunk_length):
         weights = self._get_class_weights(chunk_length)
         row_idx = torch.multinomial(
-            weights, num_samples=num_classes, replacement=True, generator=self.rng,
+            weights,
+            num_samples=num_classes,
+            replacement=True,
+            generator=self.rng,
         ).numpy()
 
         class_ids = self.class_info.iloc[row_idx].id.values
@@ -417,29 +427,30 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
+        return filter_func_args(ClassWeightedRandomSegChunkSampler.__init__, kwargs)
+
+        # valid_args = (
+        #     "min_chunk_length",
+        #     "max_chunk_length",
+        #     "min_batch_size",
+        #     "max_batch_size",
+        #     "max_batch_length",
+        #     "num_chunks_per_seg_epoch",
+        #     "num_segs_per_class",
+        #     "num_chunks_per_seg",
+        #     "weight_exponent",
+        #     "weight_mode",
+        #     "seg_weight_mode",
+        #     "num_hard_prototypes",
+        #     "class_name",
+        #     "length_name",
+        #     "iters_per_epoch",
+        #     "batch_size",
+        #     "shuffle",
+        #     "seed",
+        # )
 
-        valid_args = (
-            "min_chunk_length",
-            "max_chunk_length",
-            "min_batch_size",
-            "max_batch_size",
-            "max_batch_length",
-            "num_chunks_per_seg_epoch",
-            "num_segs_per_class",
-            "num_chunks_per_seg",
-            "weight_exponent",
-            "weight_mode",
-            "seg_weight_mode",
-            "num_hard_prototypes",
-            "class_name",
-            "length_name",
-            "iters_per_epoch",
-            "batch_size",
-            "shuffle",
-            "seed",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        # return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -545,6 +556,13 @@ def add_class_args(parser, prefix=None):
             help=("number of hard prototype classes per batch"),
         )
 
+        parser.add_argument(
+            "--max-batches-per-epoch",
+            type=int,
+            default=None,
+            help=("Max. batches per epoch"),
+        )
+
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
diff --git a/hyperion/torch/data/embed_sampler.py b/hyperion/torch/data/embed_sampler.py
index 65adcba6..251ba917 100644
--- a/hyperion/torch/data/embed_sampler.py
+++ b/hyperion/torch/data/embed_sampler.py
@@ -5,20 +5,29 @@
 
 import logging
 import math
+from typing import Optional
 
 import numpy as np
-from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
-
 import torch
+from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 
 
 class EmbedSampler(HypSampler):
     def __init__(
-        self, embed_set, batch_size=1, shuffle=False, drop_last=False, seed=1234,
+        self,
+        embed_set,
+        batch_size: int = 1,
+        max_batches_per_epoch: Optional[int] = None,
+        shuffle: bool = False,
+        drop_last: bool = False,
+        seed: int = 1234,
     ):
-        super().__init__(shuffle=shuffle, seed=seed)
+        super().__init__(
+            max_batches_per_epoch=max_batches_per_epoch, shuffle=shuffle, seed=seed
+        )
         self.embed_set = embed_set
         self.batch_size = batch_size
         self.avg_batch_size = batch_size
@@ -29,6 +38,9 @@ def __init__(
         else:
             self._len = int(math.ceil(num_batches))
 
+        if self.max_batches_per_epoch is not None:
+            self._len = min(self._len, self.max_batches_per_epoch)
+
         self._permutation = None
 
     def __len__(self):
@@ -72,15 +84,15 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
+        return filter_func_args(EmbedSampler.__init__, kwargs)
+        # valid_args = (
+        #     "batch_size",
+        #     "shuffle",
+        #     "drop_last",
+        #     "seed",
+        # )
 
-        valid_args = (
-            "batch_size",
-            "shuffle",
-            "drop_last",
-            "seed",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        # return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
 
     @staticmethod
     def add_class_args(parser, prefix=None):
@@ -89,11 +101,23 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            "--batch-size", type=int, default=1, help=("minimum batch size per gpu"),
+            "--batch-size",
+            type=int,
+            default=1,
+            help=("minimum batch size per gpu"),
+        )
+
+        parser.add_argument(
+            "--drop-last",
+            action=ActionYesNo,
+            help="drops the last batch of the epoch",
         )
 
         parser.add_argument(
-            "--drop-last", action=ActionYesNo, help="drops the last batch of the epoch",
+            "--max-batches-per-epoch",
+            type=int,
+            default=None,
+            help=("Max. batches per epoch"),
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index f8d0862b..61a922db 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -1,5 +1,6 @@
 import logging
 import math
+from typing import Optional
 
 import numpy as np
 import torch
@@ -9,13 +10,19 @@
 
 
 class HypSampler(Sampler):
-    def __init__(self, shuffle=False, seed=1234):
+    def __init__(
+        self,
+        max_batches_per_epoch: Optional[int] = None,
+        shuffle: bool = False,
+        seed: int = 1234,
+    ):
         super().__init__(None)
         self.epoch = 0
         self.batch = 0
         self.init_batch = 0
         self.shuffle = shuffle
         self.seed = seed
+        self.max_batches_per_epoch = max_batches_per_epoch
 
         try:
             rank = dist.get_rank()
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index da47c8ac..345ec287 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -5,6 +5,7 @@
 
 import logging
 import math
+from typing import Optional, Type
 
 import numpy as np
 import pandas as pd
@@ -12,7 +13,8 @@
 import torch.distributed as dist
 from jsonargparse import ActionParser, ArgumentParser
 
-from ...utils.segment_set import SegmentSet
+from ...utils import SegmentSet
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 from .seg_sampler import SegSampler
 
@@ -20,13 +22,14 @@
 class SegChunkSampler(HypSampler):
     def __init__(
         self,
-        seg_set,
-        min_chunk_length,
-        max_chunk_length=None,
-        base_sampler=SegSampler,
-        length_name="duration",
-        shuffle=False,
-        seed=1234,
+        seg_set: SegmentSet,
+        min_chunk_length: int,
+        max_chunk_length: Optional[int] = None,
+        base_sampler: Type[HypSampler] = SegSampler,
+        length_name: str = "duration",
+        max_batches_per_epoch: Optional[int] = None,
+        shuffle: bool = False,
+        seed: int = 1234,
         **base_kwargs,
     ):
         super().__init__(shuffle=shuffle, seed=seed)
@@ -141,12 +144,17 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = (
-            "min_chunk_length",
-            "max_chunk_length",
-            "length_name",
-            "shuffle",
-            "seed",
-        )
-
-        return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
+        valid_args = filter_func_args(SegChunkSampler.__init__, kwargs)
+        base_args = filter_func_args(SegSampler.__init__, kwargs)
+        valid_args.update(base_args)
+        return valid_args
+
+        # valid_args = (
+        #     "min_chunk_length",
+        #     "max_chunk_length",
+        #     "length_name",
+        #     "shuffle",
+        #     "seed",
+        # )
+
+        # return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index a280c87e..5d988092 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -5,29 +5,32 @@
 
 import logging
 import math
+from typing import Optional
 
 import numpy as np
 import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
 from ...utils.misc import filter_func_args
+from ...utils import SegmentSet
 from .hyp_sampler import HypSampler
 
 
 class SegSampler(HypSampler):
     def __init__(
         self,
-        seg_set,
-        min_batch_size=1,
-        max_batch_size=None,
-        max_batch_length=None,
-        length_name="duration",
-        shuffle=False,
-        drop_last=False,
-        sort_by_length=True,
-        seed=1234,
+        seg_set: SegmentSet,
+        min_batch_size:int=1,
+        max_batch_size:Optional[int]=None,
+        max_batch_length:Optional[int]=None,
+        length_name:str="duration",
+        max_batches_per_epoch: Optional[int]=None,
+        shuffle:bool=False,
+        drop_last:bool=False,
+        sort_by_length:bool=True,
+        seed:int=1234,
     ):
-        super().__init__(shuffle=shuffle, seed=seed)
+        super().__init__(max_batches_per_epoch=max_batches_per_epoch,shuffle=shuffle, seed=seed)
         self.seg_set = seg_set
         self.min_batch_size = min_batch_size
         self.max_batch_size = max_batch_size
@@ -49,6 +52,9 @@ def __init__(
                 math.ceil((len(self.seg_set) // self.world_size) / avg_batch_size)
             )
 
+        if self.max_batches_per_epoch is not None:
+            self._len = min(self._len, self.max_batches_per_epoch)
+
         self._permutation = None
 
     def __len__(self):
@@ -180,7 +186,16 @@ def add_class_args(parser, prefix=None):
         )
 
         parser.add_argument(
-            "--drop-last", action=ActionYesNo, help="drops the last batch of the epoch",
+            "--drop-last",
+            action=ActionYesNo,
+            help="drops the last batch of the epoch",
+        )
+
+         parser.add_argument(
+            "--max-batches-per-epoch",
+            type=int,
+            default=None,
+            help=("Max. batches per epoch"),
         )
 
         parser.add_argument(
diff --git a/hyperion/torch/trainers/torch_trainer.py b/hyperion/torch/trainers/torch_trainer.py
index bb0df6b6..4d8adcf4 100644
--- a/hyperion/torch/trainers/torch_trainer.py
+++ b/hyperion/torch/trainers/torch_trainer.py
@@ -744,7 +744,7 @@ def save_model_checkpoint(
         self, model_name: str, checkpoint: Dict[str, Any], partial: bool = False
     ):
         if partial:
-            file_path = "%s/%s_ep%04d_step%10d.pth" % (
+            file_path = "%s/%s_ep%04d_step%010d.pth" % (
                 self.exp_path,
                 model_name,
                 self.cur_epoch,
@@ -784,7 +784,7 @@ def old_save_checkpoint(self, logs=None, partial: bool = False):
 
         checkpoint = self.checkpoint(logs)
         if partial:
-            file_path = "%s/model_ep%04d_step%08d.pth" % (
+            file_path = "%s/model_ep%04d_step%010d.pth" % (
                 self.exp_path,
                 self.cur_epoch,
                 self.global_step,
@@ -1023,7 +1023,7 @@ def add_class_args(parser, prefix=None, train_modes=None, skip=set()):
             help="use tensorboard logger",
         )
         parser.add_argument(
-            "--use-wandb", action="store_true", default=False, help="use wandb logger"
+            "--use-wandb", action=ActionYesNo, default=False, help="use wandb logger"
         )
         parser.add_argument("--wandb.project", default=None, help="wandb project name")
         parser.add_argument("--wandb.group", default=None, help="wandb group name")

From 91407ad32842e9b83ea6463c31aea1f5e21c85f5 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 13 May 2024 19:01:13 -0400
Subject: [PATCH 137/154] fix

---
 .../data/class_weighted_embed_sampler.py      |  2 +-
 .../data/class_weighted_seg_chunk_sampler.py  |  2 +-
 hyperion/torch/data/embed_sampler_factory.py  | 16 ++++++++++--
 hyperion/torch/data/seg_sampler.py            | 26 ++++++++++---------
 hyperion/torch/data/seg_sampler_factory.py    |  8 ++++++
 5 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/hyperion/torch/data/class_weighted_embed_sampler.py b/hyperion/torch/data/class_weighted_embed_sampler.py
index 264e561c..708e12ed 100644
--- a/hyperion/torch/data/class_weighted_embed_sampler.py
+++ b/hyperion/torch/data/class_weighted_embed_sampler.py
@@ -30,7 +30,7 @@ def __init__(
         num_hard_prototypes: int = 0,
         affinity_matrix: Optional[torch.Tensor] = None,
         class_name: str = "class_id",
-        max_batches_per_epoch: Optiona[int] = None,
+        max_batches_per_epoch: Optional[int] = None,
         shuffle: bool = False,
         seed: int = 1234,
     ):
diff --git a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
index 0bb78901..7cadfee2 100644
--- a/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
+++ b/hyperion/torch/data/class_weighted_seg_chunk_sampler.py
@@ -6,7 +6,7 @@
 import logging
 import math
 import time
-from typing import Optional
+from typing import Optional, Union
 
 import numpy as np
 import pandas as pd
diff --git a/hyperion/torch/data/embed_sampler_factory.py b/hyperion/torch/data/embed_sampler_factory.py
index aea35ddf..6ead9daf 100644
--- a/hyperion/torch/data/embed_sampler_factory.py
+++ b/hyperion/torch/data/embed_sampler_factory.py
@@ -2,6 +2,7 @@
  Copyright 2022 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 from typing import Optional, Union
 
@@ -32,7 +33,7 @@ def create(
         """Functions that creates a sampler based on a dataset, sampler_type and sampler arguments.
 
         Args:
-          dataset: embeddings dataset object containing the data info 
+          dataset: embeddings dataset object containing the data info
           sampler_type: string indicating the sampler type.
         """
 
@@ -60,6 +61,7 @@ def filter_args(**kwargs):
             "weight_mode",
             "num_hard_prototypes",
             "class_name",
+            "max_batches_per_epoch",
             "shuffle",
             "seed",
         )
@@ -73,7 +75,10 @@ def add_class_args(parser, prefix=None):
             parser = ArgumentParser(prog="")
 
         parser.add_argument(
-            "--batch-size", type=int, default=1, help=("batch size per gpu"),
+            "--batch-size",
+            type=int,
+            default=1,
+            help=("batch size per gpu"),
         )
 
         parser.add_argument(
@@ -102,6 +107,13 @@ def add_class_args(parser, prefix=None):
             help=("number of hard prototype classes per batch"),
         )
 
+        parser.add_argument(
+            "--max-batches-per-epoch",
+            type=int,
+            default=None,
+            help=("Max. batches per epoch"),
+        )
+
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,
diff --git a/hyperion/torch/data/seg_sampler.py b/hyperion/torch/data/seg_sampler.py
index 5d988092..bb3a37ac 100644
--- a/hyperion/torch/data/seg_sampler.py
+++ b/hyperion/torch/data/seg_sampler.py
@@ -11,8 +11,8 @@
 import torch
 from jsonargparse import ActionParser, ActionYesNo, ArgumentParser
 
-from ...utils.misc import filter_func_args
 from ...utils import SegmentSet
+from ...utils.misc import filter_func_args
 from .hyp_sampler import HypSampler
 
 
@@ -20,17 +20,19 @@ class SegSampler(HypSampler):
     def __init__(
         self,
         seg_set: SegmentSet,
-        min_batch_size:int=1,
-        max_batch_size:Optional[int]=None,
-        max_batch_length:Optional[int]=None,
-        length_name:str="duration",
-        max_batches_per_epoch: Optional[int]=None,
-        shuffle:bool=False,
-        drop_last:bool=False,
-        sort_by_length:bool=True,
-        seed:int=1234,
+        min_batch_size: int = 1,
+        max_batch_size: Optional[int] = None,
+        max_batch_length: Optional[int] = None,
+        length_name: str = "duration",
+        max_batches_per_epoch: Optional[int] = None,
+        shuffle: bool = False,
+        drop_last: bool = False,
+        sort_by_length: bool = True,
+        seed: int = 1234,
     ):
-        super().__init__(max_batches_per_epoch=max_batches_per_epoch,shuffle=shuffle, seed=seed)
+        super().__init__(
+            max_batches_per_epoch=max_batches_per_epoch, shuffle=shuffle, seed=seed
+        )
         self.seg_set = seg_set
         self.min_batch_size = min_batch_size
         self.max_batch_size = max_batch_size
@@ -191,7 +193,7 @@ def add_class_args(parser, prefix=None):
             help="drops the last batch of the epoch",
         )
 
-         parser.add_argument(
+        parser.add_argument(
             "--max-batches-per-epoch",
             type=int,
             default=None,
diff --git a/hyperion/torch/data/seg_sampler_factory.py b/hyperion/torch/data/seg_sampler_factory.py
index 8f6501b5..8a37344d 100644
--- a/hyperion/torch/data/seg_sampler_factory.py
+++ b/hyperion/torch/data/seg_sampler_factory.py
@@ -89,6 +89,7 @@ def filter_args(**kwargs):
             "length_name",
             "iters_per_epoch",
             "batch_size",
+            "max_batches_per_epoch",
             "shuffle",
             "drop_last",
             "sort_by_length",
@@ -222,6 +223,13 @@ def add_class_args(parser, prefix=None):
             help="drops the last batch of the epoch",
         )
 
+        parser.add_argument(
+            "--max-batches-per-epoch",
+            type=int,
+            default=None,
+            help=("Max. batches per epoch"),
+        )
+
         parser.add_argument(
             "--shuffle",
             action=ActionYesNo,

From 6a88ee547c6423228b2a36a711a2d9a567d8270d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 May 2024 08:33:01 -0400
Subject: [PATCH 138/154] fix

---
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh |  1 +
 hyperion/bin/train_wav2rnn_transducer.py      |  5 ++-
 hyperion/torch/data/audio_dataset.py          | 39 +++++++++++++++++++
 hyperion/torch/data/bucketing_seg_sampler.py  |  2 +-
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index e56d97cc..846e85f5 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -30,6 +30,7 @@ nnet_s1=$nnet_s1_dir/teacher_model_ep0067.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0071.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0077.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0083.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0088.pth
 
 # clustering of dino embeddings
 cluster_method=cos_ahc_plda_ahc
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index ebd23845..5a5e5717 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -90,7 +90,10 @@ def init_data(partition, rank, num_gpus, **kwargs):
         {"num_workers": num_workers_per_gpu, "pin_memory": True} if num_gpus > 0 else {}
     )
     data_loader = torch.utils.data.DataLoader(
-        dataset, batch_sampler=sampler, **largs, collate_fn=transducer_collate
+        dataset,
+        batch_sampler=sampler,
+        **largs,
+        collate_fn=dataset.get_collator(),  # collate_fn=transducer_collate
     )
     return data_loader
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 83f314e1..62317d2b 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -12,6 +12,11 @@
 import pandas as pd
 
 # import k2
+try:
+    import k2
+except:
+    from ..utils.dummy_k2 import k2
+
 import sentencepiece as spm
 import torch
 import torch.distributed as dist
@@ -374,6 +379,40 @@ def __getitem__(self, segment):
         data.update(seg_info)
         return data
 
+    @staticmethod
+    def collate(self, batch):
+        from torch.nn.utils.rnn import pad_sequence
+
+        audio = []
+        audio_length = []
+        target = []
+        for record in batch:
+            audio_length.append(record["x"].shape[0])
+        audio_length = torch.as_tensor(audio_length)
+        if not torch.all(audio_length[:-1] >= audio_length[1:]):
+            sort_idx = torch.argsort(audio_length, descending=True)
+            batch = [batch[i] for i in sort_idx]
+
+        audio_length = []
+        for record in batch:
+            wav = torch.as_tensor(record["x"])
+            audio.append(wav)
+            audio_length.append(wav.shape[0])
+            target.append(record["text"])
+            audio = pad_sequence(audio)
+
+        audio_length = torch.as_tensor(audio_length)
+        target = k2.RaggedTensor(target)
+        batch = {
+            "x": torch.transpose(audio, 0, 1),
+            "x_lengths": audio_length,
+            "text": target,
+        }
+        return batch
+
+    def get_collator(self):
+        return lambda batch: AudioDataset(self, batch)
+
     @staticmethod
     def filter_args(**kwargs):
         args = filter_func_args(AudioDataset.__init__, kwargs)
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index e73e7e44..f5db3a22 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -29,7 +29,7 @@ def __init__(
         **base_kwargs
     ):
         super().__init__(
-            max_batches_per_epoch=max_batches_per_epoch, maxshuffle=False, seed=seed
+            max_batches_per_epoch=max_batches_per_epoch, shuffle=False, seed=seed
         )
         self.seg_set = seg_set
         self.base_sampler = base_sampler

From f9f539f53705bbca31dc487eacfa87cf828082f8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 May 2024 09:02:46 -0400
Subject: [PATCH 139/154] fix

---
 hyperion/torch/data/audio_dataset.py         |  4 ++--
 hyperion/torch/data/bucketing_seg_sampler.py |  2 +-
 hyperion/torch/data/seg_chunk_sampler.py     | 12 +++++++-----
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 62317d2b..e6c7b128 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -399,8 +399,8 @@ def collate(self, batch):
             audio.append(wav)
             audio_length.append(wav.shape[0])
             target.append(record["text"])
-            audio = pad_sequence(audio)
 
+        audio = pad_sequence(audio)
         audio_length = torch.as_tensor(audio_length)
         target = k2.RaggedTensor(target)
         batch = {
@@ -411,7 +411,7 @@ def collate(self, batch):
         return batch
 
     def get_collator(self):
-        return lambda batch: AudioDataset(self, batch)
+        return lambda batch: AudioDataset.collate(self, batch)
 
     @staticmethod
     def filter_args(**kwargs):
diff --git a/hyperion/torch/data/bucketing_seg_sampler.py b/hyperion/torch/data/bucketing_seg_sampler.py
index f5db3a22..64d2928c 100644
--- a/hyperion/torch/data/bucketing_seg_sampler.py
+++ b/hyperion/torch/data/bucketing_seg_sampler.py
@@ -33,7 +33,7 @@ def __init__(
         )
         self.seg_set = seg_set
         self.base_sampler = base_sampler
-        self.base_kwargs = base_kwargs
+        self.base_kwargs = base_sampler.filter_args(**base_kwargs)
         self.base_kwargs["seed"] = seed
         self.num_buckets = num_buckets
         self.length_column = length_column
diff --git a/hyperion/torch/data/seg_chunk_sampler.py b/hyperion/torch/data/seg_chunk_sampler.py
index 345ec287..e6c78775 100644
--- a/hyperion/torch/data/seg_chunk_sampler.py
+++ b/hyperion/torch/data/seg_chunk_sampler.py
@@ -45,9 +45,10 @@ def __init__(
         if "subbase_sampler" in base_kwargs:
             base_kwargs["base_sampler"] = base_kwargs.pop("subbase_sampler")
 
-        self.base_kwargs = base_kwargs
+        self.base_kwargs = base_sampler.filter_args(**base_kwargs)
         self.base_kwargs["seed"] = seed
         self.base_kwargs["shuffle"] = shuffle
+        self.base_kwargs["max_batches_per_epoch"] = max_batches_per_epoch
 
         self.__iter__()
         self.avg_batch_size = self._seg_sampler.avg_batch_size
@@ -144,10 +145,11 @@ def __next__(self):
 
     @staticmethod
     def filter_args(**kwargs):
-        valid_args = filter_func_args(SegChunkSampler.__init__, kwargs)
-        base_args = filter_func_args(SegSampler.__init__, kwargs)
-        valid_args.update(base_args)
-        return valid_args
+        return kwargs
+        # valid_args = filter_func_args(SegChunkSampler.__init__, kwargs)
+        # base_args = filter_func_args(SegSampler.__init__, kwargs)
+        # valid_args.update(base_args)
+        # return valid_args
 
         # valid_args = (
         #     "min_chunk_length",

From 6a733eeb82a6868f0465dba09c312a75b42f4f73 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 May 2024 09:11:34 -0400
Subject: [PATCH 140/154] fix

---
 hyperion/torch/data/audio_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index e6c7b128..40221fcb 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -15,7 +15,7 @@
 try:
     import k2
 except:
-    from ..utils.dummy_k2 import k2
+    import ..utils.dummy_k2 as k2
 
 import sentencepiece as spm
 import torch

From cdb1ebbba765faae3ebf5b3f68e08287abafff17 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 May 2024 09:14:52 -0400
Subject: [PATCH 141/154] fix

---
 hyperion/torch/data/audio_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 40221fcb..e0c498e7 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -15,7 +15,7 @@
 try:
     import k2
 except:
-    import ..utils.dummy_k2 as k2
+    from ..torch.utils import dummy_k2 as k2
 
 import sentencepiece as spm
 import torch

From 49c50c9161708fa02281b928d1ce07cbbc796d5b Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 14 May 2024 09:20:06 -0400
Subject: [PATCH 142/154] fix

---
 hyperion/torch/data/audio_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index e0c498e7..905b8533 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -15,7 +15,7 @@
 try:
     import k2
 except:
-    from ..torch.utils import dummy_k2 as k2
+    from ..utils import dummy_k2 as k2
 
 import sentencepiece as spm
 import torch

From e03dc8cb0744c2d4a78b4906b38718af20a4ca05 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 15 May 2024 10:43:23 -0400
Subject: [PATCH 143/154] added first tokenizers

---
 ...mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml |   8 +
 egs/librispeech/v1/run_004_train_asr.sh       |   3 +-
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh |   1 +
 hyperion/bin/train_tokenizer.py               |   6 +-
 hyperion/bin/train_wav2rnn_transducer.py      |  55 +-
 hyperion/torch/data/audio_dataset.py          | 155 ++++-
 hyperion/torch/data/hyp_sampler.py            |   5 +
 hyperion/torch/models/tvector/__init__.py     |   8 -
 .../torch/models/tvector/resnet_tvector.py    | 196 ------
 hyperion/torch/models/tvector/tvector.py      | 567 ------------------
 hyperion/torch/tokenizers/__init__.py         |   7 +
 hyperion/torch/tokenizers/hyp_tokenizer.py    |  44 ++
 hyperion/torch/tokenizers/sp_tokenizer.py     |  93 +++
 hyperion/torch/trainers/transducer_trainer.py |   1 -
 hyperion/torch/utils/__init__.py              |   7 +-
 hyperion/torch/utils/collation.py             | 199 ++++--
 hyperion/torch/utils/vad_utils.py             |   4 +-
 17 files changed, 494 insertions(+), 865 deletions(-)
 delete mode 100644 hyperion/torch/models/tvector/__init__.py
 delete mode 100644 hyperion/torch/models/tvector/resnet_tvector.py
 delete mode 100644 hyperion/torch/models/tvector/tvector.py
 create mode 100644 hyperion/torch/tokenizers/__init__.py
 create mode 100644 hyperion/torch/tokenizers/hyp_tokenizer.py
 create mode 100644 hyperion/torch/tokenizers/sp_tokenizer.py

diff --git a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
index 683f85ee..baea17ab 100644
--- a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
+++ b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
@@ -4,6 +4,10 @@ data:
       wav_scale: 1
       aug_cfgs: 
         - conf/speed_reverb_noise10-20dB_aug.yaml
+      tokenizer_mappings:
+        - text->text
+      tokenizer_files:
+        - data/token_librispeech_train-960_unigram_512/tokenizer.yaml
       return_segment_info:
         - text
     sampler:
@@ -16,6 +20,10 @@ data:
   val:
     dataset:
       wav_scale: 1
+      tokenizer_mappings:
+        - text->text
+      tokenizer_files:
+        - data/token_librispeech_train-960_unigram_512/tokenizer.yaml
       return_segment_info:
         - text
     sampler:
diff --git a/egs/librispeech/v1/run_004_train_asr.sh b/egs/librispeech/v1/run_004_train_asr.sh
index d158689e..33b68ed2 100755
--- a/egs/librispeech/v1/run_004_train_asr.sh
+++ b/egs/librispeech/v1/run_004_train_asr.sh
@@ -37,11 +37,10 @@ if [ $stage -le 1 ]; then
     --cfg $nnet_s1_cfg \
     --data.train.dataset.recordings-file $train_dir/recordings.csv \
     --data.train.dataset.segments-file $train_dir/segments.csv \
-    --data.train.dataset.bpe-model $token_model \
     --data.val.dataset.recordings-file $val_dir/recordings.csv \
     --data.val.dataset.segments-file $val_dir/segments.csv \
     --trainer.exp-path $nnet_s1_dir $args \
     --num-gpus $ngpu
-
+    #--data.train.dataset.bpe-model $token_model \
 fi
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index 846e85f5..7aebfd69 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -31,6 +31,7 @@ nnet_s1=$nnet_s1_dir/teacher_model_ep0071.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0077.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0083.pth
 nnet_s1=$nnet_s1_dir/teacher_model_ep0088.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0094.pth
 
 # clustering of dino embeddings
 cluster_method=cos_ahc_plda_ahc
diff --git a/hyperion/bin/train_tokenizer.py b/hyperion/bin/train_tokenizer.py
index b3d28923..cd8ab9cf 100644
--- a/hyperion/bin/train_tokenizer.py
+++ b/hyperion/bin/train_tokenizer.py
@@ -62,6 +62,7 @@ def train_sentencepiece(
     uppercase_text: bool,
     tokenizer_path: PathLike,
 ):
+    from hyperion.torch.tokenizers import SPTokenizer
 
     tokenizer_path = Path(tokenizer_path)
     tokenizer_path.mkdir(exist_ok=True, parents=True)
@@ -96,7 +97,10 @@ def train_sentencepiece(
             pad_piece=pad_piece,
         )
 
-    generate_sentencepiece_tokens(model_file, tokenizer_path)
+    tokenizer = SPTokenizer.load(model_file)
+    tokenizer.save(model_file.with_suffix(".yaml"))
+
+    # generate_sentencepiece_tokens(model_file, tokenizer_path)
 
 
 def generate_sentencepiece_tokens(model_file: PathLike, tokenizer_path: PathLike):
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 5a5e5717..14fc8db3 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -98,13 +98,27 @@ def init_data(partition, rank, num_gpus, **kwargs):
     return data_loader
 
 
-def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
+# def init_model(blank_id, vocab_size, rank, model_class, **kwargs):
+#     model_args = model_class.filter_args(**kwargs["model"])
+#     if rank == 0:
+#         logging.info("model network args={}".format(model_args))
+#     # TODO: check model_args
+#     model_args["transducer"]["decoder"]["blank_id"] = blank_id
+#     model_args["transducer"]["decoder"]["vocab_size"] = vocab_size
+#     model = model_class(**model_args)
+#     if rank == 0:
+#         logging.info("model={}".format(model))
+#     return model
+
+
+def init_model(rank, model_class, tokenizers, **kwargs):
     model_args = model_class.filter_args(**kwargs["model"])
     if rank == 0:
         logging.info("model network args={}".format(model_args))
-    # TODO: check model_args
-    model_args["transducer"]["decoder"]["blank_id"] = blank_id
-    model_args["transducer"]["decoder"]["vocab_size"] = vocab_size
+
+    tokenizer = list(tokenizers.items())[0][1]
+    model_args["transducer"]["decoder"]["blank_id"] = tokenizer.blank_id
+    model_args["transducer"]["decoder"]["vocab_size"] = tokenizer.vocab_size
     model = model_class(**model_args)
     if rank == 0:
         logging.info("model={}".format(model))
@@ -129,9 +143,14 @@ def train_model(gpu_id, args):
 
     train_loader = init_data(partition="train", **kwargs)
     val_loader = init_data(partition="val", **kwargs)
+    # model = init_model(
+    #     train_loader.dataset.sp.piece_to_id("<blk>"),
+    #     train_loader.dataset.sp.get_piece_size(),
+    #     **kwargs,
+    # )
+
     model = init_model(
-        train_loader.dataset.sp.piece_to_id("<blk>"),
-        train_loader.dataset.sp.get_piece_size(),
+        tokenizers=train_loader.dataset.tokenizers,
         **kwargs,
     )
 
@@ -180,22 +199,28 @@ def make_parser(model_class):
     data_parser.add_argument("--val", action=ActionParser(parser=val_parser))
     parser.add_argument("--data", action=ActionParser(parser=data_parser))
 
-    parser.add_argument(
-        "--data.train.dataset.text_file",
-        type=str,
-    )
+    # parser.add_argument(
+    #     "--data.train.dataset.text_file",
+    #     type=str,
+    # )
 
-    parser.add_argument("--data.val.dataset.text_file", type=str)
+    # parser.add_argument("--data.val.dataset.text_file", type=str)
 
-    parser.add_argument(
-        "--data.train.dataset.bpe_model",
-        type=str,
-    )
+    # parser.add_argument(
+    #     "--data.train.dataset.bpe_model",
+    #     type=str,
+    # )
 
     parser.link_arguments(
         "data.train.data_loader.num_workers", "data.val.data_loader.num_workers"
     )
 
+    parser.link_arguments(
+        "data.train.dataset.tokenizer_mappings", "data.val.dataset.tokenizer_mappings"
+    )
+    parser.link_arguments(
+        "data.train.dataset.tokenizer_files", "data.val.dataset.tokenizer_files"
+    )
     parser.link_arguments("data.train.dataset.bpe_model", "data.val.dataset.bpe_model")
 
     model_class.add_class_args(parser, prefix="model")
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 905b8533..d555a118 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -6,6 +6,7 @@
 import logging
 import math
 import time
+from collections import OrderedDict
 from typing import Dict, List, Optional
 
 import numpy as np
@@ -27,11 +28,12 @@
 from ...io import RandomAccessAudioReader as AR
 from ...np.augment import SpeechAugment
 from ...np.preprocessing import Resampler
-from ...utils.class_info import ClassInfo
+from ...utils import ClassInfo, SegmentSet
 from ...utils.misc import filter_func_args
-from ...utils.segment_set import SegmentSet
 from ...utils.text import read_text
+from ..tokenizers import HypTokenizer
 from ..torch_defs import floatstr_torch
+from ..utils import collate_seqs_1d, collate_seqs_nd, list_of_dicts_to_list
 
 
 class AudioDataset(Dataset):
@@ -42,9 +44,11 @@ class AudioDataset(Dataset):
       segments_file: segments manifest file (kaldi .scp or pandas .csv)
       class_names: list with the names of the types of classes in the datasets, e.g., speaker, language
       class_files: list of class info files
-      time_durs_file: (deprecated) segment to duration in secs file, if durations are not in segments_file
-      bpe_model: bpe model for the text label
-      text_file: text file with words labels for each utterances
+      tokenizer_mappings: list mapping the segment_set fields to the tokenizer name
+            that should be used with them, e.g., text->text-1,
+            this argument has to be sync with tokenizer_files.
+      tokenizer_files: list of tokenizer cofinguration files
+            this argument has to be sync with tokenizer_mappings.
       aug_cfgs: list of augmentation configuration files
       num_augs: number of augmentations per segment and augmentation type
       num_aug_mix: "number of AugMix augmentations per segment
@@ -55,6 +59,9 @@ class AudioDataset(Dataset):
       wav_scale: make waves to be in [-wav_scale, wav_scale]
       is_val: is validation dataset.
       seed: random seed",
+      time_durs_file: (deprecated) segment to duration in secs file, if durations are not in segments_file
+      text_file: (deprecated) text file with words labels for each utterances.
+      bpe_model: (deprecated) bpe model for the text label.
     """
 
     def __init__(
@@ -63,9 +70,8 @@ def __init__(
         segments_file: str,
         class_names: Optional[List[str]] = None,
         class_files: Optional[List[str]] = None,
-        bpe_model: Optional[str] = None,
-        text_file: Optional[str] = None,
-        time_durs_file: Optional[str] = None,
+        tokenizer_mappings: Optional[List[str]] = None,
+        tokenizer_files: Optional[List[str]] = None,
         aug_cfgs: Optional[List[str]] = None,
         num_augs: int = 1,
         num_aug_mix: int = 0,
@@ -76,6 +82,9 @@ def __init__(
         wav_scale: float = 1,
         is_val: bool = False,
         seed: int = 112358,
+        time_durs_file: Optional[str] = None,
+        text_file: Optional[str] = None,
+        bpe_model: Optional[str] = None,
     ):
         super().__init__()
         try:
@@ -110,6 +119,9 @@ def __init__(
         logging.info("loading class-info files")
         self._load_class_infos(class_names, class_files, is_val)
 
+        logging.info("loading tokenizers")
+        self._load_tokenizers(tokenizer_mappings, tokenizer_files)
+
         if bpe_model is not None:
             logging.info("loading bpe models")
             self._load_bpe_model(bpe_model, is_val)
@@ -161,7 +173,7 @@ def _load_text_infos(self, text_file, is_val):
         self.seg_set["text"] = text.loc[self.seg_set["id"]].text
 
     def _load_class_infos(self, class_names, class_files, is_val):
-        self.class_info = {}
+        self.class_info = OrderedDict()
         if class_names is None:
             assert class_files is None
             return
@@ -185,6 +197,27 @@ def _load_class_infos(self, class_names, class_files, is_val):
                             "%s class: %s not present in dataset", name, c_id
                         )
 
+    def _load_tokenizers(self, tokenizer_mappings, tokenizer_files):
+        self.tokenizers = OrderedDict()
+        self.tokenizers_to_infos = OrderedDict()
+        if tokenizer_mappings is None:
+            assert tokenizer_files is None
+            return
+
+        assert len(tokenizer_mappings) == len(tokenizer_files)
+        tokenizer_names = []
+        for map in tokenizer_mappings:
+            info_name, tokenizer_name = map.split("->", maxsplit=1)
+            self.tokenizers_to_infos[tokenizer_name] = info_name
+            tokenizer_names.append(tokenizer_name)
+
+        for name, file in zip(tokenizer_names, tokenizer_files):
+            assert name in self.seg_set, f"field {name} not present in the segment set"
+            if self.rank == 0:
+                logging.info("loading tokenizer file %s", file)
+            tokenizer = HypTokenizer.auto_load(file)
+            self.tokenizers[name] = tokenizer
+
     def _create_augmenters(self, aug_cfgs):
         self.augmenters = []
         self.reverb_context = 0
@@ -244,9 +277,6 @@ def _parse_segment_item(self, segment):
         else:
             seg_id, start, duration = segment, 0, 0
 
-        # if "start" in self.seg_set:
-        #     start += self.seg_set.loc[seg_id].start
-
         return seg_id, start, duration
 
     def _read_audio(self, seg_id, start, duration):
@@ -260,18 +290,6 @@ def _read_audio(self, seg_id, start, duration):
         x, fs = self.r.read([seg_id], time_offset=start, time_durs=read_duration)
         return x[0].astype(floatstr_torch(), copy=False), fs[0]
 
-    # def _read_audio0(self, seg_id, start, duration):
-    #     # how much extra audio we need to load to
-    #     # calculate the reverb of the first part of the audio
-    #     reverb_context = min(self.reverb_context, start)
-    #     start -= reverb_context
-    #     read_duration = duration + reverb_context
-
-    #     # read audio
-    #     recording_id = self.seg_set.recording_ids(seg_id)
-    #     x, fs = self.r.read([recording_id], time_offset=start, time_durs=read_duration)
-    #     return x[0].astype(floatstr_torch(), copy=False), fs[0]
-
     def _apply_aug_mix(self, x, x_augs, aug_idx):
         x_aug_mix = {}
         alpha_d = (self.aug_mix_alpha,) * len(x_augs)
@@ -328,6 +346,11 @@ def _get_segment_info(self, seg_id):
         seg_info = {}
         # converts the class_ids to integers
         for info_name in self.return_segment_info:
+            tokenizer_name = ""
+            if info_name in self.tokenizers_to_infos:
+                tokenizer_name = info_name
+                info_name = self.tokenizers_to_infos[tokenizer_name]
+
             seg_info_i = self.seg_set.loc[seg_id, info_name]
             if info_name in self.class_info:
                 # if the type of information is a class-id
@@ -335,8 +358,9 @@ def _get_segment_info(self, seg_id):
                 # convert from id to integer
                 class_info = self.class_info[info_name]
                 seg_info_i = class_info.loc[seg_info_i, "class_idx"]
-
-            if info_name == "text":
+            elif tokenizer_name in self.tokenizers:
+                seg_info_i = self.tokenizers[tokenizer_name].encode(seg_info_i)
+            elif info_name == "text":
                 seg_info_i = self.sp.encode(seg_info_i, out_type=int)
 
             seg_info[info_name] = seg_info_i
@@ -381,6 +405,66 @@ def __getitem__(self, segment):
 
     @staticmethod
     def collate(self, batch):
+
+        # sort batch by the length of x
+        audio_lengths = []
+        for record in batch:
+            audio_lengths.append(record["x"].shape[0])
+        audio_lengths = torch.as_tensor(audio_lengths)
+        if not torch.all(audio_lengths[:-1] >= audio_lengths[1:]):
+            sort_idx = torch.argsort(audio_lengths, descending=True)
+            batch = [batch[i] for i in sort_idx]
+
+        del audio_lengths
+
+        def _is_list_of_tensors(x):
+            return isinstance(x[0], (torch.Tensor, np.ndarray))
+
+        def _is_list_of_items(x):
+            return isinstance(x[0], (int, float))
+
+        def _is_list_of_strs(x):
+            return isinstance(x[0], str)
+
+        def _is_list_of_strlists(x):
+            return isinstance(x[0], list) and isinstance(x[0][0], str)
+
+        def _is_list_of_intlists(x):
+            return isinstance(x[0], list) and isinstance(x[0][0], int)
+
+        output_batch = {}
+        batch_keys = batch[0].keys()
+        for key in batch_keys:
+            item_list = list_of_dicts_to_list(batch, key)
+            if key == "id":
+                # this are the segment ids
+                output_batch[key] = item_list
+            elif key == "x" or key[:2] == "x_" and _is_list_of_tensors(item_list):
+                # these are input audios
+                data, data_lengths = collate_seqs_1d(item_list)
+                output_batch[key] = data
+                output_batch[f"{key}_lengths"] = data_lengths
+            elif _is_list_of_items(item_list):
+                # these should be things like class ids
+                output_batch[key] = torch.as_tensor(item_list)
+            elif _is_list_of_tensors(item_list):
+                # other tensor data
+                data, data_lengths = collate_seqs_nd(item_list)
+                output_batch[key] = data
+                output_batch[f"{key}_lengths"] = data_lengths
+            elif _is_list_of_intlists(item_list):
+                # we assume k2 ragged tensor for now
+                output_batch[key] = k2.RaggedTensor(item_list)
+            elif _is_list_of_strs(item_list):
+                # we just left them as they are:
+                output_batch[key] = item_list
+            else:
+                raise TypeError(f"we don't know how to collate this data={item_list}")
+
+        return output_batch
+
+    @staticmethod
+    def collate_old(self, batch):
         from torch.nn.utils.rnn import pad_sequence
 
         audio = []
@@ -454,6 +538,25 @@ def add_class_args(parser, prefix=None, skip=set()):
             help="list of class info files",
         )
 
+        parser.add_argument(
+            "--tokenizer-mappings",
+            default=None,
+            nargs="+",
+            help="""list mapping the segment_set fields to the tokenizer name 
+            that should be used with them, e.g., text->text-1,
+            this argument has to be sync with tokenizer_files.
+            """,
+        )
+
+        parser.add_argument(
+            "--tokenizer-files",
+            default=None,
+            nargs="+",
+            help="""list of tokenizer cofinguration files
+            this argument has to be sync with tokenizer_mappings.
+            """,
+        )
+
         parser.add_argument(
             "--time-durs-file",
             default=None,
diff --git a/hyperion/torch/data/hyp_sampler.py b/hyperion/torch/data/hyp_sampler.py
index 61a922db..30010f5e 100644
--- a/hyperion/torch/data/hyp_sampler.py
+++ b/hyperion/torch/data/hyp_sampler.py
@@ -1,3 +1,8 @@
+"""
+ Copyright 2023 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
 import logging
 import math
 from typing import Optional
diff --git a/hyperion/torch/models/tvector/__init__.py b/hyperion/torch/models/tvector/__init__.py
deleted file mode 100644
index 36999146..00000000
--- a/hyperion/torch/models/tvector/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-from .resnet_tvector import ResNetTVector
-# t-vectors
-from .tvector import TVector
diff --git a/hyperion/torch/models/tvector/resnet_tvector.py b/hyperion/torch/models/tvector/resnet_tvector.py
deleted file mode 100644
index c84a38fc..00000000
--- a/hyperion/torch/models/tvector/resnet_tvector.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import logging
-from argparse import Namespace
-
-import torch
-import torch.nn as nn
-
-from ..narchs import ResNetFactory as RNF
-from .xvector import XVector
-
-
-class ResNetXVector(XVector):
-    def __init__(
-        self,
-        in_feats,
-        num_classes,
-        resnet_cfg=Namespace(
-            resnet_type="resnet34",
-            in_channels=1,
-            conv_channels=64,
-            base_channels=64,
-            in_kernel_size=7,
-            in_stride=1,
-            zero_init_residual=False,
-            groups=1,
-            replace_stride_with_dilation=None,
-            do_maxpool=False,
-            hid_act={"name": "relu", "inplace": True},
-            dropout_rate=0,
-            norm_layer=None,
-            use_norm=True,
-            norm_before=True,
-            in_norm=False,
-            se_r=16,
-            res2net_scale=4,
-            res2net_width_factor=1,
-        ),
-        conformer_cfg=Namespace(
-            d_model=256,
-            num_heads=4,
-            num_blocks=6,
-            attype="scaled-dot-prod-v1",
-            atcontext=25,
-            conv_repeats=1,
-            conv_kernel_sizes=31,
-            conv_strides=1,
-            ff_type="linear",
-            d_ff=2048,
-            ff_kernel_size=1,
-            dropourate=0.1,
-            pos_dropourate=0.1,
-            att_dropout_rate=0.0,
-            in_layer_type="conv2d-sub",
-            rel_pos_enc=True,
-            causal_pos_enc=False,
-            no_pos_enc=False,
-            hid_act="swish",
-            conv_norm_layer=None,
-            se_r=None,
-            ff_macaron=True,
-            red_lnorms=False,
-            concat_after=False,
-        ),
-        pool_net="mean+stddev",
-        head_cfg=Namespace(
-            embed_dim=256,
-            num_embed_layers=1,
-            head_hid_act={"name": "relu", "inplace": True},
-            loss_type="arc-softmax",
-            s=64,
-            margin=0.3,
-            margin_warmup_epochs=0,
-            num_subcenters=2,
-            norm_layer=None,
-            use_norm=True,
-            norm_before=True,
-            dropout_rate=0,
-            embed_layer=0,
-        ),
-    ):
-
-        logging.info("making %s encoder network" % (resnet_type))
-        if isinstance(resnet_cfg, Namespace):
-            resnet_cfg = var(resnet_cfg)
-
-        self.resnet_type = resnet_cfg["resnet_type"]
-        encoder_net = RNF.create(**resnet_cfg)
-
-        super().__init__(
-            encoder_net,
-            num_classes,
-            conformer_cfg=conformer_cfg,
-            pool_net=pool_net,
-            head_cfg=head_cfg,
-            in_feats=in_feats,
-            proj_feats=None,
-        )
-
-    @property
-    def in_channels(self):
-        return self.encoder_net.in_channels
-
-    @property
-    def conv_channels(self):
-        return self.encoder_net.conv_channels
-
-    @property
-    def base_channels(self):
-        return self.encoder_net.base_channels
-
-    @property
-    def in_kernel_size(self):
-        return self.encoder_net.in_kernel_size
-
-    @property
-    def in_stride(self):
-        return self.encoder_net.in_stride
-
-    @property
-    def zero_init_residual(self):
-        return self.encoder_net.zero_init_residual
-
-    @property
-    def groups(self):
-        return self.encoder_net.groups
-
-    @property
-    def replace_stride_with_dilation(self):
-        return self.encoder_net.replace_stride_with_dilation
-
-    @property
-    def do_maxpool(self):
-        return self.encoder_net.do_maxpool
-
-    @property
-    def in_norm(self):
-        return self.encoder_net.in_norm
-
-    @property
-    def se_r(self):
-        return self.encoder_net.se_r
-
-    @property
-    def res2net_scale(self):
-        return self.encoder_net.res2net_scale
-
-    @property
-    def res2net_width_factor(self):
-        return self.encoder_net.res2net_width_factor
-
-    def get_config(self):
-
-        base_config = super().get_config()
-        del base_config["encoder_cfg"]
-        enc_cfg = self.encoder_net.get_config()
-        del enc_cfg["block"]
-        del enc_cfg["out_units"]
-        del enc_cfg["out_act"]
-        enc_cfg["resnet_type"] = self.resnet_type
-
-        base_config["resnet_cfg"] = enc_cfg
-
-        return base_config
-
-    @classmethod
-    def load(cls, file_path=None, cfg=None, state_dict=None):
-
-        cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
-
-        model = cls(**cfg)
-        if state_dict is not None:
-            model.load_state_dict(state_dict)
-
-        return model
-
-    def filter_args(prefix=None, **kwargs):
-
-        base_args = XVector.filter_args(prefix, **kwargs)
-        child_args = RNF.filter_args(prefix, **kwargs)
-
-        base_args.update(child_args)
-        return base_args
-
-    @staticmethod
-    def add_argparse_args(parser, prefix=None):
-
-        XVector.add_argparse_args(parser, prefix)
-        if prefix is None:
-            prefix = "resnet"
-        else:
-            prefix = prefix + "-resnet"
-        RNF.add_argparse_args(parser, prefix)
diff --git a/hyperion/torch/models/tvector/tvector.py b/hyperion/torch/models/tvector/tvector.py
deleted file mode 100644
index a46fc324..00000000
--- a/hyperion/torch/models/tvector/tvector.py
+++ /dev/null
@@ -1,567 +0,0 @@
-"""
- Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
- Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-"""
-
-import logging
-
-from jsonargparse import ActionParser, ArgumentParser
-
-import torch
-import torch.nn as nn
-
-from ...narchs import ClassifHead, ConformerEncoderV1, TorchNALoader
-from ..layer_blocks import TDNNBlock
-from ..layers import GlobalPool1dFactory as PF
-from ..torch_model import TorchModel
-from ..utils import eval_nnet_by_chunks
-
-
-class TXVector(TorchModel):
-    """x-Vector base class"""
-
-    def __init__(
-        self,
-        encoder_net,
-        num_classes,
-        conformer_net={},
-        pool_net="mean+stddev",
-        classif_net={},
-        in_feats=None,
-    ):
-
-        super().__init__()
-
-        # encoder network
-        self.encoder_net = encoder_net
-
-        # infer input and output shapes of encoder network
-        in_shape = self.encoder_net.in_shape()
-        if len(in_shape) == 3:
-            # encoder based on 1d conv or transformer
-            in_feats = in_shape[1]
-            out_shape = self.encoder_net.out_shape(in_shape)
-            enc_feats = out_shape[1]
-        elif len(in_shape) == 4:
-            # encoder based in 2d convs
-            assert (
-                in_feats is not None
-            ), "in_feats dimension must be given to calculate pooling dimension"
-            in_shape = list(in_shape)
-            in_shape[2] = in_feats
-            out_shape = self.encoder_net.out_shape(tuple(in_shape))
-            enc_feats = out_shape[1] * out_shape[2]
-
-        self.in_feats = in_feats
-
-        logging.info("encoder input shape={}".format(in_shape))
-        logging.info("encoder output shape={}".format(out_shape))
-
-        # create conformer net
-        if isinstance(conformer_net, nn.Module):
-            self.conformer_net = conformer_net
-        else:
-            logging.info("making conformer net")
-            conformer_net["in_layer_type"] = "linear"
-            self.conformer_net = ConformerEncoderV1(
-                enc_feats, in_time_dim=1, out_time_dim=1, **conformer_net
-            )
-
-        d_model = self.conformer_net.d_model
-        self.pool_net = self._make_pool_net(pool_cfg, d_model)
-        pool_feats = int(d_model * self.pool_net.size_multiplier)
-        logging.info("infer pooling dimension %d", pool_feats)
-
-        # create classification head
-        if isinstance(classif_net, nn.Module):
-            self.classif_net = classif_net
-        else:
-            logging.info("making classification head net")
-            self.classif_net = ClassifHead(pool_feats, num_classes, **head_cfg)
-
-    @property
-    def pool_feats(self):
-        return self.classif_net.in_feats
-
-    @property
-    def num_classes(self):
-        return self.classif_net.num_classes
-
-    @property
-    def embed_dim(self):
-        return self.classif_net.embed_dim
-
-    @property
-    def num_embed_layers(self):
-        return self.classif_net.num_embed_layers
-
-    @property
-    def s(self):
-        return self.classif_net.s
-
-    @property
-    def margin(self):
-        return self.classif_net.margin
-
-    @property
-    def margin_warmup_epochs(self):
-        return self.classif_net.margin_warmup_epochs
-
-    @property
-    def num_subcenters(self):
-        return self.classif_net.num_subcenters
-
-    @property
-    def loss_type(self):
-        return self.classif_net.loss_type
-
-    def _make_pool_net(self, pool_net, enc_feats=None):
-        """Makes the pooling block
-
-        Args:
-         pool_net: str or dict to pass to the pooling factory create function
-         enc_feats: dimension of the features coming from the encoder
-
-        Returns:
-         GlobalPool1d object
-        """
-        if isinstance(pool_net, str):
-            pool_net = {"pool_type": pool_net}
-
-        if isinstance(pool_net, dict):
-            if enc_feats is not None:
-                pool_net["in_feats"] = enc_feats
-
-            return PF.create(**pool_net)
-        elif isinstance(pool_net, nn.Module):
-            return pool_net
-        else:
-            raise Exception("Invalid pool_net argument")
-
-    def update_loss_margin(self, epoch):
-        """Updates the value of the margin in AAM/AM-softmax losses
-           given the epoch number
-
-        Args:
-          epoch: epoch which is about to start
-        """
-        self.classif_net.update_margin(epoch)
-
-    def _pre_enc(self, x):
-        if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-            x = x.view(x.size(0), 1, x.size(1), x.size(2))
-        return x
-
-    def _post_enc(self, x):
-        if self.encoder_net.out_dim() == 4:
-            x = x.view(x.size(0), -1, x.size(-1))
-
-        if self.proj is not None:
-            x = self.proj(x)
-
-        return x
-
-    def forward(
-        self,
-        x,
-        y=None,
-        enc_layers=None,
-        classif_layers=None,
-        return_output=True,
-        use_amp=False,
-    ):
-        if enc_layers is None and classif_layers is None:
-            return self.forward_output(x, y)
-
-        h = self.forward_hid_feats(x, y, enc_layers, classif_layers, return_output)
-        output = {}
-        if enc_layers is not None:
-            if classif_layers is None:
-                output["h_enc"] = h
-            else:
-                output["h_enc"] = h[0]
-        else:
-            output["h_enc"] = []
-        if classif_layers is not None:
-            output["h_classif"] = h[1]
-        else:
-            output["h_classif"] = []
-        if return_output:
-            output["output"] = h[2]
-        return output
-
-    def forward_output(self, x, y=None):
-        """Forward function
-
-        Args:
-          x: input features tensor with shape=(batch, in_feats, time)
-          y: target classes torch.long tensor with shape=(batch,)
-
-        Returns:
-          class posteriors tensor with shape=(batch, num_classes)
-        """
-        if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-            x = x.view(x.size(0), 1, x.size(1), x.size(2))
-
-        x = self.encoder_net(x)
-        x = self.conformer_net(x)
-
-        if self.encoder_net.out_dim() == 4:
-            x = x.view(x.size(0), -1, x.size(-1))
-
-        p = self.pool_net(x)
-        y = self.classif_net(p, y)
-        return y
-
-    def forward_hid_feats(
-        self,
-        x,
-        y=None,
-        enc_layers=None,
-        conf_layers=None,
-        classif_layers=None,
-        return_output=False,
-    ):
-        """forwards hidden representations in the x-vector network"""
-
-        if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-            x = x.view(x.size(0), 1, x.size(1), x.size(2))
-
-        h_enc, x = self.encoder_net.forward_hid_feats(x, enc_layers, return_output=True)
-
-        h_conf, x = self.conformer_net.forward_hid_feats(
-            x, conf_layers, return_output=True
-        )
-
-        if not return_output and classif_layers is None:
-            return h_enc
-
-        if self.encoder_net.out_dim() == 4:
-            x = x.view(x.size(0), -1, x.size(-1))
-
-        if self.proj is not None:
-            x = self.proj(x)
-
-        p = self.pool_net(x)
-        h_classif = self.classif_net.forward_hid_feats(
-            p, y, classif_layers, return_output=return_output
-        )
-        if return_output:
-            h_classif, y = h_classif
-            return h_enc, h_classif, y
-
-        return h_enc, h_classif
-
-    def extract_embed(self, x, chunk_length=0, embed_layer=None, detach_chunks=False):
-        if embed_layer is None:
-            embed_layer = self.embed_layer
-
-        x = self._pre_enc(x)
-        # if self.encoder_net.in_dim() == 4 and x.dim() == 3:
-        #     x = x.view(x.size(0), 1, x.size(1), x.size(2))
-        x = eval_nnet_by_chunks(
-            x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
-        )
-
-        if x.device != self.device:
-            x = x.to(self.device)
-
-        x = self._post_enc(x)
-
-        # if self.encoder_net.out_dim() == 4:
-        #     x = x.view(x.size(0), -1, x.size(-1))
-
-        # if self.proj is not None:
-        #     x = self.proj(x)
-
-        p = self.pool_net(x)
-        y = self.classif_net.extract_embed(p, embed_layer)
-        return y
-
-    def extract_embed_slidwin(
-        self,
-        x,
-        win_length,
-        win_shift,
-        snip_edges=False,
-        feat_frame_length=None,
-        feat_frame_shift=None,
-        chunk_length=0,
-        embed_layer=None,
-        detach_chunks=False,
-    ):
-
-        if feat_frame_shift is not None:
-            # assume win_length/shift are in secs, transform to frames
-            # pass feat times from msecs to secs
-            feat_frame_shift = feat_frame_shift / 1000
-            feat_frame_length = feat_frame_length / 1000
-
-            # get length and shift in number of feature frames
-            win_shift = win_shift / feat_frame_shift  # this can be a float
-            win_length = (
-                win_length - feat_frame_length + feat_frame_shift
-            ) / feat_frame_shift
-            assert win_shift > 0.5, "win-length should be longer than feat-frame-length"
-
-        if embed_layer is None:
-            embed_layer = self.embed_layer
-
-        in_time = x.size(-1)
-        x = self._pre_enc(x)
-        x = eval_nnet_by_chunks(
-            x, self.encoder_net, chunk_length, detach_chunks=detach_chunks
-        )
-
-        if x.device != self.device:
-            x = x.to(self.device)
-
-        x = self._post_enc(x)
-        pin_time = x.size(-1)  # time dim before pooling
-        downsample_factor = float(pin_time) / in_time
-        p = self.pool_net.forward_slidwin(
-            x,
-            downsample_factor * win_length,
-            downsample_factor * win_shift,
-            snip_edges=snip_edges,
-        )
-        # (batch, pool_dim, time)
-
-        p = p.transpose(1, 2).contiguous().view(-1, p.size(1))
-        y = (
-            self.classif_net.extract_embed(p, embed_layer)
-            .view(x.size(0), -1, self.embed_dim)
-            .transpose(1, 2)
-            .contiguous()
-        )
-
-        return y
-
-    def compute_slidwin_timestamps(
-        self,
-        num_windows,
-        win_length,
-        win_shift,
-        snip_edges=False,
-        feat_frame_length=25,
-        feat_frame_shift=10,
-        feat_snip_edges=False,
-    ):
-
-        P = self.compute_slidwin_left_padding(
-            win_length,
-            win_shift,
-            snip_edges,
-            feat_frame_length,
-            feat_frame_shift,
-            feat_snip_edges,
-        )
-
-        tstamps = (
-            torch.as_tensor(
-                [
-                    [i * win_shift, i * win_shift + win_length]
-                    for i in range(num_windows)
-                ]
-            )
-            - P
-        )
-        tstamps[tstamps < 0] = 0
-        return tstamps
-
-    def compute_slidwin_left_padding(
-        self,
-        win_length,
-        win_shift,
-        snip_edges=False,
-        feat_frame_length=25,
-        feat_frame_shift=10,
-        feat_snip_edges=False,
-    ):
-
-        # pass feat times from msecs to secs
-        feat_frame_shift = feat_frame_shift / 1000
-        feat_frame_length = feat_frame_length / 1000
-
-        # get length and shift in number of feature frames
-        H = win_shift / feat_frame_shift
-        L = (win_length - feat_frame_length + feat_frame_shift) / feat_frame_shift
-        assert L > 0.5, "win-length should be longer than feat-frame-length"
-
-        # compute left padding in case of snip_edges is False
-        if snip_edges:
-            P1 = 0
-        else:
-            Q = (
-                L - H
-            ) / 2  # left padding in frames introduced by x-vector sliding window
-            P1 = (
-                Q * feat_frame_shift
-            )  # left padding in secs introduced by x-vector sliding window
-
-        if feat_snip_edges:
-            # left padding introduced when computing acoustic feats
-            P2 = 0
-        else:
-            P2 = (feat_frame_length - feat_frame_shift) / 2
-
-        # total left padding
-        return P1 + P2
-
-    def get_config(self):
-
-        enc_cfg = self.encoder_net.get_config()
-        pool_cfg = PF.get_config(self.pool_net)
-        conformer_cfg = self.conformer_net.get_config()
-        classif_cfg = self.classif_net.get_config()
-
-        config = {
-            "encoder_cfg": enc_cfg,
-            "num_classes": self.num_classes,
-            "conformer_net": self.conformer_cfg,
-            "pool_net": pool_cfg,
-            "classif_net": self.classif_cfg,
-            "in_feats": self.in_feats,
-        }
-
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-    @classmethod
-    def load(cls, file_path=None, cfg=None, state_dict=None):
-        cfg, state_dict = cls._load_cfg_state_dict(file_path, cfg, state_dict)
-        encoder_net = TorchNALoader.load_from_cfg(cfg=cfg["encoder_cfg"])
-
-        for k in "encoder_cfg":
-            del cfg[k]
-
-        model = cls(encoder_net, **cfg)
-        if state_dict is not None:
-            model.load_state_dict(state_dict)
-
-        return model
-
-    def rebuild_output_layer(
-        self,
-        num_classes=None,
-        loss_type="arc-softmax",
-        s=64,
-        margin=0.3,
-        margin_warmup_epochs=10,
-    ):
-        if (self.num_classes is not None and self.num_classes != num_classes) or (
-            self.loss_type != loss_type
-        ):
-            # if we change the number of classes or the loss-type
-            # we need to reinitiate the last layer
-            self.classif_net.rebuild_output_layer(
-                num_classes, loss_type, s, margin, margin_warmup_epochs
-            )
-            return
-
-        # otherwise we just change the values of s, margin and margin_warmup
-        self.classif_net.set_margin(margin)
-        self.classif_net.set_margin_warmup_epochs(margin_warmup_epochs)
-        self.classif_net.set_s(s)
-
-    def freeze_preembed_layers(self):
-        self.encoder_net.freeze()
-        if self.proj is not None:
-            self.proj.freeze()
-
-        for param in self.pool_net.parameters():
-            param.requires_grad = False
-
-        layer_list = [l for l in range(self.embed_layer)]
-        self.classif_net.freeze_layers(layer_list)
-
-    def train_mode(self, mode="ft-embed-affine"):
-        if mode == "ft-full" or mode == "train":
-            self.train()
-            return
-
-        self.encoder_net.eval()
-        self.conformer_net.eval()
-        self.pool_net.eval()
-        self.classif_net.train()
-        layer_list = [l for l in range(self.embed_layer)]
-        self.classif_net.put_layers_in_eval_mode(layer_list)
-
-    @staticmethod
-    def filter_args(**kwargs):
-
-        valid_args = ("num_classes", "in_feats")
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        # get arguments for conformer
-        conformer_args = ConformerEncoderV1.filter_args(**kwargs["conformer_net"])
-        args["corformer_net"] = conformer_args
-        # get arguments for pooling
-        pool_args = PF.filter_args(**kwargs["pool_net"])
-        args["pool_net"] = pool_args
-        # get arguments for classif head
-        classif_args = ClassifHead.filter_args(**kwargs["classif_net"])
-        args["classif_net"] = classif_args
-
-        return args
-
-    @staticmethod
-    def add_class_args(parser, prefix=None):
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        CoformerEncoderV1.add_class_args(parser, prefix="conformer_net")
-        PF.add_class_args(
-            parser, prefix="pool_net", skip=["dim", "in_feats", "keepdim"]
-        )
-        ClassifHead.add_class_args(parser, prefix="classif_net")
-        if prefix is not None:
-            outer_parser.add_argument(
-                "--" + prefix,
-                action=ActionParser(parser=parser),
-                help="xvector options",
-            )
-
-    @staticmethod
-    def filter_finetune_args(**kwargs):
-        valid_args = ("loss_type", "s", "margin", "margin_warmup_epochs")
-        args = dict((k, kwargs[k]) for k in valid_args if k in kwargs)
-
-        return args
-
-    @staticmethod
-    def add_finetune_args(parser, prefix=None):
-        if prefix is not None:
-            outer_parser = parser
-            parser = ArgumentParser(prog="")
-
-        parser.add_argument(
-            "--loss-type",
-            default="arc-softmax",
-            choices=["softmax", "arc-softmax", "cos-softmax", "subcenter-arc-softmax"],
-            help="loss type: softmax, arc-softmax, cos-softmax, subcenter-arc-softmax",
-        )
-
-        parser.add_argument("--s", default=64, type=float, help="scale for arcface")
-
-        parser.add_argument(
-            "--margin", default=0.3, type=float, help="margin for arcface, cosface,..."
-        )
-
-        parser.add_argument(
-            "--margin-warmup-epochs",
-            default=10,
-            type=float,
-            help="number of epoch until we set the final margin",
-        )
-
-        parser.add_argument(
-            "--num-subcenters",
-            default=2,
-            type=float,
-            help="number of subcenters in subcenter losses",
-        )
-
-        if prefix is not None:
-            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/tokenizers/__init__.py b/hyperion/torch/tokenizers/__init__.py
new file mode 100644
index 00000000..42afcaf1
--- /dev/null
+++ b/hyperion/torch/tokenizers/__init__.py
@@ -0,0 +1,7 @@
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from .hyp_tokenizer import HypTokenizer
+from .sp_tokenizer import SPTokenizer
diff --git a/hyperion/torch/tokenizers/hyp_tokenizer.py b/hyperion/torch/tokenizers/hyp_tokenizer.py
new file mode 100644
index 00000000..0d6e9efb
--- /dev/null
+++ b/hyperion/torch/tokenizers/hyp_tokenizer.py
@@ -0,0 +1,44 @@
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from pathlib import Path
+
+import yaml
+
+from ...utils.misc import PathLike
+
+
+class HypTokenizer:
+    """Base class for tokenizers in Hyperion"""
+
+    registry = {}
+
+    def __init_subclass__(cls, **kwargs):
+        super().__init_subclass__(**kwargs)
+        HypTokenizer.registry[cls.__name__] = cls
+
+    def normalize(self, text):
+        return text
+
+    def encode(self, x):
+        pass
+
+    def decode(self, x):
+        pass
+
+    @staticmethod
+    def auto_load(file_path: PathLike):
+        file_path = Path(file_path)
+        with open(file_path, "r") as f:
+            cfg = yaml.safe_load(f)
+
+        class_name = cfg["class_name"]
+        del cfg["class_name"]
+        if class_name in HypTokenizer.registry:
+            class_obj = HypTokenizer.registry[class_name]
+        else:
+            raise Exception("unknown object with class_name=%s" % (class_name))
+
+        return class_obj.load(file_path)
diff --git a/hyperion/torch/tokenizers/sp_tokenizer.py b/hyperion/torch/tokenizers/sp_tokenizer.py
new file mode 100644
index 00000000..c3fa35f9
--- /dev/null
+++ b/hyperion/torch/tokenizers/sp_tokenizer.py
@@ -0,0 +1,93 @@
+"""
+ Copyright 2024 Johns Hopkins University  (Author: Jesus Villalba)
+ Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+"""
+
+from pathlib import Path
+from typing import Dict
+
+import sentencepiece as spm
+import yaml
+
+from ...utils.misc import PathLike
+from .hyp_tokenizer import HypTokenizer
+
+
+class SPTokenizer(HypTokenizer):
+    """Sentence Piece Tokenizer"""
+
+    def __init__(
+        self, sp_model: spm.SentencePieceProcessor, uppercase_text: bool = True
+    ):
+        super().__init__()
+        self.sp_model = sp_model
+        self.uppercase_text = uppercase_text
+        self.blank_id = self.sp_model.piece_to_id("<blk>")
+        self.vocab_size = self.sp_model.get_piece_size()
+        self._token2id = None
+
+    @property
+    def token2id(self):
+        if self._token2id is not None:
+            return self._token2id
+
+        token2id: Dict[str, int] = {
+            self.sp_model.id_to_piece(i): i for i in range(self.sp_model.vocab_size())
+        }
+        self._token2id = token2id
+        return token2id
+
+    def normalize(self, text):
+        if self.uppercase_text:
+            text = text.upper()
+        return text
+
+    def encode(self, text):
+        return self.sp_model.encode(text, out_type=int)
+
+    def decode(self, tokens):
+        return self.sp_model.decoder(tokens)
+
+    def save(self, file_path: PathLike, sp_model_prefix: str = "tokenizer"):
+        file_path = Path(file_path)
+        if file_path.suffix != ".yaml":
+            output_dir = file_path
+            file_path = output_dir / (sp_model_prefix + ".yaml")
+        else:
+            output_dir = file_path.parent
+
+        output_dir.mkdir(parents=True, exist_ok=True)
+        sp_model_file = sp_model_prefix + ".model"
+        sp_tokens_file = sp_model_prefix + ".tokens"
+        cfg = {
+            "class_name": self.__class__.__name__,
+            "sp_model": sp_model_file,
+            "sp_tokens": sp_tokens_file,
+            "uppercase_text": self.uppercase_text,
+        }
+        with open(file_path, "w") as f:
+            yaml.dump(cfg, f)
+
+        with open(output_dir / sp_tokens_file, "w", encoding="utf-8") as f:
+            for sym, i in self.token2id.items():
+                f.write(f"{sym} {i}\n")
+
+    @classmethod
+    def load(cls, file_path: PathLike):
+        file_path = Path(file_path)
+        if file_path.suffix == ".model":
+            sp_model = spm.SentencePieceProcessor()
+            sp_model.load(str(file_path))
+            return cls(sp_model)
+
+        with open(file_path, "r") as f:
+            cfg = yaml.safe_load(f)
+
+        sp_model_file = Path(cfg["sp_model"])
+        if not sp_model_file.is_file():
+            sp_model_file = file_path.parent / sp_model_file
+            assert sp_model_file.is_file(), f"{sp_model_file} not found"
+
+        sp_model = spm.SentencePieceProcessor()
+        sp_model.load(str(sp_model_file))
+        return cls(sp_model)
diff --git a/hyperion/torch/trainers/transducer_trainer.py b/hyperion/torch/trainers/transducer_trainer.py
index 3c52b8bf..c9cbb60b 100644
--- a/hyperion/torch/trainers/transducer_trainer.py
+++ b/hyperion/torch/trainers/transducer_trainer.py
@@ -102,7 +102,6 @@ def train_epoch(self, data_loader):
         metric_acc = MetricAcc(device=self.device)
         batch_metrics = ODict()
         self.model.train()
-        self.sp = data_loader.dataset.sp
 
         for batch, data in enumerate(data_loader):
             self.loggers.on_batch_begin(batch)
diff --git a/hyperion/torch/utils/__init__.py b/hyperion/torch/utils/__init__.py
index 610a43e9..cbfab5ed 100644
--- a/hyperion/torch/utils/__init__.py
+++ b/hyperion/torch/utils/__init__.py
@@ -3,7 +3,12 @@
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
 
-from .collation import collate_seq_1d, collate_seq_2d, collate_seq_nd
+from .collation import (
+    collate_seqs_1d,
+    collate_seqs_2d,
+    collate_seqs_nd,
+    list_of_dicts_to_list,
+)
 from .data_parallel import TorchDataParallel
 from .ddp import FairFullyShardedDDP, FairShardedDDP, TorchDDP
 from .devices import (
diff --git a/hyperion/torch/utils/collation.py b/hyperion/torch/utils/collation.py
index 25b3790b..2b18a87a 100644
--- a/hyperion/torch/utils/collation.py
+++ b/hyperion/torch/utils/collation.py
@@ -5,9 +5,21 @@
 
 import torch
 import torch.nn as nn
+from torch.nn.utils.rnn import pad_sequence
 
 
-def collate_seq_1d(x, pad_value=0):
+def list_of_dicts_to_list(list_of_dicts, key):
+    """Takes a list of dictionaries and a key,
+    and returns a list of the items corresponding to the key
+    """
+    output = []
+    for item in list_of_dicts:
+        output.append(item[key])
+
+    return output
+
+
+def collate_seqs_1d(x, pad_value=0):
     """Combines a list/tuple of vectors with different lengths
        into a single tensor.
 
@@ -18,17 +30,20 @@ def collate_seq_1d(x, pad_value=0):
       2D tensor with shape (num_vectors, max_vector_length).
       1D long tensor containing the vector lengths.
     """
-    max_length = max([x_i.size(0) for x_i in x])
-    y = pad_value * torch.ones(len(x), max_length, dtype=x[0].dtype, device=x[0].device)
-    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
-    for i, x_i in enumerate(x):
-        y[i, : x_i.size(0)] = x_i
-        y_lengths[i] = x_i.size(0)
+    if not isinstance(x[0], torch.Tensor):
+        x = [torch.from_numpy(x_i) for x_i in x]
+
+    assert x[0].dim() == 1
+    x_lengths = []
+    for x_i in x:
+        x_lengths.append(x_i.size(0))
 
-    return y, y_lengths
+    x_lengths = torch.as_tensor(x_lengths)
+    x = pad_sequence(x, batch_first=True, padding_value=pad_value)
+    return x, x_lengths
 
 
-def collate_seq_2d(x, pad_value=0, pad_dim=-1):
+def collate_seqs_2d(x, pad_value=0, pad_dim=0):
     """Combines a list/tuple of matrices with different sizes in one of
        the dimensions into a single 3d tensor.
        Combines performing padding on the dimension which is not constant.
@@ -41,24 +56,28 @@ def collate_seq_2d(x, pad_value=0, pad_dim=-1):
       3D tensor with shape (num_vectors, max_length, feat_dim) or (num_vectors, feat_dim, length).
       1D long tensor containing the dimensions lengths.
     """
-    max_length = max([x_i.size(pad_dim) for x_i in x])
-    y_size = list(x[0].size())
-    y_size[pad_dim] = max_length
-    y = pad_value * torch.ones(*y_size, dtype=x[0].dtype, device=x[0].device)
-    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
-    if pad_dim == -1 or pad_dim == 1:
-        for i, x_i in enumerate(x):
-            y[i, :, : x_i.size(pad_dim)] = x_i
-            y_lengths[i] = x_i.size(pad_dim)
-    else:
-        for i, x_i in enumerate(x):
-            y[i, : x_i.size(pad_dim)] = x_i
-            y_lengths[i] = x_i.size(pad_dim)
-
-    return y, y_lengths
-
-
-def collate_seq_nd(x, pad_value=0, pad_dim=-1):
+    if not isinstance(x[0], torch.Tensor):
+        x = [torch.from_numpy(x_i) for x_i in x]
+    assert x[0].dim() == 2
+    if pad_dim < 0:
+        pad_dim = 2 + pad_dim
+
+    if pad_dim != 0:
+        x = [x_i.transpose(pad_dim, 0) for x_i in x]
+
+    x_lengths = []
+    for x_i in x:
+        x_lengths.append(x_i.size(0))
+
+    x_lengths = torch.as_tensor(x_lengths)
+    x = pad_sequence(x, batch_first=True, padding_value=pad_value)
+    if pad_dim != 0:
+        x = x.transpose(1, pad_dim + 1)
+
+    return x, x_lengths
+
+
+def collate_seqs_nd(x, pad_value=0, pad_dim=0):
     """Combines a list/tuple of N-d tensors with different sizes in one of
        the dimensions into a single (N+1)-d tensor.
        Combines performing padding on the dimension which is not constant.
@@ -68,25 +87,113 @@ def collate_seq_nd(x, pad_value=0, pad_dim=-1):
         pad_dim: padding dimension.
 
     Returns:
-      (N+1)-D combined tensor.
+      (N+1)-d combined tensor.
       1D long tensor containing the dimensions lengths.
     """
+    if not isinstance(x[0], torch.Tensor):
+        x = [torch.from_numpy(x_i) for x_i in x]
+
     if x[0].dim() == 1:
-        return collate_seq_1d(x)
-
-    if x[0].dim() == 2:
-        return collate_seq_2d(x)
-
-    # here the general case
-    max_length = max([x_i.size(pad_dim) for x_i in x])
-    y_trans_size = list(x[0].transpose(0, pad_dim).size())
-    y = pad_value * torch.ones(*y_trans_size, dtype=x[0].dtype, device=x[0].device)
-    y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
-    for i, x_i in enumerate(x):
-        y[i, : x_i.size(pad_dim)] = x_i.transpose(0, pad_dim)
-        y_lengths[i] = x_i.size(pad_dim)
-
-    if pad_dim > 0:
-        pad_dim = pad_dim + 1
-    y = y.transpose(1, pad_dim).contiguous()
-    return y, y_lengths
+        return collate_seqs_1d(x, pad_value=pad_value)
+
+    if pad_dim < 0:
+        pad_dim = x[0].dim() + pad_dim
+
+    if pad_dim != 0:
+        x = [x_i.transpose(pad_dim, 0) for x_i in x]
+
+    x_lengths = []
+    for x_i in x:
+        x_lengths.append(x_i.size(0))
+
+    x_lengths = torch.as_tensor(x_lengths)
+    x = pad_sequence(x, batch_first=True, padding_value=pad_value)
+    if pad_dim != 0:
+        x = x.transpose(1, pad_dim + 1)
+
+    return x, x_lengths
+
+
+# def collate_seq_1d(x, pad_value=0):
+#     """Combines a list/tuple of vectors with different lengths
+#        into a single tensor.
+
+#     Args:
+#         x: input lits/tuple of vectors.
+
+#     Returns:
+#       2D tensor with shape (num_vectors, max_vector_length).
+#       1D long tensor containing the vector lengths.
+#     """
+#     max_length = max([x_i.size(0) for x_i in x])
+#     y = pad_value * torch.ones(len(x), max_length, dtype=x[0].dtype, device=x[0].device)
+#     y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+#     for i, x_i in enumerate(x):
+#         y[i, : x_i.size(0)] = x_i
+#         y_lengths[i] = x_i.size(0)
+
+#     return y, y_lengths
+
+
+# def collate_seq_2d(x, pad_value=0, pad_dim=-1):
+#     """Combines a list/tuple of matrices with different sizes in one of
+#        the dimensions into a single 3d tensor.
+#        Combines performing padding on the dimension which is not constant.
+
+#     Args:
+#         x: input lits/tuple of matrices.
+#         pad_dim: padding dimension.
+
+#     Returns:
+#       3D tensor with shape (num_vectors, max_length, feat_dim) or (num_vectors, feat_dim, length).
+#       1D long tensor containing the dimensions lengths.
+#     """
+#     max_length = max([x_i.size(pad_dim) for x_i in x])
+#     y_size = list(x[0].size())
+#     y_size[pad_dim] = max_length
+#     y = pad_value * torch.ones(*y_size, dtype=x[0].dtype, device=x[0].device)
+#     y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+#     if pad_dim == -1 or pad_dim == 1:
+#         for i, x_i in enumerate(x):
+#             y[i, :, : x_i.size(pad_dim)] = x_i
+#             y_lengths[i] = x_i.size(pad_dim)
+#     else:
+#         for i, x_i in enumerate(x):
+#             y[i, : x_i.size(pad_dim)] = x_i
+#             y_lengths[i] = x_i.size(pad_dim)
+
+#     return y, y_lengths
+
+
+# def collate_seq_nd(x, pad_value=0, pad_dim=-1):
+#     """Combines a list/tuple of N-d tensors with different sizes in one of
+#        the dimensions into a single (N+1)-d tensor.
+#        Combines performing padding on the dimension which is not constant.
+
+#     Args:
+#         x: input lits/tuple of matrices.
+#         pad_dim: padding dimension.
+
+#     Returns:
+#       (N+1)-D combined tensor.
+#       1D long tensor containing the dimensions lengths.
+#     """
+#     if x[0].dim() == 1:
+#         return collate_seq_1d(x)
+
+#     if x[0].dim() == 2:
+#         return collate_seq_2d(x)
+
+#     # here the general case
+#     max_length = max([x_i.size(pad_dim) for x_i in x])
+#     y_trans_size = list(x[0].transpose(0, pad_dim).size())
+#     y = pad_value * torch.ones(*y_trans_size, dtype=x[0].dtype, device=x[0].device)
+#     y_lengths = torch.empty(len(x), dtype=torch.long, device=x[0].device)
+#     for i, x_i in enumerate(x):
+#         y[i, : x_i.size(pad_dim)] = x_i.transpose(0, pad_dim)
+#         y_lengths[i] = x_i.size(pad_dim)
+
+#     if pad_dim > 0:
+#         pad_dim = pad_dim + 1
+#     y = y.transpose(1, pad_dim).contiguous()
+#     return y, y_lengths
diff --git a/hyperion/torch/utils/vad_utils.py b/hyperion/torch/utils/vad_utils.py
index a47b92ef..4dc11ff7 100644
--- a/hyperion/torch/utils/vad_utils.py
+++ b/hyperion/torch/utils/vad_utils.py
@@ -6,7 +6,7 @@
 import torch
 import torch.nn as nn
 
-from .collation import collate_seq_nd
+from .collation import collate_seqs_nd
 
 
 def remove_silence(x, vad, x_lengths=None, time_dim=1, tol=0):
@@ -52,7 +52,7 @@ def remove_silence(x, vad, x_lengths=None, time_dim=1, tol=0):
     for i in range(x.size(0)):
         y.append(x[i, vad[i]])
 
-    y, y_lengths = collate_seq_nd(y, pad_dim=0)
+    y, y_lengths = collate_seqs_nd(y, pad_dim=0)
     if trans:
         y = y.transpose(1, time_dim).contigous()
 

From 5fb459b46dc8194b9247be0f9cbe21bbc64da9e9 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Wed, 15 May 2024 17:04:59 -0400
Subject: [PATCH 144/154] rnnt decoder renamed

---
 README.md                                     |  5 ++-
 ...mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml |  5 +--
 hyperion/bin/train_wav2rnn_transducer.py      |  4 +-
 .../transducer/conformer_v1_rnn_transducer.py |  8 ++--
 .../models/transducer/rnn_rnn_transducer.py   | 14 +++---
 .../torch/models/transducer/rnn_transducer.py | 44 ++++++++++---------
 6 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index 71a0fbd3..6f7a8490 100644
--- a/README.md
+++ b/README.md
@@ -30,13 +30,14 @@ The full API is described in the documentation page [https://hyperion-ml.readthe
 ```
 conda create --name ${your_env} python=3.11
 conda activate ${your_env}
-conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
+# We used PyTorch 2.0.1, other versions may work too
+conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.8 -c pytorch -c nvidia
 # If using k2 for ASR
 wget https://huggingface.co/csukuangfj/k2/resolve/main/ubuntu-cuda/k2-1.24.4.dev20240223+cuda11.8.torch2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 pip install k2-1.24.4.dev20240223+cuda11.8.torch2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 ```
 
-For systems with cuda 10.2 driver:
+For older systems with cuda 10.2 driver:
 ```
 conda create --name ${your_env} python=3.10
 conda activate ${your_env}
diff --git a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
index baea17ab..b0def8fc 100644
--- a/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
+++ b/egs/librispeech/v1/conf/train_fbank80_mn_conf16x144_rnnt_k2_pruned.v1.0p.s1.yaml
@@ -44,7 +44,7 @@ model:
       num_blocks: 16
       d_ff: 576
       in_layer_type: conv2d-sub
-    decoder:
+    rnnt_decoder:
       rnnt_loss: k2_pruned
       simple_loss_scale: 0.2
       predictor:
@@ -71,8 +71,7 @@ trainer:
     warmup_steps: 25000
     update_lr_on_opt_step: true
   # grad_clip: 100
-  # grad_clip: 20
-  grad_clip: 1
+  grad_clip: 20
   use_amp: true
   log_interval: 1000
   epochs: 120
diff --git a/hyperion/bin/train_wav2rnn_transducer.py b/hyperion/bin/train_wav2rnn_transducer.py
index 14fc8db3..6dc314ad 100755
--- a/hyperion/bin/train_wav2rnn_transducer.py
+++ b/hyperion/bin/train_wav2rnn_transducer.py
@@ -117,8 +117,8 @@ def init_model(rank, model_class, tokenizers, **kwargs):
         logging.info("model network args={}".format(model_args))
 
     tokenizer = list(tokenizers.items())[0][1]
-    model_args["transducer"]["decoder"]["blank_id"] = tokenizer.blank_id
-    model_args["transducer"]["decoder"]["vocab_size"] = tokenizer.vocab_size
+    model_args["transducer"]["rnnt_decoder"]["blank_id"] = tokenizer.blank_id
+    model_args["transducer"]["rnnt_decoder"]["vocab_size"] = tokenizer.vocab_size
     model = model_class(**model_args)
     if rank == 0:
         logging.info("model={}".format(model))
diff --git a/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
index 89173eff..cf8bb91f 100644
--- a/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/conformer_v1_rnn_transducer.py
@@ -28,13 +28,13 @@ class ConformerV1RNNTransducer(RNNTransducer):
 
     """
 
-    def __init__(self, encoder, decoder):
+    def __init__(self, encoder, rnnt_decoder):
         if isinstance(encoder, dict):
             encoder = ConformerEncoderV1(**encoder)
         else:
             assert isinstance(encoder, ConformerEncoderV1)
 
-        super().__init__(encoder, decoder)
+        super().__init__(encoder, rnnt_decoder)
 
     @staticmethod
     def filter_args(**kwargs):
@@ -57,11 +57,11 @@ def add_class_args(parser, prefix=None, skip=set()):
     def change_config(
         self,
         encoder,
-        decoder,
+        rnnt_decoder,
     ):
         logging.info("changing transducer encoder config")
         self.encoder.change_config(**encoder)
-        super().chage_config(**decoder)
+        super().chage_config(**rnnt_decoder)
 
     @staticmethod
     def filter_finetune_args(**kwargs):
diff --git a/hyperion/torch/models/transducer/rnn_rnn_transducer.py b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
index 02d0c482..46438dbc 100644
--- a/hyperion/torch/models/transducer/rnn_rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_rnn_transducer.py
@@ -28,13 +28,13 @@ class RNNRNNTransducer(RNNTransducer):
 
     """
 
-    def __init__(self, encoder, decoder):
+    def __init__(self, encoder, rnnt_decoder):
         if isinstance(encoder, dict):
             encoder = RNNEncoder(**encoder)
         else:
             assert isinstance(encoder, RNNEncoder)
 
-        super().__init__(encoder, decoder)
+        super().__init__(encoder, rnnt_decoder)
 
     @staticmethod
     def filter_args(**kwargs):
@@ -53,17 +53,16 @@ def add_class_args(parser, prefix=None, skip=set()):
         RNNEncoder.add_class_args(parser, prefix="encoder", skip=skip)
         RNNTransducer.add_class_args(parser)
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     def change_config(
         self,
         encoder,
-        decoder,
+        rnnt_decoder,
     ):
         logging.info("changing transducer encoder config")
         self.encoder.change_config(**encoder)
-        super().chage_config(**decoder)
+        super().chage_config(**rnnt_decoder)
 
     @staticmethod
     def filter_finetune_args(**kwargs):
@@ -82,5 +81,4 @@ def add_finetune_args(parser, prefix=None):
         RNNTransducer.add_finetune_args(parser)
 
         if prefix is not None:
-            outer_parser.add_argument("--" + prefix,
-                                      action=ActionParser(parser=parser))
+            outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
diff --git a/hyperion/torch/models/transducer/rnn_transducer.py b/hyperion/torch/models/transducer/rnn_transducer.py
index a9fa5830..c951818d 100644
--- a/hyperion/torch/models/transducer/rnn_transducer.py
+++ b/hyperion/torch/models/transducer/rnn_transducer.py
@@ -37,28 +37,28 @@ class RNNTransducer(TorchModel):
 
     Attributes:
       encoder: Encoder network module
-      decoder: RNN-T Decoder config. dictionary or module.
+      rnnt_decoder: RNN-T Decoder config. dictionary or module.
     """
 
     def __init__(
         self,
         encoder: Union[TorchModel, None],
-        decoder: Union[Dict, RNNTransducerDecoder],
-        ctc_weight: float = 0.0,
+        rnnt_decoder: Union[Dict, RNNTransducerDecoder],
         rnnt_weight: float = 1.0,
+        ctc_weight: float = 0.0,
     ):
         super().__init__()
         if encoder is not None:
             assert isinstance(encoder, TorchModel)
-        if isinstance(decoder, dict):
+        if isinstance(rnnt_decoder, dict):
             if encoder is not None:
-                decoder["in_feats"] = encoder.out_shape()[-1]
-            decoder = RNNTransducerDecoder(**decoder)
+                rnnt_decoder["in_feats"] = encoder.out_shape()[-1]
+            rnnt_decoder = RNNTransducerDecoder(**rnnt_decoder)
         else:
-            assert isinstance(decoder, RNNTransducerDecoder)
+            assert isinstance(rnnt_decoder, RNNTransducerDecoder)
 
         self.encoder = encoder
-        self.decoder = decoder
+        self.rnnt_decoder = rnnt_decoder
 
     def forward(
         self,
@@ -89,7 +89,7 @@ def forward(
             x, x_lengths = self.encoder(x, x_lengths)
             assert torch.all(x_lengths > 0)
 
-        dec_output = self.decoder(x, x_lengths, y)
+        dec_output = self.rnnt_decoder(x, x_lengths, y)
         output = RNNTransducerOutput(*dec_output)
         return output
 
@@ -125,7 +125,7 @@ def infer(
         y = []
         for i in range(batch_size):
             x_i = x[i : i + 1, : x_lengths[i]]
-            y_i = self.decoder.decode(
+            y_i = self.rnnt_decoder.decode(
                 x_i,
                 method=decoding_method,
                 beam_width=beam_width,
@@ -166,11 +166,11 @@ def get_config(self):
             enc_cfg = self.encoder.get_config()
             del enc_cfg["class_name"]
 
-        dec_cfg = self.decoder.get_config()
+        dec_cfg = self.rnnt_decoder.get_config()
         del dec_cfg["class_name"]
         config = {
             "encoder": enc_cfg,
-            "decoder": dec_cfg,
+            "rnnt_decoder": dec_cfg,
         }
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -179,8 +179,8 @@ def get_config(self):
     def filter_args(**kwargs):
         # get arguments for pooling
         args = {}
-        decoder_args = RNNTransducerDecoder.filter_args(**kwargs["decoder"])
-        args["decoder"] = decoder_args
+        rnnt_decoder_args = RNNTransducerDecoder.filter_args(**kwargs["rnnt_decoder"])
+        args["rnnt_decoder"] = rnnt_decoder_args
         return args
 
     @staticmethod
@@ -189,23 +189,25 @@ def add_class_args(parser, prefix=None, skip=set()):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        RNNTransducerDecoder.add_class_args(parser, prefix="decoder")
+        RNNTransducerDecoder.add_class_args(parser, prefix="rnnt_decoder")
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
 
     def change_config(
         self,
-        decoder: Dict,
+        rnnt_decoder: Dict,
     ):
-        logging.info("changing decoder config")
-        self.decoder.change_config(**decoder)
+        logging.info("changing rnnt_decoder config")
+        self.rnnt_decoder.change_config(**rnnt_decoder)
 
     @staticmethod
     def filter_finetune_args(**kwargs):
         args = {}
-        decoder_args = RNNTransducerDecoder.filter_finetune_args(**kwargs["decoder"])
-        args["decoder"] = decoder_args
+        rnnt_decoder_args = RNNTransducerDecoder.filter_finetune_args(
+            **kwargs["rnnt_decoder"]
+        )
+        args["rnnt_decoder"] = rnnt_decoder_args
         return args
 
     @staticmethod
@@ -214,7 +216,7 @@ def add_finetune_args(parser, prefix=None):
             outer_parser = parser
             parser = ArgumentParser(prog="")
 
-        RNNTransducerDecoder.add_finetune_args(parser, prefix="decoder")
+        RNNTransducerDecoder.add_finetune_args(parser, prefix="rnnt_decoder")
 
         if prefix is not None:
             outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))

From dae863314fd925e12ab4ed876ec5f4d055dbbeb8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 24 May 2024 11:17:52 -0400
Subject: [PATCH 145/154] some fixes

---
 .../conf/train_fwseresnet34_dino_v1.2.2.yaml  |  2 +-
 ...config_fbank80_stmn_ecapatdnn512x3.v1.2.sh |  4 +--
 ...config_fbank80_stmn_fwseresnet34.v1.2.1.sh |  4 +--
 ...config_fbank80_stmn_fwseresnet34.v1.2.2.sh | 30 +++++--------------
 .../config_fbank80_stmn_fwseresnet34.v1.2.sh  |  4 +--
 .../config_fbank80_stmn_lresnet34.v1.2.sh     |  4 +--
 hyperion/np/feats/feature_windows.py          |  2 +-
 hyperion/torch/data/audio_dataset.py          |  3 ++
 .../torch/trainers/xvector_adv_trainer.py     |  4 +--
 .../trainers/xvector_adv_trainer_from_wav.py  |  4 +--
 hyperion/torch/trainers/xvector_trainer.py    |  4 +--
 .../xvector_trainer_deep_feat_reg_from_wav.py |  4 +--
 .../trainers/xvector_trainer_from_wav.py      |  4 +--
 hyperion/utils/hyp_dataset.py                 | 11 +++++++
 hyperion/utils/info_table.py                  |  6 +++-
 15 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
index 37bada36..a12e05f0 100644
--- a/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
+++ b/egs/voxceleb/ssl.v1/conf/train_fwseresnet34_dino_v1.2.2.yaml
@@ -80,7 +80,7 @@ trainer:
     decay_rate: 0.5
     decay_steps: 60000
     hold_steps: 15000
-    min_lr: 1.0e-05
+    min_lr: 1.0e-04
     warmup_steps: 15000
     update_lr_on_opt_step: true
   teacher_optim:
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
index de643f1e..0ecf904d 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_ecapatdnn512x3.v1.2.sh
@@ -43,7 +43,7 @@ nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s1_method=cos_ahc_plda_ahc
 cluster_ft_s1_cfg=conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_name=${cluster_ft_s1_method}
 cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 
 
@@ -62,7 +62,7 @@ nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s2_method=cos_ahc_plda_ahc
 cluster_ft_s2_cfg=conf/cluster_ecapatdnn512x3_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_name=${cluster_ft_s2_method}
 cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
 
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
index 102fbaef..c4f5c8c7 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.1.sh
@@ -43,7 +43,7 @@ nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s1_method=cos_ahc
 cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
-cluster_ft_s1_name=${cluster_method_ft_s1_method}
+cluster_ft_s1_name=${cluster_ft_s1_method}
 cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 
 # finetuning stage 2.1
@@ -61,6 +61,6 @@ nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 2.2
 cluster_ft_s2_method=cos_ahc
 cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
-cluster_ft_s2_name=${cluster_method_ft_s2_method}
+cluster_ft_s2_name=${cluster_ft_s2_method}
 cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index 7aebfd69..e3ba0c3a 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -17,21 +17,7 @@ nnet_name=${feat_type}_fwseresnet34_dino.v1.2.2
 nnet_s1_base_cfg=conf/train_fwseresnet34_dino_v1.2.2.yaml
 nnet_s1_name=$nnet_name.s1
 nnet_s1_dir=exp/xvector_nnets/$nnet_s1_name
-nnet_s1=$nnet_s1_dir/teacher_model_ep0034.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0038.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0043.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0044.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0046.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0049.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0054.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0058.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0064.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0067.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0071.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0077.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0083.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0088.pth
-nnet_s1=$nnet_s1_dir/teacher_model_ep0094.pth
+nnet_s1=$nnet_s1_dir/teacher_model_ep0100.pth
 
 # clustering of dino embeddings
 cluster_method=cos_ahc_plda_ahc
@@ -43,13 +29,13 @@ cluster_dir=exp/clustering/$nnet_s1_name/$cluster_name
 plda_cfg=conf/plda.yaml
 
 # finetuning stage 1.1
-nnet_ft_s1_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s1_1_base_cfg=conf/train_fwseresnet34_xvec_stage1.1_v1.2.2.yaml
 nnet_ft_s1_1_name=$nnet_name.s1.ft.s1.1
 nnet_ft_s1_1_dir=exp/xvector_nnets/$nnet_ft_s1_1_name
-nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0030.pth
+nnet_ft_s1_1=$nnet_ft_s1_1_dir/model_ep0025.pth
 
 # finetuning stage 1.2
-nnet_ft_s1_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s1_2_base_cfg=conf/train_fwseresnet34_xvec_stage1.2_v1.2.2.yaml
 nnet_ft_s1_2_name=$nnet_name.s1.ft.s1.2
 nnet_ft_s1_2_dir=exp/xvector_nnets/$nnet_ft_s1_2_name
 nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
@@ -57,17 +43,17 @@ nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s1_method=cos_ahc
 cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
-cluster_ft_s1_name=${cluster_method_ft_s1_method}
+cluster_ft_s1_name=${cluster_ft_s1_method}
 cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 
 # finetuning stage 2.1
-nnet_ft_s2_1_base_cfg=conf/train_lresnet34_xvec_stage1.1_v1.2.yaml
+nnet_ft_s2_1_base_cfg=conf/train_fwseresnet34_xvec_stage1.1_v1.2.2.yaml
 nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
 nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
 nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
 
 # finetuning stage 2.2
-nnet_ft_s2_2_base_cfg=conf/train_lresnet34_xvec_stage1.2_v1.2.yaml
+nnet_ft_s2_2_base_cfg=conf/train_fwseresnet34_xvec_stage1.2_v1.2.2.yaml
 nnet_ft_s2_2_name=$nnet_name.s1.ft.s2.2
 nnet_ft_s2_2_dir=exp/xvector_nnets/$nnet_ft_s2_2_name
 nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
@@ -75,6 +61,6 @@ nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 2.2
 cluster_ft_s2_method=cos_ahc
 cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc.yaml
-cluster_ft_s2_name=${cluster_method_ft_s2_method}
+cluster_ft_s2_name=${cluster_ft_s2_method}
 cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
index b3a6e963..9fecaa96 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.sh
@@ -43,7 +43,7 @@ nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s1_method=cos_ahc_plda_ahc
 cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_name=${cluster_fs_s1_method}
 cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 
 # finetuning stage 2.1
@@ -61,6 +61,6 @@ nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 2.2
 cluster_ft_s2_method=cos_ahc_plda_ahc
 cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_name=${cluster_ft_s2_method}
 cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
 
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
index 788b3b4b..4d02e22d 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_lresnet34.v1.2.sh
@@ -43,7 +43,7 @@ nnet_ft_s1_2=$nnet_ft_s1_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 1.2
 cluster_ft_s1_method=cos_ahc_plda_ahc
 cluster_ft_s1_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s1_name=${cluster_method}
+cluster_ft_s1_name=${cluster_ft_s1_method}
 cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 
 # finetuning stage 2.1
@@ -61,5 +61,5 @@ nnet_ft_s2_2=$nnet_ft_s2_2_dir/model_ep0070.pth
 # clustering of ft embeddings from stage 2.2
 cluster_ft_s2_method=cos_ahc_plda_ahc
 cluster_ft_s2_cfg=conf/cluster_lresnet34_v1.2_ft1_cos_ahc_plda_ahc.yaml
-cluster_ft_s2_name=${cluster_method}
+cluster_ft_s2_name=${cluster_ft_s2_method}
 cluster_ft_s2_dir=exp/clustering/$nnet_ft_s2_2_name/$cluster_ft_s2_name
diff --git a/hyperion/np/feats/feature_windows.py b/hyperion/np/feats/feature_windows.py
index 000831ac..ef8fe7b4 100644
--- a/hyperion/np/feats/feature_windows.py
+++ b/hyperion/np/feats/feature_windows.py
@@ -6,7 +6,7 @@
 import logging
 
 import numpy as np
-from scipy.signal import blackman, hamming, hann
+from scipy.signal.windows import blackman, hamming, hann
 
 from ...hyp_defs import float_cpu
 
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index d555a118..9d8bebc6 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -183,6 +183,9 @@ def _load_class_infos(self, class_names, class_files, is_val):
             assert (
                 name in self.seg_set
             ), f"class_name {name} not present in the segment set"
+            self.seg_set.convert_col_to_str(
+                name
+            )  # make sure that class ids are strings
             if self.rank == 0:
                 logging.info("loading class-info file %s", file)
             table = ClassInfo.load(file)
diff --git a/hyperion/torch/trainers/xvector_adv_trainer.py b/hyperion/torch/trainers/xvector_adv_trainer.py
index 9d5a8bae..12ff506a 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer.py
@@ -154,7 +154,7 @@ def train_epoch(self, data_loader):
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -198,7 +198,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
             batch_metrics["loss"] = loss.item()
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
index f63c532b..01676300 100644
--- a/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_adv_trainer_from_wav.py
@@ -159,7 +159,7 @@ def train_epoch(self, data_loader):
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -203,7 +203,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
             batch_metrics["loss"] = loss.item()
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/xvector_trainer.py b/hyperion/torch/trainers/xvector_trainer.py
index 15c5bd42..e8a91bb0 100644
--- a/hyperion/torch/trainers/xvector_trainer.py
+++ b/hyperion/torch/trainers/xvector_trainer.py
@@ -137,7 +137,7 @@ def train_epoch(self, data_loader):
 
             batch_metrics["loss"] = loss_acc * self.grad_acc_steps
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -185,7 +185,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
                 batch_metrics["loss"] = loss_acc
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(output, target)
+                    batch_metrics[k] = metric(output.logits, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
index 98c74af3..3d1a8ccf 100644
--- a/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_deep_feat_reg_from_wav.py
@@ -193,7 +193,7 @@ def train_epoch(self, data_loader):
                 self.save_checkpoint(partial=True)
 
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -235,7 +235,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
                 batch_metrics["loss"] = loss.item()
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(output, target)
+                    batch_metrics[k] = metric(output.logits, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/torch/trainers/xvector_trainer_from_wav.py b/hyperion/torch/trainers/xvector_trainer_from_wav.py
index ada74bb6..2f1fd18a 100644
--- a/hyperion/torch/trainers/xvector_trainer_from_wav.py
+++ b/hyperion/torch/trainers/xvector_trainer_from_wav.py
@@ -133,7 +133,7 @@ def train_epoch(self, data_loader):
 
             batch_metrics["loss"] = loss.item() * self.grad_acc_steps
             for k, metric in self.metrics.items():
-                batch_metrics[k] = metric(output, target)
+                batch_metrics[k] = metric(output.logits, target)
 
             metric_acc.update(batch_metrics, batch_size)
             logs = metric_acc.metrics
@@ -177,7 +177,7 @@ def validation_epoch(self, data_loader, swa_update_bn=False):
 
                 batch_metrics["loss"] = loss.mean().item()
                 for k, metric in self.metrics.items():
-                    batch_metrics[k] = metric(output, target)
+                    batch_metrics[k] = metric(output.logits, target)
 
                 metric_acc.update(batch_metrics, batch_size)
 
diff --git a/hyperion/utils/hyp_dataset.py b/hyperion/utils/hyp_dataset.py
index ba137b65..dda4231e 100644
--- a/hyperion/utils/hyp_dataset.py
+++ b/hyperion/utils/hyp_dataset.py
@@ -95,6 +95,16 @@ def __init__(
         self.sparse_trials = sparse_trials
         self.table_sep = table_sep
         self._files_to_delete = []
+        self.fix_segments_dtypes()
+
+    def fix_segments_dtypes(self):
+        if self._segments is not None:
+            self._fix_segments_dtypes(self._segments)
+
+    def _fix_segments_dtypes(self, segments):
+        # ids in class_infos should be strings in segment set columns
+        for k in self.classes_keys():
+            segments.convert_col_to_str(k)
 
     def get_dataset_files(self):
         file_paths = []
@@ -149,6 +159,7 @@ def segments(self, keep_loaded: bool = True):
         if self._segments is None:
             assert self._segments_path is not None
             segments = SegmentSet.load(self._segments_path, sep=self.table_sep)
+            self._fix_segments_dtypes(segments)
             if keep_loaded:
                 self._segments = segments
             return segments
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index a813a467..ad8d3d68 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -33,7 +33,11 @@ def __init__(self, df):
 
     def fix_dtypes(self):
         if infer_dtype(self.df.id) != "string":
-            self.df[:, "id"] = self.df["id"].apply(str)
+            self.df.loc[:, "id"] = self.df["id"].apply(str)
+
+    def convert_col_to_str(self, column):
+        if infer_dtype(self.df[column]) != "string":
+            self.df.loc[:, column] = self.df[column].apply(str)
 
     def copy(self):
         """Makes a copy of the object."""

From b62824af7e3eac17883fae7cddc3f2b8c9d3a815 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Fri, 24 May 2024 11:36:48 -0400
Subject: [PATCH 146/154] some fixes

---
 egs/voxceleb/ssl.v1/README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/egs/voxceleb/ssl.v1/README.md b/egs/voxceleb/ssl.v1/README.md
index 03b2e1c4..0d12c86b 100644
--- a/egs/voxceleb/ssl.v1/README.md
+++ b/egs/voxceleb/ssl.v1/README.md
@@ -137,6 +137,12 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | FT-2 | PLDA | 1.67 | 0.137 | 0.193 |
 | config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.49 | 0.101 | 0.161 |
 | | | | | FT-2 | PLDA | 1.53 | 0.109 | 0.168|
+| config_fbank80_stmn_fwseresnet34.v1.2.2.sh  | FW-SE ResNet34 / 0.1 x Cos Reg. | Cos+AHC+PLDA+AHC | Cos+AHC | DINO | Cosine | 3.96 | 0.232 | 0.358 |
+| | | | | | PLDA | 4.04 | 0.185 | 0.291 |
+| | | | | FT-1 | Cosine | 2.03 | 0.125 | 0.203 |
+| | | | | FT-1 | PLDA | 2.44 | 0.149 | 0.231 |
+| | | | | FT-2 | Cosine | 
+| | | | | FT-2 | PLDA | 
 
 
 ### VoxCeleb 1 Entire-Clean trial list
@@ -161,6 +167,12 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | FT-2 | PLDA | 1.77 | 0.121 | 0.208 |
 | config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 1.83 | 0.106 | 0.170 |
 | | | | | FT-2 | PLDA | 1.68 | 0.109 | 0.188 |
+| config_fbank80_stmn_fwseresnet34.v1.2.2.sh  | FW-SE ResNet34 / 0.1 x Cos Reg. | Cos+AHC+PLDA+AHC | Cos+AHC | DINO | Cosine | 4.31 | 0.250 | 0.387 |
+| | | | | | PLDA | 4.32 | 0.166 | 0.263 |
+| | | | | FT-1 | Cosine | 2.61 | 0.138 | 0.210 | 
+| | | | | FT-1 | PLDA | 2.72 | 0.1366 | 0.216 |
+| | | | | FT-2 | Cosine | 
+| | | | | FT-2 | PLDA | 
 
 
 ### VoxCeleb 1 Hard-Clean trial list
@@ -185,4 +197,10 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | FT-2 | PLDA | 3.51 | 0.219 | 0.351 |
 | config_fbank80_stmn_fwseresnet34.v1.2.1.sh  | FW-SE ResNet34 | Cos+AHC+PLDA+AHC | Cos+AHC | FT-2 | Cosine | 3.11 | 0.172 | 0.270 |
 | | | | | FT-2 | PLDA | 3.15 | 0.186 | 0.294 |
+| config_fbank80_stmn_fwseresnet34.v1.2.2.sh  | FW-SE ResNet34 / 0.1 x Cos Reg. | Cos+AHC+PLDA+AHC | Cos+AHC | DINO | Cosine | 7.41 | 0.377 | 0.526 |
+| | | | | | PLDA | 5.95 | 0.269 | 0.438 |
+| | | | | FT-1 | Cosine | 4.38 | 0.222 | 0.337 |
+| | | | | FT-1 | PLDA | 4.68 | 0.237 | 0.375 |
+| | | | | FT-2 | Cosine | 
+| | | | | FT-2 | PLDA | 
 

From d7540bc0552b001b79c998f0121fdbbe9d301588 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 27 May 2024 13:22:01 -0400
Subject: [PATCH 147/154] updated readme

---
 egs/voxceleb/ssl.v1/README.md                     | 15 ++++++++-------
 .../config_fbank80_stmn_fwseresnet34.v1.2.2.sh    |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/egs/voxceleb/ssl.v1/README.md b/egs/voxceleb/ssl.v1/README.md
index 0d12c86b..73b1d039 100644
--- a/egs/voxceleb/ssl.v1/README.md
+++ b/egs/voxceleb/ssl.v1/README.md
@@ -141,8 +141,9 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | | PLDA | 4.04 | 0.185 | 0.291 |
 | | | | | FT-1 | Cosine | 2.03 | 0.125 | 0.203 |
 | | | | | FT-1 | PLDA | 2.44 | 0.149 | 0.231 |
-| | | | | FT-2 | Cosine | 
-| | | | | FT-2 | PLDA | 
+| | | | | FT-2 | Cosine | 1.88 | 0.115 | 0.198 |
+| | | | | FT-2 | PLDA | 2.57 | 0.147 | 0.234 |
+
 
 
 ### VoxCeleb 1 Entire-Clean trial list
@@ -171,8 +172,9 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | | PLDA | 4.32 | 0.166 | 0.263 |
 | | | | | FT-1 | Cosine | 2.61 | 0.138 | 0.210 | 
 | | | | | FT-1 | PLDA | 2.72 | 0.1366 | 0.216 |
-| | | | | FT-2 | Cosine | 
-| | | | | FT-2 | PLDA | 
+| | | | | FT-2 | Cosine | 2.41 | 0.121 | 0.193 |
+| | | | | FT-2 | PLDA | 2.82 | 0.140 | 0.219 |
+
 
 
 ### VoxCeleb 1 Hard-Clean trial list
@@ -201,6 +203,5 @@ run_xxx_xxxx.sh --config-file global_conf/other_config.sh
 | | | | | | PLDA | 5.95 | 0.269 | 0.438 |
 | | | | | FT-1 | Cosine | 4.38 | 0.222 | 0.337 |
 | | | | | FT-1 | PLDA | 4.68 | 0.237 | 0.375 |
-| | | | | FT-2 | Cosine | 
-| | | | | FT-2 | PLDA | 
-
+| | | | | FT-2 | Cosine | 4.07 | 0.197 | 0.301 |
+| | | | | FT-2 | PLDA | 4.75 | 0.229 | 0.352 |
diff --git a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
index e3ba0c3a..11aab111 100644
--- a/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
+++ b/egs/voxceleb/ssl.v1/global_conf/config_fbank80_stmn_fwseresnet34.v1.2.2.sh
@@ -50,7 +50,7 @@ cluster_ft_s1_dir=exp/clustering/$nnet_ft_s1_2_name/$cluster_ft_s1_name
 nnet_ft_s2_1_base_cfg=conf/train_fwseresnet34_xvec_stage1.1_v1.2.2.yaml
 nnet_ft_s2_1_name=$nnet_name.s1.ft.s2.1
 nnet_ft_s2_1_dir=exp/xvector_nnets/$nnet_ft_s2_1_name
-nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0030.pth
+nnet_ft_s2_1=$nnet_ft_s2_1_dir/model_ep0025.pth
 
 # finetuning stage 2.2
 nnet_ft_s2_2_base_cfg=conf/train_fwseresnet34_xvec_stage1.2_v1.2.2.yaml

From 955fb02a7aa5be25bda05a48333c2274332edc59 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 10 Jun 2024 18:03:07 -0400
Subject: [PATCH 148/154] fix kmeans call in mixture models

---
 hyperion/np/pdfs/mixtures/gmm.py               | 13 +++++++------
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py      |  4 ++--
 hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py |  9 +++++----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/hyperion/np/pdfs/mixtures/gmm.py b/hyperion/np/pdfs/mixtures/gmm.py
index 7b080dae..934c6749 100644
--- a/hyperion/np/pdfs/mixtures/gmm.py
+++ b/hyperion/np/pdfs/mixtures/gmm.py
@@ -2,6 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import h5py
 import numpy as np
 import scipy.linalg as la
@@ -150,8 +151,8 @@ def _initialize_kmeans(self, num_comp, x):
             self.Lambda[0] = invert_pdmat(S, return_inv=True)[-1]
             return
 
-        kmeans = KMeans(num_clusters=num_comp)
-        loss, cluster_index = kmeans.fit(x, epochs=100)
+        kmeans = KMeans(num_clusters=num_comp, epochs=100)
+        loss, cluster_index = kmeans.fit(x)
 
         self.mu = kmeans.mu
         self.pi = np.zeros((self.num_comp,), dtype=float_cpu())
@@ -253,7 +254,7 @@ def split_comp(self, K=2):
         """
         num_comp = self.num_comp * K
         pi = np.repeat(self.pi, K) / K
-        Lambda = np.repeat(self.Lambda, K, axis=0) * (K ** 2)
+        Lambda = np.repeat(self.Lambda, K, axis=0) * (K**2)
         mu = np.repeat(self.mu, K, axis=0)
 
         for g in range(self.num_comp):
@@ -400,7 +401,7 @@ def load_from_kaldi(cls, file_path):
                             x_dim = len(fields)
                             eta1 = np.zeros((num_comp, x_dim), dtype=float_cpu())
                             eta2 = np.zeros(
-                                (num_comp, int((x_dim ** 2 + 3 * x_dim) / 2)),
+                                (num_comp, int((x_dim**2 + 3 * x_dim) / 2)),
                                 dtype=float_cpu(),
                             )
 
@@ -436,7 +437,7 @@ def _validate_Lambda(self):
 
     def _validate_eta(self):
         assert self.eta.shape[0] == self.num_comp
-        assert self.eta.shape[1] == (self.x_dim ** 2 + 3 * self.x_dim) / 2
+        assert self.eta.shape[1] == (self.x_dim**2 + 3 * self.x_dim) / 2
 
     def validate(self):
         """Validates the parameters of the distribution."""
@@ -454,7 +455,7 @@ def validate(self):
     def compute_eta(mu, Lambda):
         """Computes nat param. from mean and precision."""
         x_dim = mu.shape[-1]
-        eta_dim = int((x_dim ** 2 + 3 * x_dim) / 2)
+        eta_dim = int((x_dim**2 + 3 * x_dim) / 2)
         eta = np.zeros((mu.shape[0], eta_dim), dtype=float_cpu())
         for k in range(mu.shape[0]):
             eta[k] = Normal.compute_eta(mu[k], Lambda[k])
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index ecc7bad7..a5135190 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -121,8 +121,8 @@ def _initialize_kmeans(self, num_comp, x):
             self.Lambda = 1 / np.std(x, axis=0, keepdims=True) ** 2
             return
 
-        kmeans = KMeans(num_clusters=num_comp)
-        loss, cluster_index = kmeans.fit(x, epochs=100)
+        kmeans = KMeans(num_clusters=num_comp, epochs=100)
+        loss, cluster_index = kmeans.fit(x)
 
         self.mu = kmeans.mu
         self.pi = np.zeros((self.num_comp,), dtype=float_cpu())
diff --git a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
index 6ef7c891..d696bbac 100644
--- a/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_tied_diag_cov.py
@@ -2,6 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import h5py
 import numpy as np
 from scipy.special import erf
@@ -83,8 +84,8 @@ def _initialize_kmeans(self, num_comp, x):
             self.Lambda = 1 / np.std(x, axis=0, keepdims=True) ** 2
             return
 
-        kmeans = KMeans(num_clusters=num_comp)
-        loss, cluster_index = kmeans.fit(x, epochs=100)
+        kmeans = KMeans(num_clusters=num_comp, epochs=100)
+        loss, cluster_index = kmeans.fit(x)
 
         self.mu = kmeans.mu
         self.pi = np.zeros((self.num_comp,), dtype=float_cpu())
@@ -93,7 +94,7 @@ def _initialize_kmeans(self, num_comp, x):
             r = cluster_index == k
             self.pi[k] = np.sum(r) / x.shape[0]
             delta = x[r] - self.mu[k]
-            C += np.sum(delta ** 2, axis=0)
+            C += np.sum(delta**2, axis=0)
 
         self.Lambda = x.shape[0] / C
 
@@ -111,7 +112,7 @@ def Mstep(self, N, u_x):
             self.mu = F / N[:, None]
 
         if self.update_Lambda:
-            S = S / N[:, None] - self.mu ** 2
+            S = S / N[:, None] - self.mu**2
             S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
             S = np.maximum(S, S_floor)
             Spool = np.sum(N[:, None] * S, axis=0) / np.sum(N)

From b8fe5cb79013ac2022450d314f7c9848bae40f9a Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Mon, 10 Jun 2024 18:22:26 -0400
Subject: [PATCH 149/154] np.bool -> bool

---
 ...vec_cosine_scoring_from_transfer_adv_test_wav.py |  6 ++----
 .../bin/generate_adv_attacks_xvector_classif.py     | 13 +++----------
 hyperion/bin/generate_adv_attacks_xvector_verif.py  |  2 +-
 hyperion/np/classifiers/greedy_fusion.py            |  2 +-
 hyperion/np/diarization/diar_ahc_plda.py            |  4 ++--
 hyperion/np/pdfs/jfa/jfa_total.py                   |  2 +-
 hyperion/utils/info_table.py                        |  2 +-
 hyperion/utils/rttm.py                              |  2 +-
 hyperion/utils/vad_utils.py                         |  4 ++--
 9 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
index a6f8efa4..5cd4b864 100755
--- a/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
+++ b/hyperion/bin/eval_xvec_cosine_scoring_from_transfer_adv_test_wav.py
@@ -243,7 +243,7 @@ def eval_cosine_scoring(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+            vad = torch.as_tensor(vad.astype(bool, copy=False), dtype=torch.bool).to(
                 device
             )
             model.vad_t = vad
@@ -361,9 +361,7 @@ def main():
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/generate_adv_attacks_xvector_classif.py b/hyperion/bin/generate_adv_attacks_xvector_classif.py
index 4d0e762a..6f36e3d3 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_classif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_classif.py
@@ -208,18 +208,13 @@ def generate_attacks(
             vad = v_reader.read([key])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+            vad = torch.as_tensor(vad.astype(bool, copy=False), dtype=torch.bool).to(
                 device
             )
             model.vad = vad
             logging.info(
                 "utt %s detected %d/%d (%.2f %%) speech frames"
-                % (
-                    key,
-                    speech_frames,
-                    tot_frames,
-                    speech_frames / tot_frames * 100,
-                )
+                % (key, speech_frames, tot_frames, speech_frames / tot_frames * 100,)
             )
 
         t2 = time.time()
@@ -329,9 +324,7 @@ def main():
 
     parser.add_argument("--vad", dest="vad_spec", default=None)
     parser.add_argument(
-        "--vad-path-prefix",
-        default=None,
-        help=("scp file_path prefix for vad"),
+        "--vad-path-prefix", default=None, help=("scp file_path prefix for vad"),
     )
 
     parser.add_argument("--model-path", required=True)
diff --git a/hyperion/bin/generate_adv_attacks_xvector_verif.py b/hyperion/bin/generate_adv_attacks_xvector_verif.py
index f858ea22..ae78ea5b 100755
--- a/hyperion/bin/generate_adv_attacks_xvector_verif.py
+++ b/hyperion/bin/generate_adv_attacks_xvector_verif.py
@@ -217,7 +217,7 @@ def generate_attacks(
             vad = v_reader.read([key.seg_set[j]])[0]
             tot_frames = len(vad)
             speech_frames = np.sum(vad)
-            vad = torch.as_tensor(vad.astype(np.bool, copy=False), dtype=torch.bool).to(
+            vad = torch.as_tensor(vad.astype(bool, copy=False), dtype=torch.bool).to(
                 device
             )
             model.vad_t = vad
diff --git a/hyperion/np/classifiers/greedy_fusion.py b/hyperion/np/classifiers/greedy_fusion.py
index f03a05a0..646af8d3 100644
--- a/hyperion/np/classifiers/greedy_fusion.py
+++ b/hyperion/np/classifiers/greedy_fusion.py
@@ -226,7 +226,7 @@ def fit(self, x, class_ids, sample_weights=None):
             num_cands = len(cand_systems)
             cand_min_dcf = np.zeros((num_cands,), dtype=float_cpu())
             cand_act_dcf = np.zeros((num_cands,), dtype=float_cpu())
-            all_pos = np.zeros((num_cands,), dtype=np.bool)
+            all_pos = np.zeros((num_cands,), dtype=bool)
             cand_weights = []
             for j in range(num_cands):
                 system_idx_ij = np.concatenate(
diff --git a/hyperion/np/diarization/diar_ahc_plda.py b/hyperion/np/diarization/diar_ahc_plda.py
index 4bfbc06b..7bffa633 100644
--- a/hyperion/np/diarization/diar_ahc_plda.py
+++ b/hyperion/np/diarization/diar_ahc_plda.py
@@ -66,7 +66,7 @@ def _plot_score_hist(scores, output_file, thr=None, gmm=None):
         output_dir = Path(output_file).parent
         output_dir.mkdir(parents=True, exist_ok=True)
 
-        mask = np.triu(np.ones(scores.shape, dtype=np.bool), 1)
+        mask = np.triu(np.ones(scores.shape, dtype=bool), 1)
         scores_r = scores[mask].ravel()
 
         _, bins, _ = plt.hist(
@@ -96,7 +96,7 @@ def _plot_score_hist(scores, output_file, thr=None, gmm=None):
     @staticmethod
     def _unsup_gmm_calibration(scores):
         """Performs unsupervised calibration on the scores by training a GMM."""
-        mask = np.triu(np.ones(scores.shape, dtype=np.bool), 1)
+        mask = np.triu(np.ones(scores.shape, dtype=bool), 1)
         scores_r = scores[mask].ravel()[:, None]  # N x 1
         gmm_1c = GMM(num_comp=1)
         gmm_1c.fit(scores_r, epochs=1)
diff --git a/hyperion/np/pdfs/jfa/jfa_total.py b/hyperion/np/pdfs/jfa/jfa_total.py
index 6e2b79e3..97450e0e 100644
--- a/hyperion/np/pdfs/jfa/jfa_total.py
+++ b/hyperion/np/pdfs/jfa/jfa_total.py
@@ -281,7 +281,7 @@ def TT(self):
     def _upptr(self):
         """Upper triangular mask."""
         if self.__upptr is None:
-            self.__upptr = np.triu(np.ones(self.y_dim, dtype=np.bool))
+            self.__upptr = np.triu(np.ones(self.y_dim, dtype=bool))
         return self.__upptr
 
     @staticmethod
diff --git a/hyperion/utils/info_table.py b/hyperion/utils/info_table.py
index ad8d3d68..70ec49a0 100644
--- a/hyperion/utils/info_table.py
+++ b/hyperion/utils/info_table.py
@@ -347,7 +347,7 @@ def get_loc(self, keys):
         if isinstance(loc, int):
             return loc
 
-        if isinstance(loc, np.ndarray) and loc.dtype == np.bool:
+        if isinstance(loc, np.ndarray) and loc.dtype == bool:
             return np.nonzero(loc)[0]
 
         return list(range(loc.start, loc.stop, loc.step))
diff --git a/hyperion/utils/rttm.py b/hyperion/utils/rttm.py
index c691fc17..db7c0fae 100644
--- a/hyperion/utils/rttm.py
+++ b/hyperion/utils/rttm.py
@@ -636,7 +636,7 @@ def get_bin_sample_mask_for_spk(
 
         tend[tend > max_samples] = max_samples
 
-        vad = np.zeros((max_samples,), dtype=np.bool)
+        vad = np.zeros((max_samples,), dtype=bool)
         for i, j in zip(tbeg, tend):
             if j > i:
                 vad[i:j] = True
diff --git a/hyperion/utils/vad_utils.py b/hyperion/utils/vad_utils.py
index 2d68bc5c..4f3f980e 100644
--- a/hyperion/utils/vad_utils.py
+++ b/hyperion/utils/vad_utils.py
@@ -135,7 +135,7 @@ def vad_timestamps_to_bin(
     if max_frames is not None and num_frames < max_frames:
         num_frames = max_frames
 
-    vad = np.zeros((num_frames,), dtype=np.bool)
+    vad = np.zeros((num_frames,), dtype=bool)
     frame_start = np.ceil(
         (in_timestamps[:, 0] - (pad + frame_center)) / frame_shift
     ).astype(dtype=np.int)
@@ -242,7 +242,7 @@ def intersect_segment_timestamps_with_vad(in_timestamps, vad_timestamps):
     vad_start = vad_timestamps[:, 0]
     vad_end = vad_timestamps[:, 1]
     num_vad_segs = len(vad_start)
-    speech_idx = np.zeros((in_timestamps.shape[0],), dtype=np.bool)
+    speech_idx = np.zeros((in_timestamps.shape[0],), dtype=bool)
     out_timestamps = []
     out_timestamps2speech_segs = []
     count_speech = 0

From ad9de1c5f449219856e57a6f37410b1bcc1d45c8 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 11 Jun 2024 09:13:47 -0400
Subject: [PATCH 150/154] debug gmm

---
 hyperion/np/clustering/kmeans.py                | 4 ++--
 hyperion/np/pdfs/mixtures/exp_family_mixture.py | 3 +++
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py       | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/hyperion/np/clustering/kmeans.py b/hyperion/np/clustering/kmeans.py
index 82d257d1..59983cae 100644
--- a/hyperion/np/clustering/kmeans.py
+++ b/hyperion/np/clustering/kmeans.py
@@ -188,14 +188,14 @@ def _compute_centroids(self, x, index):
         for k in range(self.num_clusters):
             r = index == k
             if np.sum(r) > 0:
-                mu[k] = np.mean(x[index == k], axis=0)
+                mu[k] = np.mean(x[r], axis=0)
         return mu
 
     @staticmethod
     def _compute_centroid(x, index, k):
         r = index == k
         if np.sum(r) > 0:
-            return np.mean(x[index == k], axis=0)
+            return np.mean(x[r], axis=0)
         else:
             return None
 
diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index d1cf7f68..91ca19a2 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -2,6 +2,7 @@
  Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 
 import numpy as np
@@ -104,6 +105,8 @@ def fit(
                 )
                 elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
 
+            print(self.log_prob(x, mode="nat"), self.log_prob(x, mode="std"))
+
         if x_val is None:
             return elbo, elbo / x.shape[0]
         else:
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index a5135190..f8fab693 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -180,9 +180,11 @@ def Mstep(self, N, u_x):
 
         if self.update_Lambda:
             S = S / N[:, None] - self.mu**2
+            print("1", S)
             S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
             S_floor = np.maximum(S_floor, 1e-10)
             S = np.maximum(S, S_floor)
+            print("2", S)
             print(np.min(S))
             self.Lambda = 1 / S
             self._Sigma = S

From e27af1b86cea33d190a9368b98814352d9b2d21b Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 11 Jun 2024 09:26:56 -0400
Subject: [PATCH 151/154] debug gmm

---
 hyperion/np/pdfs/mixtures/exp_family_mixture.py | 6 +++++-
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py       | 9 +++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 91ca19a2..6fd2a5b1 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -105,7 +105,11 @@ def fit(
                 )
                 elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
 
-            print(self.log_prob(x, mode="nat"), self.log_prob(x, mode="std"))
+            print(
+                elbo[epoch],
+                np.mean(self.log_prob(x, mode="nat")),
+                np.mean(self.log_prob(x, mode="std")),
+            )
 
         if x_val is None:
             return elbo, elbo / x.shape[0]
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index f8fab693..29c3a571 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -180,12 +180,9 @@ def Mstep(self, N, u_x):
 
         if self.update_Lambda:
             S = S / N[:, None] - self.mu**2
-            print("1", S)
-            S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
-            S_floor = np.maximum(S_floor, 1e-10)
-            S = np.maximum(S, S_floor)
-            print("2", S)
-            print(np.min(S))
+            # S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
+            # S_floor = np.maximum(S_floor, 1e-10)
+            # S = np.maximum(S, S_floor)
             self.Lambda = 1 / S
             self._Sigma = S
             self._cholLambda = None

From 44f7abba12dfc83806982ad9a15eb19e11890e87 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 11 Jun 2024 09:44:03 -0400
Subject: [PATCH 152/154] debug gmm

---
 hyperion/np/pdfs/mixtures/exp_family_mixture.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 6fd2a5b1..01181b61 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -106,7 +106,7 @@ def fit(
                 elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
 
             print(
-                elbo[epoch],
+                elbo[epoch] / x.shape[0],
                 np.mean(self.log_prob(x, mode="nat")),
                 np.mean(self.log_prob(x, mode="std")),
             )
@@ -210,7 +210,6 @@ def _accum_suff_stats_1batch(self, x, u_x=None, sample_weight=None):
 
         N = np.sum(z, axis=0)
         acc_u_x = np.dot(z.T, u_x)
-        # L_z=gmm.ElnP_z_w(N,gmm.lnw)-gmm.Elnq_z(z);
         return N, acc_u_x
 
     def _accum_suff_stats_nbatches(self, x, sample_weight, batch_size):
@@ -473,8 +472,8 @@ def sum_suff_stats(self, N, u_x):
           Accumalted N and u_x.
         """
         assert len(N) == len(u_x)
-        acc_N = N[1]
-        acc_u_x = u_x[1]
+        acc_N = N[0]
+        acc_u_x = u_x[0]
         for i in range(1, len(N)):
             acc_N += N[i]
             acc_u_x += u_x[i]

From 025a1376bee1bccfae88bbc3eff2f03c2b1b7643 Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Tue, 11 Jun 2024 09:56:04 -0400
Subject: [PATCH 153/154] debug gmm

---
 hyperion/np/pdfs/mixtures/exp_family_mixture.py | 6 ------
 hyperion/np/pdfs/mixtures/gmm_diag_cov.py       | 6 +++---
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/hyperion/np/pdfs/mixtures/exp_family_mixture.py b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
index 01181b61..e1355dc5 100644
--- a/hyperion/np/pdfs/mixtures/exp_family_mixture.py
+++ b/hyperion/np/pdfs/mixtures/exp_family_mixture.py
@@ -105,12 +105,6 @@ def fit(
                 )
                 elbo_val[epoch] = self.elbo(None, N=N, u_x=u_x, log_h=log_h_val)
 
-            print(
-                elbo[epoch] / x.shape[0],
-                np.mean(self.log_prob(x, mode="nat")),
-                np.mean(self.log_prob(x, mode="std")),
-            )
-
         if x_val is None:
             return elbo, elbo / x.shape[0]
         else:
diff --git a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
index 29c3a571..c3985aef 100644
--- a/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
+++ b/hyperion/np/pdfs/mixtures/gmm_diag_cov.py
@@ -180,9 +180,9 @@ def Mstep(self, N, u_x):
 
         if self.update_Lambda:
             S = S / N[:, None] - self.mu**2
-            # S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
-            # S_floor = np.maximum(S_floor, 1e-10)
-            # S = np.maximum(S, S_floor)
+            S_floor = self.var_floor * np.mean(S[N > self.min_N], axis=0)
+            S_floor = np.maximum(S_floor, 1e-10)
+            S = np.maximum(S, S_floor)
             self.Lambda = 1 / S
             self._Sigma = S
             self._cholLambda = None

From 9303c0d4f141ae249d05a7f4cfbe811216ee422d Mon Sep 17 00:00:00 2001
From: Jesus Villalba <jesus.antonio.villalba@gmail.com>
Date: Sat, 20 Jul 2024 12:34:41 -0400
Subject: [PATCH 154/154] remove nan assert from glob pool

---
 README.md                            | 17 +-------------
 hyperion/torch/data/audio_dataset.py |  2 +-
 hyperion/torch/layers/global_pool.py | 33 ++++++++++++++--------------
 hyperion/utils/class_info.py         |  2 +-
 hyperion/utils/trial_key.py          |  4 ++--
 5 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 6f7a8490..04f4d269 100644
--- a/README.md
+++ b/README.md
@@ -52,27 +52,12 @@ conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit
 git clone https://github.com/hyperion-ml/hyperion.git
 ```
 
-- You can choose to install hyperion in the environment
+- Then install hyperion in the environment
 ```bash
 cd hyperion
 pip install -e .
 ```
 
-- Or add the hyperion toolkit to the PYTHONPATH envirnoment variable
-  This option will allow you to share the same environment if you are working with several hyperion branches
-  at the same time, while installing it requires to have an enviroment per branch.
-  For this, you need to install the requirements
-```bash
-cd hyperion
-pip install -r requirements.txt
-```
-Then add these lines to your `~/.bashrc` or to each script that uses hyperion
-```bash
-HYP_ROOT= #substitute this by your hyperion location
-export PYTHONPATH=${HYP_ROOT}:$PYTHONPATH
-export PATH=${HYP_ROOT}/bin:$PATH
-```
-
 ## Recipes
 
 There are recipes for several tasks in the `./egs` directory.
diff --git a/hyperion/torch/data/audio_dataset.py b/hyperion/torch/data/audio_dataset.py
index 9d8bebc6..a8e45bda 100644
--- a/hyperion/torch/data/audio_dataset.py
+++ b/hyperion/torch/data/audio_dataset.py
@@ -153,7 +153,7 @@ def _load_legacy_durations(self, time_durs_file):
         time_durs = SegmentSet.load(time_durs_file)
         self.seg_set["duration"] = time_durs.loc[
             self.seg_set["id"]
-        ].class_id.values.astype(np.float, copy=False)
+        ].class_id.values.astype(float, copy=False)
 
     def _load_bpe_model(self, bpe_model, is_val):
         if self.rank == 0:
diff --git a/hyperion/torch/layers/global_pool.py b/hyperion/torch/layers/global_pool.py
index aa14f743..f4174e3d 100644
--- a/hyperion/torch/layers/global_pool.py
+++ b/hyperion/torch/layers/global_pool.py
@@ -2,6 +2,7 @@
  Copyright 2019 Johns Hopkins University  (Author: Jesus Villalba)
  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 """
+
 import logging
 import math
 
@@ -201,7 +202,7 @@ def forward(self, x, x_lengths=None, weights=None):
             # this can produce slightly negative variance when relu6 saturates in all time steps
             # add 1e-5 for stability
             s = torch.sqrt(
-                torch.mean(delta ** 2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
+                torch.mean(delta**2, dim=self.dim, keepdim=False).clamp(min=SQRT_EPS)
             )
 
             mus = torch.cat((mu, s), dim=1)
@@ -214,7 +215,7 @@ def forward(self, x, x_lengths=None, weights=None):
         wbar = torch.mean(weights, dim=self.dim, keepdim=True)
         mu = xbar / wbar
         delta = x - mu
-        var = torch.mean(weights * delta ** 2, dim=self.dim, keepdim=True) / wbar
+        var = torch.mean(weights * delta**2, dim=self.dim, keepdim=True) / wbar
         s = torch.sqrt(var.clamp(min=SQRT_EPS))
         mu = mu.squeeze(self.dim)
         s = s.squeeze(self.dim)
@@ -254,9 +255,9 @@ def _forward_slidwin_int(self, x, win_length, win_shift, snip_edges):
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
         m_x = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
 
-        c_x = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
+        c_x = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
         m_x2 = (c_x[:, win_shift:] - c_x[:, :-win_shift]) / win_length
-        s_x = torch.sqrt(m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
+        s_x = torch.sqrt(m_x2 - m_x**2).clamp(min=SQRT_EPS)
 
         mus = self._post_slidwin(m_x, s_x, out_shape)
         return mus
@@ -265,7 +266,7 @@ def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
         x, out_shape = self._pre_slidwin(x, win_length, win_shift, snip_edges)
         num_frames = out_shape[-1]
         c_x = torch.cumsum(x, dim=-1).view(-1, x.shape[-1])
-        c_x2 = torch.cumsum(x ** 2, dim=-1).view(-1, x.shape[-1])
+        c_x2 = torch.cumsum(x**2, dim=-1).view(-1, x.shape[-1])
 
         # xx = x.view(-1, x.shape[-1])
         # print(xx.shape[1])
@@ -309,7 +310,7 @@ def _forward_slidwin_float(self, x, win_length, win_shift, snip_edges):
 
             k += win_shift
 
-        var_x = (m_x2 - m_x ** 2).clamp(min=SQRT_EPS)
+        var_x = (m_x2 - m_x**2).clamp(min=SQRT_EPS)
         s_x = torch.sqrt(var_x)
         # idx = torch.isnan(s_x) #.any(dim=1)
         # if torch.sum(idx) > 0:
@@ -400,14 +401,14 @@ def forward(self, x, x_lengths=None, weights=None):
         weights = self._standardize_weights(x, x_lengths, weights)
         if weights is None:
             mu = torch.mean(x, dim=self.dim, keepdim=self.keepdim)
-            x2bar = torch.mean(x ** 2, dim=self.dim, keepdim=self.keepdim)
+            x2bar = torch.mean(x**2, dim=self.dim, keepdim=self.keepdim)
             logvar = torch.log(x2bar - mu * mu + 1e-5)  # for stability in case var=0
             return torch.cat((mu, logvar), dim=-1)
 
         xbar = torch.mean(weights * x, dim=self.dim, keepdim=self.keepdim)
         wbar = torch.mean(weights, dim=self.dim, keepdim=self.keepdim)
         mu = xbar / wbar
-        x2bar = torch.mean(weights * x ** 2, dim=self.dim, keepdim=self.keepdim) / wbar
+        x2bar = torch.mean(weights * x**2, dim=self.dim, keepdim=self.keepdim) / wbar
         var = (x2bar - mu * mu).clamp(min=1e-5)
         logvar = torch.log(var)
 
@@ -444,7 +445,7 @@ def __init__(
         if dist_pow == 1:
             self.dist_f = lambda x: torch.norm(x, p=2, dim=-1)
         else:
-            self.dist_f = lambda x: torch.sum(x ** 2, dim=-1)
+            self.dist_f = lambda x: torch.sum(x**2, dim=-1)
 
         self.size_multiplier = num_comp
 
@@ -503,7 +504,7 @@ def forward(self, x, x_lengths=None, weights=None):
         delta = x - self.mu  # (batch, time, num_comp, feat_dim)
         dist = self.dist_f(delta)  # (batch, time, num_comp)
 
-        llk = -self.prec ** 2 * dist + self.bias
+        llk = -self.prec**2 * dist + self.bias
         r = nnf.softmax(llk, dim=-1)  # (batch, time, num_comp)
         if weights is not None:
             r *= weights
@@ -778,9 +779,9 @@ def forward(self, x, x_lengths=None, weights=None):
         # x = (batch, feat_dim, time)
         weights = self._standardize_weights(x, x_lengths, weights)  # (batch, 1,  time)
         x_inner = self.conv1(x)  # (batch, inner_dim, time)
-        assert not torch.any(
-            torch.isnan(x_inner)
-        ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(x))} {torch.mean(x)} {torch.sum(torch.isinf(x))} {x.size()}"
+        # assert not torch.any(
+        #     torch.isnan(x_inner)
+        # ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(x))} {torch.mean(x)} {torch.sum(torch.isinf(x))} {x.size()}"
         # assert not torch.any(
         #     torch.isinf(x_inner)
         # ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(x))}"
@@ -788,9 +789,9 @@ def forward(self, x, x_lengths=None, weights=None):
         if self.use_global_context:
             global_mus = self.stats_pool(x, weights=weights)
             x_inner = x_inner + self.lin_global(global_mus).unsqueeze(-1)
-            assert not torch.any(
-                torch.isnan(x_inner)
-            ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(global_mus))}"
+            # assert not torch.any(
+            #     torch.isnan(x_inner)
+            # ), f"xinner is nan {torch.sum(torch.isnan(x_inner))} {torch.sum(torch.isnan(global_mus))}"
             # assert not torch.any(
             #     torch.isinf(x_inner)
             # ), f"xinner is inf {torch.sum(torch.isinf(x_inner))} {torch.sum(torch.isinf(global_mus))}"
diff --git a/hyperion/utils/class_info.py b/hyperion/utils/class_info.py
index 3cb03659..b3a08178 100644
--- a/hyperion/utils/class_info.py
+++ b/hyperion/utils/class_info.py
@@ -76,7 +76,7 @@ def load(cls, file_path, sep=None):
                 sep=" ",
                 header=None,
                 names=["id"],
-                dtype={"id": np.str},
+                dtype={"id": str},
             )
             return cls(df)
 
diff --git a/hyperion/utils/trial_key.py b/hyperion/utils/trial_key.py
index 5d8019b6..539a049d 100644
--- a/hyperion/utils/trial_key.py
+++ b/hyperion/utils/trial_key.py
@@ -12,7 +12,7 @@
 import pandas as pd
 
 # from .list_utils import *
-from .list_utils import sort, intersect, ismember, split_list, list2ndarray
+from .list_utils import intersect, ismember, list2ndarray, sort, split_list
 from .trial_ndx import TrialNdx
 
 
@@ -421,7 +421,7 @@ def filter(self, model_set, seg_set, keep=True):
         assert np.all(f)
 
         model_set = self.model_set[mod_idx]
-        set_set = self.seg_set[seg_idx]
+        seg_set = self.seg_set[seg_idx]
         ix = np.ix_(mod_idx, seg_idx)
         tar = self.tar[ix]
         non = self.non[ix]